Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  * Copyright 2009 VMware, Inc.  All Rights Reserved.
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the
  9.  * "Software"), to deal in the Software without restriction, including
  10.  * without limitation the rights to use, copy, modify, merge, publish,
  11.  * distribute, sub license, and/or sell copies of the Software, and to
  12.  * permit persons to whom the Software is furnished to do so, subject to
  13.  * the following conditions:
  14.  *
  15.  * The above copyright notice and this permission notice (including the
  16.  * next paragraph) shall be included in all copies or substantial portions
  17.  * of the Software.
  18.  *
  19.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26.  *
  27.  **************************************************************************/
  28.  
  29. #include "glheader.h"
  30. #include "imports.h"
  31. #include "program/program.h"
  32. #include "program/prog_parameter.h"
  33. #include "program/prog_cache.h"
  34. #include "program/prog_instruction.h"
  35. #include "program/prog_print.h"
  36. #include "program/prog_statevars.h"
  37. #include "program/programopt.h"
  38. #include "texenvprogram.h"
  39.  
  40.  
  41. /*
  42.  * Note on texture units:
  43.  *
  44.  * The number of texture units supported by fixed-function fragment
  45.  * processing is MAX_TEXTURE_COORD_UNITS, not MAX_TEXTURE_IMAGE_UNITS.
  46.  * That's because there's a one-to-one correspondence between texture
  47.  * coordinates and samplers in fixed-function processing.
  48.  *
  49.  * Since fixed-function vertex processing is limited to MAX_TEXTURE_COORD_UNITS
  50.  * sets of texcoords, so is fixed-function fragment processing.
  51.  *
  52.  * We can safely use ctx->Const.MaxTextureUnits for loop bounds.
  53.  */
  54.  
  55.  
  56. struct texenvprog_cache_item
  57. {
  58.    GLuint hash;
  59.    void *key;
  60.    struct gl_fragment_program *data;
  61.    struct texenvprog_cache_item *next;
  62. };
  63.  
  64. static GLboolean
  65. texenv_doing_secondary_color(struct gl_context *ctx)
  66. {
  67.    if (ctx->Light.Enabled &&
  68.        (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR))
  69.       return GL_TRUE;
  70.  
  71.    if (ctx->Fog.ColorSumEnabled)
  72.       return GL_TRUE;
  73.  
  74.    return GL_FALSE;
  75. }
  76.  
  77. /**
  78.  * Up to nine instructions per tex unit, plus fog, specular color.
  79.  */
  80. #define MAX_INSTRUCTIONS ((MAX_TEXTURE_COORD_UNITS * 9) + 12)
  81.  
  82. #define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM)
  83.  
  84. struct mode_opt {
  85. #ifdef __GNUC__
  86.    __extension__ GLubyte Source:4;  /**< SRC_x */
  87.    __extension__ GLubyte Operand:3; /**< OPR_x */
  88. #else
  89.    GLubyte Source;  /**< SRC_x */
  90.    GLubyte Operand; /**< OPR_x */
  91. #endif
  92. };
  93.  
  94. struct state_key {
  95.    GLuint nr_enabled_units:8;
  96.    GLuint enabled_units:8;
  97.    GLuint separate_specular:1;
  98.    GLuint fog_enabled:1;
  99.    GLuint fog_mode:2;          /**< FOG_x */
  100.    GLuint inputs_available:12;
  101.    GLuint num_draw_buffers:4;
  102.  
  103.    /* NOTE: This array of structs must be last! (see "keySize" below) */
  104.    struct {
  105.       GLuint enabled:1;
  106.       GLuint source_index:3;   /**< TEXTURE_x_INDEX */
  107.       GLuint shadow:1;
  108.       GLuint ScaleShiftRGB:2;
  109.       GLuint ScaleShiftA:2;
  110.  
  111.       GLuint NumArgsRGB:3;  /**< up to MAX_COMBINER_TERMS */
  112.       GLuint ModeRGB:5;     /**< MODE_x */
  113.  
  114.       GLuint NumArgsA:3;  /**< up to MAX_COMBINER_TERMS */
  115.       GLuint ModeA:5;     /**< MODE_x */
  116.  
  117.       GLuint texture_cyl_wrap:1; /**< For gallium test/debug only */
  118.  
  119.       struct mode_opt OptRGB[MAX_COMBINER_TERMS];
  120.       struct mode_opt OptA[MAX_COMBINER_TERMS];
  121.    } unit[MAX_TEXTURE_UNITS];
  122. };
  123.  
  124. #define FOG_LINEAR  0
  125. #define FOG_EXP     1
  126. #define FOG_EXP2    2
  127. #define FOG_UNKNOWN 3
  128.  
  129. static GLuint translate_fog_mode( GLenum mode )
  130. {
  131.    switch (mode) {
  132.    case GL_LINEAR: return FOG_LINEAR;
  133.    case GL_EXP: return FOG_EXP;
  134.    case GL_EXP2: return FOG_EXP2;
  135.    default: return FOG_UNKNOWN;
  136.    }
  137. }
  138.  
  139. #define OPR_SRC_COLOR           0
  140. #define OPR_ONE_MINUS_SRC_COLOR 1
  141. #define OPR_SRC_ALPHA           2
  142. #define OPR_ONE_MINUS_SRC_ALPHA 3
  143. #define OPR_ZERO                4
  144. #define OPR_ONE                 5
  145. #define OPR_UNKNOWN             7
  146.  
  147. static GLuint translate_operand( GLenum operand )
  148. {
  149.    switch (operand) {
  150.    case GL_SRC_COLOR: return OPR_SRC_COLOR;
  151.    case GL_ONE_MINUS_SRC_COLOR: return OPR_ONE_MINUS_SRC_COLOR;
  152.    case GL_SRC_ALPHA: return OPR_SRC_ALPHA;
  153.    case GL_ONE_MINUS_SRC_ALPHA: return OPR_ONE_MINUS_SRC_ALPHA;
  154.    case GL_ZERO: return OPR_ZERO;
  155.    case GL_ONE: return OPR_ONE;
  156.    default:
  157.       assert(0);
  158.       return OPR_UNKNOWN;
  159.    }
  160. }
  161.  
  162. #define SRC_TEXTURE  0
  163. #define SRC_TEXTURE0 1
  164. #define SRC_TEXTURE1 2
  165. #define SRC_TEXTURE2 3
  166. #define SRC_TEXTURE3 4
  167. #define SRC_TEXTURE4 5
  168. #define SRC_TEXTURE5 6
  169. #define SRC_TEXTURE6 7
  170. #define SRC_TEXTURE7 8
  171. #define SRC_CONSTANT 9
  172. #define SRC_PRIMARY_COLOR 10
  173. #define SRC_PREVIOUS 11
  174. #define SRC_ZERO     12
  175. #define SRC_UNKNOWN  15
  176.  
  177. static GLuint translate_source( GLenum src )
  178. {
  179.    switch (src) {
  180.    case GL_TEXTURE: return SRC_TEXTURE;
  181.    case GL_TEXTURE0:
  182.    case GL_TEXTURE1:
  183.    case GL_TEXTURE2:
  184.    case GL_TEXTURE3:
  185.    case GL_TEXTURE4:
  186.    case GL_TEXTURE5:
  187.    case GL_TEXTURE6:
  188.    case GL_TEXTURE7: return SRC_TEXTURE0 + (src - GL_TEXTURE0);
  189.    case GL_CONSTANT: return SRC_CONSTANT;
  190.    case GL_PRIMARY_COLOR: return SRC_PRIMARY_COLOR;
  191.    case GL_PREVIOUS: return SRC_PREVIOUS;
  192.    case GL_ZERO:
  193.       return SRC_ZERO;
  194.    default:
  195.       assert(0);
  196.       return SRC_UNKNOWN;
  197.    }
  198. }
  199.  
  200. #define MODE_REPLACE                     0  /* r = a0 */
  201. #define MODE_MODULATE                    1  /* r = a0 * a1 */
  202. #define MODE_ADD                         2  /* r = a0 + a1 */
  203. #define MODE_ADD_SIGNED                  3  /* r = a0 + a1 - 0.5 */
  204. #define MODE_INTERPOLATE                 4  /* r = a0 * a2 + a1 * (1 - a2) */
  205. #define MODE_SUBTRACT                    5  /* r = a0 - a1 */
  206. #define MODE_DOT3_RGB                    6  /* r = a0 . a1 */
  207. #define MODE_DOT3_RGB_EXT                7  /* r = a0 . a1 */
  208. #define MODE_DOT3_RGBA                   8  /* r = a0 . a1 */
  209. #define MODE_DOT3_RGBA_EXT               9  /* r = a0 . a1 */
  210. #define MODE_MODULATE_ADD_ATI           10  /* r = a0 * a2 + a1 */
  211. #define MODE_MODULATE_SIGNED_ADD_ATI    11  /* r = a0 * a2 + a1 - 0.5 */
  212. #define MODE_MODULATE_SUBTRACT_ATI      12  /* r = a0 * a2 - a1 */
  213. #define MODE_ADD_PRODUCTS               13  /* r = a0 * a1 + a2 * a3 */
  214. #define MODE_ADD_PRODUCTS_SIGNED        14  /* r = a0 * a1 + a2 * a3 - 0.5 */
  215. #define MODE_BUMP_ENVMAP_ATI            15  /* special */
  216. #define MODE_UNKNOWN                    16
  217.  
  218. /**
  219.  * Translate GL combiner state into a MODE_x value
  220.  */
  221. static GLuint translate_mode( GLenum envMode, GLenum mode )
  222. {
  223.    switch (mode) {
  224.    case GL_REPLACE: return MODE_REPLACE;
  225.    case GL_MODULATE: return MODE_MODULATE;
  226.    case GL_ADD:
  227.       if (envMode == GL_COMBINE4_NV)
  228.          return MODE_ADD_PRODUCTS;
  229.       else
  230.          return MODE_ADD;
  231.    case GL_ADD_SIGNED:
  232.       if (envMode == GL_COMBINE4_NV)
  233.          return MODE_ADD_PRODUCTS_SIGNED;
  234.       else
  235.          return MODE_ADD_SIGNED;
  236.    case GL_INTERPOLATE: return MODE_INTERPOLATE;
  237.    case GL_SUBTRACT: return MODE_SUBTRACT;
  238.    case GL_DOT3_RGB: return MODE_DOT3_RGB;
  239.    case GL_DOT3_RGB_EXT: return MODE_DOT3_RGB_EXT;
  240.    case GL_DOT3_RGBA: return MODE_DOT3_RGBA;
  241.    case GL_DOT3_RGBA_EXT: return MODE_DOT3_RGBA_EXT;
  242.    case GL_MODULATE_ADD_ATI: return MODE_MODULATE_ADD_ATI;
  243.    case GL_MODULATE_SIGNED_ADD_ATI: return MODE_MODULATE_SIGNED_ADD_ATI;
  244.    case GL_MODULATE_SUBTRACT_ATI: return MODE_MODULATE_SUBTRACT_ATI;
  245.    case GL_BUMP_ENVMAP_ATI: return MODE_BUMP_ENVMAP_ATI;
  246.    default:
  247.       assert(0);
  248.       return MODE_UNKNOWN;
  249.    }
  250. }
  251.  
  252.  
  253. /**
  254.  * Do we need to clamp the results of the given texture env/combine mode?
  255.  * If the inputs to the mode are in [0,1] we don't always have to clamp
  256.  * the results.
  257.  */
  258. static GLboolean
  259. need_saturate( GLuint mode )
  260. {
  261.    switch (mode) {
  262.    case MODE_REPLACE:
  263.    case MODE_MODULATE:
  264.    case MODE_INTERPOLATE:
  265.       return GL_FALSE;
  266.    case MODE_ADD:
  267.    case MODE_ADD_SIGNED:
  268.    case MODE_SUBTRACT:
  269.    case MODE_DOT3_RGB:
  270.    case MODE_DOT3_RGB_EXT:
  271.    case MODE_DOT3_RGBA:
  272.    case MODE_DOT3_RGBA_EXT:
  273.    case MODE_MODULATE_ADD_ATI:
  274.    case MODE_MODULATE_SIGNED_ADD_ATI:
  275.    case MODE_MODULATE_SUBTRACT_ATI:
  276.    case MODE_ADD_PRODUCTS:
  277.    case MODE_ADD_PRODUCTS_SIGNED:
  278.    case MODE_BUMP_ENVMAP_ATI:
  279.       return GL_TRUE;
  280.    default:
  281.       assert(0);
  282.       return GL_FALSE;
  283.    }
  284. }
  285.  
  286.  
  287.  
  288. /**
  289.  * Translate TEXTURE_x_BIT to TEXTURE_x_INDEX.
  290.  */
  291. static GLuint translate_tex_src_bit( GLbitfield bit )
  292. {
  293.    ASSERT(bit);
  294.    return _mesa_ffs(bit) - 1;
  295. }
  296.  
  297.  
  298. #define VERT_BIT_TEX_ANY    (0xff << VERT_ATTRIB_TEX0)
  299. #define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0)
  300.  
  301. /**
  302.  * Identify all possible varying inputs.  The fragment program will
  303.  * never reference non-varying inputs, but will track them via state
  304.  * constants instead.
  305.  *
  306.  * This function figures out all the inputs that the fragment program
  307.  * has access to.  The bitmask is later reduced to just those which
  308.  * are actually referenced.
  309.  */
  310. static GLbitfield get_fp_input_mask( struct gl_context *ctx )
  311. {
  312.    /* _NEW_PROGRAM */
  313.    const GLboolean vertexShader =
  314.       (ctx->Shader.CurrentVertexProgram &&
  315.        ctx->Shader.CurrentVertexProgram->LinkStatus &&
  316.        ctx->Shader.CurrentVertexProgram->VertexProgram);
  317.    const GLboolean vertexProgram = ctx->VertexProgram._Enabled;
  318.    GLbitfield fp_inputs = 0x0;
  319.  
  320.    if (ctx->VertexProgram._Overriden) {
  321.       /* Somebody's messing with the vertex program and we don't have
  322.        * a clue what's happening.  Assume that it could be producing
  323.        * all possible outputs.
  324.        */
  325.       fp_inputs = ~0;
  326.    }
  327.    else if (ctx->RenderMode == GL_FEEDBACK) {
  328.       /* _NEW_RENDERMODE */
  329.       fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
  330.    }
  331.    else if (!(vertexProgram || vertexShader) ||
  332.             !ctx->VertexProgram._Current) {
  333.       /* Fixed function vertex logic */
  334.       /* _NEW_ARRAY */
  335.       GLbitfield varying_inputs = ctx->varying_vp_inputs;
  336.  
  337.       /* These get generated in the setup routine regardless of the
  338.        * vertex program:
  339.        */
  340.       /* _NEW_POINT */
  341.       if (ctx->Point.PointSprite)
  342.          varying_inputs |= FRAG_BITS_TEX_ANY;
  343.  
  344.       /* First look at what values may be computed by the generated
  345.        * vertex program:
  346.        */
  347.       /* _NEW_LIGHT */
  348.       if (ctx->Light.Enabled) {
  349.          fp_inputs |= FRAG_BIT_COL0;
  350.  
  351.          if (texenv_doing_secondary_color(ctx))
  352.             fp_inputs |= FRAG_BIT_COL1;
  353.       }
  354.  
  355.       /* _NEW_TEXTURE */
  356.       fp_inputs |= (ctx->Texture._TexGenEnabled |
  357.                     ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0;
  358.  
  359.       /* Then look at what might be varying as a result of enabled
  360.        * arrays, etc:
  361.        */
  362.       if (varying_inputs & VERT_BIT_COLOR0)
  363.          fp_inputs |= FRAG_BIT_COL0;
  364.       if (varying_inputs & VERT_BIT_COLOR1)
  365.          fp_inputs |= FRAG_BIT_COL1;
  366.  
  367.       fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0)
  368.                     << FRAG_ATTRIB_TEX0);
  369.  
  370.    }
  371.    else {
  372.       /* calculate from vp->outputs */
  373.       struct gl_vertex_program *vprog;
  374.       GLbitfield64 vp_outputs;
  375.  
  376.       /* Choose GLSL vertex shader over ARB vertex program.  Need this
  377.        * since vertex shader state validation comes after fragment state
  378.        * validation (see additional comments in state.c).
  379.        */
  380.       if (vertexShader)
  381.          vprog = ctx->Shader.CurrentVertexProgram->VertexProgram;
  382.       else
  383.          vprog = ctx->VertexProgram.Current;
  384.  
  385.       vp_outputs = vprog->Base.OutputsWritten;
  386.  
  387.       /* These get generated in the setup routine regardless of the
  388.        * vertex program:
  389.        */
  390.       /* _NEW_POINT */
  391.       if (ctx->Point.PointSprite)
  392.          vp_outputs |= FRAG_BITS_TEX_ANY;
  393.  
  394.       if (vp_outputs & (1 << VERT_RESULT_COL0))
  395.          fp_inputs |= FRAG_BIT_COL0;
  396.       if (vp_outputs & (1 << VERT_RESULT_COL1))
  397.          fp_inputs |= FRAG_BIT_COL1;
  398.  
  399.       fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0)
  400.                     << FRAG_ATTRIB_TEX0);
  401.    }
  402.    
  403.    return fp_inputs;
  404. }
  405.  
  406.  
  407. /**
  408.  * Examine current texture environment state and generate a unique
  409.  * key to identify it.
  410.  */
  411. static GLuint make_state_key( struct gl_context *ctx,  struct state_key *key )
  412. {
  413.    GLuint i, j;
  414.    GLbitfield inputs_referenced = FRAG_BIT_COL0;
  415.    const GLbitfield inputs_available = get_fp_input_mask( ctx );
  416.    GLuint keySize;
  417.  
  418.    memset(key, 0, sizeof(*key));
  419.  
  420.    /* _NEW_TEXTURE */
  421.    for (i = 0; i < ctx->Const.MaxTextureUnits; i++) {
  422.       const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
  423.       const struct gl_texture_object *texObj = texUnit->_Current;
  424.       const struct gl_tex_env_combine_state *comb = texUnit->_CurrentCombine;
  425.       GLenum format;
  426.  
  427.       if (!texUnit->_ReallyEnabled || !texUnit->Enabled)
  428.          continue;
  429.  
  430.       format = texObj->Image[0][texObj->BaseLevel]->_BaseFormat;
  431.  
  432.       key->unit[i].enabled = 1;
  433.       key->enabled_units |= (1<<i);
  434.       key->nr_enabled_units = i + 1;
  435.       inputs_referenced |= FRAG_BIT_TEX(i);
  436.  
  437.       key->unit[i].source_index =
  438.          translate_tex_src_bit(texUnit->_ReallyEnabled);
  439.  
  440.       key->unit[i].shadow = ((texObj->CompareMode == GL_COMPARE_R_TO_TEXTURE) &&
  441.                              ((format == GL_DEPTH_COMPONENT) ||
  442.                               (format == GL_DEPTH_STENCIL_EXT)));
  443.  
  444.       key->unit[i].NumArgsRGB = comb->_NumArgsRGB;
  445.       key->unit[i].NumArgsA = comb->_NumArgsA;
  446.  
  447.       key->unit[i].ModeRGB =
  448.          translate_mode(texUnit->EnvMode, comb->ModeRGB);
  449.       key->unit[i].ModeA =
  450.          translate_mode(texUnit->EnvMode, comb->ModeA);
  451.  
  452.       key->unit[i].ScaleShiftRGB = comb->ScaleShiftRGB;
  453.       key->unit[i].ScaleShiftA = comb->ScaleShiftA;
  454.  
  455.       for (j = 0; j < MAX_COMBINER_TERMS; j++) {
  456.          key->unit[i].OptRGB[j].Operand = translate_operand(comb->OperandRGB[j]);
  457.          key->unit[i].OptA[j].Operand = translate_operand(comb->OperandA[j]);
  458.          key->unit[i].OptRGB[j].Source = translate_source(comb->SourceRGB[j]);
  459.          key->unit[i].OptA[j].Source = translate_source(comb->SourceA[j]);
  460.       }
  461.  
  462.       if (key->unit[i].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
  463.          /* requires some special translation */
  464.          key->unit[i].NumArgsRGB = 2;
  465.          key->unit[i].ScaleShiftRGB = 0;
  466.          key->unit[i].OptRGB[0].Operand = OPR_SRC_COLOR;
  467.          key->unit[i].OptRGB[0].Source = SRC_TEXTURE;
  468.          key->unit[i].OptRGB[1].Operand = OPR_SRC_COLOR;
  469.          key->unit[i].OptRGB[1].Source = texUnit->BumpTarget - GL_TEXTURE0 + SRC_TEXTURE0;
  470.        }
  471.  
  472.       /* this is a back-door for enabling cylindrical texture wrap mode */
  473.       if (texObj->Priority == 0.125)
  474.          key->unit[i].texture_cyl_wrap = 1;
  475.    }
  476.  
  477.    /* _NEW_LIGHT | _NEW_FOG */
  478.    if (texenv_doing_secondary_color(ctx)) {
  479.       key->separate_specular = 1;
  480.       inputs_referenced |= FRAG_BIT_COL1;
  481.    }
  482.  
  483.    /* _NEW_FOG */
  484.    if (ctx->Fog.Enabled) {
  485.       key->fog_enabled = 1;
  486.       key->fog_mode = translate_fog_mode(ctx->Fog.Mode);
  487.       inputs_referenced |= FRAG_BIT_FOGC; /* maybe */
  488.    }
  489.  
  490.    /* _NEW_BUFFERS */
  491.    key->num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers;
  492.  
  493.    key->inputs_available = (inputs_available & inputs_referenced);
  494.  
  495.    /* compute size of state key, ignoring unused texture units */
  496.    keySize = sizeof(*key) - sizeof(key->unit)
  497.       + key->nr_enabled_units * sizeof(key->unit[0]);
  498.  
  499.    return keySize;
  500. }
  501.  
  502.  
  503. /**
  504.  * Use uregs to represent registers internally, translate to Mesa's
  505.  * expected formats on emit.  
  506.  *
  507.  * NOTE: These are passed by value extensively in this file rather
  508.  * than as usual by pointer reference.  If this disturbs you, try
  509.  * remembering they are just 32bits in size.
  510.  *
  511.  * GCC is smart enough to deal with these dword-sized structures in
  512.  * much the same way as if I had defined them as dwords and was using
  513.  * macros to access and set the fields.  This is much nicer and easier
  514.  * to evolve.
  515.  */
  516. struct ureg {
  517.    GLuint file:4;
  518.    GLuint idx:8;
  519.    GLuint negatebase:1;
  520.    GLuint swz:12;
  521.    GLuint pad:7;
  522. };
  523.  
  524. static const struct ureg undef = {
  525.    PROGRAM_UNDEFINED,
  526.    ~0,
  527.    0,
  528.    0,
  529.    0
  530. };
  531.  
  532.  
  533. /** State used to build the fragment program:
  534.  */
  535. struct texenv_fragment_program {
  536.    struct gl_fragment_program *program;
  537.    struct state_key *state;
  538.  
  539.    GLbitfield alu_temps;        /**< Track texture indirections, see spec. */
  540.    GLbitfield temps_output;     /**< Track texture indirections, see spec. */
  541.    GLbitfield temp_in_use;      /**< Tracks temporary regs which are in use. */
  542.    GLboolean error;
  543.  
  544.    struct ureg src_texture[MAX_TEXTURE_COORD_UNITS];  
  545.    /* Reg containing each texture unit's sampled texture color,
  546.     * else undef.
  547.     */
  548.  
  549.    struct ureg texcoord_tex[MAX_TEXTURE_COORD_UNITS];
  550.    /* Reg containing texcoord for a texture unit,
  551.     * needed for bump mapping, else undef.
  552.     */
  553.  
  554.    struct ureg src_previous;    /**< Reg containing color from previous
  555.                                  * stage.  May need to be decl'd.
  556.                                  */
  557.  
  558.    GLuint last_tex_stage;       /**< Number of last enabled texture unit */
  559.  
  560.    struct ureg half;
  561.    struct ureg one;
  562.    struct ureg zero;
  563. };
  564.  
  565.  
  566.  
  567. static struct ureg make_ureg(GLuint file, GLuint idx)
  568. {
  569.    struct ureg reg;
  570.    reg.file = file;
  571.    reg.idx = idx;
  572.    reg.negatebase = 0;
  573.    reg.swz = SWIZZLE_NOOP;
  574.    reg.pad = 0;
  575.    return reg;
  576. }
  577.  
  578. static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
  579. {
  580.    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
  581.                            GET_SWZ(reg.swz, y),
  582.                            GET_SWZ(reg.swz, z),
  583.                            GET_SWZ(reg.swz, w));
  584.  
  585.    return reg;
  586. }
  587.  
  588. static struct ureg swizzle1( struct ureg reg, int x )
  589. {
  590.    return swizzle(reg, x, x, x, x);
  591. }
  592.  
  593. static struct ureg negate( struct ureg reg )
  594. {
  595.    reg.negatebase ^= 1;
  596.    return reg;
  597. }
  598.  
  599. static GLboolean is_undef( struct ureg reg )
  600. {
  601.    return reg.file == PROGRAM_UNDEFINED;
  602. }
  603.  
  604.  
  605. static struct ureg get_temp( struct texenv_fragment_program *p )
  606. {
  607.    GLint bit;
  608.    
  609.    /* First try and reuse temps which have been used already:
  610.     */
  611.    bit = _mesa_ffs( ~p->temp_in_use & p->alu_temps );
  612.  
  613.    /* Then any unused temporary:
  614.     */
  615.    if (!bit)
  616.       bit = _mesa_ffs( ~p->temp_in_use );
  617.  
  618.    if (!bit) {
  619.       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
  620.       exit(1);
  621.    }
  622.  
  623.    if ((GLuint) bit > p->program->Base.NumTemporaries)
  624.       p->program->Base.NumTemporaries = bit;
  625.  
  626.    p->temp_in_use |= 1<<(bit-1);
  627.    return make_ureg(PROGRAM_TEMPORARY, (bit-1));
  628. }
  629.  
  630. static struct ureg get_tex_temp( struct texenv_fragment_program *p )
  631. {
  632.    int bit;
  633.    
  634.    /* First try to find available temp not previously used (to avoid
  635.     * starting a new texture indirection).  According to the spec, the
  636.     * ~p->temps_output isn't necessary, but will keep it there for
  637.     * now:
  638.     */
  639.    bit = _mesa_ffs( ~p->temp_in_use & ~p->alu_temps & ~p->temps_output );
  640.  
  641.    /* Then any unused temporary:
  642.     */
  643.    if (!bit)
  644.       bit = _mesa_ffs( ~p->temp_in_use );
  645.  
  646.    if (!bit) {
  647.       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
  648.       exit(1);
  649.    }
  650.  
  651.    if ((GLuint) bit > p->program->Base.NumTemporaries)
  652.       p->program->Base.NumTemporaries = bit;
  653.  
  654.    p->temp_in_use |= 1<<(bit-1);
  655.    return make_ureg(PROGRAM_TEMPORARY, (bit-1));
  656. }
  657.  
  658.  
  659. /** Mark a temp reg as being no longer allocatable. */
  660. static void reserve_temp( struct texenv_fragment_program *p, struct ureg r )
  661. {
  662.    if (r.file == PROGRAM_TEMPORARY)
  663.       p->temps_output |= (1 << r.idx);
  664. }
  665.  
  666.  
  667. static void release_temps(struct gl_context *ctx, struct texenv_fragment_program *p )
  668. {
  669.    GLuint max_temp = ctx->Const.FragmentProgram.MaxTemps;
  670.  
  671.    /* KW: To support tex_env_crossbar, don't release the registers in
  672.     * temps_output.
  673.     */
  674.    if (max_temp >= sizeof(int) * 8)
  675.       p->temp_in_use = p->temps_output;
  676.    else
  677.       p->temp_in_use = ~((1<<max_temp)-1) | p->temps_output;
  678. }
  679.  
  680.  
  681. static struct ureg register_param5( struct texenv_fragment_program *p,
  682.                                     GLint s0,
  683.                                     GLint s1,
  684.                                     GLint s2,
  685.                                     GLint s3,
  686.                                     GLint s4)
  687. {
  688.    gl_state_index tokens[STATE_LENGTH];
  689.    GLuint idx;
  690.    tokens[0] = s0;
  691.    tokens[1] = s1;
  692.    tokens[2] = s2;
  693.    tokens[3] = s3;
  694.    tokens[4] = s4;
  695.    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
  696.    return make_ureg(PROGRAM_STATE_VAR, idx);
  697. }
  698.  
  699.  
  700. #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
  701. #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
  702. #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
  703. #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
  704.  
  705. static GLuint frag_to_vert_attrib( GLuint attrib )
  706. {
  707.    switch (attrib) {
  708.    case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0;
  709.    case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1;
  710.    default:
  711.       assert(attrib >= FRAG_ATTRIB_TEX0);
  712.       assert(attrib <= FRAG_ATTRIB_TEX7);
  713.       return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0;
  714.    }
  715. }
  716.  
  717.  
  718. static struct ureg register_input( struct texenv_fragment_program *p, GLuint input )
  719. {
  720.    if (p->state->inputs_available & (1<<input)) {
  721.       p->program->Base.InputsRead |= (1 << input);
  722.       return make_ureg(PROGRAM_INPUT, input);
  723.    }
  724.    else {
  725.       GLuint idx = frag_to_vert_attrib( input );
  726.       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, idx );
  727.    }
  728. }
  729.  
  730.  
  731. static void emit_arg( struct prog_src_register *reg,
  732.                       struct ureg ureg )
  733. {
  734.    reg->File = ureg.file;
  735.    reg->Index = ureg.idx;
  736.    reg->Swizzle = ureg.swz;
  737.    reg->Negate = ureg.negatebase ? NEGATE_XYZW : NEGATE_NONE;
  738.    reg->Abs = GL_FALSE;
  739. }
  740.  
  741. static void emit_dst( struct prog_dst_register *dst,
  742.                       struct ureg ureg, GLuint mask )
  743. {
  744.    dst->File = ureg.file;
  745.    dst->Index = ureg.idx;
  746.    dst->WriteMask = mask;
  747.    dst->CondMask = COND_TR;  /* always pass cond test */
  748.    dst->CondSwizzle = SWIZZLE_NOOP;
  749. }
  750.  
  751. static struct prog_instruction *
  752. emit_op(struct texenv_fragment_program *p,
  753.         enum prog_opcode op,
  754.         struct ureg dest,
  755.         GLuint mask,
  756.         GLboolean saturate,
  757.         struct ureg src0,
  758.         struct ureg src1,
  759.         struct ureg src2 )
  760. {
  761.    const GLuint nr = p->program->Base.NumInstructions++;
  762.    struct prog_instruction *inst = &p->program->Base.Instructions[nr];
  763.  
  764.    assert(nr < MAX_INSTRUCTIONS);
  765.  
  766.    _mesa_init_instructions(inst, 1);
  767.    inst->Opcode = op;
  768.    
  769.    emit_arg( &inst->SrcReg[0], src0 );
  770.    emit_arg( &inst->SrcReg[1], src1 );
  771.    emit_arg( &inst->SrcReg[2], src2 );
  772.    
  773.    inst->SaturateMode = saturate ? SATURATE_ZERO_ONE : SATURATE_OFF;
  774.  
  775.    emit_dst( &inst->DstReg, dest, mask );
  776.  
  777. #if 0
  778.    /* Accounting for indirection tracking:
  779.     */
  780.    if (dest.file == PROGRAM_TEMPORARY)
  781.       p->temps_output |= 1 << dest.idx;
  782. #endif
  783.  
  784.    return inst;
  785. }
  786.    
  787.  
  788. static struct ureg emit_arith( struct texenv_fragment_program *p,
  789.                                enum prog_opcode op,
  790.                                struct ureg dest,
  791.                                GLuint mask,
  792.                                GLboolean saturate,
  793.                                struct ureg src0,
  794.                                struct ureg src1,
  795.                                struct ureg src2 )
  796. {
  797.    emit_op(p, op, dest, mask, saturate, src0, src1, src2);
  798.    
  799.    /* Accounting for indirection tracking:
  800.     */
  801.    if (src0.file == PROGRAM_TEMPORARY)
  802.       p->alu_temps |= 1 << src0.idx;
  803.  
  804.    if (!is_undef(src1) && src1.file == PROGRAM_TEMPORARY)
  805.       p->alu_temps |= 1 << src1.idx;
  806.  
  807.    if (!is_undef(src2) && src2.file == PROGRAM_TEMPORARY)
  808.       p->alu_temps |= 1 << src2.idx;
  809.  
  810.    if (dest.file == PROGRAM_TEMPORARY)
  811.       p->alu_temps |= 1 << dest.idx;
  812.        
  813.    p->program->Base.NumAluInstructions++;
  814.    return dest;
  815. }
  816.  
  817. static struct ureg emit_texld( struct texenv_fragment_program *p,
  818.                                enum prog_opcode op,
  819.                                struct ureg dest,
  820.                                GLuint destmask,
  821.                                GLuint tex_unit,
  822.                                GLuint tex_idx,
  823.                                GLuint tex_shadow,
  824.                                struct ureg coord )
  825. {
  826.    struct prog_instruction *inst = emit_op( p, op,
  827.                                           dest, destmask,
  828.                                           GL_FALSE,     /* don't saturate? */
  829.                                           coord,        /* arg 0? */
  830.                                           undef,
  831.                                           undef);
  832.    
  833.    inst->TexSrcTarget = tex_idx;
  834.    inst->TexSrcUnit = tex_unit;
  835.    inst->TexShadow = tex_shadow;
  836.  
  837.    p->program->Base.NumTexInstructions++;
  838.  
  839.    /* Accounting for indirection tracking:
  840.     */
  841.    reserve_temp(p, dest);
  842.  
  843. #if 0
  844.    /* Is this a texture indirection?
  845.     */
  846.    if ((coord.file == PROGRAM_TEMPORARY &&
  847.         (p->temps_output & (1<<coord.idx))) ||
  848.        (dest.file == PROGRAM_TEMPORARY &&
  849.         (p->alu_temps & (1<<dest.idx)))) {
  850.       p->program->Base.NumTexIndirections++;
  851.       p->temps_output = 1<<coord.idx;
  852.       p->alu_temps = 0;
  853.       assert(0);                /* KW: texture env crossbar */
  854.    }
  855. #endif
  856.  
  857.    return dest;
  858. }
  859.  
  860.  
  861. static struct ureg register_const4f( struct texenv_fragment_program *p,
  862.                                      GLfloat s0,
  863.                                      GLfloat s1,
  864.                                      GLfloat s2,
  865.                                      GLfloat s3)
  866. {
  867.    GLfloat values[4];
  868.    GLuint idx, swizzle;
  869.    struct ureg r;
  870.    values[0] = s0;
  871.    values[1] = s1;
  872.    values[2] = s2;
  873.    values[3] = s3;
  874.    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
  875.                                      &swizzle );
  876.    r = make_ureg(PROGRAM_CONSTANT, idx);
  877.    r.swz = swizzle;
  878.    return r;
  879. }
  880.  
  881. #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
  882. #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
  883. #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
  884. #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
  885.  
  886.  
  887. static struct ureg get_one( struct texenv_fragment_program *p )
  888. {
  889.    if (is_undef(p->one))
  890.       p->one = register_scalar_const(p, 1.0);
  891.    return p->one;
  892. }
  893.  
  894. static struct ureg get_half( struct texenv_fragment_program *p )
  895. {
  896.    if (is_undef(p->half))
  897.       p->half = register_scalar_const(p, 0.5);
  898.    return p->half;
  899. }
  900.  
  901. static struct ureg get_zero( struct texenv_fragment_program *p )
  902. {
  903.    if (is_undef(p->zero))
  904.       p->zero = register_scalar_const(p, 0.0);
  905.    return p->zero;
  906. }
  907.  
  908.  
  909. static void program_error( struct texenv_fragment_program *p, const char *msg )
  910. {
  911.    _mesa_problem(NULL, "%s", msg);
  912.    p->error = 1;
  913. }
  914.  
  915. static struct ureg get_source( struct texenv_fragment_program *p,
  916.                                GLuint src, GLuint unit )
  917. {
  918.    switch (src) {
  919.    case SRC_TEXTURE:
  920.       assert(!is_undef(p->src_texture[unit]));
  921.       return p->src_texture[unit];
  922.  
  923.    case SRC_TEXTURE0:
  924.    case SRC_TEXTURE1:
  925.    case SRC_TEXTURE2:
  926.    case SRC_TEXTURE3:
  927.    case SRC_TEXTURE4:
  928.    case SRC_TEXTURE5:
  929.    case SRC_TEXTURE6:
  930.    case SRC_TEXTURE7:
  931.       assert(!is_undef(p->src_texture[src - SRC_TEXTURE0]));
  932.       return p->src_texture[src - SRC_TEXTURE0];
  933.  
  934.    case SRC_CONSTANT:
  935.       return register_param2(p, STATE_TEXENV_COLOR, unit);
  936.  
  937.    case SRC_PRIMARY_COLOR:
  938.       return register_input(p, FRAG_ATTRIB_COL0);
  939.  
  940.    case SRC_ZERO:
  941.       return get_zero(p);
  942.  
  943.    case SRC_PREVIOUS:
  944.       if (is_undef(p->src_previous))
  945.          return register_input(p, FRAG_ATTRIB_COL0);
  946.       else
  947.          return p->src_previous;
  948.  
  949.    default:
  950.       assert(0);
  951.       return undef;
  952.    }
  953. }
  954.  
  955. static struct ureg emit_combine_source( struct texenv_fragment_program *p,
  956.                                         GLuint mask,
  957.                                         GLuint unit,
  958.                                         GLuint source,
  959.                                         GLuint operand )
  960. {
  961.    struct ureg arg, src, one;
  962.  
  963.    src = get_source(p, source, unit);
  964.  
  965.    switch (operand) {
  966.    case OPR_ONE_MINUS_SRC_COLOR:
  967.       /* Get unused tmp,
  968.        * Emit tmp = 1.0 - arg.xyzw
  969.        */
  970.       arg = get_temp( p );
  971.       one = get_one( p );
  972.       return emit_arith( p, OPCODE_SUB, arg, mask, 0, one, src, undef);
  973.  
  974.    case OPR_SRC_ALPHA:
  975.       if (mask == WRITEMASK_W)
  976.          return src;
  977.       else
  978.          return swizzle1( src, SWIZZLE_W );
  979.    case OPR_ONE_MINUS_SRC_ALPHA:
  980.       /* Get unused tmp,
  981.        * Emit tmp = 1.0 - arg.wwww
  982.        */
  983.       arg = get_temp(p);
  984.       one = get_one(p);
  985.       return emit_arith(p, OPCODE_SUB, arg, mask, 0,
  986.                         one, swizzle1(src, SWIZZLE_W), undef);
  987.    case OPR_ZERO:
  988.       return get_zero(p);
  989.    case OPR_ONE:
  990.       return get_one(p);
  991.    case OPR_SRC_COLOR:
  992.       return src;
  993.    default:
  994.       assert(0);
  995.       return src;
  996.    }
  997. }
  998.  
  999. /**
  1000.  * Check if the RGB and Alpha sources and operands match for the given
  1001.  * texture unit's combinder state.  When the RGB and A sources and
  1002.  * operands match, we can emit fewer instructions.
  1003.  */
  1004. static GLboolean args_match( const struct state_key *key, GLuint unit )
  1005. {
  1006.    GLuint i, numArgs = key->unit[unit].NumArgsRGB;
  1007.  
  1008.    for (i = 0; i < numArgs; i++) {
  1009.       if (key->unit[unit].OptA[i].Source != key->unit[unit].OptRGB[i].Source)
  1010.          return GL_FALSE;
  1011.  
  1012.       switch (key->unit[unit].OptA[i].Operand) {
  1013.       case OPR_SRC_ALPHA:
  1014.          switch (key->unit[unit].OptRGB[i].Operand) {
  1015.          case OPR_SRC_COLOR:
  1016.          case OPR_SRC_ALPHA:
  1017.             break;
  1018.          default:
  1019.             return GL_FALSE;
  1020.          }
  1021.          break;
  1022.       case OPR_ONE_MINUS_SRC_ALPHA:
  1023.          switch (key->unit[unit].OptRGB[i].Operand) {
  1024.          case OPR_ONE_MINUS_SRC_COLOR:
  1025.          case OPR_ONE_MINUS_SRC_ALPHA:
  1026.             break;
  1027.          default:
  1028.             return GL_FALSE;
  1029.          }
  1030.          break;
  1031.       default:
  1032.          return GL_FALSE;       /* impossible */
  1033.       }
  1034.    }
  1035.  
  1036.    return GL_TRUE;
  1037. }
  1038.  
  1039. static struct ureg emit_combine( struct texenv_fragment_program *p,
  1040.                                  struct ureg dest,
  1041.                                  GLuint mask,
  1042.                                  GLboolean saturate,
  1043.                                  GLuint unit,
  1044.                                  GLuint nr,
  1045.                                  GLuint mode,
  1046.                                  const struct mode_opt *opt)
  1047. {
  1048.    struct ureg src[MAX_COMBINER_TERMS];
  1049.    struct ureg tmp, half;
  1050.    GLuint i;
  1051.  
  1052.    assert(nr <= MAX_COMBINER_TERMS);
  1053.  
  1054.    for (i = 0; i < nr; i++)
  1055.       src[i] = emit_combine_source( p, mask, unit, opt[i].Source, opt[i].Operand );
  1056.  
  1057.    switch (mode) {
  1058.    case MODE_REPLACE:
  1059.       if (mask == WRITEMASK_XYZW && !saturate)
  1060.          return src[0];
  1061.       else
  1062.          return emit_arith( p, OPCODE_MOV, dest, mask, saturate, src[0], undef, undef );
  1063.    case MODE_MODULATE:
  1064.       return emit_arith( p, OPCODE_MUL, dest, mask, saturate,
  1065.                          src[0], src[1], undef );
  1066.    case MODE_ADD:
  1067.       return emit_arith( p, OPCODE_ADD, dest, mask, saturate,
  1068.                          src[0], src[1], undef );
  1069.    case MODE_ADD_SIGNED:
  1070.       /* tmp = arg0 + arg1
  1071.        * result = tmp - .5
  1072.        */
  1073.       half = get_half(p);
  1074.       tmp = get_temp( p );
  1075.       emit_arith( p, OPCODE_ADD, tmp, mask, 0, src[0], src[1], undef );
  1076.       emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp, half, undef );
  1077.       return dest;
  1078.    case MODE_INTERPOLATE:
  1079.       /* Arg0 * (Arg2) + Arg1 * (1-Arg2) -- note arguments are reordered:
  1080.        */
  1081.       return emit_arith( p, OPCODE_LRP, dest, mask, saturate, src[2], src[0], src[1] );
  1082.  
  1083.    case MODE_SUBTRACT:
  1084.       return emit_arith( p, OPCODE_SUB, dest, mask, saturate, src[0], src[1], undef );
  1085.  
  1086.    case MODE_DOT3_RGBA:
  1087.    case MODE_DOT3_RGBA_EXT:
  1088.    case MODE_DOT3_RGB_EXT:
  1089.    case MODE_DOT3_RGB: {
  1090.       struct ureg tmp0 = get_temp( p );
  1091.       struct ureg tmp1 = get_temp( p );
  1092.       struct ureg neg1 = register_scalar_const(p, -1);
  1093.       struct ureg two  = register_scalar_const(p, 2);
  1094.  
  1095.       /* tmp0 = 2*src0 - 1
  1096.        * tmp1 = 2*src1 - 1
  1097.        *
  1098.        * dst = tmp0 dot3 tmp1
  1099.        */
  1100.       emit_arith( p, OPCODE_MAD, tmp0, WRITEMASK_XYZW, 0,
  1101.                   two, src[0], neg1);
  1102.  
  1103.       if (memcmp(&src[0], &src[1], sizeof(struct ureg)) == 0)
  1104.          tmp1 = tmp0;
  1105.       else
  1106.          emit_arith( p, OPCODE_MAD, tmp1, WRITEMASK_XYZW, 0,
  1107.                      two, src[1], neg1);
  1108.       emit_arith( p, OPCODE_DP3, dest, mask, saturate, tmp0, tmp1, undef);
  1109.       return dest;
  1110.    }
  1111.    case MODE_MODULATE_ADD_ATI:
  1112.       /* Arg0 * Arg2 + Arg1 */
  1113.       return emit_arith( p, OPCODE_MAD, dest, mask, saturate,
  1114.                          src[0], src[2], src[1] );
  1115.    case MODE_MODULATE_SIGNED_ADD_ATI: {
  1116.       /* Arg0 * Arg2 + Arg1 - 0.5 */
  1117.       struct ureg tmp0 = get_temp(p);
  1118.       half = get_half(p);
  1119.       emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[0], src[2], src[1] );
  1120.       emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef );
  1121.       return dest;
  1122.    }
  1123.    case MODE_MODULATE_SUBTRACT_ATI:
  1124.       /* Arg0 * Arg2 - Arg1 */
  1125.       emit_arith( p, OPCODE_MAD, dest, mask, 0, src[0], src[2], negate(src[1]) );
  1126.       return dest;
  1127.    case MODE_ADD_PRODUCTS:
  1128.       /* Arg0 * Arg1 + Arg2 * Arg3 */
  1129.       {
  1130.          struct ureg tmp0 = get_temp(p);
  1131.          emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef );
  1132.          emit_arith( p, OPCODE_MAD, dest, mask, saturate, src[2], src[3], tmp0 );
  1133.       }
  1134.       return dest;
  1135.    case MODE_ADD_PRODUCTS_SIGNED:
  1136.       /* Arg0 * Arg1 + Arg2 * Arg3 - 0.5 */
  1137.       {
  1138.          struct ureg tmp0 = get_temp(p);
  1139.          half = get_half(p);
  1140.          emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef );
  1141.          emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[2], src[3], tmp0 );
  1142.          emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef );
  1143.       }
  1144.       return dest;
  1145.    case MODE_BUMP_ENVMAP_ATI:
  1146.       /* special - not handled here */
  1147.       assert(0);
  1148.       return src[0];
  1149.    default:
  1150.       assert(0);
  1151.       return src[0];
  1152.    }
  1153. }
  1154.  
  1155.  
  1156. /**
  1157.  * Generate instructions for one texture unit's env/combiner mode.
  1158.  */
  1159. static struct ureg
  1160. emit_texenv(struct texenv_fragment_program *p, GLuint unit)
  1161. {
  1162.    const struct state_key *key = p->state;
  1163.    GLboolean rgb_saturate, alpha_saturate;
  1164.    GLuint rgb_shift, alpha_shift;
  1165.    struct ureg out, dest;
  1166.  
  1167.    if (!key->unit[unit].enabled) {
  1168.       return get_source(p, SRC_PREVIOUS, 0);
  1169.    }
  1170.    if (key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
  1171.       /* this isn't really a env stage delivering a color and handled elsewhere */
  1172.       return get_source(p, SRC_PREVIOUS, 0);
  1173.    }
  1174.    
  1175.    switch (key->unit[unit].ModeRGB) {
  1176.    case MODE_DOT3_RGB_EXT:
  1177.       alpha_shift = key->unit[unit].ScaleShiftA;
  1178.       rgb_shift = 0;
  1179.       break;
  1180.    case MODE_DOT3_RGBA_EXT:
  1181.       alpha_shift = 0;
  1182.       rgb_shift = 0;
  1183.       break;
  1184.    default:
  1185.       rgb_shift = key->unit[unit].ScaleShiftRGB;
  1186.       alpha_shift = key->unit[unit].ScaleShiftA;
  1187.       break;
  1188.    }
  1189.    
  1190.    /* If we'll do rgb/alpha shifting don't saturate in emit_combine().
  1191.     * We don't want to clamp twice.
  1192.     */
  1193.    if (rgb_shift)
  1194.       rgb_saturate = GL_FALSE;  /* saturate after rgb shift */
  1195.    else if (need_saturate(key->unit[unit].ModeRGB))
  1196.       rgb_saturate = GL_TRUE;
  1197.    else
  1198.       rgb_saturate = GL_FALSE;
  1199.  
  1200.    if (alpha_shift)
  1201.       alpha_saturate = GL_FALSE;  /* saturate after alpha shift */
  1202.    else if (need_saturate(key->unit[unit].ModeA))
  1203.       alpha_saturate = GL_TRUE;
  1204.    else
  1205.       alpha_saturate = GL_FALSE;
  1206.  
  1207.    /* If this is the very last calculation (and various other conditions
  1208.     * are met), emit directly to the color output register.  Otherwise,
  1209.     * emit to a temporary register.
  1210.     */
  1211.    if (key->separate_specular ||
  1212.        unit != p->last_tex_stage ||
  1213.        alpha_shift ||
  1214.        key->num_draw_buffers != 1 ||
  1215.        rgb_shift)
  1216.       dest = get_temp( p );
  1217.    else
  1218.       dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR);
  1219.  
  1220.    /* Emit the RGB and A combine ops
  1221.     */
  1222.    if (key->unit[unit].ModeRGB == key->unit[unit].ModeA &&
  1223.        args_match(key, unit)) {
  1224.       out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate,
  1225.                           unit,
  1226.                           key->unit[unit].NumArgsRGB,
  1227.                           key->unit[unit].ModeRGB,
  1228.                           key->unit[unit].OptRGB);
  1229.    }
  1230.    else if (key->unit[unit].ModeRGB == MODE_DOT3_RGBA_EXT ||
  1231.             key->unit[unit].ModeRGB == MODE_DOT3_RGBA) {
  1232.       out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate,
  1233.                           unit,
  1234.                           key->unit[unit].NumArgsRGB,
  1235.                           key->unit[unit].ModeRGB,
  1236.                           key->unit[unit].OptRGB);
  1237.    }
  1238.    else {
  1239.       /* Need to do something to stop from re-emitting identical
  1240.        * argument calculations here:
  1241.        */
  1242.       out = emit_combine( p, dest, WRITEMASK_XYZ, rgb_saturate,
  1243.                           unit,
  1244.                           key->unit[unit].NumArgsRGB,
  1245.                           key->unit[unit].ModeRGB,
  1246.                           key->unit[unit].OptRGB);
  1247.       out = emit_combine( p, dest, WRITEMASK_W, alpha_saturate,
  1248.                           unit,
  1249.                           key->unit[unit].NumArgsA,
  1250.                           key->unit[unit].ModeA,
  1251.                           key->unit[unit].OptA);
  1252.    }
  1253.  
  1254.    /* Deal with the final shift:
  1255.     */
  1256.    if (alpha_shift || rgb_shift) {
  1257.       struct ureg shift;
  1258.       GLboolean saturate = GL_TRUE;  /* always saturate at this point */
  1259.  
  1260.       if (rgb_shift == alpha_shift) {
  1261.          shift = register_scalar_const(p, (GLfloat)(1<<rgb_shift));
  1262.       }
  1263.       else {
  1264.          shift = register_const4f(p,
  1265.                                   (GLfloat)(1<<rgb_shift),
  1266.                                   (GLfloat)(1<<rgb_shift),
  1267.                                   (GLfloat)(1<<rgb_shift),
  1268.                                   (GLfloat)(1<<alpha_shift));
  1269.       }
  1270.       return emit_arith( p, OPCODE_MUL, dest, WRITEMASK_XYZW,
  1271.                          saturate, out, shift, undef );
  1272.    }
  1273.    else
  1274.       return out;
  1275. }
  1276.  
  1277.  
  1278. /**
  1279.  * Generate instruction for getting a texture source term.
  1280.  */
  1281. static void load_texture( struct texenv_fragment_program *p, GLuint unit )
  1282. {
  1283.    if (is_undef(p->src_texture[unit])) {
  1284.       const GLuint texTarget = p->state->unit[unit].source_index;
  1285.       struct ureg texcoord;
  1286.       struct ureg tmp = get_tex_temp( p );
  1287.  
  1288.       if (is_undef(p->texcoord_tex[unit])) {
  1289.          texcoord = register_input(p, FRAG_ATTRIB_TEX0+unit);
  1290.       }
  1291.       else {
  1292.          /* might want to reuse this reg for tex output actually */
  1293.          texcoord = p->texcoord_tex[unit];
  1294.       }
  1295.  
  1296.       /* TODO: Use D0_MASK_XY where possible.
  1297.        */
  1298.       if (p->state->unit[unit].enabled) {
  1299.          GLboolean shadow = GL_FALSE;
  1300.  
  1301.          if (p->state->unit[unit].shadow) {
  1302.             p->program->Base.ShadowSamplers |= 1 << unit;
  1303.             shadow = GL_TRUE;
  1304.          }
  1305.  
  1306.          p->src_texture[unit] = emit_texld( p, OPCODE_TXP,
  1307.                                             tmp, WRITEMASK_XYZW,
  1308.                                             unit, texTarget, shadow,
  1309.                                             texcoord );
  1310.  
  1311.          p->program->Base.SamplersUsed |= (1 << unit);
  1312.          /* This identity mapping should already be in place
  1313.           * (see _mesa_init_program_struct()) but let's be safe.
  1314.           */
  1315.          p->program->Base.SamplerUnits[unit] = unit;
  1316.       }
  1317.       else
  1318.          p->src_texture[unit] = get_zero(p);
  1319.  
  1320.       if (p->state->unit[unit].texture_cyl_wrap) {
  1321.          /* set flag which is checked by Mesa->Gallium program translation */
  1322.          p->program->Base.InputFlags[0] |= PROG_PARAM_BIT_CYL_WRAP;
  1323.       }
  1324.  
  1325.    }
  1326. }
  1327.  
  1328. static GLboolean load_texenv_source( struct texenv_fragment_program *p,
  1329.                                      GLuint src, GLuint unit )
  1330. {
  1331.    switch (src) {
  1332.    case SRC_TEXTURE:
  1333.       load_texture(p, unit);
  1334.       break;
  1335.  
  1336.    case SRC_TEXTURE0:
  1337.    case SRC_TEXTURE1:
  1338.    case SRC_TEXTURE2:
  1339.    case SRC_TEXTURE3:
  1340.    case SRC_TEXTURE4:
  1341.    case SRC_TEXTURE5:
  1342.    case SRC_TEXTURE6:
  1343.    case SRC_TEXTURE7:      
  1344.       load_texture(p, src - SRC_TEXTURE0);
  1345.       break;
  1346.      
  1347.    default:
  1348.       /* not a texture src - do nothing */
  1349.       break;
  1350.    }
  1351.  
  1352.    return GL_TRUE;
  1353. }
  1354.  
  1355.  
  1356. /**
  1357.  * Generate instructions for loading all texture source terms.
  1358.  */
  1359. static GLboolean
  1360. load_texunit_sources( struct texenv_fragment_program *p, GLuint unit )
  1361. {
  1362.    const struct state_key *key = p->state;
  1363.    GLuint i;
  1364.  
  1365.    for (i = 0; i < key->unit[unit].NumArgsRGB; i++) {
  1366.       load_texenv_source( p, key->unit[unit].OptRGB[i].Source, unit );
  1367.    }
  1368.  
  1369.    for (i = 0; i < key->unit[unit].NumArgsA; i++) {
  1370.       load_texenv_source( p, key->unit[unit].OptA[i].Source, unit );
  1371.    }
  1372.  
  1373.    return GL_TRUE;
  1374. }
  1375.  
  1376. /**
  1377.  * Generate instructions for loading bump map textures.
  1378.  */
  1379. static GLboolean
  1380. load_texunit_bumpmap( struct texenv_fragment_program *p, GLuint unit )
  1381. {
  1382.    const struct state_key *key = p->state;
  1383.    GLuint bumpedUnitNr = key->unit[unit].OptRGB[1].Source - SRC_TEXTURE0;
  1384.    struct ureg texcDst, bumpMapRes;
  1385.    struct ureg constdudvcolor = register_const4f(p, 0.0, 0.0, 0.0, 1.0);
  1386.    struct ureg texcSrc = register_input(p, FRAG_ATTRIB_TEX0 + bumpedUnitNr);
  1387.    struct ureg rotMat0 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_0, unit );
  1388.    struct ureg rotMat1 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_1, unit );
  1389.  
  1390.    load_texenv_source( p, unit + SRC_TEXTURE0, unit );
  1391.  
  1392.    bumpMapRes = get_source(p, key->unit[unit].OptRGB[0].Source, unit);
  1393.    texcDst = get_tex_temp( p );
  1394.    p->texcoord_tex[bumpedUnitNr] = texcDst;
  1395.  
  1396.    /* Apply rot matrix and add coords to be available in next phase.
  1397.     * dest = (Arg0.xxxx * rotMat0 + Arg1) + (Arg0.yyyy * rotMat1)
  1398.     * note only 2 coords are affected the rest are left unchanged (mul by 0)
  1399.     */
  1400.    emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0,
  1401.                swizzle1(bumpMapRes, SWIZZLE_X), rotMat0, texcSrc );
  1402.    emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0,
  1403.                swizzle1(bumpMapRes, SWIZZLE_Y), rotMat1, texcDst );
  1404.  
  1405.    /* Move 0,0,0,1 into bumpmap src if someone (crossbar) is foolish
  1406.     * enough to access this later, should optimize away.
  1407.     */
  1408.    emit_arith( p, OPCODE_MOV, bumpMapRes, WRITEMASK_XYZW, 0,
  1409.                constdudvcolor, undef, undef );
  1410.  
  1411.    return GL_TRUE;
  1412. }
  1413.  
  1414. /**
  1415.  * Generate a new fragment program which implements the context's
  1416.  * current texture env/combine mode.
  1417.  */
  1418. static void
  1419. create_new_program(struct gl_context *ctx, struct state_key *key,
  1420.                    struct gl_fragment_program *program)
  1421. {
  1422.    struct prog_instruction instBuffer[MAX_INSTRUCTIONS];
  1423.    struct texenv_fragment_program p;
  1424.    GLuint unit;
  1425.    struct ureg cf, out;
  1426.    int i;
  1427.  
  1428.    memset(&p, 0, sizeof(p));
  1429.    p.state = key;
  1430.    p.program = program;
  1431.  
  1432.    /* During code generation, use locally-allocated instruction buffer,
  1433.     * then alloc dynamic storage below.
  1434.     */
  1435.    p.program->Base.Instructions = instBuffer;
  1436.    p.program->Base.Target = GL_FRAGMENT_PROGRAM_ARB;
  1437.    p.program->Base.String = NULL;
  1438.    p.program->Base.NumTexIndirections = 1; /* is this right? */
  1439.    p.program->Base.NumTexInstructions = 0;
  1440.    p.program->Base.NumAluInstructions = 0;
  1441.    p.program->Base.NumInstructions = 0;
  1442.    p.program->Base.NumTemporaries = 0;
  1443.    p.program->Base.NumParameters = 0;
  1444.    p.program->Base.NumAttributes = 0;
  1445.    p.program->Base.NumAddressRegs = 0;
  1446.    p.program->Base.Parameters = _mesa_new_parameter_list();
  1447.    p.program->Base.InputsRead = 0x0;
  1448.  
  1449.    if (key->num_draw_buffers == 1)
  1450.       p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR;
  1451.    else {
  1452.       for (i = 0; i < key->num_draw_buffers; i++)
  1453.          p.program->Base.OutputsWritten |= (1 << (FRAG_RESULT_DATA0 + i));
  1454.    }
  1455.  
  1456.    for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) {
  1457.       p.src_texture[unit] = undef;
  1458.       p.texcoord_tex[unit] = undef;
  1459.    }
  1460.  
  1461.    p.src_previous = undef;
  1462.    p.half = undef;
  1463.    p.zero = undef;
  1464.    p.one = undef;
  1465.  
  1466.    p.last_tex_stage = 0;
  1467.    release_temps(ctx, &p);
  1468.  
  1469.    if (key->enabled_units && key->num_draw_buffers) {
  1470.       GLboolean needbumpstage = GL_FALSE;
  1471.  
  1472.       /* Zeroth pass - bump map textures first */
  1473.       for (unit = 0; unit < key->nr_enabled_units; unit++)
  1474.          if (key->unit[unit].enabled &&
  1475.              key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) {
  1476.             needbumpstage = GL_TRUE;
  1477.             load_texunit_bumpmap( &p, unit );
  1478.          }
  1479.       if (needbumpstage)
  1480.          p.program->Base.NumTexIndirections++;
  1481.  
  1482.       /* First pass - to support texture_env_crossbar, first identify
  1483.        * all referenced texture sources and emit texld instructions
  1484.        * for each:
  1485.        */
  1486.       for (unit = 0; unit < key->nr_enabled_units; unit++)
  1487.          if (key->unit[unit].enabled) {
  1488.             load_texunit_sources( &p, unit );
  1489.             p.last_tex_stage = unit;
  1490.          }
  1491.  
  1492.       /* Second pass - emit combine instructions to build final color:
  1493.        */
  1494.       for (unit = 0; unit < key->nr_enabled_units; unit++)
  1495.          if (key->unit[unit].enabled) {
  1496.             p.src_previous = emit_texenv( &p, unit );
  1497.             reserve_temp(&p, p.src_previous); /* don't re-use this temp reg */
  1498.             release_temps(ctx, &p);     /* release all temps */
  1499.          }
  1500.    }
  1501.  
  1502.    cf = get_source( &p, SRC_PREVIOUS, 0 );
  1503.  
  1504.    for (i = 0; i < key->num_draw_buffers; i++) {
  1505.       if (key->num_draw_buffers == 1)
  1506.          out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLOR );
  1507.       else {
  1508.          out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i );
  1509.       }
  1510.  
  1511.       if (key->separate_specular) {
  1512.          /* Emit specular add.
  1513.           */
  1514.          struct ureg s = register_input(&p, FRAG_ATTRIB_COL1);
  1515.          emit_arith( &p, OPCODE_ADD, out, WRITEMASK_XYZ, 0, cf, s, undef );
  1516.          emit_arith( &p, OPCODE_MOV, out, WRITEMASK_W, 0, cf, undef, undef );
  1517.       }
  1518.       else if (memcmp(&cf, &out, sizeof(cf)) != 0) {
  1519.          /* Will wind up in here if no texture enabled or a couple of
  1520.           * other scenarios (GL_REPLACE for instance).
  1521.           */
  1522.          emit_arith( &p, OPCODE_MOV, out, WRITEMASK_XYZW, 0, cf, undef, undef );
  1523.       }
  1524.    }
  1525.    /* Finish up:
  1526.     */
  1527.    emit_arith( &p, OPCODE_END, undef, WRITEMASK_XYZW, 0, undef, undef, undef);
  1528.  
  1529.    if (key->fog_enabled) {
  1530.       /* Pull fog mode from struct gl_context, the value in the state key is
  1531.        * a reduced value and not what is expected in FogOption
  1532.        */
  1533.       p.program->FogOption = ctx->Fog.Mode;
  1534.       p.program->Base.InputsRead |= FRAG_BIT_FOGC;
  1535.    }
  1536.    else {
  1537.       p.program->FogOption = GL_NONE;
  1538.    }
  1539.  
  1540.    if (p.program->Base.NumTexIndirections > ctx->Const.FragmentProgram.MaxTexIndirections)
  1541.       program_error(&p, "Exceeded max nr indirect texture lookups");
  1542.  
  1543.    if (p.program->Base.NumTexInstructions > ctx->Const.FragmentProgram.MaxTexInstructions)
  1544.       program_error(&p, "Exceeded max TEX instructions");
  1545.  
  1546.    if (p.program->Base.NumAluInstructions > ctx->Const.FragmentProgram.MaxAluInstructions)
  1547.       program_error(&p, "Exceeded max ALU instructions");
  1548.  
  1549.    ASSERT(p.program->Base.NumInstructions <= MAX_INSTRUCTIONS);
  1550.  
  1551.    /* Allocate final instruction array */
  1552.    p.program->Base.Instructions
  1553.       = _mesa_alloc_instructions(p.program->Base.NumInstructions);
  1554.    if (!p.program->Base.Instructions) {
  1555.       _mesa_error(ctx, GL_OUT_OF_MEMORY,
  1556.                   "generating tex env program");
  1557.       return;
  1558.    }
  1559.    _mesa_copy_instructions(p.program->Base.Instructions, instBuffer,
  1560.                            p.program->Base.NumInstructions);
  1561.  
  1562.    if (key->num_draw_buffers && p.program->FogOption) {
  1563.       _mesa_append_fog_code(ctx, p.program);
  1564.       p.program->FogOption = GL_NONE;
  1565.    }
  1566.  
  1567.  
  1568.    /* Notify driver the fragment program has (actually) changed.
  1569.     */
  1570.    if (ctx->Driver.ProgramStringNotify) {
  1571.       GLboolean ok = ctx->Driver.ProgramStringNotify(ctx,
  1572.                                                      GL_FRAGMENT_PROGRAM_ARB,
  1573.                                                      &p.program->Base);
  1574.       /* Driver should be able to handle any texenv programs as long as
  1575.        * the driver correctly reported max number of texture units correctly,
  1576.        * etc.
  1577.        */
  1578.       ASSERT(ok);
  1579.       (void) ok; /* silence unused var warning */
  1580.    }
  1581.  
  1582.    if (DISASSEM) {
  1583.       _mesa_print_program(&p.program->Base);
  1584.       printf("\n");
  1585.    }
  1586. }
  1587.  
  1588.  
  1589. /**
  1590.  * Return a fragment program which implements the current
  1591.  * fixed-function texture, fog and color-sum operations.
  1592.  */
  1593. struct gl_fragment_program *
  1594. _mesa_get_fixed_func_fragment_program(struct gl_context *ctx)
  1595. {
  1596.    struct gl_fragment_program *prog;
  1597.    struct state_key key;
  1598.    GLuint keySize;
  1599.        
  1600.    keySize = make_state_key(ctx, &key);
  1601.      
  1602.    prog = (struct gl_fragment_program *)
  1603.       _mesa_search_program_cache(ctx->FragmentProgram.Cache,
  1604.                                  &key, keySize);
  1605.  
  1606.    if (!prog) {
  1607.       prog = (struct gl_fragment_program *)
  1608.          ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0);
  1609.  
  1610.       create_new_program(ctx, &key, prog);
  1611.  
  1612.       _mesa_program_cache_insert(ctx, ctx->FragmentProgram.Cache,
  1613.                                  &key, keySize, &prog->Base);
  1614.    }
  1615.  
  1616.    return prog;
  1617. }
  1618.