Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * \file ffvertex_prog.c
  30.  *
  31.  * Create a vertex program to execute the current fixed function T&L pipeline.
  32.  * \author Keith Whitwell
  33.  */
  34.  
  35.  
  36. #include "main/glheader.h"
  37. #include "main/mtypes.h"
  38. #include "main/macros.h"
  39. #include "main/enums.h"
  40. #include "main/ffvertex_prog.h"
  41. #include "program/program.h"
  42. #include "program/prog_cache.h"
  43. #include "program/prog_instruction.h"
  44. #include "program/prog_parameter.h"
  45. #include "program/prog_print.h"
  46. #include "program/prog_statevars.h"
  47.  
  48.  
  49. /** Max of number of lights and texture coord units */
  50. #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
  51.  
  52. struct state_key {
  53.    unsigned light_color_material_mask:12;
  54.    unsigned light_global_enabled:1;
  55.    unsigned light_local_viewer:1;
  56.    unsigned light_twoside:1;
  57.    unsigned material_shininess_is_zero:1;
  58.    unsigned need_eye_coords:1;
  59.    unsigned normalize:1;
  60.    unsigned rescale_normals:1;
  61.  
  62.    unsigned fog_source_is_depth:1;
  63.    unsigned fog_distance_mode:2;
  64.    unsigned separate_specular:1;
  65.    unsigned point_attenuated:1;
  66.    unsigned point_array:1;
  67.    unsigned texture_enabled_global:1;
  68.    unsigned fragprog_inputs_read:12;
  69.  
  70.    GLbitfield64 varying_vp_inputs;
  71.  
  72.    struct {
  73.       unsigned light_enabled:1;
  74.       unsigned light_eyepos3_is_zero:1;
  75.       unsigned light_spotcutoff_is_180:1;
  76.       unsigned light_attenuated:1;
  77.       unsigned texunit_really_enabled:1;
  78.       unsigned texmat_enabled:1;
  79.       unsigned coord_replace:1;
  80.       unsigned texgen_enabled:4;
  81.       unsigned texgen_mode0:4;
  82.       unsigned texgen_mode1:4;
  83.       unsigned texgen_mode2:4;
  84.       unsigned texgen_mode3:4;
  85.    } unit[NUM_UNITS];
  86. };
  87.  
  88.  
  89. #define TXG_NONE           0
  90. #define TXG_OBJ_LINEAR     1
  91. #define TXG_EYE_LINEAR     2
  92. #define TXG_SPHERE_MAP     3
  93. #define TXG_REFLECTION_MAP 4
  94. #define TXG_NORMAL_MAP     5
  95.  
  96. static GLuint translate_texgen( GLboolean enabled, GLenum mode )
  97. {
  98.    if (!enabled)
  99.       return TXG_NONE;
  100.  
  101.    switch (mode) {
  102.    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
  103.    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
  104.    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
  105.    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
  106.    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
  107.    default: return TXG_NONE;
  108.    }
  109. }
  110.  
  111. #define FDM_EYE_RADIAL    0
  112. #define FDM_EYE_PLANE     1
  113. #define FDM_EYE_PLANE_ABS 2
  114.  
  115. static GLuint translate_fog_distance_mode( GLenum mode )
  116. {
  117.    switch (mode) {
  118.    case GL_EYE_RADIAL_NV:
  119.       return FDM_EYE_RADIAL;
  120.    case GL_EYE_PLANE:
  121.       return FDM_EYE_PLANE;
  122.    default: /* shouldn't happen; fall through to a sensible default */
  123.    case GL_EYE_PLANE_ABSOLUTE_NV:
  124.       return FDM_EYE_PLANE_ABS;
  125.    }
  126. }
  127.  
  128. static GLboolean check_active_shininess( struct gl_context *ctx,
  129.                                          const struct state_key *key,
  130.                                          GLuint side )
  131. {
  132.    GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side;
  133.  
  134.    if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
  135.        (key->light_color_material_mask & (1 << attr)))
  136.       return GL_TRUE;
  137.  
  138.    if (key->varying_vp_inputs & VERT_ATTRIB_GENERIC(attr))
  139.       return GL_TRUE;
  140.  
  141.    if (ctx->Light.Material.Attrib[attr][0] != 0.0F)
  142.       return GL_TRUE;
  143.  
  144.    return GL_FALSE;
  145. }
  146.  
  147.  
  148. static void make_state_key( struct gl_context *ctx, struct state_key *key )
  149. {
  150.    const struct gl_fragment_program *fp;
  151.    GLuint i;
  152.  
  153.    memset(key, 0, sizeof(struct state_key));
  154.    fp = ctx->FragmentProgram._Current;
  155.  
  156.    /* This now relies on texenvprogram.c being active:
  157.     */
  158.    assert(fp);
  159.  
  160.    key->need_eye_coords = ctx->_NeedEyeCoords;
  161.  
  162.    key->fragprog_inputs_read = fp->Base.InputsRead;
  163.    key->varying_vp_inputs = ctx->varying_vp_inputs;
  164.  
  165.    if (ctx->RenderMode == GL_FEEDBACK) {
  166.       /* make sure the vertprog emits color and tex0 */
  167.       key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
  168.    }
  169.  
  170.    key->separate_specular = (ctx->Light.Model.ColorControl ==
  171.                              GL_SEPARATE_SPECULAR_COLOR);
  172.  
  173.    if (ctx->Light.Enabled) {
  174.       key->light_global_enabled = 1;
  175.  
  176.       if (ctx->Light.Model.LocalViewer)
  177.          key->light_local_viewer = 1;
  178.  
  179.       if (ctx->Light.Model.TwoSide)
  180.          key->light_twoside = 1;
  181.  
  182.       if (ctx->Light.ColorMaterialEnabled) {
  183.          key->light_color_material_mask = ctx->Light._ColorMaterialBitmask;
  184.       }
  185.  
  186.       for (i = 0; i < MAX_LIGHTS; i++) {
  187.          struct gl_light *light = &ctx->Light.Light[i];
  188.  
  189.          if (light->Enabled) {
  190.             key->unit[i].light_enabled = 1;
  191.  
  192.             if (light->EyePosition[3] == 0.0)
  193.                key->unit[i].light_eyepos3_is_zero = 1;
  194.  
  195.             if (light->SpotCutoff == 180.0)
  196.                key->unit[i].light_spotcutoff_is_180 = 1;
  197.  
  198.             if (light->ConstantAttenuation != 1.0 ||
  199.                 light->LinearAttenuation != 0.0 ||
  200.                 light->QuadraticAttenuation != 0.0)
  201.                key->unit[i].light_attenuated = 1;
  202.          }
  203.       }
  204.  
  205.       if (check_active_shininess(ctx, key, 0)) {
  206.          key->material_shininess_is_zero = 0;
  207.       }
  208.       else if (key->light_twoside &&
  209.                check_active_shininess(ctx, key, 1)) {
  210.          key->material_shininess_is_zero = 0;
  211.       }
  212.       else {
  213.          key->material_shininess_is_zero = 1;
  214.       }
  215.    }
  216.  
  217.    if (ctx->Transform.Normalize)
  218.       key->normalize = 1;
  219.  
  220.    if (ctx->Transform.RescaleNormals)
  221.       key->rescale_normals = 1;
  222.  
  223.    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) {
  224.       key->fog_source_is_depth = 1;
  225.       key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode);
  226.    }
  227.  
  228.    if (ctx->Point._Attenuated)
  229.       key->point_attenuated = 1;
  230.  
  231.    if (ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled)
  232.       key->point_array = 1;
  233.  
  234.    if (ctx->Texture._TexGenEnabled ||
  235.        ctx->Texture._TexMatEnabled ||
  236.        ctx->Texture._EnabledUnits)
  237.       key->texture_enabled_global = 1;
  238.  
  239.    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
  240.       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
  241.  
  242.       if (texUnit->_ReallyEnabled)
  243.          key->unit[i].texunit_really_enabled = 1;
  244.  
  245.       if (ctx->Point.PointSprite)
  246.          if (ctx->Point.CoordReplace[i])
  247.             key->unit[i].coord_replace = 1;
  248.  
  249.       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
  250.          key->unit[i].texmat_enabled = 1;
  251.  
  252.       if (texUnit->TexGenEnabled) {
  253.          key->unit[i].texgen_enabled = 1;
  254.  
  255.          key->unit[i].texgen_mode0 =
  256.             translate_texgen( texUnit->TexGenEnabled & (1<<0),
  257.                               texUnit->GenS.Mode );
  258.          key->unit[i].texgen_mode1 =
  259.             translate_texgen( texUnit->TexGenEnabled & (1<<1),
  260.                               texUnit->GenT.Mode );
  261.          key->unit[i].texgen_mode2 =
  262.             translate_texgen( texUnit->TexGenEnabled & (1<<2),
  263.                               texUnit->GenR.Mode );
  264.          key->unit[i].texgen_mode3 =
  265.             translate_texgen( texUnit->TexGenEnabled & (1<<3),
  266.                               texUnit->GenQ.Mode );
  267.       }
  268.    }
  269. }
  270.  
  271.  
  272.  
  273. /* Very useful debugging tool - produces annotated listing of
  274.  * generated program with line/function references for each
  275.  * instruction back into this file:
  276.  */
  277. #define DISASSEM 0
  278.  
  279.  
  280. /* Use uregs to represent registers internally, translate to Mesa's
  281.  * expected formats on emit.
  282.  *
  283.  * NOTE: These are passed by value extensively in this file rather
  284.  * than as usual by pointer reference.  If this disturbs you, try
  285.  * remembering they are just 32bits in size.
  286.  *
  287.  * GCC is smart enough to deal with these dword-sized structures in
  288.  * much the same way as if I had defined them as dwords and was using
  289.  * macros to access and set the fields.  This is much nicer and easier
  290.  * to evolve.
  291.  */
  292. struct ureg {
  293.    GLuint file:4;
  294.    GLint idx:9;      /* relative addressing may be negative */
  295.                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
  296.    GLuint negate:1;
  297.    GLuint swz:12;
  298.    GLuint pad:6;
  299. };
  300.  
  301.  
  302. struct tnl_program {
  303.    const struct state_key *state;
  304.    struct gl_vertex_program *program;
  305.    GLint max_inst;  /** number of instructions allocated for program */
  306.    GLboolean mvp_with_dp4;
  307.  
  308.    GLuint temp_in_use;
  309.    GLuint temp_reserved;
  310.  
  311.    struct ureg eye_position;
  312.    struct ureg eye_position_z;
  313.    struct ureg eye_position_normalized;
  314.    struct ureg transformed_normal;
  315.    struct ureg identity;
  316.  
  317.    GLuint materials;
  318.    GLuint color_materials;
  319. };
  320.  
  321.  
  322. static const struct ureg undef = {
  323.    PROGRAM_UNDEFINED,
  324.    0,
  325.    0,
  326.    0,
  327.    0
  328. };
  329.  
  330. /* Local shorthand:
  331.  */
  332. #define X    SWIZZLE_X
  333. #define Y    SWIZZLE_Y
  334. #define Z    SWIZZLE_Z
  335. #define W    SWIZZLE_W
  336.  
  337.  
  338. /* Construct a ureg:
  339.  */
  340. static struct ureg make_ureg(GLuint file, GLint idx)
  341. {
  342.    struct ureg reg;
  343.    reg.file = file;
  344.    reg.idx = idx;
  345.    reg.negate = 0;
  346.    reg.swz = SWIZZLE_NOOP;
  347.    reg.pad = 0;
  348.    return reg;
  349. }
  350.  
  351.  
  352.  
  353. static struct ureg negate( struct ureg reg )
  354. {
  355.    reg.negate ^= 1;
  356.    return reg;
  357. }
  358.  
  359.  
  360. static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
  361. {
  362.    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
  363.                            GET_SWZ(reg.swz, y),
  364.                            GET_SWZ(reg.swz, z),
  365.                            GET_SWZ(reg.swz, w));
  366.    return reg;
  367. }
  368.  
  369.  
  370. static struct ureg swizzle1( struct ureg reg, int x )
  371. {
  372.    return swizzle(reg, x, x, x, x);
  373. }
  374.  
  375.  
  376. static struct ureg get_temp( struct tnl_program *p )
  377. {
  378.    int bit = ffs( ~p->temp_in_use );
  379.    if (!bit) {
  380.       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
  381.       exit(1);
  382.    }
  383.  
  384.    if ((GLuint) bit > p->program->Base.NumTemporaries)
  385.       p->program->Base.NumTemporaries = bit;
  386.  
  387.    p->temp_in_use |= 1<<(bit-1);
  388.    return make_ureg(PROGRAM_TEMPORARY, bit-1);
  389. }
  390.  
  391.  
  392. static struct ureg reserve_temp( struct tnl_program *p )
  393. {
  394.    struct ureg temp = get_temp( p );
  395.    p->temp_reserved |= 1<<temp.idx;
  396.    return temp;
  397. }
  398.  
  399.  
  400. static void release_temp( struct tnl_program *p, struct ureg reg )
  401. {
  402.    if (reg.file == PROGRAM_TEMPORARY) {
  403.       p->temp_in_use &= ~(1<<reg.idx);
  404.       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
  405.    }
  406. }
  407.  
  408. static void release_temps( struct tnl_program *p )
  409. {
  410.    p->temp_in_use = p->temp_reserved;
  411. }
  412.  
  413.  
  414. static struct ureg register_param5(struct tnl_program *p,
  415.                                    GLint s0,
  416.                                    GLint s1,
  417.                                    GLint s2,
  418.                                    GLint s3,
  419.                                    GLint s4)
  420. {
  421.    gl_state_index tokens[STATE_LENGTH];
  422.    GLint idx;
  423.    tokens[0] = s0;
  424.    tokens[1] = s1;
  425.    tokens[2] = s2;
  426.    tokens[3] = s3;
  427.    tokens[4] = s4;
  428.    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
  429.    return make_ureg(PROGRAM_STATE_VAR, idx);
  430. }
  431.  
  432.  
  433. #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
  434. #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
  435. #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
  436. #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
  437.  
  438.  
  439.  
  440. /**
  441.  * \param input  one of VERT_ATTRIB_x tokens.
  442.  */
  443. static struct ureg register_input( struct tnl_program *p, GLuint input )
  444. {
  445.    assert(input < VERT_ATTRIB_MAX);
  446.  
  447.    if (p->state->varying_vp_inputs & VERT_BIT(input)) {
  448.       p->program->Base.InputsRead |= VERT_BIT(input);
  449.       return make_ureg(PROGRAM_INPUT, input);
  450.    }
  451.    else {
  452.       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
  453.    }
  454. }
  455.  
  456.  
  457. /**
  458.  * \param input  one of VARYING_SLOT_x tokens.
  459.  */
  460. static struct ureg register_output( struct tnl_program *p, GLuint output )
  461. {
  462.    p->program->Base.OutputsWritten |= BITFIELD64_BIT(output);
  463.    return make_ureg(PROGRAM_OUTPUT, output);
  464. }
  465.  
  466.  
  467. static struct ureg register_const4f( struct tnl_program *p,
  468.                               GLfloat s0,
  469.                               GLfloat s1,
  470.                               GLfloat s2,
  471.                               GLfloat s3)
  472. {
  473.    gl_constant_value values[4];
  474.    GLint idx;
  475.    GLuint swizzle;
  476.    values[0].f = s0;
  477.    values[1].f = s1;
  478.    values[2].f = s2;
  479.    values[3].f = s3;
  480.    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
  481.                                      &swizzle );
  482.    ASSERT(swizzle == SWIZZLE_NOOP);
  483.    return make_ureg(PROGRAM_CONSTANT, idx);
  484. }
  485.  
  486. #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
  487. #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
  488. #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
  489. #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
  490.  
  491. static GLboolean is_undef( struct ureg reg )
  492. {
  493.    return reg.file == PROGRAM_UNDEFINED;
  494. }
  495.  
  496.  
  497. static struct ureg get_identity_param( struct tnl_program *p )
  498. {
  499.    if (is_undef(p->identity))
  500.       p->identity = register_const4f(p, 0,0,0,1);
  501.  
  502.    return p->identity;
  503. }
  504.  
  505. static void register_matrix_param5( struct tnl_program *p,
  506.                                     GLint s0, /* modelview, projection, etc */
  507.                                     GLint s1, /* texture matrix number */
  508.                                     GLint s2, /* first row */
  509.                                     GLint s3, /* last row */
  510.                                     GLint s4, /* inverse, transpose, etc */
  511.                                     struct ureg *matrix )
  512. {
  513.    GLint i;
  514.  
  515.    /* This is a bit sad as the support is there to pull the whole
  516.     * matrix out in one go:
  517.     */
  518.    for (i = 0; i <= s3 - s2; i++)
  519.       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
  520. }
  521.  
  522.  
  523. static void emit_arg( struct prog_src_register *src,
  524.                       struct ureg reg )
  525. {
  526.    src->File = reg.file;
  527.    src->Index = reg.idx;
  528.    src->Swizzle = reg.swz;
  529.    src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
  530.    src->Abs = 0;
  531.    src->RelAddr = 0;
  532.    /* Check that bitfield sizes aren't exceeded */
  533.    ASSERT(src->Index == reg.idx);
  534. }
  535.  
  536.  
  537. static void emit_dst( struct prog_dst_register *dst,
  538.                       struct ureg reg, GLuint mask )
  539. {
  540.    dst->File = reg.file;
  541.    dst->Index = reg.idx;
  542.    /* allow zero as a shorthand for xyzw */
  543.    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
  544.    dst->CondMask = COND_TR;  /* always pass cond test */
  545.    dst->CondSwizzle = SWIZZLE_NOOP;
  546.    /* Check that bitfield sizes aren't exceeded */
  547.    ASSERT(dst->Index == reg.idx);
  548. }
  549.  
  550.  
  551. static void debug_insn( struct prog_instruction *inst, const char *fn,
  552.                         GLuint line )
  553. {
  554.    if (DISASSEM) {
  555.       static const char *last_fn;
  556.  
  557.       if (fn != last_fn) {
  558.          last_fn = fn;
  559.          printf("%s:\n", fn);
  560.       }
  561.  
  562.       printf("%d:\t", line);
  563.       _mesa_print_instruction(inst);
  564.    }
  565. }
  566.  
  567.  
  568. static void emit_op3fn(struct tnl_program *p,
  569.                        enum prog_opcode op,
  570.                        struct ureg dest,
  571.                        GLuint mask,
  572.                        struct ureg src0,
  573.                        struct ureg src1,
  574.                        struct ureg src2,
  575.                        const char *fn,
  576.                        GLuint line)
  577. {
  578.    GLuint nr;
  579.    struct prog_instruction *inst;
  580.  
  581.    assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
  582.  
  583.    if (p->program->Base.NumInstructions == p->max_inst) {
  584.       /* need to extend the program's instruction array */
  585.       struct prog_instruction *newInst;
  586.  
  587.       /* double the size */
  588.       p->max_inst *= 2;
  589.  
  590.       newInst = _mesa_alloc_instructions(p->max_inst);
  591.       if (!newInst) {
  592.          _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
  593.          return;
  594.       }
  595.  
  596.       _mesa_copy_instructions(newInst,
  597.                               p->program->Base.Instructions,
  598.                               p->program->Base.NumInstructions);
  599.  
  600.       _mesa_free_instructions(p->program->Base.Instructions,
  601.                               p->program->Base.NumInstructions);
  602.  
  603.       p->program->Base.Instructions = newInst;
  604.    }
  605.  
  606.    nr = p->program->Base.NumInstructions++;
  607.  
  608.    inst = &p->program->Base.Instructions[nr];
  609.    inst->Opcode = (enum prog_opcode) op;
  610.  
  611.    emit_arg( &inst->SrcReg[0], src0 );
  612.    emit_arg( &inst->SrcReg[1], src1 );
  613.    emit_arg( &inst->SrcReg[2], src2 );
  614.  
  615.    emit_dst( &inst->DstReg, dest, mask );
  616.  
  617.    debug_insn(inst, fn, line);
  618. }
  619.  
  620.  
  621. #define emit_op3(p, op, dst, mask, src0, src1, src2) \
  622.    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
  623.  
  624. #define emit_op2(p, op, dst, mask, src0, src1) \
  625.     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
  626.  
  627. #define emit_op1(p, op, dst, mask, src0) \
  628.     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
  629.  
  630.  
  631. static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
  632. {
  633.    if (reg.file == PROGRAM_TEMPORARY &&
  634.        !(p->temp_reserved & (1<<reg.idx)))
  635.       return reg;
  636.    else {
  637.       struct ureg temp = get_temp(p);
  638.       emit_op1(p, OPCODE_MOV, temp, 0, reg);
  639.       return temp;
  640.    }
  641. }
  642.  
  643.  
  644. /* Currently no tracking performed of input/output/register size or
  645.  * active elements.  Could be used to reduce these operations, as
  646.  * could the matrix type.
  647.  */
  648. static void emit_matrix_transform_vec4( struct tnl_program *p,
  649.                                         struct ureg dest,
  650.                                         const struct ureg *mat,
  651.                                         struct ureg src)
  652. {
  653.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
  654.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
  655.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
  656.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
  657. }
  658.  
  659.  
  660. /* This version is much easier to implement if writemasks are not
  661.  * supported natively on the target or (like SSE), the target doesn't
  662.  * have a clean/obvious dotproduct implementation.
  663.  */
  664. static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
  665.                                                   struct ureg dest,
  666.                                                   const struct ureg *mat,
  667.                                                   struct ureg src)
  668. {
  669.    struct ureg tmp;
  670.  
  671.    if (dest.file != PROGRAM_TEMPORARY)
  672.       tmp = get_temp(p);
  673.    else
  674.       tmp = dest;
  675.  
  676.    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
  677.    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
  678.    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
  679.    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
  680.  
  681.    if (dest.file != PROGRAM_TEMPORARY)
  682.       release_temp(p, tmp);
  683. }
  684.  
  685.  
  686. static void emit_matrix_transform_vec3( struct tnl_program *p,
  687.                                         struct ureg dest,
  688.                                         const struct ureg *mat,
  689.                                         struct ureg src)
  690. {
  691.    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
  692.    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
  693.    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
  694. }
  695.  
  696.  
  697. static void emit_normalize_vec3( struct tnl_program *p,
  698.                                  struct ureg dest,
  699.                                  struct ureg src )
  700. {
  701.    struct ureg tmp = get_temp(p);
  702.    emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
  703.    emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
  704.    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
  705.    release_temp(p, tmp);
  706. }
  707.  
  708.  
  709. static void emit_passthrough( struct tnl_program *p,
  710.                               GLuint input,
  711.                               GLuint output )
  712. {
  713.    struct ureg out = register_output(p, output);
  714.    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
  715. }
  716.  
  717.  
  718. static struct ureg get_eye_position( struct tnl_program *p )
  719. {
  720.    if (is_undef(p->eye_position)) {
  721.       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
  722.       struct ureg modelview[4];
  723.  
  724.       p->eye_position = reserve_temp(p);
  725.  
  726.       if (p->mvp_with_dp4) {
  727.          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
  728.                                  0, modelview );
  729.  
  730.          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
  731.       }
  732.       else {
  733.          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
  734.                                  STATE_MATRIX_TRANSPOSE, modelview );
  735.  
  736.          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
  737.       }
  738.    }
  739.  
  740.    return p->eye_position;
  741. }
  742.  
  743.  
  744. static struct ureg get_eye_position_z( struct tnl_program *p )
  745. {
  746.    if (!is_undef(p->eye_position))
  747.       return swizzle1(p->eye_position, Z);
  748.  
  749.    if (is_undef(p->eye_position_z)) {
  750.       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
  751.       struct ureg modelview[4];
  752.  
  753.       p->eye_position_z = reserve_temp(p);
  754.  
  755.       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
  756.                               0, modelview );
  757.  
  758.       emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
  759.    }
  760.  
  761.    return p->eye_position_z;
  762. }
  763.  
  764.  
  765. static struct ureg get_eye_position_normalized( struct tnl_program *p )
  766. {
  767.    if (is_undef(p->eye_position_normalized)) {
  768.       struct ureg eye = get_eye_position(p);
  769.       p->eye_position_normalized = reserve_temp(p);
  770.       emit_normalize_vec3(p, p->eye_position_normalized, eye);
  771.    }
  772.  
  773.    return p->eye_position_normalized;
  774. }
  775.  
  776.  
  777. static struct ureg get_transformed_normal( struct tnl_program *p )
  778. {
  779.    if (is_undef(p->transformed_normal) &&
  780.        !p->state->need_eye_coords &&
  781.        !p->state->normalize &&
  782.        !(p->state->need_eye_coords == p->state->rescale_normals))
  783.    {
  784.       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
  785.    }
  786.    else if (is_undef(p->transformed_normal))
  787.    {
  788.       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
  789.       struct ureg mvinv[3];
  790.       struct ureg transformed_normal = reserve_temp(p);
  791.  
  792.       if (p->state->need_eye_coords) {
  793.          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
  794.                                  STATE_MATRIX_INVTRANS, mvinv );
  795.  
  796.          /* Transform to eye space:
  797.           */
  798.          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
  799.          normal = transformed_normal;
  800.       }
  801.  
  802.       /* Normalize/Rescale:
  803.        */
  804.       if (p->state->normalize) {
  805.          emit_normalize_vec3( p, transformed_normal, normal );
  806.          normal = transformed_normal;
  807.       }
  808.       else if (p->state->need_eye_coords == p->state->rescale_normals) {
  809.          /* This is already adjusted for eye/non-eye rendering:
  810.           */
  811.          struct ureg rescale = register_param2(p, STATE_INTERNAL,
  812.                                                STATE_NORMAL_SCALE);
  813.  
  814.          emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
  815.          normal = transformed_normal;
  816.       }
  817.  
  818.       assert(normal.file == PROGRAM_TEMPORARY);
  819.       p->transformed_normal = normal;
  820.    }
  821.  
  822.    return p->transformed_normal;
  823. }
  824.  
  825.  
  826. static void build_hpos( struct tnl_program *p )
  827. {
  828.    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
  829.    struct ureg hpos = register_output( p, VARYING_SLOT_POS );
  830.    struct ureg mvp[4];
  831.  
  832.    if (p->mvp_with_dp4) {
  833.       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
  834.                               0, mvp );
  835.       emit_matrix_transform_vec4( p, hpos, mvp, pos );
  836.    }
  837.    else {
  838.       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
  839.                               STATE_MATRIX_TRANSPOSE, mvp );
  840.       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
  841.    }
  842. }
  843.  
  844.  
  845. static GLuint material_attrib( GLuint side, GLuint property )
  846. {
  847.    return (property - STATE_AMBIENT) * 2 + side;
  848. }
  849.  
  850.  
  851. /**
  852.  * Get a bitmask of which material values vary on a per-vertex basis.
  853.  */
  854. static void set_material_flags( struct tnl_program *p )
  855. {
  856.    p->color_materials = 0;
  857.    p->materials = 0;
  858.  
  859.    if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
  860.       p->materials =
  861.          p->color_materials = p->state->light_color_material_mask;
  862.    }
  863.  
  864.    p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0);
  865. }
  866.  
  867.  
  868. static struct ureg get_material( struct tnl_program *p, GLuint side,
  869.                                  GLuint property )
  870. {
  871.    GLuint attrib = material_attrib(side, property);
  872.  
  873.    if (p->color_materials & (1<<attrib))
  874.       return register_input(p, VERT_ATTRIB_COLOR0);
  875.    else if (p->materials & (1<<attrib)) {
  876.       /* Put material values in the GENERIC slots -- they are not used
  877.        * for anything in fixed function mode.
  878.        */
  879.       return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
  880.    }
  881.    else
  882.       return register_param3( p, STATE_MATERIAL, side, property );
  883. }
  884.  
  885. #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
  886.                                    MAT_BIT_FRONT_AMBIENT | \
  887.                                    MAT_BIT_FRONT_DIFFUSE) << (side))
  888.  
  889.  
  890. /**
  891.  * Either return a precalculated constant value or emit code to
  892.  * calculate these values dynamically in the case where material calls
  893.  * are present between begin/end pairs.
  894.  *
  895.  * Probably want to shift this to the program compilation phase - if
  896.  * we always emitted the calculation here, a smart compiler could
  897.  * detect that it was constant (given a certain set of inputs), and
  898.  * lift it out of the main loop.  That way the programs created here
  899.  * would be independent of the vertex_buffer details.
  900.  */
  901. static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
  902. {
  903.    if (p->materials & SCENE_COLOR_BITS(side)) {
  904.       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
  905.       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
  906.       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
  907.       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
  908.       struct ureg tmp = make_temp(p, material_diffuse);
  909.       emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
  910.                material_ambient, material_emission);
  911.       return tmp;
  912.    }
  913.    else
  914.       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
  915. }
  916.  
  917.  
  918. static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
  919.                                   GLuint side, GLuint property )
  920. {
  921.    GLuint attrib = material_attrib(side, property);
  922.    if (p->materials & (1<<attrib)) {
  923.       struct ureg light_value =
  924.          register_param3(p, STATE_LIGHT, light, property);
  925.       struct ureg material_value = get_material(p, side, property);
  926.       struct ureg tmp = get_temp(p);
  927.       emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
  928.       return tmp;
  929.    }
  930.    else
  931.       return register_param4(p, STATE_LIGHTPROD, light, side, property);
  932. }
  933.  
  934.  
  935. static struct ureg calculate_light_attenuation( struct tnl_program *p,
  936.                                                 GLuint i,
  937.                                                 struct ureg VPpli,
  938.                                                 struct ureg dist )
  939. {
  940.    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
  941.                                              STATE_ATTENUATION);
  942.    struct ureg att = undef;
  943.  
  944.    /* Calculate spot attenuation:
  945.     */
  946.    if (!p->state->unit[i].light_spotcutoff_is_180) {
  947.       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
  948.                                                   STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
  949.       struct ureg spot = get_temp(p);
  950.       struct ureg slt = get_temp(p);
  951.  
  952.       att = get_temp(p);
  953.  
  954.       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
  955.       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
  956.       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
  957.       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
  958.  
  959.       release_temp(p, spot);
  960.       release_temp(p, slt);
  961.    }
  962.  
  963.    /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62):
  964.     *
  965.     * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero)
  966.     */
  967.    if (p->state->unit[i].light_attenuated && !is_undef(dist)) {
  968.       if (is_undef(att))
  969.          att = get_temp(p);
  970.       /* 1/d,d,d,1/d */
  971.       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
  972.       /* 1,d,d*d,1/d */
  973.       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
  974.       /* 1/dist-atten */
  975.       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
  976.  
  977.       if (!p->state->unit[i].light_spotcutoff_is_180) {
  978.          /* dist-atten */
  979.          emit_op1(p, OPCODE_RCP, dist, 0, dist);
  980.          /* spot-atten * dist-atten */
  981.          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
  982.       }
  983.       else {
  984.          /* dist-atten */
  985.          emit_op1(p, OPCODE_RCP, att, 0, dist);
  986.       }
  987.    }
  988.  
  989.    return att;
  990. }
  991.  
  992.  
  993. /**
  994.  * Compute:
  995.  *   lit.y = MAX(0, dots.x)
  996.  *   lit.z = SLT(0, dots.x)
  997.  */
  998. static void emit_degenerate_lit( struct tnl_program *p,
  999.                                  struct ureg lit,
  1000.                                  struct ureg dots )
  1001. {
  1002.    struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
  1003.  
  1004.    /* Note that lit.x & lit.w will not be examined.  Note also that
  1005.     * dots.xyzw == dots.xxxx.
  1006.     */
  1007.  
  1008.    /* MAX lit, id, dots;
  1009.     */
  1010.    emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
  1011.  
  1012.    /* result[2] = (in > 0 ? 1 : 0)
  1013.     * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
  1014.     */
  1015.    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
  1016. }
  1017.  
  1018.  
  1019. /* Need to add some addtional parameters to allow lighting in object
  1020.  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
  1021.  * space lighting.
  1022.  */
  1023. static void build_lighting( struct tnl_program *p )
  1024. {
  1025.    const GLboolean twoside = p->state->light_twoside;
  1026.    const GLboolean separate = p->state->separate_specular;
  1027.    GLuint nr_lights = 0, count = 0;
  1028.    struct ureg normal = get_transformed_normal(p);
  1029.    struct ureg lit = get_temp(p);
  1030.    struct ureg dots = get_temp(p);
  1031.    struct ureg _col0 = undef, _col1 = undef;
  1032.    struct ureg _bfc0 = undef, _bfc1 = undef;
  1033.    GLuint i;
  1034.  
  1035.    /*
  1036.     * NOTE:
  1037.     * dots.x = dot(normal, VPpli)
  1038.     * dots.y = dot(normal, halfAngle)
  1039.     * dots.z = back.shininess
  1040.     * dots.w = front.shininess
  1041.     */
  1042.  
  1043.    for (i = 0; i < MAX_LIGHTS; i++)
  1044.       if (p->state->unit[i].light_enabled)
  1045.          nr_lights++;
  1046.  
  1047.    set_material_flags(p);
  1048.  
  1049.    {
  1050.       if (!p->state->material_shininess_is_zero) {
  1051.          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
  1052.          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
  1053.          release_temp(p, shininess);
  1054.       }
  1055.  
  1056.       _col0 = make_temp(p, get_scenecolor(p, 0));
  1057.       if (separate)
  1058.          _col1 = make_temp(p, get_identity_param(p));
  1059.       else
  1060.          _col1 = _col0;
  1061.    }
  1062.  
  1063.    if (twoside) {
  1064.       if (!p->state->material_shininess_is_zero) {
  1065.          /* Note that we negate the back-face specular exponent here.
  1066.           * The negation will be un-done later in the back-face code below.
  1067.           */
  1068.          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
  1069.          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
  1070.                   negate(swizzle1(shininess,X)));
  1071.          release_temp(p, shininess);
  1072.       }
  1073.  
  1074.       _bfc0 = make_temp(p, get_scenecolor(p, 1));
  1075.       if (separate)
  1076.          _bfc1 = make_temp(p, get_identity_param(p));
  1077.       else
  1078.          _bfc1 = _bfc0;
  1079.    }
  1080.  
  1081.    /* If no lights, still need to emit the scenecolor.
  1082.     */
  1083.    {
  1084.       struct ureg res0 = register_output( p, VARYING_SLOT_COL0 );
  1085.       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
  1086.    }
  1087.  
  1088.    if (separate) {
  1089.       struct ureg res1 = register_output( p, VARYING_SLOT_COL1 );
  1090.       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
  1091.    }
  1092.  
  1093.    if (twoside) {
  1094.       struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 );
  1095.       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
  1096.    }
  1097.  
  1098.    if (twoside && separate) {
  1099.       struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 );
  1100.       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
  1101.    }
  1102.  
  1103.    if (nr_lights == 0) {
  1104.       release_temps(p);
  1105.       return;
  1106.    }
  1107.  
  1108.    for (i = 0; i < MAX_LIGHTS; i++) {
  1109.       if (p->state->unit[i].light_enabled) {
  1110.          struct ureg half = undef;
  1111.          struct ureg att = undef, VPpli = undef;
  1112.          struct ureg dist = undef;
  1113.  
  1114.          count++;
  1115.          if (p->state->unit[i].light_eyepos3_is_zero) {
  1116.              VPpli = register_param3(p, STATE_INTERNAL,
  1117.                                      STATE_LIGHT_POSITION_NORMALIZED, i);
  1118.          } else {
  1119.             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
  1120.                                                STATE_LIGHT_POSITION, i);
  1121.             struct ureg V = get_eye_position(p);
  1122.  
  1123.             VPpli = get_temp(p);
  1124.             dist = get_temp(p);
  1125.  
  1126.             /* Calculate VPpli vector
  1127.              */
  1128.             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
  1129.  
  1130.             /* Normalize VPpli.  The dist value also used in
  1131.              * attenuation below.
  1132.              */
  1133.             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
  1134.             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
  1135.             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
  1136.          }
  1137.  
  1138.          /* Calculate attenuation:
  1139.           */
  1140.          att = calculate_light_attenuation(p, i, VPpli, dist);
  1141.          release_temp(p, dist);
  1142.  
  1143.          /* Calculate viewer direction, or use infinite viewer:
  1144.           */
  1145.          if (!p->state->material_shininess_is_zero) {
  1146.             if (p->state->light_local_viewer) {
  1147.                struct ureg eye_hat = get_eye_position_normalized(p);
  1148.                half = get_temp(p);
  1149.                emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
  1150.                emit_normalize_vec3(p, half, half);
  1151.             } else if (p->state->unit[i].light_eyepos3_is_zero) {
  1152.                half = register_param3(p, STATE_INTERNAL,
  1153.                                       STATE_LIGHT_HALF_VECTOR, i);
  1154.             } else {
  1155.                struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
  1156.                half = get_temp(p);
  1157.                emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
  1158.                emit_normalize_vec3(p, half, half);
  1159.             }
  1160.          }
  1161.  
  1162.          /* Calculate dot products:
  1163.           */
  1164.          if (p->state->material_shininess_is_zero) {
  1165.             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
  1166.          }
  1167.          else {
  1168.             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
  1169.             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
  1170.          }
  1171.  
  1172.          /* Front face lighting:
  1173.           */
  1174.          {
  1175.             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
  1176.             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
  1177.             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
  1178.             struct ureg res0, res1;
  1179.             GLuint mask0, mask1;
  1180.  
  1181.             if (count == nr_lights) {
  1182.                if (separate) {
  1183.                   mask0 = WRITEMASK_XYZ;
  1184.                   mask1 = WRITEMASK_XYZ;
  1185.                   res0 = register_output( p, VARYING_SLOT_COL0 );
  1186.                   res1 = register_output( p, VARYING_SLOT_COL1 );
  1187.                }
  1188.                else {
  1189.                   mask0 = 0;
  1190.                   mask1 = WRITEMASK_XYZ;
  1191.                   res0 = _col0;
  1192.                   res1 = register_output( p, VARYING_SLOT_COL0 );
  1193.                }
  1194.             }
  1195.             else {
  1196.                mask0 = 0;
  1197.                mask1 = 0;
  1198.                res0 = _col0;
  1199.                res1 = _col1;
  1200.             }
  1201.  
  1202.             if (!is_undef(att)) {
  1203.                /* light is attenuated by distance */
  1204.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1205.                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
  1206.                emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
  1207.             }
  1208.             else if (!p->state->material_shininess_is_zero) {
  1209.                /* there's a non-zero specular term */
  1210.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1211.                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
  1212.             }
  1213.             else {
  1214.                /* no attenutation, no specular */
  1215.                emit_degenerate_lit(p, lit, dots);
  1216.                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
  1217.             }
  1218.  
  1219.             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
  1220.             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
  1221.  
  1222.             release_temp(p, ambient);
  1223.             release_temp(p, diffuse);
  1224.             release_temp(p, specular);
  1225.          }
  1226.  
  1227.          /* Back face lighting:
  1228.           */
  1229.          if (twoside) {
  1230.             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
  1231.             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
  1232.             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
  1233.             struct ureg res0, res1;
  1234.             GLuint mask0, mask1;
  1235.  
  1236.             if (count == nr_lights) {
  1237.                if (separate) {
  1238.                   mask0 = WRITEMASK_XYZ;
  1239.                   mask1 = WRITEMASK_XYZ;
  1240.                   res0 = register_output( p, VARYING_SLOT_BFC0 );
  1241.                   res1 = register_output( p, VARYING_SLOT_BFC1 );
  1242.                }
  1243.                else {
  1244.                   mask0 = 0;
  1245.                   mask1 = WRITEMASK_XYZ;
  1246.                   res0 = _bfc0;
  1247.                   res1 = register_output( p, VARYING_SLOT_BFC0 );
  1248.                }
  1249.             }
  1250.             else {
  1251.                res0 = _bfc0;
  1252.                res1 = _bfc1;
  1253.                mask0 = 0;
  1254.                mask1 = 0;
  1255.             }
  1256.  
  1257.             /* For the back face we need to negate the X and Y component
  1258.              * dot products.  dots.Z has the negated back-face specular
  1259.              * exponent.  We swizzle that into the W position.  This
  1260.              * negation makes the back-face specular term positive again.
  1261.              */
  1262.             dots = negate(swizzle(dots,X,Y,W,Z));
  1263.  
  1264.             if (!is_undef(att)) {
  1265.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1266.                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
  1267.                emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
  1268.             }
  1269.             else if (!p->state->material_shininess_is_zero) {
  1270.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1271.                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
  1272.             }
  1273.             else {
  1274.                emit_degenerate_lit(p, lit, dots);
  1275.                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
  1276.             }
  1277.  
  1278.             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
  1279.             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
  1280.             /* restore dots to its original state for subsequent lights
  1281.              * by negating and swizzling again.
  1282.              */
  1283.             dots = negate(swizzle(dots,X,Y,W,Z));
  1284.  
  1285.             release_temp(p, ambient);
  1286.             release_temp(p, diffuse);
  1287.             release_temp(p, specular);
  1288.          }
  1289.  
  1290.          release_temp(p, half);
  1291.          release_temp(p, VPpli);
  1292.          release_temp(p, att);
  1293.       }
  1294.    }
  1295.  
  1296.    release_temps( p );
  1297. }
  1298.  
  1299.  
  1300. static void build_fog( struct tnl_program *p )
  1301. {
  1302.    struct ureg fog = register_output(p, VARYING_SLOT_FOGC);
  1303.    struct ureg input;
  1304.  
  1305.    if (p->state->fog_source_is_depth) {
  1306.  
  1307.       switch (p->state->fog_distance_mode) {
  1308.       case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */
  1309.         input = get_eye_position(p);
  1310.         emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input);
  1311.         emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog);
  1312.         emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog);
  1313.         break;
  1314.       case FDM_EYE_PLANE: /* Z = Ze */
  1315.         input = get_eye_position_z(p);
  1316.         emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
  1317.         break;
  1318.       case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */
  1319.         input = get_eye_position_z(p);
  1320.         emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
  1321.         break;
  1322.       default: assert(0); break; /* can't happen */
  1323.       }
  1324.  
  1325.    }
  1326.    else {
  1327.       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
  1328.       emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
  1329.    }
  1330.  
  1331.    emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
  1332. }
  1333.  
  1334.  
  1335. static void build_reflect_texgen( struct tnl_program *p,
  1336.                                   struct ureg dest,
  1337.                                   GLuint writemask )
  1338. {
  1339.    struct ureg normal = get_transformed_normal(p);
  1340.    struct ureg eye_hat = get_eye_position_normalized(p);
  1341.    struct ureg tmp = get_temp(p);
  1342.  
  1343.    /* n.u */
  1344.    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
  1345.    /* 2n.u */
  1346.    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
  1347.    /* (-2n.u)n + u */
  1348.    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
  1349.  
  1350.    release_temp(p, tmp);
  1351. }
  1352.  
  1353.  
  1354. static void build_sphere_texgen( struct tnl_program *p,
  1355.                                  struct ureg dest,
  1356.                                  GLuint writemask )
  1357. {
  1358.    struct ureg normal = get_transformed_normal(p);
  1359.    struct ureg eye_hat = get_eye_position_normalized(p);
  1360.    struct ureg tmp = get_temp(p);
  1361.    struct ureg half = register_scalar_const(p, .5);
  1362.    struct ureg r = get_temp(p);
  1363.    struct ureg inv_m = get_temp(p);
  1364.    struct ureg id = get_identity_param(p);
  1365.  
  1366.    /* Could share the above calculations, but it would be
  1367.     * a fairly odd state for someone to set (both sphere and
  1368.     * reflection active for different texture coordinate
  1369.     * components.  Of course - if two texture units enable
  1370.     * reflect and/or sphere, things start to tilt in favour
  1371.     * of seperating this out:
  1372.     */
  1373.  
  1374.    /* n.u */
  1375.    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
  1376.    /* 2n.u */
  1377.    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
  1378.    /* (-2n.u)n + u */
  1379.    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
  1380.    /* r + 0,0,1 */
  1381.    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
  1382.    /* rx^2 + ry^2 + (rz+1)^2 */
  1383.    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
  1384.    /* 2/m */
  1385.    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
  1386.    /* 1/m */
  1387.    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
  1388.    /* r/m + 1/2 */
  1389.    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
  1390.  
  1391.    release_temp(p, tmp);
  1392.    release_temp(p, r);
  1393.    release_temp(p, inv_m);
  1394. }
  1395.  
  1396.  
  1397. static void build_texture_transform( struct tnl_program *p )
  1398. {
  1399.    GLuint i, j;
  1400.  
  1401.    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
  1402.  
  1403.       if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i)))
  1404.          continue;
  1405.  
  1406.       if (p->state->unit[i].coord_replace)
  1407.          continue;
  1408.  
  1409.       if (p->state->unit[i].texgen_enabled ||
  1410.           p->state->unit[i].texmat_enabled) {
  1411.  
  1412.          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
  1413.          struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i);
  1414.          struct ureg out_texgen = undef;
  1415.  
  1416.          if (p->state->unit[i].texgen_enabled) {
  1417.             GLuint copy_mask = 0;
  1418.             GLuint sphere_mask = 0;
  1419.             GLuint reflect_mask = 0;
  1420.             GLuint normal_mask = 0;
  1421.             GLuint modes[4];
  1422.  
  1423.             if (texmat_enabled)
  1424.                out_texgen = get_temp(p);
  1425.             else
  1426.                out_texgen = out;
  1427.  
  1428.             modes[0] = p->state->unit[i].texgen_mode0;
  1429.             modes[1] = p->state->unit[i].texgen_mode1;
  1430.             modes[2] = p->state->unit[i].texgen_mode2;
  1431.             modes[3] = p->state->unit[i].texgen_mode3;
  1432.  
  1433.             for (j = 0; j < 4; j++) {
  1434.                switch (modes[j]) {
  1435.                case TXG_OBJ_LINEAR: {
  1436.                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
  1437.                   struct ureg plane =
  1438.                      register_param3(p, STATE_TEXGEN, i,
  1439.                                      STATE_TEXGEN_OBJECT_S + j);
  1440.  
  1441.                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
  1442.                            obj, plane );
  1443.                   break;
  1444.                }
  1445.                case TXG_EYE_LINEAR: {
  1446.                   struct ureg eye = get_eye_position(p);
  1447.                   struct ureg plane =
  1448.                      register_param3(p, STATE_TEXGEN, i,
  1449.                                      STATE_TEXGEN_EYE_S + j);
  1450.  
  1451.                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
  1452.                            eye, plane );
  1453.                   break;
  1454.                }
  1455.                case TXG_SPHERE_MAP:
  1456.                   sphere_mask |= WRITEMASK_X << j;
  1457.                   break;
  1458.                case TXG_REFLECTION_MAP:
  1459.                   reflect_mask |= WRITEMASK_X << j;
  1460.                   break;
  1461.                case TXG_NORMAL_MAP:
  1462.                   normal_mask |= WRITEMASK_X << j;
  1463.                   break;
  1464.                case TXG_NONE:
  1465.                   copy_mask |= WRITEMASK_X << j;
  1466.                }
  1467.             }
  1468.  
  1469.             if (sphere_mask) {
  1470.                build_sphere_texgen(p, out_texgen, sphere_mask);
  1471.             }
  1472.  
  1473.             if (reflect_mask) {
  1474.                build_reflect_texgen(p, out_texgen, reflect_mask);
  1475.             }
  1476.  
  1477.             if (normal_mask) {
  1478.                struct ureg normal = get_transformed_normal(p);
  1479.                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
  1480.             }
  1481.  
  1482.             if (copy_mask) {
  1483.                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
  1484.                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
  1485.             }
  1486.          }
  1487.  
  1488.          if (texmat_enabled) {
  1489.             struct ureg texmat[4];
  1490.             struct ureg in = (!is_undef(out_texgen) ?
  1491.                               out_texgen :
  1492.                               register_input(p, VERT_ATTRIB_TEX0+i));
  1493.             if (p->mvp_with_dp4) {
  1494.                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
  1495.                                        0, texmat );
  1496.                emit_matrix_transform_vec4( p, out, texmat, in );
  1497.             }
  1498.             else {
  1499.                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
  1500.                                        STATE_MATRIX_TRANSPOSE, texmat );
  1501.                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
  1502.             }
  1503.          }
  1504.  
  1505.          release_temps(p);
  1506.       }
  1507.       else {
  1508.          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i);
  1509.       }
  1510.    }
  1511. }
  1512.  
  1513.  
  1514. /**
  1515.  * Point size attenuation computation.
  1516.  */
  1517. static void build_atten_pointsize( struct tnl_program *p )
  1518. {
  1519.    struct ureg eye = get_eye_position_z(p);
  1520.    struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
  1521.    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
  1522.    struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
  1523.    struct ureg ut = get_temp(p);
  1524.  
  1525.    /* dist = |eyez| */
  1526.    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
  1527.    /* p1 + dist * (p2 + dist * p3); */
  1528.    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
  1529.                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
  1530.    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
  1531.                 ut, swizzle1(state_attenuation, X));
  1532.  
  1533.    /* 1 / sqrt(factor) */
  1534.    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
  1535.  
  1536. #if 0
  1537.    /* out = pointSize / sqrt(factor) */
  1538.    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
  1539. #else
  1540.    /* this is a good place to clamp the point size since there's likely
  1541.     * no hardware registers to clamp point size at rasterization time.
  1542.     */
  1543.    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
  1544.    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
  1545.    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
  1546. #endif
  1547.  
  1548.    release_temp(p, ut);
  1549. }
  1550.  
  1551.  
  1552. /**
  1553.  * Pass-though per-vertex point size, from user's point size array.
  1554.  */
  1555. static void build_array_pointsize( struct tnl_program *p )
  1556. {
  1557.    struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
  1558.    struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
  1559.    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
  1560. }
  1561.  
  1562.  
  1563. static void build_tnl_program( struct tnl_program *p )
  1564. {
  1565.    /* Emit the program, starting with the modelview, projection transforms:
  1566.     */
  1567.    build_hpos(p);
  1568.  
  1569.    /* Lighting calculations:
  1570.     */
  1571.    if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) {
  1572.       if (p->state->light_global_enabled)
  1573.          build_lighting(p);
  1574.       else {
  1575.          if (p->state->fragprog_inputs_read & VARYING_BIT_COL0)
  1576.             emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0);
  1577.  
  1578.          if (p->state->fragprog_inputs_read & VARYING_BIT_COL1)
  1579.             emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1);
  1580.       }
  1581.    }
  1582.  
  1583.    if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC)
  1584.       build_fog(p);
  1585.  
  1586.    if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY)
  1587.       build_texture_transform(p);
  1588.  
  1589.    if (p->state->point_attenuated)
  1590.       build_atten_pointsize(p);
  1591.    else if (p->state->point_array)
  1592.       build_array_pointsize(p);
  1593.  
  1594.    /* Finish up:
  1595.     */
  1596.    emit_op1(p, OPCODE_END, undef, 0, undef);
  1597.  
  1598.    /* Disassemble:
  1599.     */
  1600.    if (DISASSEM) {
  1601.       printf ("\n");
  1602.    }
  1603. }
  1604.  
  1605.  
  1606. static void
  1607. create_new_program( const struct state_key *key,
  1608.                     struct gl_vertex_program *program,
  1609.                     GLboolean mvp_with_dp4,
  1610.                     GLuint max_temps)
  1611. {
  1612.    struct tnl_program p;
  1613.  
  1614.    memset(&p, 0, sizeof(p));
  1615.    p.state = key;
  1616.    p.program = program;
  1617.    p.eye_position = undef;
  1618.    p.eye_position_z = undef;
  1619.    p.eye_position_normalized = undef;
  1620.    p.transformed_normal = undef;
  1621.    p.identity = undef;
  1622.    p.temp_in_use = 0;
  1623.    p.mvp_with_dp4 = mvp_with_dp4;
  1624.  
  1625.    if (max_temps >= sizeof(int) * 8)
  1626.       p.temp_reserved = 0;
  1627.    else
  1628.       p.temp_reserved = ~((1<<max_temps)-1);
  1629.  
  1630.    /* Start by allocating 32 instructions.
  1631.     * If we need more, we'll grow the instruction array as needed.
  1632.     */
  1633.    p.max_inst = 32;
  1634.    p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
  1635.    p.program->Base.String = NULL;
  1636.    p.program->Base.NumInstructions =
  1637.    p.program->Base.NumTemporaries =
  1638.    p.program->Base.NumParameters =
  1639.    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
  1640.    p.program->Base.Parameters = _mesa_new_parameter_list();
  1641.    p.program->Base.InputsRead = 0;
  1642.    p.program->Base.OutputsWritten = 0;
  1643.  
  1644.    build_tnl_program( &p );
  1645. }
  1646.  
  1647.  
  1648. /**
  1649.  * Return a vertex program which implements the current fixed-function
  1650.  * transform/lighting/texgen operations.
  1651.  */
  1652. struct gl_vertex_program *
  1653. _mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
  1654. {
  1655.    struct gl_vertex_program *prog;
  1656.    struct state_key key;
  1657.  
  1658.    /* Grab all the relevent state and put it in a single structure:
  1659.     */
  1660.    make_state_key(ctx, &key);
  1661.  
  1662.    /* Look for an already-prepared program for this state:
  1663.     */
  1664.    prog = gl_vertex_program(
  1665.       _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key)));
  1666.  
  1667.    if (!prog) {
  1668.       /* OK, we'll have to build a new one */
  1669.       if (0)
  1670.          printf("Build new TNL program\n");
  1671.  
  1672.       prog = gl_vertex_program(ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0));
  1673.       if (!prog)
  1674.          return NULL;
  1675.  
  1676.       create_new_program( &key, prog,
  1677.                           ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4,
  1678.                           ctx->Const.VertexProgram.MaxTemps );
  1679.  
  1680. #if 0
  1681.       if (ctx->Driver.ProgramStringNotify)
  1682.          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
  1683.                                           &prog->Base );
  1684. #endif
  1685.       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
  1686.                                  &key, sizeof(key), &prog->Base);
  1687.    }
  1688.  
  1689.    return prog;
  1690. }
  1691.