Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * \file ffvertex_prog.c
  30.  *
  31.  * Create a vertex program to execute the current fixed function T&L pipeline.
  32.  * \author Keith Whitwell
  33.  */
  34.  
  35.  
  36. #include "main/glheader.h"
  37. #include "main/mtypes.h"
  38. #include "main/macros.h"
  39. #include "main/enums.h"
  40. #include "main/ffvertex_prog.h"
  41. #include "program/program.h"
  42. #include "program/prog_cache.h"
  43. #include "program/prog_instruction.h"
  44. #include "program/prog_parameter.h"
  45. #include "program/prog_print.h"
  46. #include "program/prog_statevars.h"
  47.  
  48.  
  49. /** Max of number of lights and texture coord units */
  50. #define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
  51.  
  52. struct state_key {
  53.    unsigned light_color_material_mask:12;
  54.    unsigned light_global_enabled:1;
  55.    unsigned light_local_viewer:1;
  56.    unsigned light_twoside:1;
  57.    unsigned material_shininess_is_zero:1;
  58.    unsigned need_eye_coords:1;
  59.    unsigned normalize:1;
  60.    unsigned rescale_normals:1;
  61.  
  62.    unsigned fog_source_is_depth:1;
  63.    unsigned separate_specular:1;
  64.    unsigned point_attenuated:1;
  65.    unsigned point_array:1;
  66.    unsigned texture_enabled_global:1;
  67.    unsigned fragprog_inputs_read:12;
  68.  
  69.    unsigned varying_vp_inputs;
  70.  
  71.    struct {
  72.       unsigned light_enabled:1;
  73.       unsigned light_eyepos3_is_zero:1;
  74.       unsigned light_spotcutoff_is_180:1;
  75.       unsigned light_attenuated:1;
  76.       unsigned texunit_really_enabled:1;
  77.       unsigned texmat_enabled:1;
  78.       unsigned coord_replace:1;
  79.       unsigned texgen_enabled:4;
  80.       unsigned texgen_mode0:4;
  81.       unsigned texgen_mode1:4;
  82.       unsigned texgen_mode2:4;
  83.       unsigned texgen_mode3:4;
  84.    } unit[NUM_UNITS];
  85. };
  86.  
  87.  
  88. #define TXG_NONE           0
  89. #define TXG_OBJ_LINEAR     1
  90. #define TXG_EYE_LINEAR     2
  91. #define TXG_SPHERE_MAP     3
  92. #define TXG_REFLECTION_MAP 4
  93. #define TXG_NORMAL_MAP     5
  94.  
  95. static GLuint translate_texgen( GLboolean enabled, GLenum mode )
  96. {
  97.    if (!enabled)
  98.       return TXG_NONE;
  99.  
  100.    switch (mode) {
  101.    case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
  102.    case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
  103.    case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
  104.    case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
  105.    case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
  106.    default: return TXG_NONE;
  107.    }
  108. }
  109.  
  110.  
  111.  
  112. static GLboolean check_active_shininess( struct gl_context *ctx,
  113.                                          const struct state_key *key,
  114.                                          GLuint side )
  115. {
  116.    GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side);
  117.  
  118.    if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
  119.        (key->light_color_material_mask & bit))
  120.       return GL_TRUE;
  121.  
  122.    if (key->varying_vp_inputs & (bit << 16))
  123.       return GL_TRUE;
  124.  
  125.    if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F)
  126.       return GL_TRUE;
  127.  
  128.    return GL_FALSE;
  129. }
  130.  
  131.  
  132. static void make_state_key( struct gl_context *ctx, struct state_key *key )
  133. {
  134.    const struct gl_fragment_program *fp;
  135.    GLuint i;
  136.  
  137.    memset(key, 0, sizeof(struct state_key));
  138.    fp = ctx->FragmentProgram._Current;
  139.  
  140.    /* This now relies on texenvprogram.c being active:
  141.     */
  142.    assert(fp);
  143.  
  144.    key->need_eye_coords = ctx->_NeedEyeCoords;
  145.  
  146.    key->fragprog_inputs_read = fp->Base.InputsRead;
  147.    key->varying_vp_inputs = ctx->varying_vp_inputs;
  148.  
  149.    if (ctx->RenderMode == GL_FEEDBACK) {
  150.       /* make sure the vertprog emits color and tex0 */
  151.       key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0);
  152.    }
  153.  
  154.    key->separate_specular = (ctx->Light.Model.ColorControl ==
  155.                              GL_SEPARATE_SPECULAR_COLOR);
  156.  
  157.    if (ctx->Light.Enabled) {
  158.       key->light_global_enabled = 1;
  159.  
  160.       if (ctx->Light.Model.LocalViewer)
  161.          key->light_local_viewer = 1;
  162.  
  163.       if (ctx->Light.Model.TwoSide)
  164.          key->light_twoside = 1;
  165.  
  166.       if (ctx->Light.ColorMaterialEnabled) {
  167.          key->light_color_material_mask = ctx->Light.ColorMaterialBitmask;
  168.       }
  169.  
  170.       for (i = 0; i < MAX_LIGHTS; i++) {
  171.          struct gl_light *light = &ctx->Light.Light[i];
  172.  
  173.          if (light->Enabled) {
  174.             key->unit[i].light_enabled = 1;
  175.  
  176.             if (light->EyePosition[3] == 0.0)
  177.                key->unit[i].light_eyepos3_is_zero = 1;
  178.  
  179.             if (light->SpotCutoff == 180.0)
  180.                key->unit[i].light_spotcutoff_is_180 = 1;
  181.  
  182.             if (light->ConstantAttenuation != 1.0 ||
  183.                 light->LinearAttenuation != 0.0 ||
  184.                 light->QuadraticAttenuation != 0.0)
  185.                key->unit[i].light_attenuated = 1;
  186.          }
  187.       }
  188.  
  189.       if (check_active_shininess(ctx, key, 0)) {
  190.          key->material_shininess_is_zero = 0;
  191.       }
  192.       else if (key->light_twoside &&
  193.                check_active_shininess(ctx, key, 1)) {
  194.          key->material_shininess_is_zero = 0;
  195.       }
  196.       else {
  197.          key->material_shininess_is_zero = 1;
  198.       }
  199.    }
  200.  
  201.    if (ctx->Transform.Normalize)
  202.       key->normalize = 1;
  203.  
  204.    if (ctx->Transform.RescaleNormals)
  205.       key->rescale_normals = 1;
  206.  
  207.    if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT)
  208.       key->fog_source_is_depth = 1;
  209.  
  210.    if (ctx->Point._Attenuated)
  211.       key->point_attenuated = 1;
  212.  
  213. #if FEATURE_point_size_array
  214.    if (ctx->Array.ArrayObj->PointSize.Enabled)
  215.       key->point_array = 1;
  216. #endif
  217.  
  218.    if (ctx->Texture._TexGenEnabled ||
  219.        ctx->Texture._TexMatEnabled ||
  220.        ctx->Texture._EnabledUnits)
  221.       key->texture_enabled_global = 1;
  222.  
  223.    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
  224.       struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
  225.  
  226.       if (texUnit->_ReallyEnabled)
  227.          key->unit[i].texunit_really_enabled = 1;
  228.  
  229.       if (ctx->Point.PointSprite)
  230.          if (ctx->Point.CoordReplace[i])
  231.             key->unit[i].coord_replace = 1;
  232.  
  233.       if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
  234.          key->unit[i].texmat_enabled = 1;
  235.  
  236.       if (texUnit->TexGenEnabled) {
  237.          key->unit[i].texgen_enabled = 1;
  238.  
  239.          key->unit[i].texgen_mode0 =
  240.             translate_texgen( texUnit->TexGenEnabled & (1<<0),
  241.                               texUnit->GenS.Mode );
  242.          key->unit[i].texgen_mode1 =
  243.             translate_texgen( texUnit->TexGenEnabled & (1<<1),
  244.                               texUnit->GenT.Mode );
  245.          key->unit[i].texgen_mode2 =
  246.             translate_texgen( texUnit->TexGenEnabled & (1<<2),
  247.                               texUnit->GenR.Mode );
  248.          key->unit[i].texgen_mode3 =
  249.             translate_texgen( texUnit->TexGenEnabled & (1<<3),
  250.                               texUnit->GenQ.Mode );
  251.       }
  252.    }
  253. }
  254.  
  255.  
  256.  
  257. /* Very useful debugging tool - produces annotated listing of
  258.  * generated program with line/function references for each
  259.  * instruction back into this file:
  260.  */
  261. #define DISASSEM 0
  262.  
  263.  
  264. /* Use uregs to represent registers internally, translate to Mesa's
  265.  * expected formats on emit.
  266.  *
  267.  * NOTE: These are passed by value extensively in this file rather
  268.  * than as usual by pointer reference.  If this disturbs you, try
  269.  * remembering they are just 32bits in size.
  270.  *
  271.  * GCC is smart enough to deal with these dword-sized structures in
  272.  * much the same way as if I had defined them as dwords and was using
  273.  * macros to access and set the fields.  This is much nicer and easier
  274.  * to evolve.
  275.  */
  276. struct ureg {
  277.    GLuint file:4;
  278.    GLint idx:9;      /* relative addressing may be negative */
  279.                      /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
  280.    GLuint negate:1;
  281.    GLuint swz:12;
  282.    GLuint pad:6;
  283. };
  284.  
  285.  
  286. struct tnl_program {
  287.    const struct state_key *state;
  288.    struct gl_vertex_program *program;
  289.    GLint max_inst;  /** number of instructions allocated for program */
  290.    GLboolean mvp_with_dp4;
  291.  
  292.    GLuint temp_in_use;
  293.    GLuint temp_reserved;
  294.  
  295.    struct ureg eye_position;
  296.    struct ureg eye_position_z;
  297.    struct ureg eye_position_normalized;
  298.    struct ureg transformed_normal;
  299.    struct ureg identity;
  300.  
  301.    GLuint materials;
  302.    GLuint color_materials;
  303. };
  304.  
  305.  
  306. static const struct ureg undef = {
  307.    PROGRAM_UNDEFINED,
  308.    0,
  309.    0,
  310.    0,
  311.    0
  312. };
  313.  
  314. /* Local shorthand:
  315.  */
  316. #define X    SWIZZLE_X
  317. #define Y    SWIZZLE_Y
  318. #define Z    SWIZZLE_Z
  319. #define W    SWIZZLE_W
  320.  
  321.  
  322. /* Construct a ureg:
  323.  */
  324. static struct ureg make_ureg(GLuint file, GLint idx)
  325. {
  326.    struct ureg reg;
  327.    reg.file = file;
  328.    reg.idx = idx;
  329.    reg.negate = 0;
  330.    reg.swz = SWIZZLE_NOOP;
  331.    reg.pad = 0;
  332.    return reg;
  333. }
  334.  
  335.  
  336.  
  337. static struct ureg negate( struct ureg reg )
  338. {
  339.    reg.negate ^= 1;
  340.    return reg;
  341. }
  342.  
  343.  
  344. static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
  345. {
  346.    reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
  347.                            GET_SWZ(reg.swz, y),
  348.                            GET_SWZ(reg.swz, z),
  349.                            GET_SWZ(reg.swz, w));
  350.    return reg;
  351. }
  352.  
  353.  
  354. static struct ureg swizzle1( struct ureg reg, int x )
  355. {
  356.    return swizzle(reg, x, x, x, x);
  357. }
  358.  
  359.  
  360. static struct ureg get_temp( struct tnl_program *p )
  361. {
  362.    int bit = _mesa_ffs( ~p->temp_in_use );
  363.    if (!bit) {
  364.       _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
  365.       exit(1);
  366.    }
  367.  
  368.    if ((GLuint) bit > p->program->Base.NumTemporaries)
  369.       p->program->Base.NumTemporaries = bit;
  370.  
  371.    p->temp_in_use |= 1<<(bit-1);
  372.    return make_ureg(PROGRAM_TEMPORARY, bit-1);
  373. }
  374.  
  375.  
  376. static struct ureg reserve_temp( struct tnl_program *p )
  377. {
  378.    struct ureg temp = get_temp( p );
  379.    p->temp_reserved |= 1<<temp.idx;
  380.    return temp;
  381. }
  382.  
  383.  
  384. static void release_temp( struct tnl_program *p, struct ureg reg )
  385. {
  386.    if (reg.file == PROGRAM_TEMPORARY) {
  387.       p->temp_in_use &= ~(1<<reg.idx);
  388.       p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
  389.    }
  390. }
  391.  
  392. static void release_temps( struct tnl_program *p )
  393. {
  394.    p->temp_in_use = p->temp_reserved;
  395. }
  396.  
  397.  
  398. static struct ureg register_param5(struct tnl_program *p,
  399.                                    GLint s0,
  400.                                    GLint s1,
  401.                                    GLint s2,
  402.                                    GLint s3,
  403.                                    GLint s4)
  404. {
  405.    gl_state_index tokens[STATE_LENGTH];
  406.    GLint idx;
  407.    tokens[0] = s0;
  408.    tokens[1] = s1;
  409.    tokens[2] = s2;
  410.    tokens[3] = s3;
  411.    tokens[4] = s4;
  412.    idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
  413.    return make_ureg(PROGRAM_STATE_VAR, idx);
  414. }
  415.  
  416.  
  417. #define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
  418. #define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
  419. #define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
  420. #define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
  421.  
  422.  
  423.  
  424. /**
  425.  * \param input  one of VERT_ATTRIB_x tokens.
  426.  */
  427. static struct ureg register_input( struct tnl_program *p, GLuint input )
  428. {
  429.    assert(input < 32);
  430.  
  431.    if (p->state->varying_vp_inputs & (1<<input)) {
  432.       p->program->Base.InputsRead |= (1<<input);
  433.       return make_ureg(PROGRAM_INPUT, input);
  434.    }
  435.    else {
  436.       return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
  437.    }
  438. }
  439.  
  440.  
  441. /**
  442.  * \param input  one of VERT_RESULT_x tokens.
  443.  */
  444. static struct ureg register_output( struct tnl_program *p, GLuint output )
  445. {
  446.    p->program->Base.OutputsWritten |= BITFIELD64_BIT(output);
  447.    return make_ureg(PROGRAM_OUTPUT, output);
  448. }
  449.  
  450.  
  451. static struct ureg register_const4f( struct tnl_program *p,
  452.                               GLfloat s0,
  453.                               GLfloat s1,
  454.                               GLfloat s2,
  455.                               GLfloat s3)
  456. {
  457.    GLfloat values[4];
  458.    GLint idx;
  459.    GLuint swizzle;
  460.    values[0] = s0;
  461.    values[1] = s1;
  462.    values[2] = s2;
  463.    values[3] = s3;
  464.    idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
  465.                                      &swizzle );
  466.    ASSERT(swizzle == SWIZZLE_NOOP);
  467.    return make_ureg(PROGRAM_CONSTANT, idx);
  468. }
  469.  
  470. #define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
  471. #define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
  472. #define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
  473. #define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
  474.  
  475. static GLboolean is_undef( struct ureg reg )
  476. {
  477.    return reg.file == PROGRAM_UNDEFINED;
  478. }
  479.  
  480.  
  481. static struct ureg get_identity_param( struct tnl_program *p )
  482. {
  483.    if (is_undef(p->identity))
  484.       p->identity = register_const4f(p, 0,0,0,1);
  485.  
  486.    return p->identity;
  487. }
  488.  
  489. static void register_matrix_param5( struct tnl_program *p,
  490.                                     GLint s0, /* modelview, projection, etc */
  491.                                     GLint s1, /* texture matrix number */
  492.                                     GLint s2, /* first row */
  493.                                     GLint s3, /* last row */
  494.                                     GLint s4, /* inverse, transpose, etc */
  495.                                     struct ureg *matrix )
  496. {
  497.    GLint i;
  498.  
  499.    /* This is a bit sad as the support is there to pull the whole
  500.     * matrix out in one go:
  501.     */
  502.    for (i = 0; i <= s3 - s2; i++)
  503.       matrix[i] = register_param5( p, s0, s1, i, i, s4 );
  504. }
  505.  
  506.  
  507. static void emit_arg( struct prog_src_register *src,
  508.                       struct ureg reg )
  509. {
  510.    src->File = reg.file;
  511.    src->Index = reg.idx;
  512.    src->Swizzle = reg.swz;
  513.    src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
  514.    src->Abs = 0;
  515.    src->RelAddr = 0;
  516.    /* Check that bitfield sizes aren't exceeded */
  517.    ASSERT(src->Index == reg.idx);
  518. }
  519.  
  520.  
  521. static void emit_dst( struct prog_dst_register *dst,
  522.                       struct ureg reg, GLuint mask )
  523. {
  524.    dst->File = reg.file;
  525.    dst->Index = reg.idx;
  526.    /* allow zero as a shorthand for xyzw */
  527.    dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
  528.    dst->CondMask = COND_TR;  /* always pass cond test */
  529.    dst->CondSwizzle = SWIZZLE_NOOP;
  530.    dst->CondSrc = 0;
  531.    /* Check that bitfield sizes aren't exceeded */
  532.    ASSERT(dst->Index == reg.idx);
  533. }
  534.  
  535.  
  536. static void debug_insn( struct prog_instruction *inst, const char *fn,
  537.                         GLuint line )
  538. {
  539.    if (DISASSEM) {
  540.       static const char *last_fn;
  541.  
  542.       if (fn != last_fn) {
  543.          last_fn = fn;
  544.          printf("%s:\n", fn);
  545.       }
  546.  
  547.       printf("%d:\t", line);
  548.       _mesa_print_instruction(inst);
  549.    }
  550. }
  551.  
  552.  
  553. static void emit_op3fn(struct tnl_program *p,
  554.                        enum prog_opcode op,
  555.                        struct ureg dest,
  556.                        GLuint mask,
  557.                        struct ureg src0,
  558.                        struct ureg src1,
  559.                        struct ureg src2,
  560.                        const char *fn,
  561.                        GLuint line)
  562. {
  563.    GLuint nr;
  564.    struct prog_instruction *inst;
  565.  
  566.    assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
  567.  
  568.    if (p->program->Base.NumInstructions == p->max_inst) {
  569.       /* need to extend the program's instruction array */
  570.       struct prog_instruction *newInst;
  571.  
  572.       /* double the size */
  573.       p->max_inst *= 2;
  574.  
  575.       newInst = _mesa_alloc_instructions(p->max_inst);
  576.       if (!newInst) {
  577.          _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
  578.          return;
  579.       }
  580.  
  581.       _mesa_copy_instructions(newInst,
  582.                               p->program->Base.Instructions,
  583.                               p->program->Base.NumInstructions);
  584.  
  585.       _mesa_free_instructions(p->program->Base.Instructions,
  586.                               p->program->Base.NumInstructions);
  587.  
  588.       p->program->Base.Instructions = newInst;
  589.    }
  590.  
  591.    nr = p->program->Base.NumInstructions++;
  592.  
  593.    inst = &p->program->Base.Instructions[nr];
  594.    inst->Opcode = (enum prog_opcode) op;
  595.    inst->Data = 0;
  596.  
  597.    emit_arg( &inst->SrcReg[0], src0 );
  598.    emit_arg( &inst->SrcReg[1], src1 );
  599.    emit_arg( &inst->SrcReg[2], src2 );
  600.  
  601.    emit_dst( &inst->DstReg, dest, mask );
  602.  
  603.    debug_insn(inst, fn, line);
  604. }
  605.  
  606.  
  607. #define emit_op3(p, op, dst, mask, src0, src1, src2) \
  608.    emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
  609.  
  610. #define emit_op2(p, op, dst, mask, src0, src1) \
  611.     emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
  612.  
  613. #define emit_op1(p, op, dst, mask, src0) \
  614.     emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
  615.  
  616.  
  617. static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
  618. {
  619.    if (reg.file == PROGRAM_TEMPORARY &&
  620.        !(p->temp_reserved & (1<<reg.idx)))
  621.       return reg;
  622.    else {
  623.       struct ureg temp = get_temp(p);
  624.       emit_op1(p, OPCODE_MOV, temp, 0, reg);
  625.       return temp;
  626.    }
  627. }
  628.  
  629.  
  630. /* Currently no tracking performed of input/output/register size or
  631.  * active elements.  Could be used to reduce these operations, as
  632.  * could the matrix type.
  633.  */
  634. static void emit_matrix_transform_vec4( struct tnl_program *p,
  635.                                         struct ureg dest,
  636.                                         const struct ureg *mat,
  637.                                         struct ureg src)
  638. {
  639.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
  640.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
  641.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
  642.    emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
  643. }
  644.  
  645.  
  646. /* This version is much easier to implement if writemasks are not
  647.  * supported natively on the target or (like SSE), the target doesn't
  648.  * have a clean/obvious dotproduct implementation.
  649.  */
  650. static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
  651.                                                   struct ureg dest,
  652.                                                   const struct ureg *mat,
  653.                                                   struct ureg src)
  654. {
  655.    struct ureg tmp;
  656.  
  657.    if (dest.file != PROGRAM_TEMPORARY)
  658.       tmp = get_temp(p);
  659.    else
  660.       tmp = dest;
  661.  
  662.    emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
  663.    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
  664.    emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
  665.    emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
  666.  
  667.    if (dest.file != PROGRAM_TEMPORARY)
  668.       release_temp(p, tmp);
  669. }
  670.  
  671.  
  672. static void emit_matrix_transform_vec3( struct tnl_program *p,
  673.                                         struct ureg dest,
  674.                                         const struct ureg *mat,
  675.                                         struct ureg src)
  676. {
  677.    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
  678.    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
  679.    emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
  680. }
  681.  
  682.  
  683. static void emit_normalize_vec3( struct tnl_program *p,
  684.                                  struct ureg dest,
  685.                                  struct ureg src )
  686. {
  687. #if 0
  688.    /* XXX use this when drivers are ready for NRM3 */
  689.    emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src);
  690. #else
  691.    struct ureg tmp = get_temp(p);
  692.    emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
  693.    emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
  694.    emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
  695.    release_temp(p, tmp);
  696. #endif
  697. }
  698.  
  699.  
  700. static void emit_passthrough( struct tnl_program *p,
  701.                               GLuint input,
  702.                               GLuint output )
  703. {
  704.    struct ureg out = register_output(p, output);
  705.    emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
  706. }
  707.  
  708.  
  709. static struct ureg get_eye_position( struct tnl_program *p )
  710. {
  711.    if (is_undef(p->eye_position)) {
  712.       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
  713.       struct ureg modelview[4];
  714.  
  715.       p->eye_position = reserve_temp(p);
  716.  
  717.       if (p->mvp_with_dp4) {
  718.          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
  719.                                  0, modelview );
  720.  
  721.          emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
  722.       }
  723.       else {
  724.          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
  725.                                  STATE_MATRIX_TRANSPOSE, modelview );
  726.  
  727.          emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
  728.       }
  729.    }
  730.  
  731.    return p->eye_position;
  732. }
  733.  
  734.  
  735. static struct ureg get_eye_position_z( struct tnl_program *p )
  736. {
  737.    if (!is_undef(p->eye_position))
  738.       return swizzle1(p->eye_position, Z);
  739.  
  740.    if (is_undef(p->eye_position_z)) {
  741.       struct ureg pos = register_input( p, VERT_ATTRIB_POS );
  742.       struct ureg modelview[4];
  743.  
  744.       p->eye_position_z = reserve_temp(p);
  745.  
  746.       register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
  747.                               0, modelview );
  748.  
  749.       emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
  750.    }
  751.  
  752.    return p->eye_position_z;
  753. }
  754.  
  755.  
  756. static struct ureg get_eye_position_normalized( struct tnl_program *p )
  757. {
  758.    if (is_undef(p->eye_position_normalized)) {
  759.       struct ureg eye = get_eye_position(p);
  760.       p->eye_position_normalized = reserve_temp(p);
  761.       emit_normalize_vec3(p, p->eye_position_normalized, eye);
  762.    }
  763.  
  764.    return p->eye_position_normalized;
  765. }
  766.  
  767.  
  768. static struct ureg get_transformed_normal( struct tnl_program *p )
  769. {
  770.    if (is_undef(p->transformed_normal) &&
  771.        !p->state->need_eye_coords &&
  772.        !p->state->normalize &&
  773.        !(p->state->need_eye_coords == p->state->rescale_normals))
  774.    {
  775.       p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
  776.    }
  777.    else if (is_undef(p->transformed_normal))
  778.    {
  779.       struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
  780.       struct ureg mvinv[3];
  781.       struct ureg transformed_normal = reserve_temp(p);
  782.  
  783.       if (p->state->need_eye_coords) {
  784.          register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
  785.                                  STATE_MATRIX_INVTRANS, mvinv );
  786.  
  787.          /* Transform to eye space:
  788.           */
  789.          emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
  790.          normal = transformed_normal;
  791.       }
  792.  
  793.       /* Normalize/Rescale:
  794.        */
  795.       if (p->state->normalize) {
  796.          emit_normalize_vec3( p, transformed_normal, normal );
  797.          normal = transformed_normal;
  798.       }
  799.       else if (p->state->need_eye_coords == p->state->rescale_normals) {
  800.          /* This is already adjusted for eye/non-eye rendering:
  801.           */
  802.          struct ureg rescale = register_param2(p, STATE_INTERNAL,
  803.                                                STATE_NORMAL_SCALE);
  804.  
  805.          emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
  806.          normal = transformed_normal;
  807.       }
  808.  
  809.       assert(normal.file == PROGRAM_TEMPORARY);
  810.       p->transformed_normal = normal;
  811.    }
  812.  
  813.    return p->transformed_normal;
  814. }
  815.  
  816.  
  817. static void build_hpos( struct tnl_program *p )
  818. {
  819.    struct ureg pos = register_input( p, VERT_ATTRIB_POS );
  820.    struct ureg hpos = register_output( p, VERT_RESULT_HPOS );
  821.    struct ureg mvp[4];
  822.  
  823.    if (p->mvp_with_dp4) {
  824.       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
  825.                               0, mvp );
  826.       emit_matrix_transform_vec4( p, hpos, mvp, pos );
  827.    }
  828.    else {
  829.       register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
  830.                               STATE_MATRIX_TRANSPOSE, mvp );
  831.       emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
  832.    }
  833. }
  834.  
  835.  
  836. static GLuint material_attrib( GLuint side, GLuint property )
  837. {
  838.    return (property - STATE_AMBIENT) * 2 + side;
  839. }
  840.  
  841.  
  842. /**
  843.  * Get a bitmask of which material values vary on a per-vertex basis.
  844.  */
  845. static void set_material_flags( struct tnl_program *p )
  846. {
  847.    p->color_materials = 0;
  848.    p->materials = 0;
  849.  
  850.    if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
  851.       p->materials =
  852.          p->color_materials = p->state->light_color_material_mask;
  853.    }
  854.  
  855.    p->materials |= (p->state->varying_vp_inputs >> 16);
  856. }
  857.  
  858.  
  859. static struct ureg get_material( struct tnl_program *p, GLuint side,
  860.                                  GLuint property )
  861. {
  862.    GLuint attrib = material_attrib(side, property);
  863.  
  864.    if (p->color_materials & (1<<attrib))
  865.       return register_input(p, VERT_ATTRIB_COLOR0);
  866.    else if (p->materials & (1<<attrib)) {
  867.       /* Put material values in the GENERIC slots -- they are not used
  868.        * for anything in fixed function mode.
  869.        */
  870.       return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
  871.    }
  872.    else
  873.       return register_param3( p, STATE_MATERIAL, side, property );
  874. }
  875.  
  876. #define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
  877.                                    MAT_BIT_FRONT_AMBIENT | \
  878.                                    MAT_BIT_FRONT_DIFFUSE) << (side))
  879.  
  880.  
  881. /**
  882.  * Either return a precalculated constant value or emit code to
  883.  * calculate these values dynamically in the case where material calls
  884.  * are present between begin/end pairs.
  885.  *
  886.  * Probably want to shift this to the program compilation phase - if
  887.  * we always emitted the calculation here, a smart compiler could
  888.  * detect that it was constant (given a certain set of inputs), and
  889.  * lift it out of the main loop.  That way the programs created here
  890.  * would be independent of the vertex_buffer details.
  891.  */
  892. static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
  893. {
  894.    if (p->materials & SCENE_COLOR_BITS(side)) {
  895.       struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
  896.       struct ureg material_emission = get_material(p, side, STATE_EMISSION);
  897.       struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
  898.       struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
  899.       struct ureg tmp = make_temp(p, material_diffuse);
  900.       emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
  901.                material_ambient, material_emission);
  902.       return tmp;
  903.    }
  904.    else
  905.       return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
  906. }
  907.  
  908.  
  909. static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
  910.                                   GLuint side, GLuint property )
  911. {
  912.    GLuint attrib = material_attrib(side, property);
  913.    if (p->materials & (1<<attrib)) {
  914.       struct ureg light_value =
  915.          register_param3(p, STATE_LIGHT, light, property);
  916.       struct ureg material_value = get_material(p, side, property);
  917.       struct ureg tmp = get_temp(p);
  918.       emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
  919.       return tmp;
  920.    }
  921.    else
  922.       return register_param4(p, STATE_LIGHTPROD, light, side, property);
  923. }
  924.  
  925.  
  926. static struct ureg calculate_light_attenuation( struct tnl_program *p,
  927.                                                 GLuint i,
  928.                                                 struct ureg VPpli,
  929.                                                 struct ureg dist )
  930. {
  931.    struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
  932.                                              STATE_ATTENUATION);
  933.    struct ureg att = get_temp(p);
  934.  
  935.    /* Calculate spot attenuation:
  936.     */
  937.    if (!p->state->unit[i].light_spotcutoff_is_180) {
  938.       struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
  939.                                                   STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
  940.       struct ureg spot = get_temp(p);
  941.       struct ureg slt = get_temp(p);
  942.  
  943.       emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
  944.       emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
  945.       emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
  946.       emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
  947.  
  948.       release_temp(p, spot);
  949.       release_temp(p, slt);
  950.    }
  951.  
  952.    /* Calculate distance attenuation:
  953.     */
  954.    if (p->state->unit[i].light_attenuated) {
  955.       /* 1/d,d,d,1/d */
  956.       emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
  957.       /* 1,d,d*d,1/d */
  958.       emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
  959.       /* 1/dist-atten */
  960.       emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
  961.  
  962.       if (!p->state->unit[i].light_spotcutoff_is_180) {
  963.          /* dist-atten */
  964.          emit_op1(p, OPCODE_RCP, dist, 0, dist);
  965.          /* spot-atten * dist-atten */
  966.          emit_op2(p, OPCODE_MUL, att, 0, dist, att);
  967.       }
  968.       else {
  969.          /* dist-atten */
  970.          emit_op1(p, OPCODE_RCP, att, 0, dist);
  971.       }
  972.    }
  973.  
  974.    return att;
  975. }
  976.  
  977.  
  978. /**
  979.  * Compute:
  980.  *   lit.y = MAX(0, dots.x)
  981.  *   lit.z = SLT(0, dots.x)
  982.  */
  983. static void emit_degenerate_lit( struct tnl_program *p,
  984.                                  struct ureg lit,
  985.                                  struct ureg dots )
  986. {
  987.    struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
  988.  
  989.    /* Note that lit.x & lit.w will not be examined.  Note also that
  990.     * dots.xyzw == dots.xxxx.
  991.     */
  992.  
  993.    /* MAX lit, id, dots;
  994.     */
  995.    emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
  996.  
  997.    /* result[2] = (in > 0 ? 1 : 0)
  998.     * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
  999.     */
  1000.    emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
  1001. }
  1002.  
  1003.  
  1004. /* Need to add some addtional parameters to allow lighting in object
  1005.  * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
  1006.  * space lighting.
  1007.  */
  1008. static void build_lighting( struct tnl_program *p )
  1009. {
  1010.    const GLboolean twoside = p->state->light_twoside;
  1011.    const GLboolean separate = p->state->separate_specular;
  1012.    GLuint nr_lights = 0, count = 0;
  1013.    struct ureg normal = get_transformed_normal(p);
  1014.    struct ureg lit = get_temp(p);
  1015.    struct ureg dots = get_temp(p);
  1016.    struct ureg _col0 = undef, _col1 = undef;
  1017.    struct ureg _bfc0 = undef, _bfc1 = undef;
  1018.    GLuint i;
  1019.  
  1020.    /*
  1021.     * NOTE:
  1022.     * dots.x = dot(normal, VPpli)
  1023.     * dots.y = dot(normal, halfAngle)
  1024.     * dots.z = back.shininess
  1025.     * dots.w = front.shininess
  1026.     */
  1027.  
  1028.    for (i = 0; i < MAX_LIGHTS; i++)
  1029.       if (p->state->unit[i].light_enabled)
  1030.          nr_lights++;
  1031.  
  1032.    set_material_flags(p);
  1033.  
  1034.    {
  1035.       if (!p->state->material_shininess_is_zero) {
  1036.          struct ureg shininess = get_material(p, 0, STATE_SHININESS);
  1037.          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
  1038.          release_temp(p, shininess);
  1039.       }
  1040.  
  1041.       _col0 = make_temp(p, get_scenecolor(p, 0));
  1042.       if (separate)
  1043.          _col1 = make_temp(p, get_identity_param(p));
  1044.       else
  1045.          _col1 = _col0;
  1046.    }
  1047.  
  1048.    if (twoside) {
  1049.       if (!p->state->material_shininess_is_zero) {
  1050.          /* Note that we negate the back-face specular exponent here.
  1051.           * The negation will be un-done later in the back-face code below.
  1052.           */
  1053.          struct ureg shininess = get_material(p, 1, STATE_SHININESS);
  1054.          emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
  1055.                   negate(swizzle1(shininess,X)));
  1056.          release_temp(p, shininess);
  1057.       }
  1058.  
  1059.       _bfc0 = make_temp(p, get_scenecolor(p, 1));
  1060.       if (separate)
  1061.          _bfc1 = make_temp(p, get_identity_param(p));
  1062.       else
  1063.          _bfc1 = _bfc0;
  1064.    }
  1065.  
  1066.    /* If no lights, still need to emit the scenecolor.
  1067.     */
  1068.    {
  1069.       struct ureg res0 = register_output( p, VERT_RESULT_COL0 );
  1070.       emit_op1(p, OPCODE_MOV, res0, 0, _col0);
  1071.    }
  1072.  
  1073.    if (separate) {
  1074.       struct ureg res1 = register_output( p, VERT_RESULT_COL1 );
  1075.       emit_op1(p, OPCODE_MOV, res1, 0, _col1);
  1076.    }
  1077.  
  1078.    if (twoside) {
  1079.       struct ureg res0 = register_output( p, VERT_RESULT_BFC0 );
  1080.       emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
  1081.    }
  1082.  
  1083.    if (twoside && separate) {
  1084.       struct ureg res1 = register_output( p, VERT_RESULT_BFC1 );
  1085.       emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
  1086.    }
  1087.  
  1088.    if (nr_lights == 0) {
  1089.       release_temps(p);
  1090.       return;
  1091.    }
  1092.  
  1093.    for (i = 0; i < MAX_LIGHTS; i++) {
  1094.       if (p->state->unit[i].light_enabled) {
  1095.          struct ureg half = undef;
  1096.          struct ureg att = undef, VPpli = undef;
  1097.  
  1098.          count++;
  1099.  
  1100.          if (p->state->unit[i].light_eyepos3_is_zero) {
  1101.             /* Can used precomputed constants in this case.
  1102.              * Attenuation never applies to infinite lights.
  1103.              */
  1104.             VPpli = register_param3(p, STATE_INTERNAL,
  1105.                                     STATE_LIGHT_POSITION_NORMALIZED, i);
  1106.  
  1107.             if (!p->state->material_shininess_is_zero) {
  1108.                if (p->state->light_local_viewer) {
  1109.                   struct ureg eye_hat = get_eye_position_normalized(p);
  1110.                   half = get_temp(p);
  1111.                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
  1112.                   emit_normalize_vec3(p, half, half);
  1113.                }
  1114.                else {
  1115.                   half = register_param3(p, STATE_INTERNAL,
  1116.                                          STATE_LIGHT_HALF_VECTOR, i);
  1117.                }
  1118.             }
  1119.          }
  1120.          else {
  1121.             struct ureg Ppli = register_param3(p, STATE_INTERNAL,
  1122.                                                STATE_LIGHT_POSITION, i);
  1123.             struct ureg V = get_eye_position(p);
  1124.             struct ureg dist = get_temp(p);
  1125.  
  1126.             VPpli = get_temp(p);
  1127.  
  1128.             /* Calculate VPpli vector
  1129.              */
  1130.             emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
  1131.  
  1132.             /* Normalize VPpli.  The dist value also used in
  1133.              * attenuation below.
  1134.              */
  1135.             emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
  1136.             emit_op1(p, OPCODE_RSQ, dist, 0, dist);
  1137.             emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
  1138.  
  1139.             /* Calculate attenuation:
  1140.              */
  1141.             if (!p->state->unit[i].light_spotcutoff_is_180 ||
  1142.                 p->state->unit[i].light_attenuated) {
  1143.                att = calculate_light_attenuation(p, i, VPpli, dist);
  1144.             }
  1145.  
  1146.             /* Calculate viewer direction, or use infinite viewer:
  1147.              */
  1148.             if (!p->state->material_shininess_is_zero) {
  1149.                half = get_temp(p);
  1150.  
  1151.                if (p->state->light_local_viewer) {
  1152.                   struct ureg eye_hat = get_eye_position_normalized(p);
  1153.                   emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
  1154.                }
  1155.                else {
  1156.                   struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
  1157.                   emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
  1158.                }
  1159.  
  1160.                emit_normalize_vec3(p, half, half);
  1161.             }
  1162.  
  1163.             release_temp(p, dist);
  1164.          }
  1165.  
  1166.          /* Calculate dot products:
  1167.           */
  1168.          if (p->state->material_shininess_is_zero) {
  1169.             emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
  1170.          }
  1171.          else {
  1172.             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
  1173.             emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
  1174.          }
  1175.  
  1176.          /* Front face lighting:
  1177.           */
  1178.          {
  1179.             struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
  1180.             struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
  1181.             struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
  1182.             struct ureg res0, res1;
  1183.             GLuint mask0, mask1;
  1184.  
  1185.             if (count == nr_lights) {
  1186.                if (separate) {
  1187.                   mask0 = WRITEMASK_XYZ;
  1188.                   mask1 = WRITEMASK_XYZ;
  1189.                   res0 = register_output( p, VERT_RESULT_COL0 );
  1190.                   res1 = register_output( p, VERT_RESULT_COL1 );
  1191.                }
  1192.                else {
  1193.                   mask0 = 0;
  1194.                   mask1 = WRITEMASK_XYZ;
  1195.                   res0 = _col0;
  1196.                   res1 = register_output( p, VERT_RESULT_COL0 );
  1197.                }
  1198.             }
  1199.             else {
  1200.                mask0 = 0;
  1201.                mask1 = 0;
  1202.                res0 = _col0;
  1203.                res1 = _col1;
  1204.             }
  1205.  
  1206.             if (!is_undef(att)) {
  1207.                /* light is attenuated by distance */
  1208.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1209.                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
  1210.                emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
  1211.             }
  1212.             else if (!p->state->material_shininess_is_zero) {
  1213.                /* there's a non-zero specular term */
  1214.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1215.                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
  1216.             }
  1217.             else {
  1218.                /* no attenutation, no specular */
  1219.                emit_degenerate_lit(p, lit, dots);
  1220.                emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
  1221.             }
  1222.  
  1223.             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
  1224.             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
  1225.  
  1226.             release_temp(p, ambient);
  1227.             release_temp(p, diffuse);
  1228.             release_temp(p, specular);
  1229.          }
  1230.  
  1231.          /* Back face lighting:
  1232.           */
  1233.          if (twoside) {
  1234.             struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
  1235.             struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
  1236.             struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
  1237.             struct ureg res0, res1;
  1238.             GLuint mask0, mask1;
  1239.  
  1240.             if (count == nr_lights) {
  1241.                if (separate) {
  1242.                   mask0 = WRITEMASK_XYZ;
  1243.                   mask1 = WRITEMASK_XYZ;
  1244.                   res0 = register_output( p, VERT_RESULT_BFC0 );
  1245.                   res1 = register_output( p, VERT_RESULT_BFC1 );
  1246.                }
  1247.                else {
  1248.                   mask0 = 0;
  1249.                   mask1 = WRITEMASK_XYZ;
  1250.                   res0 = _bfc0;
  1251.                   res1 = register_output( p, VERT_RESULT_BFC0 );
  1252.                }
  1253.             }
  1254.             else {
  1255.                res0 = _bfc0;
  1256.                res1 = _bfc1;
  1257.                mask0 = 0;
  1258.                mask1 = 0;
  1259.             }
  1260.  
  1261.             /* For the back face we need to negate the X and Y component
  1262.              * dot products.  dots.Z has the negated back-face specular
  1263.              * exponent.  We swizzle that into the W position.  This
  1264.              * negation makes the back-face specular term positive again.
  1265.              */
  1266.             dots = negate(swizzle(dots,X,Y,W,Z));
  1267.  
  1268.             if (!is_undef(att)) {
  1269.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1270.                emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
  1271.                emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
  1272.             }
  1273.             else if (!p->state->material_shininess_is_zero) {
  1274.                emit_op1(p, OPCODE_LIT, lit, 0, dots);
  1275.                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
  1276.             }
  1277.             else {
  1278.                emit_degenerate_lit(p, lit, dots);
  1279.                emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
  1280.             }
  1281.  
  1282.             emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
  1283.             emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
  1284.             /* restore dots to its original state for subsequent lights
  1285.              * by negating and swizzling again.
  1286.              */
  1287.             dots = negate(swizzle(dots,X,Y,W,Z));
  1288.  
  1289.             release_temp(p, ambient);
  1290.             release_temp(p, diffuse);
  1291.             release_temp(p, specular);
  1292.          }
  1293.  
  1294.          release_temp(p, half);
  1295.          release_temp(p, VPpli);
  1296.          release_temp(p, att);
  1297.       }
  1298.    }
  1299.  
  1300.    release_temps( p );
  1301. }
  1302.  
  1303.  
  1304. static void build_fog( struct tnl_program *p )
  1305. {
  1306.    struct ureg fog = register_output(p, VERT_RESULT_FOGC);
  1307.    struct ureg input;
  1308.  
  1309.    if (p->state->fog_source_is_depth) {
  1310.       input = get_eye_position_z(p);
  1311.    }
  1312.    else {
  1313.       input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
  1314.    }
  1315.  
  1316.    /* result.fog = {abs(f),0,0,1}; */
  1317.    emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
  1318.    emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
  1319. }
  1320.  
  1321.  
  1322. static void build_reflect_texgen( struct tnl_program *p,
  1323.                                   struct ureg dest,
  1324.                                   GLuint writemask )
  1325. {
  1326.    struct ureg normal = get_transformed_normal(p);
  1327.    struct ureg eye_hat = get_eye_position_normalized(p);
  1328.    struct ureg tmp = get_temp(p);
  1329.  
  1330.    /* n.u */
  1331.    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
  1332.    /* 2n.u */
  1333.    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
  1334.    /* (-2n.u)n + u */
  1335.    emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
  1336.  
  1337.    release_temp(p, tmp);
  1338. }
  1339.  
  1340.  
  1341. static void build_sphere_texgen( struct tnl_program *p,
  1342.                                  struct ureg dest,
  1343.                                  GLuint writemask )
  1344. {
  1345.    struct ureg normal = get_transformed_normal(p);
  1346.    struct ureg eye_hat = get_eye_position_normalized(p);
  1347.    struct ureg tmp = get_temp(p);
  1348.    struct ureg half = register_scalar_const(p, .5);
  1349.    struct ureg r = get_temp(p);
  1350.    struct ureg inv_m = get_temp(p);
  1351.    struct ureg id = get_identity_param(p);
  1352.  
  1353.    /* Could share the above calculations, but it would be
  1354.     * a fairly odd state for someone to set (both sphere and
  1355.     * reflection active for different texture coordinate
  1356.     * components.  Of course - if two texture units enable
  1357.     * reflect and/or sphere, things start to tilt in favour
  1358.     * of seperating this out:
  1359.     */
  1360.  
  1361.    /* n.u */
  1362.    emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
  1363.    /* 2n.u */
  1364.    emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
  1365.    /* (-2n.u)n + u */
  1366.    emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
  1367.    /* r + 0,0,1 */
  1368.    emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
  1369.    /* rx^2 + ry^2 + (rz+1)^2 */
  1370.    emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
  1371.    /* 2/m */
  1372.    emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
  1373.    /* 1/m */
  1374.    emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
  1375.    /* r/m + 1/2 */
  1376.    emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
  1377.  
  1378.    release_temp(p, tmp);
  1379.    release_temp(p, r);
  1380.    release_temp(p, inv_m);
  1381. }
  1382.  
  1383.  
  1384. static void build_texture_transform( struct tnl_program *p )
  1385. {
  1386.    GLuint i, j;
  1387.  
  1388.    for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
  1389.  
  1390.       if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i)))
  1391.          continue;
  1392.  
  1393.       if (p->state->unit[i].coord_replace)
  1394.          continue;
  1395.  
  1396.       if (p->state->unit[i].texgen_enabled ||
  1397.           p->state->unit[i].texmat_enabled) {
  1398.  
  1399.          GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
  1400.          struct ureg out = register_output(p, VERT_RESULT_TEX0 + i);
  1401.          struct ureg out_texgen = undef;
  1402.  
  1403.          if (p->state->unit[i].texgen_enabled) {
  1404.             GLuint copy_mask = 0;
  1405.             GLuint sphere_mask = 0;
  1406.             GLuint reflect_mask = 0;
  1407.             GLuint normal_mask = 0;
  1408.             GLuint modes[4];
  1409.  
  1410.             if (texmat_enabled)
  1411.                out_texgen = get_temp(p);
  1412.             else
  1413.                out_texgen = out;
  1414.  
  1415.             modes[0] = p->state->unit[i].texgen_mode0;
  1416.             modes[1] = p->state->unit[i].texgen_mode1;
  1417.             modes[2] = p->state->unit[i].texgen_mode2;
  1418.             modes[3] = p->state->unit[i].texgen_mode3;
  1419.  
  1420.             for (j = 0; j < 4; j++) {
  1421.                switch (modes[j]) {
  1422.                case TXG_OBJ_LINEAR: {
  1423.                   struct ureg obj = register_input(p, VERT_ATTRIB_POS);
  1424.                   struct ureg plane =
  1425.                      register_param3(p, STATE_TEXGEN, i,
  1426.                                      STATE_TEXGEN_OBJECT_S + j);
  1427.  
  1428.                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
  1429.                            obj, plane );
  1430.                   break;
  1431.                }
  1432.                case TXG_EYE_LINEAR: {
  1433.                   struct ureg eye = get_eye_position(p);
  1434.                   struct ureg plane =
  1435.                      register_param3(p, STATE_TEXGEN, i,
  1436.                                      STATE_TEXGEN_EYE_S + j);
  1437.  
  1438.                   emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
  1439.                            eye, plane );
  1440.                   break;
  1441.                }
  1442.                case TXG_SPHERE_MAP:
  1443.                   sphere_mask |= WRITEMASK_X << j;
  1444.                   break;
  1445.                case TXG_REFLECTION_MAP:
  1446.                   reflect_mask |= WRITEMASK_X << j;
  1447.                   break;
  1448.                case TXG_NORMAL_MAP:
  1449.                   normal_mask |= WRITEMASK_X << j;
  1450.                   break;
  1451.                case TXG_NONE:
  1452.                   copy_mask |= WRITEMASK_X << j;
  1453.                }
  1454.             }
  1455.  
  1456.             if (sphere_mask) {
  1457.                build_sphere_texgen(p, out_texgen, sphere_mask);
  1458.             }
  1459.  
  1460.             if (reflect_mask) {
  1461.                build_reflect_texgen(p, out_texgen, reflect_mask);
  1462.             }
  1463.  
  1464.             if (normal_mask) {
  1465.                struct ureg normal = get_transformed_normal(p);
  1466.                emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
  1467.             }
  1468.  
  1469.             if (copy_mask) {
  1470.                struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
  1471.                emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
  1472.             }
  1473.          }
  1474.  
  1475.          if (texmat_enabled) {
  1476.             struct ureg texmat[4];
  1477.             struct ureg in = (!is_undef(out_texgen) ?
  1478.                               out_texgen :
  1479.                               register_input(p, VERT_ATTRIB_TEX0+i));
  1480.             if (p->mvp_with_dp4) {
  1481.                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
  1482.                                        0, texmat );
  1483.                emit_matrix_transform_vec4( p, out, texmat, in );
  1484.             }
  1485.             else {
  1486.                register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
  1487.                                        STATE_MATRIX_TRANSPOSE, texmat );
  1488.                emit_transpose_matrix_transform_vec4( p, out, texmat, in );
  1489.             }
  1490.          }
  1491.  
  1492.          release_temps(p);
  1493.       }
  1494.       else {
  1495.          emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i);
  1496.       }
  1497.    }
  1498. }
  1499.  
  1500.  
  1501. /**
  1502.  * Point size attenuation computation.
  1503.  */
  1504. static void build_atten_pointsize( struct tnl_program *p )
  1505. {
  1506.    struct ureg eye = get_eye_position_z(p);
  1507.    struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
  1508.    struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
  1509.    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
  1510.    struct ureg ut = get_temp(p);
  1511.  
  1512.    /* dist = |eyez| */
  1513.    emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
  1514.    /* p1 + dist * (p2 + dist * p3); */
  1515.    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
  1516.                 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
  1517.    emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
  1518.                 ut, swizzle1(state_attenuation, X));
  1519.  
  1520.    /* 1 / sqrt(factor) */
  1521.    emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
  1522.  
  1523. #if 0
  1524.    /* out = pointSize / sqrt(factor) */
  1525.    emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
  1526. #else
  1527.    /* this is a good place to clamp the point size since there's likely
  1528.     * no hardware registers to clamp point size at rasterization time.
  1529.     */
  1530.    emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
  1531.    emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
  1532.    emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
  1533. #endif
  1534.  
  1535.    release_temp(p, ut);
  1536. }
  1537.  
  1538.  
  1539. /**
  1540.  * Pass-though per-vertex point size, from user's point size array.
  1541.  */
  1542. static void build_array_pointsize( struct tnl_program *p )
  1543. {
  1544.    struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
  1545.    struct ureg out = register_output(p, VERT_RESULT_PSIZ);
  1546.    emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
  1547. }
  1548.  
  1549.  
  1550. static void build_tnl_program( struct tnl_program *p )
  1551. {
  1552.    /* Emit the program, starting with modelviewproject:
  1553.     */
  1554.    build_hpos(p);
  1555.  
  1556.    /* Lighting calculations:
  1557.     */
  1558.    if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) {
  1559.       if (p->state->light_global_enabled)
  1560.          build_lighting(p);
  1561.       else {
  1562.          if (p->state->fragprog_inputs_read & FRAG_BIT_COL0)
  1563.             emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0);
  1564.  
  1565.          if (p->state->fragprog_inputs_read & FRAG_BIT_COL1)
  1566.             emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1);
  1567.       }
  1568.    }
  1569.  
  1570.    if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC)
  1571.       build_fog(p);
  1572.  
  1573.    if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY)
  1574.       build_texture_transform(p);
  1575.  
  1576.    if (p->state->point_attenuated)
  1577.       build_atten_pointsize(p);
  1578.    else if (p->state->point_array)
  1579.       build_array_pointsize(p);
  1580.  
  1581.    /* Finish up:
  1582.     */
  1583.    emit_op1(p, OPCODE_END, undef, 0, undef);
  1584.  
  1585.    /* Disassemble:
  1586.     */
  1587.    if (DISASSEM) {
  1588.       printf ("\n");
  1589.    }
  1590. }
  1591.  
  1592.  
  1593. static void
  1594. create_new_program( const struct state_key *key,
  1595.                     struct gl_vertex_program *program,
  1596.                     GLboolean mvp_with_dp4,
  1597.                     GLuint max_temps)
  1598. {
  1599.    struct tnl_program p;
  1600.  
  1601.    memset(&p, 0, sizeof(p));
  1602.    p.state = key;
  1603.    p.program = program;
  1604.    p.eye_position = undef;
  1605.    p.eye_position_z = undef;
  1606.    p.eye_position_normalized = undef;
  1607.    p.transformed_normal = undef;
  1608.    p.identity = undef;
  1609.    p.temp_in_use = 0;
  1610.    p.mvp_with_dp4 = mvp_with_dp4;
  1611.  
  1612.    if (max_temps >= sizeof(int) * 8)
  1613.       p.temp_reserved = 0;
  1614.    else
  1615.       p.temp_reserved = ~((1<<max_temps)-1);
  1616.  
  1617.    /* Start by allocating 32 instructions.
  1618.     * If we need more, we'll grow the instruction array as needed.
  1619.     */
  1620.    p.max_inst = 32;
  1621.    p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
  1622.    p.program->Base.String = NULL;
  1623.    p.program->Base.NumInstructions =
  1624.    p.program->Base.NumTemporaries =
  1625.    p.program->Base.NumParameters =
  1626.    p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
  1627.    p.program->Base.Parameters = _mesa_new_parameter_list();
  1628.    p.program->Base.InputsRead = 0;
  1629.    p.program->Base.OutputsWritten = 0;
  1630.  
  1631.    build_tnl_program( &p );
  1632. }
  1633.  
  1634.  
  1635. /**
  1636.  * Return a vertex program which implements the current fixed-function
  1637.  * transform/lighting/texgen operations.
  1638.  * XXX move this into core mesa (main/)
  1639.  */
  1640. struct gl_vertex_program *
  1641. _mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
  1642. {
  1643.    struct gl_vertex_program *prog;
  1644.    struct state_key key;
  1645.  
  1646.    /* Grab all the relevent state and put it in a single structure:
  1647.     */
  1648.    make_state_key(ctx, &key);
  1649.  
  1650.    /* Look for an already-prepared program for this state:
  1651.     */
  1652.    prog = (struct gl_vertex_program *)
  1653.       _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key));
  1654.  
  1655.    if (!prog) {
  1656.       /* OK, we'll have to build a new one */
  1657.       if (0)
  1658.          printf("Build new TNL program\n");
  1659.  
  1660.       prog = (struct gl_vertex_program *)
  1661.          ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0);
  1662.       if (!prog)
  1663.          return NULL;
  1664.  
  1665.       create_new_program( &key, prog,
  1666.                           ctx->mvp_with_dp4,
  1667.                           ctx->Const.VertexProgram.MaxTemps );
  1668.  
  1669. #if 0
  1670.       if (ctx->Driver.ProgramStringNotify)
  1671.          ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
  1672.                                           &prog->Base );
  1673. #endif
  1674.       _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
  1675.                                  &key, sizeof(key), &prog->Base);
  1676.    }
  1677.  
  1678.    return prog;
  1679. }
  1680.