Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
  3.  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
  4.  * Copyright © 2010 Intel Corporation
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  22.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  23.  * DEALINGS IN THE SOFTWARE.
  24.  */
  25.  
  26. /**
  27.  * \file ir_to_mesa.cpp
  28.  *
  29.  * Translate GLSL IR to Mesa's gl_program representation.
  30.  */
  31.  
  32. #include <stdio.h>
  33. #include "main/compiler.h"
  34. #include "ir.h"
  35. #include "ir_visitor.h"
  36. #include "ir_expression_flattening.h"
  37. #include "ir_uniform.h"
  38. #include "glsl_types.h"
  39. #include "glsl_parser_extras.h"
  40. #include "../glsl/program.h"
  41. #include "ir_optimization.h"
  42. #include "ast.h"
  43. #include "linker.h"
  44.  
  45. #include "main/mtypes.h"
  46. #include "main/shaderapi.h"
  47. #include "main/shaderobj.h"
  48. #include "main/uniforms.h"
  49.  
  50. #include "program/hash_table.h"
  51. #include "program/prog_instruction.h"
  52. #include "program/prog_optimize.h"
  53. #include "program/prog_print.h"
  54. #include "program/program.h"
  55. #include "program/prog_parameter.h"
  56. #include "program/sampler.h"
  57.  
  58.  
  59. static int swizzle_for_size(int size);
  60.  
  61. namespace {
  62.  
  63. class src_reg;
  64. class dst_reg;
  65.  
  66. /**
  67.  * This struct is a corresponding struct to Mesa prog_src_register, with
  68.  * wider fields.
  69.  */
  70. class src_reg {
  71. public:
  72.    src_reg(gl_register_file file, int index, const glsl_type *type)
  73.    {
  74.       this->file = file;
  75.       this->index = index;
  76.       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  77.          this->swizzle = swizzle_for_size(type->vector_elements);
  78.       else
  79.          this->swizzle = SWIZZLE_XYZW;
  80.       this->negate = 0;
  81.       this->reladdr = NULL;
  82.    }
  83.  
  84.    src_reg()
  85.    {
  86.       this->file = PROGRAM_UNDEFINED;
  87.       this->index = 0;
  88.       this->swizzle = 0;
  89.       this->negate = 0;
  90.       this->reladdr = NULL;
  91.    }
  92.  
  93.    explicit src_reg(dst_reg reg);
  94.  
  95.    gl_register_file file; /**< PROGRAM_* from Mesa */
  96.    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  97.    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  98.    int negate; /**< NEGATE_XYZW mask from mesa */
  99.    /** Register index should be offset by the integer in this reg. */
  100.    src_reg *reladdr;
  101. };
  102.  
  103. class dst_reg {
  104. public:
  105.    dst_reg(gl_register_file file, int writemask)
  106.    {
  107.       this->file = file;
  108.       this->index = 0;
  109.       this->writemask = writemask;
  110.       this->cond_mask = COND_TR;
  111.       this->reladdr = NULL;
  112.    }
  113.  
  114.    dst_reg()
  115.    {
  116.       this->file = PROGRAM_UNDEFINED;
  117.       this->index = 0;
  118.       this->writemask = 0;
  119.       this->cond_mask = COND_TR;
  120.       this->reladdr = NULL;
  121.    }
  122.  
  123.    explicit dst_reg(src_reg reg);
  124.  
  125.    gl_register_file file; /**< PROGRAM_* from Mesa */
  126.    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  127.    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  128.    GLuint cond_mask:4;
  129.    /** Register index should be offset by the integer in this reg. */
  130.    src_reg *reladdr;
  131. };
  132.  
  133. } /* anonymous namespace */
  134.  
  135. src_reg::src_reg(dst_reg reg)
  136. {
  137.    this->file = reg.file;
  138.    this->index = reg.index;
  139.    this->swizzle = SWIZZLE_XYZW;
  140.    this->negate = 0;
  141.    this->reladdr = reg.reladdr;
  142. }
  143.  
  144. dst_reg::dst_reg(src_reg reg)
  145. {
  146.    this->file = reg.file;
  147.    this->index = reg.index;
  148.    this->writemask = WRITEMASK_XYZW;
  149.    this->cond_mask = COND_TR;
  150.    this->reladdr = reg.reladdr;
  151. }
  152.  
  153. namespace {
  154.  
  155. class ir_to_mesa_instruction : public exec_node {
  156. public:
  157.    DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction)
  158.  
  159.    enum prog_opcode op;
  160.    dst_reg dst;
  161.    src_reg src[3];
  162.    /** Pointer to the ir source this tree came from for debugging */
  163.    ir_instruction *ir;
  164.    GLboolean cond_update;
  165.    bool saturate;
  166.    int sampler; /**< sampler index */
  167.    int tex_target; /**< One of TEXTURE_*_INDEX */
  168.    GLboolean tex_shadow;
  169. };
  170.  
  171. class variable_storage : public exec_node {
  172. public:
  173.    variable_storage(ir_variable *var, gl_register_file file, int index)
  174.       : file(file), index(index), var(var)
  175.    {
  176.       /* empty */
  177.    }
  178.  
  179.    gl_register_file file;
  180.    int index;
  181.    ir_variable *var; /* variable that maps to this, if any */
  182. };
  183.  
  184. class function_entry : public exec_node {
  185. public:
  186.    ir_function_signature *sig;
  187.  
  188.    /**
  189.     * identifier of this function signature used by the program.
  190.     *
  191.     * At the point that Mesa instructions for function calls are
  192.     * generated, we don't know the address of the first instruction of
  193.     * the function body.  So we make the BranchTarget that is called a
  194.     * small integer and rewrite them during set_branchtargets().
  195.     */
  196.    int sig_id;
  197.  
  198.    /**
  199.     * Pointer to first instruction of the function body.
  200.     *
  201.     * Set during function body emits after main() is processed.
  202.     */
  203.    ir_to_mesa_instruction *bgn_inst;
  204.  
  205.    /**
  206.     * Index of the first instruction of the function body in actual
  207.     * Mesa IR.
  208.     *
  209.     * Set after convertion from ir_to_mesa_instruction to prog_instruction.
  210.     */
  211.    int inst;
  212.  
  213.    /** Storage for the return value. */
  214.    src_reg return_reg;
  215. };
  216.  
  217. class ir_to_mesa_visitor : public ir_visitor {
  218. public:
  219.    ir_to_mesa_visitor();
  220.    ~ir_to_mesa_visitor();
  221.  
  222.    function_entry *current_function;
  223.  
  224.    struct gl_context *ctx;
  225.    struct gl_program *prog;
  226.    struct gl_shader_program *shader_program;
  227.    struct gl_shader_compiler_options *options;
  228.  
  229.    int next_temp;
  230.  
  231.    variable_storage *find_variable_storage(const ir_variable *var);
  232.  
  233.    src_reg get_temp(const glsl_type *type);
  234.    void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
  235.  
  236.    src_reg src_reg_for_float(float val);
  237.  
  238.    /**
  239.     * \name Visit methods
  240.     *
  241.     * As typical for the visitor pattern, there must be one \c visit method for
  242.     * each concrete subclass of \c ir_instruction.  Virtual base classes within
  243.     * the hierarchy should not have \c visit methods.
  244.     */
  245.    /*@{*/
  246.    virtual void visit(ir_variable *);
  247.    virtual void visit(ir_loop *);
  248.    virtual void visit(ir_loop_jump *);
  249.    virtual void visit(ir_function_signature *);
  250.    virtual void visit(ir_function *);
  251.    virtual void visit(ir_expression *);
  252.    virtual void visit(ir_swizzle *);
  253.    virtual void visit(ir_dereference_variable  *);
  254.    virtual void visit(ir_dereference_array *);
  255.    virtual void visit(ir_dereference_record *);
  256.    virtual void visit(ir_assignment *);
  257.    virtual void visit(ir_constant *);
  258.    virtual void visit(ir_call *);
  259.    virtual void visit(ir_return *);
  260.    virtual void visit(ir_discard *);
  261.    virtual void visit(ir_texture *);
  262.    virtual void visit(ir_if *);
  263.    virtual void visit(ir_emit_vertex *);
  264.    virtual void visit(ir_end_primitive *);
  265.    /*@}*/
  266.  
  267.    src_reg result;
  268.  
  269.    /** List of variable_storage */
  270.    exec_list variables;
  271.  
  272.    /** List of function_entry */
  273.    exec_list function_signatures;
  274.    int next_signature_id;
  275.  
  276.    /** List of ir_to_mesa_instruction */
  277.    exec_list instructions;
  278.  
  279.    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op);
  280.  
  281.    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
  282.                                 dst_reg dst, src_reg src0);
  283.  
  284.    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
  285.                                 dst_reg dst, src_reg src0, src_reg src1);
  286.  
  287.    ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op,
  288.                                 dst_reg dst,
  289.                                 src_reg src0, src_reg src1, src_reg src2);
  290.  
  291.    /**
  292.     * Emit the correct dot-product instruction for the type of arguments
  293.     */
  294.    ir_to_mesa_instruction * emit_dp(ir_instruction *ir,
  295.                                     dst_reg dst,
  296.                                     src_reg src0,
  297.                                     src_reg src1,
  298.                                     unsigned elements);
  299.  
  300.    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
  301.                     dst_reg dst, src_reg src0);
  302.  
  303.    void emit_scalar(ir_instruction *ir, enum prog_opcode op,
  304.                     dst_reg dst, src_reg src0, src_reg src1);
  305.  
  306.    bool try_emit_mad(ir_expression *ir,
  307.                           int mul_operand);
  308.    bool try_emit_mad_for_and_not(ir_expression *ir,
  309.                                  int mul_operand);
  310.  
  311.    void emit_swz(ir_expression *ir);
  312.  
  313.    bool process_move_condition(ir_rvalue *ir);
  314.  
  315.    void copy_propagate(void);
  316.  
  317.    void *mem_ctx;
  318. };
  319.  
  320. } /* anonymous namespace */
  321.  
  322. static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL);
  323.  
  324. static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP);
  325.  
  326. static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X);
  327.  
  328. static int
  329. swizzle_for_size(int size)
  330. {
  331.    static const int size_swizzles[4] = {
  332.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
  333.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
  334.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
  335.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
  336.    };
  337.  
  338.    assert((size >= 1) && (size <= 4));
  339.    return size_swizzles[size - 1];
  340. }
  341.  
  342. ir_to_mesa_instruction *
  343. ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
  344.                          dst_reg dst,
  345.                          src_reg src0, src_reg src1, src_reg src2)
  346. {
  347.    ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction();
  348.    int num_reladdr = 0;
  349.  
  350.    /* If we have to do relative addressing, we want to load the ARL
  351.     * reg directly for one of the regs, and preload the other reladdr
  352.     * sources into temps.
  353.     */
  354.    num_reladdr += dst.reladdr != NULL;
  355.    num_reladdr += src0.reladdr != NULL;
  356.    num_reladdr += src1.reladdr != NULL;
  357.    num_reladdr += src2.reladdr != NULL;
  358.  
  359.    reladdr_to_temp(ir, &src2, &num_reladdr);
  360.    reladdr_to_temp(ir, &src1, &num_reladdr);
  361.    reladdr_to_temp(ir, &src0, &num_reladdr);
  362.  
  363.    if (dst.reladdr) {
  364.       emit(ir, OPCODE_ARL, address_reg, *dst.reladdr);
  365.       num_reladdr--;
  366.    }
  367.    assert(num_reladdr == 0);
  368.  
  369.    inst->op = op;
  370.    inst->dst = dst;
  371.    inst->src[0] = src0;
  372.    inst->src[1] = src1;
  373.    inst->src[2] = src2;
  374.    inst->ir = ir;
  375.  
  376.    this->instructions.push_tail(inst);
  377.  
  378.    return inst;
  379. }
  380.  
  381.  
  382. ir_to_mesa_instruction *
  383. ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
  384.                          dst_reg dst, src_reg src0, src_reg src1)
  385. {
  386.    return emit(ir, op, dst, src0, src1, undef_src);
  387. }
  388.  
  389. ir_to_mesa_instruction *
  390. ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op,
  391.                          dst_reg dst, src_reg src0)
  392. {
  393.    assert(dst.writemask != 0);
  394.    return emit(ir, op, dst, src0, undef_src, undef_src);
  395. }
  396.  
  397. ir_to_mesa_instruction *
  398. ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op)
  399. {
  400.    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
  401. }
  402.  
  403. ir_to_mesa_instruction *
  404. ir_to_mesa_visitor::emit_dp(ir_instruction *ir,
  405.                             dst_reg dst, src_reg src0, src_reg src1,
  406.                             unsigned elements)
  407. {
  408.    static const gl_inst_opcode dot_opcodes[] = {
  409.       OPCODE_DP2, OPCODE_DP3, OPCODE_DP4
  410.    };
  411.  
  412.    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
  413. }
  414.  
  415. /**
  416.  * Emits Mesa scalar opcodes to produce unique answers across channels.
  417.  *
  418.  * Some Mesa opcodes are scalar-only, like ARB_fp/vp.  The src X
  419.  * channel determines the result across all channels.  So to do a vec4
  420.  * of this operation, we want to emit a scalar per source channel used
  421.  * to produce dest channels.
  422.  */
  423. void
  424. ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
  425.                                 dst_reg dst,
  426.                                 src_reg orig_src0, src_reg orig_src1)
  427. {
  428.    int i, j;
  429.    int done_mask = ~dst.writemask;
  430.  
  431.    /* Mesa RCP is a scalar operation splatting results to all channels,
  432.     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
  433.     * dst channels.
  434.     */
  435.    for (i = 0; i < 4; i++) {
  436.       GLuint this_mask = (1 << i);
  437.       ir_to_mesa_instruction *inst;
  438.       src_reg src0 = orig_src0;
  439.       src_reg src1 = orig_src1;
  440.  
  441.       if (done_mask & this_mask)
  442.          continue;
  443.  
  444.       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
  445.       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
  446.       for (j = i + 1; j < 4; j++) {
  447.          /* If there is another enabled component in the destination that is
  448.           * derived from the same inputs, generate its value on this pass as
  449.           * well.
  450.           */
  451.          if (!(done_mask & (1 << j)) &&
  452.              GET_SWZ(src0.swizzle, j) == src0_swiz &&
  453.              GET_SWZ(src1.swizzle, j) == src1_swiz) {
  454.             this_mask |= (1 << j);
  455.          }
  456.       }
  457.       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
  458.                                    src0_swiz, src0_swiz);
  459.       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
  460.                                   src1_swiz, src1_swiz);
  461.  
  462.       inst = emit(ir, op, dst, src0, src1);
  463.       inst->dst.writemask = this_mask;
  464.       done_mask |= this_mask;
  465.    }
  466. }
  467.  
  468. void
  469. ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op,
  470.                                 dst_reg dst, src_reg src0)
  471. {
  472.    src_reg undef = undef_src;
  473.  
  474.    undef.swizzle = SWIZZLE_XXXX;
  475.  
  476.    emit_scalar(ir, op, dst, src0, undef);
  477. }
  478.  
  479. src_reg
  480. ir_to_mesa_visitor::src_reg_for_float(float val)
  481. {
  482.    src_reg src(PROGRAM_CONSTANT, -1, NULL);
  483.  
  484.    src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
  485.                                           (const gl_constant_value *)&val, 1, &src.swizzle);
  486.  
  487.    return src;
  488. }
  489.  
  490. static int
  491. type_size(const struct glsl_type *type)
  492. {
  493.    unsigned int i;
  494.    int size;
  495.  
  496.    switch (type->base_type) {
  497.    case GLSL_TYPE_UINT:
  498.    case GLSL_TYPE_INT:
  499.    case GLSL_TYPE_FLOAT:
  500.    case GLSL_TYPE_BOOL:
  501.       if (type->is_matrix()) {
  502.          return type->matrix_columns;
  503.       } else {
  504.          /* Regardless of size of vector, it gets a vec4. This is bad
  505.           * packing for things like floats, but otherwise arrays become a
  506.           * mess.  Hopefully a later pass over the code can pack scalars
  507.           * down if appropriate.
  508.           */
  509.          return 1;
  510.       }
  511.       break;
  512.    case GLSL_TYPE_DOUBLE:
  513.       if (type->is_matrix()) {
  514.          if (type->vector_elements > 2)
  515.             return type->matrix_columns * 2;
  516.          else
  517.             return type->matrix_columns;
  518.       } else {
  519.          if (type->vector_elements > 2)
  520.             return 2;
  521.          else
  522.             return 1;
  523.       }
  524.       break;
  525.    case GLSL_TYPE_ARRAY:
  526.       assert(type->length > 0);
  527.       return type_size(type->fields.array) * type->length;
  528.    case GLSL_TYPE_STRUCT:
  529.       size = 0;
  530.       for (i = 0; i < type->length; i++) {
  531.          size += type_size(type->fields.structure[i].type);
  532.       }
  533.       return size;
  534.    case GLSL_TYPE_SAMPLER:
  535.    case GLSL_TYPE_IMAGE:
  536.       /* Samplers take up one slot in UNIFORMS[], but they're baked in
  537.        * at link time.
  538.        */
  539.       return 1;
  540.    case GLSL_TYPE_ATOMIC_UINT:
  541.    case GLSL_TYPE_VOID:
  542.    case GLSL_TYPE_ERROR:
  543.    case GLSL_TYPE_INTERFACE:
  544.       assert(!"Invalid type in type_size");
  545.       break;
  546.    }
  547.  
  548.    return 0;
  549. }
  550.  
  551. /**
  552.  * In the initial pass of codegen, we assign temporary numbers to
  553.  * intermediate results.  (not SSA -- variable assignments will reuse
  554.  * storage).  Actual register allocation for the Mesa VM occurs in a
  555.  * pass over the Mesa IR later.
  556.  */
  557. src_reg
  558. ir_to_mesa_visitor::get_temp(const glsl_type *type)
  559. {
  560.    src_reg src;
  561.  
  562.    src.file = PROGRAM_TEMPORARY;
  563.    src.index = next_temp;
  564.    src.reladdr = NULL;
  565.    next_temp += type_size(type);
  566.  
  567.    if (type->is_array() || type->is_record()) {
  568.       src.swizzle = SWIZZLE_NOOP;
  569.    } else {
  570.       src.swizzle = swizzle_for_size(type->vector_elements);
  571.    }
  572.    src.negate = 0;
  573.  
  574.    return src;
  575. }
  576.  
  577. variable_storage *
  578. ir_to_mesa_visitor::find_variable_storage(const ir_variable *var)
  579. {
  580.    foreach_in_list(variable_storage, entry, &this->variables) {
  581.       if (entry->var == var)
  582.          return entry;
  583.    }
  584.  
  585.    return NULL;
  586. }
  587.  
  588. void
  589. ir_to_mesa_visitor::visit(ir_variable *ir)
  590. {
  591.    if (strcmp(ir->name, "gl_FragCoord") == 0) {
  592.       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
  593.  
  594.       fp->OriginUpperLeft = ir->data.origin_upper_left;
  595.       fp->PixelCenterInteger = ir->data.pixel_center_integer;
  596.    }
  597.  
  598.    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
  599.       unsigned int i;
  600.       const ir_state_slot *const slots = ir->get_state_slots();
  601.       assert(slots != NULL);
  602.  
  603.       /* Check if this statevar's setup in the STATE file exactly
  604.        * matches how we'll want to reference it as a
  605.        * struct/array/whatever.  If not, then we need to move it into
  606.        * temporary storage and hope that it'll get copy-propagated
  607.        * out.
  608.        */
  609.       for (i = 0; i < ir->get_num_state_slots(); i++) {
  610.          if (slots[i].swizzle != SWIZZLE_XYZW) {
  611.             break;
  612.          }
  613.       }
  614.  
  615.       variable_storage *storage;
  616.       dst_reg dst;
  617.       if (i == ir->get_num_state_slots()) {
  618.          /* We'll set the index later. */
  619.          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
  620.          this->variables.push_tail(storage);
  621.  
  622.          dst = undef_dst;
  623.       } else {
  624.          /* The variable_storage constructor allocates slots based on the size
  625.           * of the type.  However, this had better match the number of state
  626.           * elements that we're going to copy into the new temporary.
  627.           */
  628.          assert((int) ir->get_num_state_slots() == type_size(ir->type));
  629.  
  630.          storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY,
  631.                                                  this->next_temp);
  632.          this->variables.push_tail(storage);
  633.          this->next_temp += type_size(ir->type);
  634.  
  635.          dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL));
  636.       }
  637.  
  638.  
  639.       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
  640.          int index = _mesa_add_state_reference(this->prog->Parameters,
  641.                                                (gl_state_index *)slots[i].tokens);
  642.  
  643.          if (storage->file == PROGRAM_STATE_VAR) {
  644.             if (storage->index == -1) {
  645.                storage->index = index;
  646.             } else {
  647.                assert(index == storage->index + (int)i);
  648.             }
  649.          } else {
  650.             src_reg src(PROGRAM_STATE_VAR, index, NULL);
  651.             src.swizzle = slots[i].swizzle;
  652.             emit(ir, OPCODE_MOV, dst, src);
  653.             /* even a float takes up a whole vec4 reg in a struct/array. */
  654.             dst.index++;
  655.          }
  656.       }
  657.  
  658.       if (storage->file == PROGRAM_TEMPORARY &&
  659.           dst.index != storage->index + (int) ir->get_num_state_slots()) {
  660.          linker_error(this->shader_program,
  661.                       "failed to load builtin uniform `%s' "
  662.                       "(%d/%d regs loaded)\n",
  663.                       ir->name, dst.index - storage->index,
  664.                       type_size(ir->type));
  665.       }
  666.    }
  667. }
  668.  
  669. void
  670. ir_to_mesa_visitor::visit(ir_loop *ir)
  671. {
  672.    emit(NULL, OPCODE_BGNLOOP);
  673.  
  674.    visit_exec_list(&ir->body_instructions, this);
  675.  
  676.    emit(NULL, OPCODE_ENDLOOP);
  677. }
  678.  
  679. void
  680. ir_to_mesa_visitor::visit(ir_loop_jump *ir)
  681. {
  682.    switch (ir->mode) {
  683.    case ir_loop_jump::jump_break:
  684.       emit(NULL, OPCODE_BRK);
  685.       break;
  686.    case ir_loop_jump::jump_continue:
  687.       emit(NULL, OPCODE_CONT);
  688.       break;
  689.    }
  690. }
  691.  
  692.  
  693. void
  694. ir_to_mesa_visitor::visit(ir_function_signature *ir)
  695. {
  696.    assert(0);
  697.    (void)ir;
  698. }
  699.  
  700. void
  701. ir_to_mesa_visitor::visit(ir_function *ir)
  702. {
  703.    /* Ignore function bodies other than main() -- we shouldn't see calls to
  704.     * them since they should all be inlined before we get to ir_to_mesa.
  705.     */
  706.    if (strcmp(ir->name, "main") == 0) {
  707.       const ir_function_signature *sig;
  708.       exec_list empty;
  709.  
  710.       sig = ir->matching_signature(NULL, &empty, false);
  711.  
  712.       assert(sig);
  713.  
  714.       foreach_in_list(ir_instruction, ir, &sig->body) {
  715.          ir->accept(this);
  716.       }
  717.    }
  718. }
  719.  
  720. bool
  721. ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
  722. {
  723.    int nonmul_operand = 1 - mul_operand;
  724.    src_reg a, b, c;
  725.  
  726.    ir_expression *expr = ir->operands[mul_operand]->as_expression();
  727.    if (!expr || expr->operation != ir_binop_mul)
  728.       return false;
  729.  
  730.    expr->operands[0]->accept(this);
  731.    a = this->result;
  732.    expr->operands[1]->accept(this);
  733.    b = this->result;
  734.    ir->operands[nonmul_operand]->accept(this);
  735.    c = this->result;
  736.  
  737.    this->result = get_temp(ir->type);
  738.    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c);
  739.  
  740.    return true;
  741. }
  742.  
  743. /**
  744.  * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b))
  745.  *
  746.  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
  747.  * implemented using multiplication, and logical-or is implemented using
  748.  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
  749.  * As result, the logical expression (a & !b) can be rewritten as:
  750.  *
  751.  *     - a * !b
  752.  *     - a * (1 - b)
  753.  *     - (a * 1) - (a * b)
  754.  *     - a + -(a * b)
  755.  *     - a + (a * -b)
  756.  *
  757.  * This final expression can be implemented as a single MAD(a, -b, a)
  758.  * instruction.
  759.  */
  760. bool
  761. ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
  762. {
  763.    const int other_operand = 1 - try_operand;
  764.    src_reg a, b;
  765.  
  766.    ir_expression *expr = ir->operands[try_operand]->as_expression();
  767.    if (!expr || expr->operation != ir_unop_logic_not)
  768.       return false;
  769.  
  770.    ir->operands[other_operand]->accept(this);
  771.    a = this->result;
  772.    expr->operands[0]->accept(this);
  773.    b = this->result;
  774.  
  775.    b.negate = ~b.negate;
  776.  
  777.    this->result = get_temp(ir->type);
  778.    emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a);
  779.  
  780.    return true;
  781. }
  782.  
  783. void
  784. ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir,
  785.                                     src_reg *reg, int *num_reladdr)
  786. {
  787.    if (!reg->reladdr)
  788.       return;
  789.  
  790.    emit(ir, OPCODE_ARL, address_reg, *reg->reladdr);
  791.  
  792.    if (*num_reladdr != 1) {
  793.       src_reg temp = get_temp(glsl_type::vec4_type);
  794.  
  795.       emit(ir, OPCODE_MOV, dst_reg(temp), *reg);
  796.       *reg = temp;
  797.    }
  798.  
  799.    (*num_reladdr)--;
  800. }
  801.  
  802. void
  803. ir_to_mesa_visitor::emit_swz(ir_expression *ir)
  804. {
  805.    /* Assume that the vector operator is in a form compatible with OPCODE_SWZ.
  806.     * This means that each of the operands is either an immediate value of -1,
  807.     * 0, or 1, or is a component from one source register (possibly with
  808.     * negation).
  809.     */
  810.    uint8_t components[4] = { 0 };
  811.    bool negate[4] = { false };
  812.    ir_variable *var = NULL;
  813.  
  814.    for (unsigned i = 0; i < ir->type->vector_elements; i++) {
  815.       ir_rvalue *op = ir->operands[i];
  816.  
  817.       assert(op->type->is_scalar());
  818.  
  819.       while (op != NULL) {
  820.          switch (op->ir_type) {
  821.          case ir_type_constant: {
  822.  
  823.             assert(op->type->is_scalar());
  824.  
  825.             const ir_constant *const c = op->as_constant();
  826.             if (c->is_one()) {
  827.                components[i] = SWIZZLE_ONE;
  828.             } else if (c->is_zero()) {
  829.                components[i] = SWIZZLE_ZERO;
  830.             } else if (c->is_negative_one()) {
  831.                components[i] = SWIZZLE_ONE;
  832.                negate[i] = true;
  833.             } else {
  834.                assert(!"SWZ constant must be 0.0 or 1.0.");
  835.             }
  836.  
  837.             op = NULL;
  838.             break;
  839.          }
  840.  
  841.          case ir_type_dereference_variable: {
  842.             ir_dereference_variable *const deref =
  843.                (ir_dereference_variable *) op;
  844.  
  845.             assert((var == NULL) || (deref->var == var));
  846.             components[i] = SWIZZLE_X;
  847.             var = deref->var;
  848.             op = NULL;
  849.             break;
  850.          }
  851.  
  852.          case ir_type_expression: {
  853.             ir_expression *const expr = (ir_expression *) op;
  854.  
  855.             assert(expr->operation == ir_unop_neg);
  856.             negate[i] = true;
  857.  
  858.             op = expr->operands[0];
  859.             break;
  860.          }
  861.  
  862.          case ir_type_swizzle: {
  863.             ir_swizzle *const swiz = (ir_swizzle *) op;
  864.  
  865.             components[i] = swiz->mask.x;
  866.             op = swiz->val;
  867.             break;
  868.          }
  869.  
  870.          default:
  871.             assert(!"Should not get here.");
  872.             return;
  873.          }
  874.       }
  875.    }
  876.  
  877.    assert(var != NULL);
  878.  
  879.    ir_dereference_variable *const deref =
  880.       new(mem_ctx) ir_dereference_variable(var);
  881.  
  882.    this->result.file = PROGRAM_UNDEFINED;
  883.    deref->accept(this);
  884.    if (this->result.file == PROGRAM_UNDEFINED) {
  885.       printf("Failed to get tree for expression operand:\n");
  886.       deref->print();
  887.       printf("\n");
  888.       exit(1);
  889.    }
  890.  
  891.    src_reg src;
  892.  
  893.    src = this->result;
  894.    src.swizzle = MAKE_SWIZZLE4(components[0],
  895.                                components[1],
  896.                                components[2],
  897.                                components[3]);
  898.    src.negate = ((unsigned(negate[0]) << 0)
  899.                  | (unsigned(negate[1]) << 1)
  900.                  | (unsigned(negate[2]) << 2)
  901.                  | (unsigned(negate[3]) << 3));
  902.  
  903.    /* Storage for our result.  Ideally for an assignment we'd be using the
  904.     * actual storage for the result here, instead.
  905.     */
  906.    const src_reg result_src = get_temp(ir->type);
  907.    dst_reg result_dst = dst_reg(result_src);
  908.  
  909.    /* Limit writes to the channels that will be used by result_src later.
  910.     * This does limit this temp's use as a temporary for multi-instruction
  911.     * sequences.
  912.     */
  913.    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  914.  
  915.    emit(ir, OPCODE_SWZ, result_dst, src);
  916.    this->result = result_src;
  917. }
  918.  
  919. void
  920. ir_to_mesa_visitor::visit(ir_expression *ir)
  921. {
  922.    unsigned int operand;
  923.    src_reg op[ARRAY_SIZE(ir->operands)];
  924.    src_reg result_src;
  925.    dst_reg result_dst;
  926.  
  927.    /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c)
  928.     */
  929.    if (ir->operation == ir_binop_add) {
  930.       if (try_emit_mad(ir, 1))
  931.          return;
  932.       if (try_emit_mad(ir, 0))
  933.          return;
  934.    }
  935.  
  936.    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
  937.     */
  938.    if (ir->operation == ir_binop_logic_and) {
  939.       if (try_emit_mad_for_and_not(ir, 1))
  940.          return;
  941.       if (try_emit_mad_for_and_not(ir, 0))
  942.          return;
  943.    }
  944.  
  945.    if (ir->operation == ir_quadop_vector) {
  946.       this->emit_swz(ir);
  947.       return;
  948.    }
  949.  
  950.    for (operand = 0; operand < ir->get_num_operands(); operand++) {
  951.       this->result.file = PROGRAM_UNDEFINED;
  952.       ir->operands[operand]->accept(this);
  953.       if (this->result.file == PROGRAM_UNDEFINED) {
  954.          printf("Failed to get tree for expression operand:\n");
  955.          ir->operands[operand]->print();
  956.          printf("\n");
  957.          exit(1);
  958.       }
  959.       op[operand] = this->result;
  960.  
  961.       /* Matrix expression operands should have been broken down to vector
  962.        * operations already.
  963.        */
  964.       assert(!ir->operands[operand]->type->is_matrix());
  965.    }
  966.  
  967.    int vector_elements = ir->operands[0]->type->vector_elements;
  968.    if (ir->operands[1]) {
  969.       vector_elements = MAX2(vector_elements,
  970.                              ir->operands[1]->type->vector_elements);
  971.    }
  972.  
  973.    this->result.file = PROGRAM_UNDEFINED;
  974.  
  975.    /* Storage for our result.  Ideally for an assignment we'd be using
  976.     * the actual storage for the result here, instead.
  977.     */
  978.    result_src = get_temp(ir->type);
  979.    /* convenience for the emit functions below. */
  980.    result_dst = dst_reg(result_src);
  981.    /* Limit writes to the channels that will be used by result_src later.
  982.     * This does limit this temp's use as a temporary for multi-instruction
  983.     * sequences.
  984.     */
  985.    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  986.  
  987.    switch (ir->operation) {
  988.    case ir_unop_logic_not:
  989.       /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
  990.        * older GPUs implement SEQ using multiple instructions (i915 uses two
  991.        * SGE instructions and a MUL instruction).  Since our logic values are
  992.        * 0.0 and 1.0, 1-x also implements !x.
  993.        */
  994.       op[0].negate = ~op[0].negate;
  995.       emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0));
  996.       break;
  997.    case ir_unop_neg:
  998.       op[0].negate = ~op[0].negate;
  999.       result_src = op[0];
  1000.       break;
  1001.    case ir_unop_abs:
  1002.       emit(ir, OPCODE_ABS, result_dst, op[0]);
  1003.       break;
  1004.    case ir_unop_sign:
  1005.       emit(ir, OPCODE_SSG, result_dst, op[0]);
  1006.       break;
  1007.    case ir_unop_rcp:
  1008.       emit_scalar(ir, OPCODE_RCP, result_dst, op[0]);
  1009.       break;
  1010.  
  1011.    case ir_unop_exp2:
  1012.       emit_scalar(ir, OPCODE_EX2, result_dst, op[0]);
  1013.       break;
  1014.    case ir_unop_exp:
  1015.    case ir_unop_log:
  1016.       assert(!"not reached: should be handled by ir_explog_to_explog2");
  1017.       break;
  1018.    case ir_unop_log2:
  1019.       emit_scalar(ir, OPCODE_LG2, result_dst, op[0]);
  1020.       break;
  1021.    case ir_unop_sin:
  1022.       emit_scalar(ir, OPCODE_SIN, result_dst, op[0]);
  1023.       break;
  1024.    case ir_unop_cos:
  1025.       emit_scalar(ir, OPCODE_COS, result_dst, op[0]);
  1026.       break;
  1027.  
  1028.    case ir_unop_dFdx:
  1029.       emit(ir, OPCODE_DDX, result_dst, op[0]);
  1030.       break;
  1031.    case ir_unop_dFdy:
  1032.       emit(ir, OPCODE_DDY, result_dst, op[0]);
  1033.       break;
  1034.  
  1035.    case ir_unop_saturate: {
  1036.       ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV,
  1037.                                           result_dst, op[0]);
  1038.       inst->saturate = true;
  1039.       break;
  1040.    }
  1041.    case ir_unop_noise: {
  1042.       const enum prog_opcode opcode =
  1043.          prog_opcode(OPCODE_NOISE1
  1044.                      + (ir->operands[0]->type->vector_elements) - 1);
  1045.       assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4));
  1046.  
  1047.       emit(ir, opcode, result_dst, op[0]);
  1048.       break;
  1049.    }
  1050.  
  1051.    case ir_binop_add:
  1052.       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
  1053.       break;
  1054.    case ir_binop_sub:
  1055.       emit(ir, OPCODE_SUB, result_dst, op[0], op[1]);
  1056.       break;
  1057.  
  1058.    case ir_binop_mul:
  1059.       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
  1060.       break;
  1061.    case ir_binop_div:
  1062.       assert(!"not reached: should be handled by ir_div_to_mul_rcp");
  1063.       break;
  1064.    case ir_binop_mod:
  1065.       /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */
  1066.       assert(ir->type->is_integer());
  1067.       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
  1068.       break;
  1069.  
  1070.    case ir_binop_less:
  1071.       emit(ir, OPCODE_SLT, result_dst, op[0], op[1]);
  1072.       break;
  1073.    case ir_binop_greater:
  1074.       emit(ir, OPCODE_SGT, result_dst, op[0], op[1]);
  1075.       break;
  1076.    case ir_binop_lequal:
  1077.       emit(ir, OPCODE_SLE, result_dst, op[0], op[1]);
  1078.       break;
  1079.    case ir_binop_gequal:
  1080.       emit(ir, OPCODE_SGE, result_dst, op[0], op[1]);
  1081.       break;
  1082.    case ir_binop_equal:
  1083.       emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
  1084.       break;
  1085.    case ir_binop_nequal:
  1086.       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
  1087.       break;
  1088.    case ir_binop_all_equal:
  1089.       /* "==" operator producing a scalar boolean. */
  1090.       if (ir->operands[0]->type->is_vector() ||
  1091.           ir->operands[1]->type->is_vector()) {
  1092.          src_reg temp = get_temp(glsl_type::vec4_type);
  1093.          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
  1094.  
  1095.          /* After the dot-product, the value will be an integer on the
  1096.           * range [0,4].  Zero becomes 1.0, and positive values become zero.
  1097.           */
  1098.          emit_dp(ir, result_dst, temp, temp, vector_elements);
  1099.  
  1100.          /* Negating the result of the dot-product gives values on the range
  1101.           * [-4, 0].  Zero becomes 1.0, and negative values become zero.  This
  1102.           * achieved using SGE.
  1103.           */
  1104.          src_reg sge_src = result_src;
  1105.          sge_src.negate = ~sge_src.negate;
  1106.          emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0));
  1107.       } else {
  1108.          emit(ir, OPCODE_SEQ, result_dst, op[0], op[1]);
  1109.       }
  1110.       break;
  1111.    case ir_binop_any_nequal:
  1112.       /* "!=" operator producing a scalar boolean. */
  1113.       if (ir->operands[0]->type->is_vector() ||
  1114.           ir->operands[1]->type->is_vector()) {
  1115.          src_reg temp = get_temp(glsl_type::vec4_type);
  1116.          emit(ir, OPCODE_SNE, dst_reg(temp), op[0], op[1]);
  1117.  
  1118.          /* After the dot-product, the value will be an integer on the
  1119.           * range [0,4].  Zero stays zero, and positive values become 1.0.
  1120.           */
  1121.          ir_to_mesa_instruction *const dp =
  1122.             emit_dp(ir, result_dst, temp, temp, vector_elements);
  1123.          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1124.             /* The clamping to [0,1] can be done for free in the fragment
  1125.              * shader with a saturate.
  1126.              */
  1127.             dp->saturate = true;
  1128.          } else {
  1129.             /* Negating the result of the dot-product gives values on the range
  1130.              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
  1131.              * achieved using SLT.
  1132.              */
  1133.             src_reg slt_src = result_src;
  1134.             slt_src.negate = ~slt_src.negate;
  1135.             emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
  1136.          }
  1137.       } else {
  1138.          emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
  1139.       }
  1140.       break;
  1141.  
  1142.    case ir_unop_any: {
  1143.       assert(ir->operands[0]->type->is_vector());
  1144.  
  1145.       /* After the dot-product, the value will be an integer on the
  1146.        * range [0,4].  Zero stays zero, and positive values become 1.0.
  1147.        */
  1148.       ir_to_mesa_instruction *const dp =
  1149.          emit_dp(ir, result_dst, op[0], op[0],
  1150.                  ir->operands[0]->type->vector_elements);
  1151.       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1152.          /* The clamping to [0,1] can be done for free in the fragment
  1153.           * shader with a saturate.
  1154.           */
  1155.          dp->saturate = true;
  1156.       } else {
  1157.          /* Negating the result of the dot-product gives values on the range
  1158.           * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
  1159.           * is achieved using SLT.
  1160.           */
  1161.          src_reg slt_src = result_src;
  1162.          slt_src.negate = ~slt_src.negate;
  1163.          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
  1164.       }
  1165.       break;
  1166.    }
  1167.  
  1168.    case ir_binop_logic_xor:
  1169.       emit(ir, OPCODE_SNE, result_dst, op[0], op[1]);
  1170.       break;
  1171.  
  1172.    case ir_binop_logic_or: {
  1173.       /* After the addition, the value will be an integer on the
  1174.        * range [0,2].  Zero stays zero, and positive values become 1.0.
  1175.        */
  1176.       ir_to_mesa_instruction *add =
  1177.          emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
  1178.       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1179.          /* The clamping to [0,1] can be done for free in the fragment
  1180.           * shader with a saturate.
  1181.           */
  1182.          add->saturate = true;
  1183.       } else {
  1184.          /* Negating the result of the addition gives values on the range
  1185.           * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
  1186.           * is achieved using SLT.
  1187.           */
  1188.          src_reg slt_src = result_src;
  1189.          slt_src.negate = ~slt_src.negate;
  1190.          emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0));
  1191.       }
  1192.       break;
  1193.    }
  1194.  
  1195.    case ir_binop_logic_and:
  1196.       /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */
  1197.       emit(ir, OPCODE_MUL, result_dst, op[0], op[1]);
  1198.       break;
  1199.  
  1200.    case ir_binop_dot:
  1201.       assert(ir->operands[0]->type->is_vector());
  1202.       assert(ir->operands[0]->type == ir->operands[1]->type);
  1203.       emit_dp(ir, result_dst, op[0], op[1],
  1204.               ir->operands[0]->type->vector_elements);
  1205.       break;
  1206.  
  1207.    case ir_unop_sqrt:
  1208.       /* sqrt(x) = x * rsq(x). */
  1209.       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
  1210.       emit(ir, OPCODE_MUL, result_dst, result_src, op[0]);
  1211.       /* For incoming channels <= 0, set the result to 0. */
  1212.       op[0].negate = ~op[0].negate;
  1213.       emit(ir, OPCODE_CMP, result_dst,
  1214.                           op[0], result_src, src_reg_for_float(0.0));
  1215.       break;
  1216.    case ir_unop_rsq:
  1217.       emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]);
  1218.       break;
  1219.    case ir_unop_i2f:
  1220.    case ir_unop_u2f:
  1221.    case ir_unop_b2f:
  1222.    case ir_unop_b2i:
  1223.    case ir_unop_i2u:
  1224.    case ir_unop_u2i:
  1225.       /* Mesa IR lacks types, ints are stored as truncated floats. */
  1226.       result_src = op[0];
  1227.       break;
  1228.    case ir_unop_f2i:
  1229.    case ir_unop_f2u:
  1230.       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
  1231.       break;
  1232.    case ir_unop_f2b:
  1233.    case ir_unop_i2b:
  1234.       emit(ir, OPCODE_SNE, result_dst,
  1235.                           op[0], src_reg_for_float(0.0));
  1236.       break;
  1237.    case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway
  1238.    case ir_unop_bitcast_f2u:
  1239.    case ir_unop_bitcast_i2f:
  1240.    case ir_unop_bitcast_u2f:
  1241.       break;
  1242.    case ir_unop_trunc:
  1243.       emit(ir, OPCODE_TRUNC, result_dst, op[0]);
  1244.       break;
  1245.    case ir_unop_ceil:
  1246.       op[0].negate = ~op[0].negate;
  1247.       emit(ir, OPCODE_FLR, result_dst, op[0]);
  1248.       result_src.negate = ~result_src.negate;
  1249.       break;
  1250.    case ir_unop_floor:
  1251.       emit(ir, OPCODE_FLR, result_dst, op[0]);
  1252.       break;
  1253.    case ir_unop_fract:
  1254.       emit(ir, OPCODE_FRC, result_dst, op[0]);
  1255.       break;
  1256.    case ir_unop_pack_snorm_2x16:
  1257.    case ir_unop_pack_snorm_4x8:
  1258.    case ir_unop_pack_unorm_2x16:
  1259.    case ir_unop_pack_unorm_4x8:
  1260.    case ir_unop_pack_half_2x16:
  1261.    case ir_unop_pack_double_2x32:
  1262.    case ir_unop_unpack_snorm_2x16:
  1263.    case ir_unop_unpack_snorm_4x8:
  1264.    case ir_unop_unpack_unorm_2x16:
  1265.    case ir_unop_unpack_unorm_4x8:
  1266.    case ir_unop_unpack_half_2x16:
  1267.    case ir_unop_unpack_half_2x16_split_x:
  1268.    case ir_unop_unpack_half_2x16_split_y:
  1269.    case ir_unop_unpack_double_2x32:
  1270.    case ir_binop_pack_half_2x16_split:
  1271.    case ir_unop_bitfield_reverse:
  1272.    case ir_unop_bit_count:
  1273.    case ir_unop_find_msb:
  1274.    case ir_unop_find_lsb:
  1275.    case ir_unop_d2f:
  1276.    case ir_unop_f2d:
  1277.    case ir_unop_d2i:
  1278.    case ir_unop_i2d:
  1279.    case ir_unop_d2u:
  1280.    case ir_unop_u2d:
  1281.    case ir_unop_d2b:
  1282.    case ir_unop_frexp_sig:
  1283.    case ir_unop_frexp_exp:
  1284.       assert(!"not supported");
  1285.       break;
  1286.    case ir_binop_min:
  1287.       emit(ir, OPCODE_MIN, result_dst, op[0], op[1]);
  1288.       break;
  1289.    case ir_binop_max:
  1290.       emit(ir, OPCODE_MAX, result_dst, op[0], op[1]);
  1291.       break;
  1292.    case ir_binop_pow:
  1293.       emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]);
  1294.       break;
  1295.  
  1296.       /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since
  1297.        * hardware backends have no way to avoid Mesa IR generation
  1298.        * even if they don't use it, we need to emit "something" and
  1299.        * continue.
  1300.        */
  1301.    case ir_binop_lshift:
  1302.    case ir_binop_rshift:
  1303.    case ir_binop_bit_and:
  1304.    case ir_binop_bit_xor:
  1305.    case ir_binop_bit_or:
  1306.       emit(ir, OPCODE_ADD, result_dst, op[0], op[1]);
  1307.       break;
  1308.  
  1309.    case ir_unop_bit_not:
  1310.    case ir_unop_round_even:
  1311.       emit(ir, OPCODE_MOV, result_dst, op[0]);
  1312.       break;
  1313.  
  1314.    case ir_binop_ubo_load:
  1315.       assert(!"not supported");
  1316.       break;
  1317.  
  1318.    case ir_triop_lrp:
  1319.       /* ir_triop_lrp operands are (x, y, a) while
  1320.        * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program.
  1321.        */
  1322.       emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]);
  1323.       break;
  1324.  
  1325.    case ir_binop_vector_extract:
  1326.    case ir_binop_bfm:
  1327.    case ir_triop_fma:
  1328.    case ir_triop_bfi:
  1329.    case ir_triop_bitfield_extract:
  1330.    case ir_triop_vector_insert:
  1331.    case ir_quadop_bitfield_insert:
  1332.    case ir_binop_ldexp:
  1333.    case ir_triop_csel:
  1334.    case ir_binop_carry:
  1335.    case ir_binop_borrow:
  1336.    case ir_binop_imul_high:
  1337.    case ir_unop_interpolate_at_centroid:
  1338.    case ir_binop_interpolate_at_offset:
  1339.    case ir_binop_interpolate_at_sample:
  1340.    case ir_unop_dFdx_coarse:
  1341.    case ir_unop_dFdx_fine:
  1342.    case ir_unop_dFdy_coarse:
  1343.    case ir_unop_dFdy_fine:
  1344.       assert(!"not supported");
  1345.       break;
  1346.  
  1347.    case ir_quadop_vector:
  1348.       /* This operation should have already been handled.
  1349.        */
  1350.       assert(!"Should not get here.");
  1351.       break;
  1352.    }
  1353.  
  1354.    this->result = result_src;
  1355. }
  1356.  
  1357.  
  1358. void
  1359. ir_to_mesa_visitor::visit(ir_swizzle *ir)
  1360. {
  1361.    src_reg src;
  1362.    int i;
  1363.    int swizzle[4];
  1364.  
  1365.    /* Note that this is only swizzles in expressions, not those on the left
  1366.     * hand side of an assignment, which do write masking.  See ir_assignment
  1367.     * for that.
  1368.     */
  1369.  
  1370.    ir->val->accept(this);
  1371.    src = this->result;
  1372.    assert(src.file != PROGRAM_UNDEFINED);
  1373.    assert(ir->type->vector_elements > 0);
  1374.  
  1375.    for (i = 0; i < 4; i++) {
  1376.       if (i < ir->type->vector_elements) {
  1377.          switch (i) {
  1378.          case 0:
  1379.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
  1380.             break;
  1381.          case 1:
  1382.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
  1383.             break;
  1384.          case 2:
  1385.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
  1386.             break;
  1387.          case 3:
  1388.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
  1389.             break;
  1390.          }
  1391.       } else {
  1392.          /* If the type is smaller than a vec4, replicate the last
  1393.           * channel out.
  1394.           */
  1395.          swizzle[i] = swizzle[ir->type->vector_elements - 1];
  1396.       }
  1397.    }
  1398.  
  1399.    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
  1400.  
  1401.    this->result = src;
  1402. }
  1403.  
  1404. void
  1405. ir_to_mesa_visitor::visit(ir_dereference_variable *ir)
  1406. {
  1407.    variable_storage *entry = find_variable_storage(ir->var);
  1408.    ir_variable *var = ir->var;
  1409.  
  1410.    if (!entry) {
  1411.       switch (var->data.mode) {
  1412.       case ir_var_uniform:
  1413.          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
  1414.                                                var->data.location);
  1415.          this->variables.push_tail(entry);
  1416.          break;
  1417.       case ir_var_shader_in:
  1418.          /* The linker assigns locations for varyings and attributes,
  1419.           * including deprecated builtins (like gl_Color),
  1420.           * user-assigned generic attributes (glBindVertexLocation),
  1421.           * and user-defined varyings.
  1422.           */
  1423.          assert(var->data.location != -1);
  1424.          entry = new(mem_ctx) variable_storage(var,
  1425.                                                PROGRAM_INPUT,
  1426.                                                var->data.location);
  1427.          break;
  1428.       case ir_var_shader_out:
  1429.          assert(var->data.location != -1);
  1430.          entry = new(mem_ctx) variable_storage(var,
  1431.                                                PROGRAM_OUTPUT,
  1432.                                                var->data.location);
  1433.          break;
  1434.       case ir_var_system_value:
  1435.          entry = new(mem_ctx) variable_storage(var,
  1436.                                                PROGRAM_SYSTEM_VALUE,
  1437.                                                var->data.location);
  1438.          break;
  1439.       case ir_var_auto:
  1440.       case ir_var_temporary:
  1441.          entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY,
  1442.                                                this->next_temp);
  1443.          this->variables.push_tail(entry);
  1444.  
  1445.          next_temp += type_size(var->type);
  1446.          break;
  1447.       }
  1448.  
  1449.       if (!entry) {
  1450.          printf("Failed to make storage for %s\n", var->name);
  1451.          exit(1);
  1452.       }
  1453.    }
  1454.  
  1455.    this->result = src_reg(entry->file, entry->index, var->type);
  1456. }
  1457.  
  1458. void
  1459. ir_to_mesa_visitor::visit(ir_dereference_array *ir)
  1460. {
  1461.    ir_constant *index;
  1462.    src_reg src;
  1463.    int element_size = type_size(ir->type);
  1464.  
  1465.    index = ir->array_index->constant_expression_value();
  1466.  
  1467.    ir->array->accept(this);
  1468.    src = this->result;
  1469.  
  1470.    if (index) {
  1471.       src.index += index->value.i[0] * element_size;
  1472.    } else {
  1473.       /* Variable index array dereference.  It eats the "vec4" of the
  1474.        * base of the array and an index that offsets the Mesa register
  1475.        * index.
  1476.        */
  1477.       ir->array_index->accept(this);
  1478.  
  1479.       src_reg index_reg;
  1480.  
  1481.       if (element_size == 1) {
  1482.          index_reg = this->result;
  1483.       } else {
  1484.          index_reg = get_temp(glsl_type::float_type);
  1485.  
  1486.          emit(ir, OPCODE_MUL, dst_reg(index_reg),
  1487.               this->result, src_reg_for_float(element_size));
  1488.       }
  1489.  
  1490.       /* If there was already a relative address register involved, add the
  1491.        * new and the old together to get the new offset.
  1492.        */
  1493.       if (src.reladdr != NULL)  {
  1494.          src_reg accum_reg = get_temp(glsl_type::float_type);
  1495.  
  1496.          emit(ir, OPCODE_ADD, dst_reg(accum_reg),
  1497.               index_reg, *src.reladdr);
  1498.  
  1499.          index_reg = accum_reg;
  1500.       }
  1501.  
  1502.       src.reladdr = ralloc(mem_ctx, src_reg);
  1503.       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
  1504.    }
  1505.  
  1506.    /* If the type is smaller than a vec4, replicate the last channel out. */
  1507.    if (ir->type->is_scalar() || ir->type->is_vector())
  1508.       src.swizzle = swizzle_for_size(ir->type->vector_elements);
  1509.    else
  1510.       src.swizzle = SWIZZLE_NOOP;
  1511.  
  1512.    this->result = src;
  1513. }
  1514.  
  1515. void
  1516. ir_to_mesa_visitor::visit(ir_dereference_record *ir)
  1517. {
  1518.    unsigned int i;
  1519.    const glsl_type *struct_type = ir->record->type;
  1520.    int offset = 0;
  1521.  
  1522.    ir->record->accept(this);
  1523.  
  1524.    for (i = 0; i < struct_type->length; i++) {
  1525.       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
  1526.          break;
  1527.       offset += type_size(struct_type->fields.structure[i].type);
  1528.    }
  1529.  
  1530.    /* If the type is smaller than a vec4, replicate the last channel out. */
  1531.    if (ir->type->is_scalar() || ir->type->is_vector())
  1532.       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
  1533.    else
  1534.       this->result.swizzle = SWIZZLE_NOOP;
  1535.  
  1536.    this->result.index += offset;
  1537. }
  1538.  
  1539. /**
  1540.  * We want to be careful in assignment setup to hit the actual storage
  1541.  * instead of potentially using a temporary like we might with the
  1542.  * ir_dereference handler.
  1543.  */
  1544. static dst_reg
  1545. get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v)
  1546. {
  1547.    /* The LHS must be a dereference.  If the LHS is a variable indexed array
  1548.     * access of a vector, it must be separated into a series conditional moves
  1549.     * before reaching this point (see ir_vec_index_to_cond_assign).
  1550.     */
  1551.    assert(ir->as_dereference());
  1552.    ir_dereference_array *deref_array = ir->as_dereference_array();
  1553.    if (deref_array) {
  1554.       assert(!deref_array->array->type->is_vector());
  1555.    }
  1556.  
  1557.    /* Use the rvalue deref handler for the most part.  We'll ignore
  1558.     * swizzles in it and write swizzles using writemask, though.
  1559.     */
  1560.    ir->accept(v);
  1561.    return dst_reg(v->result);
  1562. }
  1563.  
  1564. /**
  1565.  * Process the condition of a conditional assignment
  1566.  *
  1567.  * Examines the condition of a conditional assignment to generate the optimal
  1568.  * first operand of a \c CMP instruction.  If the condition is a relational
  1569.  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
  1570.  * used as the source for the \c CMP instruction.  Otherwise the comparison
  1571.  * is processed to a boolean result, and the boolean result is used as the
  1572.  * operand to the CMP instruction.
  1573.  */
  1574. bool
  1575. ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir)
  1576. {
  1577.    ir_rvalue *src_ir = ir;
  1578.    bool negate = true;
  1579.    bool switch_order = false;
  1580.  
  1581.    ir_expression *const expr = ir->as_expression();
  1582.    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
  1583.       bool zero_on_left = false;
  1584.  
  1585.       if (expr->operands[0]->is_zero()) {
  1586.          src_ir = expr->operands[1];
  1587.          zero_on_left = true;
  1588.       } else if (expr->operands[1]->is_zero()) {
  1589.          src_ir = expr->operands[0];
  1590.          zero_on_left = false;
  1591.       }
  1592.  
  1593.       /*      a is -  0  +            -  0  +
  1594.        * (a <  0)  T  F  F  ( a < 0)  T  F  F
  1595.        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
  1596.        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
  1597.        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
  1598.        * (a >  0)  F  F  T  (-a < 0)  F  F  T
  1599.        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
  1600.        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
  1601.        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
  1602.        *
  1603.        * Note that exchanging the order of 0 and 'a' in the comparison simply
  1604.        * means that the value of 'a' should be negated.
  1605.        */
  1606.       if (src_ir != ir) {
  1607.          switch (expr->operation) {
  1608.          case ir_binop_less:
  1609.             switch_order = false;
  1610.             negate = zero_on_left;
  1611.             break;
  1612.  
  1613.          case ir_binop_greater:
  1614.             switch_order = false;
  1615.             negate = !zero_on_left;
  1616.             break;
  1617.  
  1618.          case ir_binop_lequal:
  1619.             switch_order = true;
  1620.             negate = !zero_on_left;
  1621.             break;
  1622.  
  1623.          case ir_binop_gequal:
  1624.             switch_order = true;
  1625.             negate = zero_on_left;
  1626.             break;
  1627.  
  1628.          default:
  1629.             /* This isn't the right kind of comparison afterall, so make sure
  1630.              * the whole condition is visited.
  1631.              */
  1632.             src_ir = ir;
  1633.             break;
  1634.          }
  1635.       }
  1636.    }
  1637.  
  1638.    src_ir->accept(this);
  1639.  
  1640.    /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
  1641.     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
  1642.     * choose which value OPCODE_CMP produces without an extra instruction
  1643.     * computing the condition.
  1644.     */
  1645.    if (negate)
  1646.       this->result.negate = ~this->result.negate;
  1647.  
  1648.    return switch_order;
  1649. }
  1650.  
  1651. void
  1652. ir_to_mesa_visitor::visit(ir_assignment *ir)
  1653. {
  1654.    dst_reg l;
  1655.    src_reg r;
  1656.    int i;
  1657.  
  1658.    ir->rhs->accept(this);
  1659.    r = this->result;
  1660.  
  1661.    l = get_assignment_lhs(ir->lhs, this);
  1662.  
  1663.    /* FINISHME: This should really set to the correct maximal writemask for each
  1664.     * FINISHME: component written (in the loops below).  This case can only
  1665.     * FINISHME: occur for matrices, arrays, and structures.
  1666.     */
  1667.    if (ir->write_mask == 0) {
  1668.       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
  1669.       l.writemask = WRITEMASK_XYZW;
  1670.    } else if (ir->lhs->type->is_scalar()) {
  1671.       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
  1672.        * FINISHME: W component of fragment shader output zero, work correctly.
  1673.        */
  1674.       l.writemask = WRITEMASK_XYZW;
  1675.    } else {
  1676.       int swizzles[4];
  1677.       int first_enabled_chan = 0;
  1678.       int rhs_chan = 0;
  1679.  
  1680.       assert(ir->lhs->type->is_vector());
  1681.       l.writemask = ir->write_mask;
  1682.  
  1683.       for (int i = 0; i < 4; i++) {
  1684.          if (l.writemask & (1 << i)) {
  1685.             first_enabled_chan = GET_SWZ(r.swizzle, i);
  1686.             break;
  1687.          }
  1688.       }
  1689.  
  1690.       /* Swizzle a small RHS vector into the channels being written.
  1691.        *
  1692.        * glsl ir treats write_mask as dictating how many channels are
  1693.        * present on the RHS while Mesa IR treats write_mask as just
  1694.        * showing which channels of the vec4 RHS get written.
  1695.        */
  1696.       for (int i = 0; i < 4; i++) {
  1697.          if (l.writemask & (1 << i))
  1698.             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
  1699.          else
  1700.             swizzles[i] = first_enabled_chan;
  1701.       }
  1702.       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
  1703.                                 swizzles[2], swizzles[3]);
  1704.    }
  1705.  
  1706.    assert(l.file != PROGRAM_UNDEFINED);
  1707.    assert(r.file != PROGRAM_UNDEFINED);
  1708.  
  1709.    if (ir->condition) {
  1710.       const bool switch_order = this->process_move_condition(ir->condition);
  1711.       src_reg condition = this->result;
  1712.  
  1713.       for (i = 0; i < type_size(ir->lhs->type); i++) {
  1714.          if (switch_order) {
  1715.             emit(ir, OPCODE_CMP, l, condition, src_reg(l), r);
  1716.          } else {
  1717.             emit(ir, OPCODE_CMP, l, condition, r, src_reg(l));
  1718.          }
  1719.  
  1720.          l.index++;
  1721.          r.index++;
  1722.       }
  1723.    } else {
  1724.       for (i = 0; i < type_size(ir->lhs->type); i++) {
  1725.          emit(ir, OPCODE_MOV, l, r);
  1726.          l.index++;
  1727.          r.index++;
  1728.       }
  1729.    }
  1730. }
  1731.  
  1732.  
  1733. void
  1734. ir_to_mesa_visitor::visit(ir_constant *ir)
  1735. {
  1736.    src_reg src;
  1737.    GLfloat stack_vals[4] = { 0 };
  1738.    GLfloat *values = stack_vals;
  1739.    unsigned int i;
  1740.  
  1741.    /* Unfortunately, 4 floats is all we can get into
  1742.     * _mesa_add_unnamed_constant.  So, make a temp to store an
  1743.     * aggregate constant and move each constant value into it.  If we
  1744.     * get lucky, copy propagation will eliminate the extra moves.
  1745.     */
  1746.  
  1747.    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
  1748.       src_reg temp_base = get_temp(ir->type);
  1749.       dst_reg temp = dst_reg(temp_base);
  1750.  
  1751.       foreach_in_list(ir_constant, field_value, &ir->components) {
  1752.          int size = type_size(field_value->type);
  1753.  
  1754.          assert(size > 0);
  1755.  
  1756.          field_value->accept(this);
  1757.          src = this->result;
  1758.  
  1759.          for (i = 0; i < (unsigned int)size; i++) {
  1760.             emit(ir, OPCODE_MOV, temp, src);
  1761.  
  1762.             src.index++;
  1763.             temp.index++;
  1764.          }
  1765.       }
  1766.       this->result = temp_base;
  1767.       return;
  1768.    }
  1769.  
  1770.    if (ir->type->is_array()) {
  1771.       src_reg temp_base = get_temp(ir->type);
  1772.       dst_reg temp = dst_reg(temp_base);
  1773.       int size = type_size(ir->type->fields.array);
  1774.  
  1775.       assert(size > 0);
  1776.  
  1777.       for (i = 0; i < ir->type->length; i++) {
  1778.          ir->array_elements[i]->accept(this);
  1779.          src = this->result;
  1780.          for (int j = 0; j < size; j++) {
  1781.             emit(ir, OPCODE_MOV, temp, src);
  1782.  
  1783.             src.index++;
  1784.             temp.index++;
  1785.          }
  1786.       }
  1787.       this->result = temp_base;
  1788.       return;
  1789.    }
  1790.  
  1791.    if (ir->type->is_matrix()) {
  1792.       src_reg mat = get_temp(ir->type);
  1793.       dst_reg mat_column = dst_reg(mat);
  1794.  
  1795.       for (i = 0; i < ir->type->matrix_columns; i++) {
  1796.          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
  1797.          values = &ir->value.f[i * ir->type->vector_elements];
  1798.  
  1799.          src = src_reg(PROGRAM_CONSTANT, -1, NULL);
  1800.          src.index = _mesa_add_unnamed_constant(this->prog->Parameters,
  1801.                                                 (gl_constant_value *) values,
  1802.                                                 ir->type->vector_elements,
  1803.                                                 &src.swizzle);
  1804.          emit(ir, OPCODE_MOV, mat_column, src);
  1805.  
  1806.          mat_column.index++;
  1807.       }
  1808.  
  1809.       this->result = mat;
  1810.       return;
  1811.    }
  1812.  
  1813.    src.file = PROGRAM_CONSTANT;
  1814.    switch (ir->type->base_type) {
  1815.    case GLSL_TYPE_FLOAT:
  1816.       values = &ir->value.f[0];
  1817.       break;
  1818.    case GLSL_TYPE_UINT:
  1819.       for (i = 0; i < ir->type->vector_elements; i++) {
  1820.          values[i] = ir->value.u[i];
  1821.       }
  1822.       break;
  1823.    case GLSL_TYPE_INT:
  1824.       for (i = 0; i < ir->type->vector_elements; i++) {
  1825.          values[i] = ir->value.i[i];
  1826.       }
  1827.       break;
  1828.    case GLSL_TYPE_BOOL:
  1829.       for (i = 0; i < ir->type->vector_elements; i++) {
  1830.          values[i] = ir->value.b[i];
  1831.       }
  1832.       break;
  1833.    default:
  1834.       assert(!"Non-float/uint/int/bool constant");
  1835.    }
  1836.  
  1837.    this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type);
  1838.    this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters,
  1839.                                                    (gl_constant_value *) values,
  1840.                                                    ir->type->vector_elements,
  1841.                                                    &this->result.swizzle);
  1842. }
  1843.  
  1844. void
  1845. ir_to_mesa_visitor::visit(ir_call *)
  1846. {
  1847.    assert(!"ir_to_mesa: All function calls should have been inlined by now.");
  1848. }
  1849.  
  1850. void
  1851. ir_to_mesa_visitor::visit(ir_texture *ir)
  1852. {
  1853.    src_reg result_src, coord, lod_info, projector, dx, dy;
  1854.    dst_reg result_dst, coord_dst;
  1855.    ir_to_mesa_instruction *inst = NULL;
  1856.    prog_opcode opcode = OPCODE_NOP;
  1857.  
  1858.    if (ir->op == ir_txs)
  1859.       this->result = src_reg_for_float(0.0);
  1860.    else
  1861.       ir->coordinate->accept(this);
  1862.  
  1863.    /* Put our coords in a temp.  We'll need to modify them for shadow,
  1864.     * projection, or LOD, so the only case we'd use it as is is if
  1865.     * we're doing plain old texturing.  Mesa IR optimization should
  1866.     * handle cleaning up our mess in that case.
  1867.     */
  1868.    coord = get_temp(glsl_type::vec4_type);
  1869.    coord_dst = dst_reg(coord);
  1870.    emit(ir, OPCODE_MOV, coord_dst, this->result);
  1871.  
  1872.    if (ir->projector) {
  1873.       ir->projector->accept(this);
  1874.       projector = this->result;
  1875.    }
  1876.  
  1877.    /* Storage for our result.  Ideally for an assignment we'd be using
  1878.     * the actual storage for the result here, instead.
  1879.     */
  1880.    result_src = get_temp(glsl_type::vec4_type);
  1881.    result_dst = dst_reg(result_src);
  1882.  
  1883.    switch (ir->op) {
  1884.    case ir_tex:
  1885.    case ir_txs:
  1886.       opcode = OPCODE_TEX;
  1887.       break;
  1888.    case ir_txb:
  1889.       opcode = OPCODE_TXB;
  1890.       ir->lod_info.bias->accept(this);
  1891.       lod_info = this->result;
  1892.       break;
  1893.    case ir_txf:
  1894.       /* Pretend to be TXL so the sampler, coordinate, lod are available */
  1895.    case ir_txl:
  1896.       opcode = OPCODE_TXL;
  1897.       ir->lod_info.lod->accept(this);
  1898.       lod_info = this->result;
  1899.       break;
  1900.    case ir_txd:
  1901.       opcode = OPCODE_TXD;
  1902.       ir->lod_info.grad.dPdx->accept(this);
  1903.       dx = this->result;
  1904.       ir->lod_info.grad.dPdy->accept(this);
  1905.       dy = this->result;
  1906.       break;
  1907.    case ir_txf_ms:
  1908.       assert(!"Unexpected ir_txf_ms opcode");
  1909.       break;
  1910.    case ir_lod:
  1911.       assert(!"Unexpected ir_lod opcode");
  1912.       break;
  1913.    case ir_tg4:
  1914.       assert(!"Unexpected ir_tg4 opcode");
  1915.       break;
  1916.    case ir_query_levels:
  1917.       assert(!"Unexpected ir_query_levels opcode");
  1918.       break;
  1919.    }
  1920.  
  1921.    const glsl_type *sampler_type = ir->sampler->type;
  1922.  
  1923.    if (ir->projector) {
  1924.       if (opcode == OPCODE_TEX) {
  1925.          /* Slot the projector in as the last component of the coord. */
  1926.          coord_dst.writemask = WRITEMASK_W;
  1927.          emit(ir, OPCODE_MOV, coord_dst, projector);
  1928.          coord_dst.writemask = WRITEMASK_XYZW;
  1929.          opcode = OPCODE_TXP;
  1930.       } else {
  1931.          src_reg coord_w = coord;
  1932.          coord_w.swizzle = SWIZZLE_WWWW;
  1933.  
  1934.          /* For the other TEX opcodes there's no projective version
  1935.           * since the last slot is taken up by lod info.  Do the
  1936.           * projective divide now.
  1937.           */
  1938.          coord_dst.writemask = WRITEMASK_W;
  1939.          emit(ir, OPCODE_RCP, coord_dst, projector);
  1940.  
  1941.          /* In the case where we have to project the coordinates "by hand,"
  1942.           * the shadow comparitor value must also be projected.
  1943.           */
  1944.          src_reg tmp_src = coord;
  1945.          if (ir->shadow_comparitor) {
  1946.             /* Slot the shadow value in as the second to last component of the
  1947.              * coord.
  1948.              */
  1949.             ir->shadow_comparitor->accept(this);
  1950.  
  1951.             tmp_src = get_temp(glsl_type::vec4_type);
  1952.             dst_reg tmp_dst = dst_reg(tmp_src);
  1953.  
  1954.             /* Projective division not allowed for array samplers. */
  1955.             assert(!sampler_type->sampler_array);
  1956.  
  1957.             tmp_dst.writemask = WRITEMASK_Z;
  1958.             emit(ir, OPCODE_MOV, tmp_dst, this->result);
  1959.  
  1960.             tmp_dst.writemask = WRITEMASK_XY;
  1961.             emit(ir, OPCODE_MOV, tmp_dst, coord);
  1962.          }
  1963.  
  1964.          coord_dst.writemask = WRITEMASK_XYZ;
  1965.          emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w);
  1966.  
  1967.          coord_dst.writemask = WRITEMASK_XYZW;
  1968.          coord.swizzle = SWIZZLE_XYZW;
  1969.       }
  1970.    }
  1971.  
  1972.    /* If projection is done and the opcode is not OPCODE_TXP, then the shadow
  1973.     * comparitor was put in the correct place (and projected) by the code,
  1974.     * above, that handles by-hand projection.
  1975.     */
  1976.    if (ir->shadow_comparitor && (!ir->projector || opcode == OPCODE_TXP)) {
  1977.       /* Slot the shadow value in as the second to last component of the
  1978.        * coord.
  1979.        */
  1980.       ir->shadow_comparitor->accept(this);
  1981.  
  1982.       /* XXX This will need to be updated for cubemap array samplers. */
  1983.       if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
  1984.           sampler_type->sampler_array) {
  1985.          coord_dst.writemask = WRITEMASK_W;
  1986.       } else {
  1987.          coord_dst.writemask = WRITEMASK_Z;
  1988.       }
  1989.  
  1990.       emit(ir, OPCODE_MOV, coord_dst, this->result);
  1991.       coord_dst.writemask = WRITEMASK_XYZW;
  1992.    }
  1993.  
  1994.    if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) {
  1995.       /* Mesa IR stores lod or lod bias in the last channel of the coords. */
  1996.       coord_dst.writemask = WRITEMASK_W;
  1997.       emit(ir, OPCODE_MOV, coord_dst, lod_info);
  1998.       coord_dst.writemask = WRITEMASK_XYZW;
  1999.    }
  2000.  
  2001.    if (opcode == OPCODE_TXD)
  2002.       inst = emit(ir, opcode, result_dst, coord, dx, dy);
  2003.    else
  2004.       inst = emit(ir, opcode, result_dst, coord);
  2005.  
  2006.    if (ir->shadow_comparitor)
  2007.       inst->tex_shadow = GL_TRUE;
  2008.  
  2009.    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
  2010.                                                    this->shader_program,
  2011.                                                    this->prog);
  2012.  
  2013.    switch (sampler_type->sampler_dimensionality) {
  2014.    case GLSL_SAMPLER_DIM_1D:
  2015.       inst->tex_target = (sampler_type->sampler_array)
  2016.          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
  2017.       break;
  2018.    case GLSL_SAMPLER_DIM_2D:
  2019.       inst->tex_target = (sampler_type->sampler_array)
  2020.          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
  2021.       break;
  2022.    case GLSL_SAMPLER_DIM_3D:
  2023.       inst->tex_target = TEXTURE_3D_INDEX;
  2024.       break;
  2025.    case GLSL_SAMPLER_DIM_CUBE:
  2026.       inst->tex_target = TEXTURE_CUBE_INDEX;
  2027.       break;
  2028.    case GLSL_SAMPLER_DIM_RECT:
  2029.       inst->tex_target = TEXTURE_RECT_INDEX;
  2030.       break;
  2031.    case GLSL_SAMPLER_DIM_BUF:
  2032.       assert(!"FINISHME: Implement ARB_texture_buffer_object");
  2033.       break;
  2034.    case GLSL_SAMPLER_DIM_EXTERNAL:
  2035.       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
  2036.       break;
  2037.    default:
  2038.       assert(!"Should not get here.");
  2039.    }
  2040.  
  2041.    this->result = result_src;
  2042. }
  2043.  
  2044. void
  2045. ir_to_mesa_visitor::visit(ir_return *ir)
  2046. {
  2047.    /* Non-void functions should have been inlined.  We may still emit RETs
  2048.     * from main() unless the EmitNoMainReturn option is set.
  2049.     */
  2050.    assert(!ir->get_value());
  2051.    emit(ir, OPCODE_RET);
  2052. }
  2053.  
  2054. void
  2055. ir_to_mesa_visitor::visit(ir_discard *ir)
  2056. {
  2057.    if (ir->condition) {
  2058.       ir->condition->accept(this);
  2059.       this->result.negate = ~this->result.negate;
  2060.       emit(ir, OPCODE_KIL, undef_dst, this->result);
  2061.    } else {
  2062.       emit(ir, OPCODE_KIL_NV);
  2063.    }
  2064. }
  2065.  
  2066. void
  2067. ir_to_mesa_visitor::visit(ir_if *ir)
  2068. {
  2069.    ir_to_mesa_instruction *cond_inst, *if_inst;
  2070.    ir_to_mesa_instruction *prev_inst;
  2071.  
  2072.    prev_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
  2073.  
  2074.    ir->condition->accept(this);
  2075.    assert(this->result.file != PROGRAM_UNDEFINED);
  2076.  
  2077.    if (this->options->EmitCondCodes) {
  2078.       cond_inst = (ir_to_mesa_instruction *)this->instructions.get_tail();
  2079.  
  2080.       /* See if we actually generated any instruction for generating
  2081.        * the condition.  If not, then cook up a move to a temp so we
  2082.        * have something to set cond_update on.
  2083.        */
  2084.       if (cond_inst == prev_inst) {
  2085.          src_reg temp = get_temp(glsl_type::bool_type);
  2086.          cond_inst = emit(ir->condition, OPCODE_MOV, dst_reg(temp), result);
  2087.       }
  2088.       cond_inst->cond_update = GL_TRUE;
  2089.  
  2090.       if_inst = emit(ir->condition, OPCODE_IF);
  2091.       if_inst->dst.cond_mask = COND_NE;
  2092.    } else {
  2093.       if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result);
  2094.    }
  2095.  
  2096.    this->instructions.push_tail(if_inst);
  2097.  
  2098.    visit_exec_list(&ir->then_instructions, this);
  2099.  
  2100.    if (!ir->else_instructions.is_empty()) {
  2101.       emit(ir->condition, OPCODE_ELSE);
  2102.       visit_exec_list(&ir->else_instructions, this);
  2103.    }
  2104.  
  2105.    emit(ir->condition, OPCODE_ENDIF);
  2106. }
  2107.  
  2108. void
  2109. ir_to_mesa_visitor::visit(ir_emit_vertex *)
  2110. {
  2111.    assert(!"Geometry shaders not supported.");
  2112. }
  2113.  
  2114. void
  2115. ir_to_mesa_visitor::visit(ir_end_primitive *)
  2116. {
  2117.    assert(!"Geometry shaders not supported.");
  2118. }
  2119.  
  2120. ir_to_mesa_visitor::ir_to_mesa_visitor()
  2121. {
  2122.    result.file = PROGRAM_UNDEFINED;
  2123.    next_temp = 1;
  2124.    next_signature_id = 1;
  2125.    current_function = NULL;
  2126.    mem_ctx = ralloc_context(NULL);
  2127. }
  2128.  
  2129. ir_to_mesa_visitor::~ir_to_mesa_visitor()
  2130. {
  2131.    ralloc_free(mem_ctx);
  2132. }
  2133.  
  2134. static struct prog_src_register
  2135. mesa_src_reg_from_ir_src_reg(src_reg reg)
  2136. {
  2137.    struct prog_src_register mesa_reg;
  2138.  
  2139.    mesa_reg.File = reg.file;
  2140.    assert(reg.index < (1 << INST_INDEX_BITS));
  2141.    mesa_reg.Index = reg.index;
  2142.    mesa_reg.Swizzle = reg.swizzle;
  2143.    mesa_reg.RelAddr = reg.reladdr != NULL;
  2144.    mesa_reg.Negate = reg.negate;
  2145.    mesa_reg.Abs = 0;
  2146.    mesa_reg.HasIndex2 = GL_FALSE;
  2147.    mesa_reg.RelAddr2 = 0;
  2148.    mesa_reg.Index2 = 0;
  2149.  
  2150.    return mesa_reg;
  2151. }
  2152.  
  2153. static void
  2154. set_branchtargets(ir_to_mesa_visitor *v,
  2155.                   struct prog_instruction *mesa_instructions,
  2156.                   int num_instructions)
  2157. {
  2158.    int if_count = 0, loop_count = 0;
  2159.    int *if_stack, *loop_stack;
  2160.    int if_stack_pos = 0, loop_stack_pos = 0;
  2161.    int i, j;
  2162.  
  2163.    for (i = 0; i < num_instructions; i++) {
  2164.       switch (mesa_instructions[i].Opcode) {
  2165.       case OPCODE_IF:
  2166.          if_count++;
  2167.          break;
  2168.       case OPCODE_BGNLOOP:
  2169.          loop_count++;
  2170.          break;
  2171.       case OPCODE_BRK:
  2172.       case OPCODE_CONT:
  2173.          mesa_instructions[i].BranchTarget = -1;
  2174.          break;
  2175.       default:
  2176.          break;
  2177.       }
  2178.    }
  2179.  
  2180.    if_stack = rzalloc_array(v->mem_ctx, int, if_count);
  2181.    loop_stack = rzalloc_array(v->mem_ctx, int, loop_count);
  2182.  
  2183.    for (i = 0; i < num_instructions; i++) {
  2184.       switch (mesa_instructions[i].Opcode) {
  2185.       case OPCODE_IF:
  2186.          if_stack[if_stack_pos] = i;
  2187.          if_stack_pos++;
  2188.          break;
  2189.       case OPCODE_ELSE:
  2190.          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
  2191.          if_stack[if_stack_pos - 1] = i;
  2192.          break;
  2193.       case OPCODE_ENDIF:
  2194.          mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i;
  2195.          if_stack_pos--;
  2196.          break;
  2197.       case OPCODE_BGNLOOP:
  2198.          loop_stack[loop_stack_pos] = i;
  2199.          loop_stack_pos++;
  2200.          break;
  2201.       case OPCODE_ENDLOOP:
  2202.          loop_stack_pos--;
  2203.          /* Rewrite any breaks/conts at this nesting level (haven't
  2204.           * already had a BranchTarget assigned) to point to the end
  2205.           * of the loop.
  2206.           */
  2207.          for (j = loop_stack[loop_stack_pos]; j < i; j++) {
  2208.             if (mesa_instructions[j].Opcode == OPCODE_BRK ||
  2209.                 mesa_instructions[j].Opcode == OPCODE_CONT) {
  2210.                if (mesa_instructions[j].BranchTarget == -1) {
  2211.                   mesa_instructions[j].BranchTarget = i;
  2212.                }
  2213.             }
  2214.          }
  2215.          /* The loop ends point at each other. */
  2216.          mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos];
  2217.          mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i;
  2218.          break;
  2219.       case OPCODE_CAL:
  2220.          foreach_in_list(function_entry, entry, &v->function_signatures) {
  2221.             if (entry->sig_id == mesa_instructions[i].BranchTarget) {
  2222.                mesa_instructions[i].BranchTarget = entry->inst;
  2223.                break;
  2224.             }
  2225.          }
  2226.          break;
  2227.       default:
  2228.          break;
  2229.       }
  2230.    }
  2231. }
  2232.  
  2233. static void
  2234. print_program(struct prog_instruction *mesa_instructions,
  2235.               ir_instruction **mesa_instruction_annotation,
  2236.               int num_instructions)
  2237. {
  2238.    ir_instruction *last_ir = NULL;
  2239.    int i;
  2240.    int indent = 0;
  2241.  
  2242.    for (i = 0; i < num_instructions; i++) {
  2243.       struct prog_instruction *mesa_inst = mesa_instructions + i;
  2244.       ir_instruction *ir = mesa_instruction_annotation[i];
  2245.  
  2246.       fprintf(stdout, "%3d: ", i);
  2247.  
  2248.       if (last_ir != ir && ir) {
  2249.          int j;
  2250.  
  2251.          for (j = 0; j < indent; j++) {
  2252.             fprintf(stdout, " ");
  2253.          }
  2254.          ir->print();
  2255.          printf("\n");
  2256.          last_ir = ir;
  2257.  
  2258.          fprintf(stdout, "     "); /* line number spacing. */
  2259.       }
  2260.  
  2261.       indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent,
  2262.                                             PROG_PRINT_DEBUG, NULL);
  2263.    }
  2264. }
  2265.  
  2266. namespace {
  2267.  
  2268. class add_uniform_to_shader : public program_resource_visitor {
  2269. public:
  2270.    add_uniform_to_shader(struct gl_shader_program *shader_program,
  2271.                          struct gl_program_parameter_list *params,
  2272.                          gl_shader_stage shader_type)
  2273.       : shader_program(shader_program), params(params), idx(-1),
  2274.         shader_type(shader_type)
  2275.    {
  2276.       /* empty */
  2277.    }
  2278.  
  2279.    void process(ir_variable *var)
  2280.    {
  2281.       this->idx = -1;
  2282.       this->program_resource_visitor::process(var);
  2283.  
  2284.       var->data.location = this->idx;
  2285.    }
  2286.  
  2287. private:
  2288.    virtual void visit_field(const glsl_type *type, const char *name,
  2289.                             bool row_major);
  2290.  
  2291.    struct gl_shader_program *shader_program;
  2292.    struct gl_program_parameter_list *params;
  2293.    int idx;
  2294.    gl_shader_stage shader_type;
  2295. };
  2296.  
  2297. } /* anonymous namespace */
  2298.  
  2299. void
  2300. add_uniform_to_shader::visit_field(const glsl_type *type, const char *name,
  2301.                                    bool row_major)
  2302. {
  2303.    unsigned int size;
  2304.  
  2305.    (void) row_major;
  2306.  
  2307.    if (type->is_vector() || type->is_scalar()) {
  2308.       size = type->vector_elements;
  2309.       if (type->is_double())
  2310.          size *= 2;
  2311.    } else {
  2312.       size = type_size(type) * 4;
  2313.    }
  2314.  
  2315.    gl_register_file file;
  2316.    if (type->without_array()->is_sampler()) {
  2317.       file = PROGRAM_SAMPLER;
  2318.    } else {
  2319.       file = PROGRAM_UNIFORM;
  2320.    }
  2321.  
  2322.    int index = _mesa_lookup_parameter_index(params, -1, name);
  2323.    if (index < 0) {
  2324.       index = _mesa_add_parameter(params, file, name, size, type->gl_type,
  2325.                                   NULL, NULL);
  2326.  
  2327.       /* Sampler uniform values are stored in prog->SamplerUnits,
  2328.        * and the entry in that array is selected by this index we
  2329.        * store in ParameterValues[].
  2330.        */
  2331.       if (file == PROGRAM_SAMPLER) {
  2332.          unsigned location;
  2333.          const bool found =
  2334.             this->shader_program->UniformHash->get(location,
  2335.                                                    params->Parameters[index].Name);
  2336.          assert(found);
  2337.  
  2338.          if (!found)
  2339.             return;
  2340.  
  2341.          struct gl_uniform_storage *storage =
  2342.             &this->shader_program->UniformStorage[location];
  2343.  
  2344.          assert(storage->sampler[shader_type].active);
  2345.  
  2346.          for (unsigned int j = 0; j < size / 4; j++)
  2347.             params->ParameterValues[index + j][0].f =
  2348.                storage->sampler[shader_type].index + j;
  2349.       }
  2350.    }
  2351.  
  2352.    /* The first part of the uniform that's processed determines the base
  2353.     * location of the whole uniform (for structures).
  2354.     */
  2355.    if (this->idx < 0)
  2356.       this->idx = index;
  2357. }
  2358.  
  2359. /**
  2360.  * Generate the program parameters list for the user uniforms in a shader
  2361.  *
  2362.  * \param shader_program Linked shader program.  This is only used to
  2363.  *                       emit possible link errors to the info log.
  2364.  * \param sh             Shader whose uniforms are to be processed.
  2365.  * \param params         Parameter list to be filled in.
  2366.  */
  2367. void
  2368. _mesa_generate_parameters_list_for_uniforms(struct gl_shader_program
  2369.                                             *shader_program,
  2370.                                             struct gl_shader *sh,
  2371.                                             struct gl_program_parameter_list
  2372.                                             *params)
  2373. {
  2374.    add_uniform_to_shader add(shader_program, params, sh->Stage);
  2375.  
  2376.    foreach_in_list(ir_instruction, node, sh->ir) {
  2377.       ir_variable *var = node->as_variable();
  2378.  
  2379.       if ((var == NULL) || (var->data.mode != ir_var_uniform)
  2380.           || var->is_in_uniform_block() || (strncmp(var->name, "gl_", 3) == 0))
  2381.          continue;
  2382.  
  2383.       add.process(var);
  2384.    }
  2385. }
  2386.  
  2387. void
  2388. _mesa_associate_uniform_storage(struct gl_context *ctx,
  2389.                                 struct gl_shader_program *shader_program,
  2390.                                 struct gl_program_parameter_list *params)
  2391. {
  2392.    /* After adding each uniform to the parameter list, connect the storage for
  2393.     * the parameter with the tracking structure used by the API for the
  2394.     * uniform.
  2395.     */
  2396.    unsigned last_location = unsigned(~0);
  2397.    for (unsigned i = 0; i < params->NumParameters; i++) {
  2398.       if (params->Parameters[i].Type != PROGRAM_UNIFORM)
  2399.          continue;
  2400.  
  2401.       unsigned location;
  2402.       const bool found =
  2403.          shader_program->UniformHash->get(location, params->Parameters[i].Name);
  2404.       assert(found);
  2405.  
  2406.       if (!found)
  2407.          continue;
  2408.  
  2409.       if (location != last_location) {
  2410.          struct gl_uniform_storage *storage =
  2411.             &shader_program->UniformStorage[location];
  2412.          enum gl_uniform_driver_format format = uniform_native;
  2413.  
  2414.          unsigned columns = 0;
  2415.          int dmul = 4 * sizeof(float);
  2416.          switch (storage->type->base_type) {
  2417.          case GLSL_TYPE_UINT:
  2418.             assert(ctx->Const.NativeIntegers);
  2419.             format = uniform_native;
  2420.             columns = 1;
  2421.             break;
  2422.          case GLSL_TYPE_INT:
  2423.             format =
  2424.                (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float;
  2425.             columns = 1;
  2426.             break;
  2427.  
  2428.          case GLSL_TYPE_DOUBLE:
  2429.             if (storage->type->vector_elements > 2)
  2430.                dmul *= 2;
  2431.             /* fallthrough */
  2432.          case GLSL_TYPE_FLOAT:
  2433.             format = uniform_native;
  2434.             columns = storage->type->matrix_columns;
  2435.             break;
  2436.          case GLSL_TYPE_BOOL:
  2437.             format = uniform_native;
  2438.             columns = 1;
  2439.             break;
  2440.          case GLSL_TYPE_SAMPLER:
  2441.          case GLSL_TYPE_IMAGE:
  2442.             format = uniform_native;
  2443.             columns = 1;
  2444.             break;
  2445.          case GLSL_TYPE_ATOMIC_UINT:
  2446.          case GLSL_TYPE_ARRAY:
  2447.          case GLSL_TYPE_VOID:
  2448.          case GLSL_TYPE_STRUCT:
  2449.          case GLSL_TYPE_ERROR:
  2450.          case GLSL_TYPE_INTERFACE:
  2451.             assert(!"Should not get here.");
  2452.             break;
  2453.          }
  2454.  
  2455.          _mesa_uniform_attach_driver_storage(storage,
  2456.                                              dmul * columns,
  2457.                                              dmul,
  2458.                                              format,
  2459.                                              &params->ParameterValues[i]);
  2460.  
  2461.          /* After attaching the driver's storage to the uniform, propagate any
  2462.           * data from the linker's backing store.  This will cause values from
  2463.           * initializers in the source code to be copied over.
  2464.           */
  2465.          _mesa_propagate_uniforms_to_driver_storage(storage,
  2466.                                                     0,
  2467.                                                     MAX2(1, storage->array_elements));
  2468.  
  2469.          last_location = location;
  2470.       }
  2471.    }
  2472. }
  2473.  
  2474. /*
  2475.  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
  2476.  * channels for copy propagation and updates following instructions to
  2477.  * use the original versions.
  2478.  *
  2479.  * The ir_to_mesa_visitor lazily produces code assuming that this pass
  2480.  * will occur.  As an example, a TXP production before this pass:
  2481.  *
  2482.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  2483.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  2484.  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
  2485.  *
  2486.  * and after:
  2487.  *
  2488.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  2489.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  2490.  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  2491.  *
  2492.  * which allows for dead code elimination on TEMP[1]'s writes.
  2493.  */
  2494. void
  2495. ir_to_mesa_visitor::copy_propagate(void)
  2496. {
  2497.    ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx,
  2498.                                                     ir_to_mesa_instruction *,
  2499.                                                     this->next_temp * 4);
  2500.    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
  2501.    int level = 0;
  2502.  
  2503.    foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) {
  2504.       assert(inst->dst.file != PROGRAM_TEMPORARY
  2505.              || inst->dst.index < this->next_temp);
  2506.  
  2507.       /* First, do any copy propagation possible into the src regs. */
  2508.       for (int r = 0; r < 3; r++) {
  2509.          ir_to_mesa_instruction *first = NULL;
  2510.          bool good = true;
  2511.          int acp_base = inst->src[r].index * 4;
  2512.  
  2513.          if (inst->src[r].file != PROGRAM_TEMPORARY ||
  2514.              inst->src[r].reladdr)
  2515.             continue;
  2516.  
  2517.          /* See if we can find entries in the ACP consisting of MOVs
  2518.           * from the same src register for all the swizzled channels
  2519.           * of this src register reference.
  2520.           */
  2521.          for (int i = 0; i < 4; i++) {
  2522.             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
  2523.             ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan];
  2524.  
  2525.             if (!copy_chan) {
  2526.                good = false;
  2527.                break;
  2528.             }
  2529.  
  2530.             assert(acp_level[acp_base + src_chan] <= level);
  2531.  
  2532.             if (!first) {
  2533.                first = copy_chan;
  2534.             } else {
  2535.                if (first->src[0].file != copy_chan->src[0].file ||
  2536.                    first->src[0].index != copy_chan->src[0].index) {
  2537.                   good = false;
  2538.                   break;
  2539.                }
  2540.             }
  2541.          }
  2542.  
  2543.          if (good) {
  2544.             /* We've now validated that we can copy-propagate to
  2545.              * replace this src register reference.  Do it.
  2546.              */
  2547.             inst->src[r].file = first->src[0].file;
  2548.             inst->src[r].index = first->src[0].index;
  2549.  
  2550.             int swizzle = 0;
  2551.             for (int i = 0; i < 4; i++) {
  2552.                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
  2553.                ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan];
  2554.                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
  2555.                            (3 * i));
  2556.             }
  2557.             inst->src[r].swizzle = swizzle;
  2558.          }
  2559.       }
  2560.  
  2561.       switch (inst->op) {
  2562.       case OPCODE_BGNLOOP:
  2563.       case OPCODE_ENDLOOP:
  2564.          /* End of a basic block, clear the ACP entirely. */
  2565.          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
  2566.          break;
  2567.  
  2568.       case OPCODE_IF:
  2569.          ++level;
  2570.          break;
  2571.  
  2572.       case OPCODE_ENDIF:
  2573.       case OPCODE_ELSE:
  2574.          /* Clear all channels written inside the block from the ACP, but
  2575.           * leaving those that were not touched.
  2576.           */
  2577.          for (int r = 0; r < this->next_temp; r++) {
  2578.             for (int c = 0; c < 4; c++) {
  2579.                if (!acp[4 * r + c])
  2580.                   continue;
  2581.  
  2582.                if (acp_level[4 * r + c] >= level)
  2583.                   acp[4 * r + c] = NULL;
  2584.             }
  2585.          }
  2586.          if (inst->op == OPCODE_ENDIF)
  2587.             --level;
  2588.          break;
  2589.  
  2590.       default:
  2591.          /* Continuing the block, clear any written channels from
  2592.           * the ACP.
  2593.           */
  2594.          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
  2595.             /* Any temporary might be written, so no copy propagation
  2596.              * across this instruction.
  2597.              */
  2598.             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
  2599.          } else if (inst->dst.file == PROGRAM_OUTPUT &&
  2600.                     inst->dst.reladdr) {
  2601.             /* Any output might be written, so no copy propagation
  2602.              * from outputs across this instruction.
  2603.              */
  2604.             for (int r = 0; r < this->next_temp; r++) {
  2605.                for (int c = 0; c < 4; c++) {
  2606.                   if (!acp[4 * r + c])
  2607.                      continue;
  2608.  
  2609.                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
  2610.                      acp[4 * r + c] = NULL;
  2611.                }
  2612.             }
  2613.          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
  2614.                     inst->dst.file == PROGRAM_OUTPUT) {
  2615.             /* Clear where it's used as dst. */
  2616.             if (inst->dst.file == PROGRAM_TEMPORARY) {
  2617.                for (int c = 0; c < 4; c++) {
  2618.                   if (inst->dst.writemask & (1 << c)) {
  2619.                      acp[4 * inst->dst.index + c] = NULL;
  2620.                   }
  2621.                }
  2622.             }
  2623.  
  2624.             /* Clear where it's used as src. */
  2625.             for (int r = 0; r < this->next_temp; r++) {
  2626.                for (int c = 0; c < 4; c++) {
  2627.                   if (!acp[4 * r + c])
  2628.                      continue;
  2629.  
  2630.                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
  2631.  
  2632.                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
  2633.                       acp[4 * r + c]->src[0].index == inst->dst.index &&
  2634.                       inst->dst.writemask & (1 << src_chan))
  2635.                   {
  2636.                      acp[4 * r + c] = NULL;
  2637.                   }
  2638.                }
  2639.             }
  2640.          }
  2641.          break;
  2642.       }
  2643.  
  2644.       /* If this is a copy, add it to the ACP. */
  2645.       if (inst->op == OPCODE_MOV &&
  2646.           inst->dst.file == PROGRAM_TEMPORARY &&
  2647.           !(inst->dst.file == inst->src[0].file &&
  2648.             inst->dst.index == inst->src[0].index) &&
  2649.           !inst->dst.reladdr &&
  2650.           !inst->saturate &&
  2651.           !inst->src[0].reladdr &&
  2652.           !inst->src[0].negate) {
  2653.          for (int i = 0; i < 4; i++) {
  2654.             if (inst->dst.writemask & (1 << i)) {
  2655.                acp[4 * inst->dst.index + i] = inst;
  2656.                acp_level[4 * inst->dst.index + i] = level;
  2657.             }
  2658.          }
  2659.       }
  2660.    }
  2661.  
  2662.    ralloc_free(acp_level);
  2663.    ralloc_free(acp);
  2664. }
  2665.  
  2666.  
  2667. /**
  2668.  * Convert a shader's GLSL IR into a Mesa gl_program.
  2669.  */
  2670. static struct gl_program *
  2671. get_mesa_program(struct gl_context *ctx,
  2672.                  struct gl_shader_program *shader_program,
  2673.                  struct gl_shader *shader)
  2674. {
  2675.    ir_to_mesa_visitor v;
  2676.    struct prog_instruction *mesa_instructions, *mesa_inst;
  2677.    ir_instruction **mesa_instruction_annotation;
  2678.    int i;
  2679.    struct gl_program *prog;
  2680.    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
  2681.    const char *target_string = _mesa_shader_stage_to_string(shader->Stage);
  2682.    struct gl_shader_compiler_options *options =
  2683.          &ctx->Const.ShaderCompilerOptions[shader->Stage];
  2684.  
  2685.    validate_ir_tree(shader->ir);
  2686.  
  2687.    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
  2688.    if (!prog)
  2689.       return NULL;
  2690.    prog->Parameters = _mesa_new_parameter_list();
  2691.    v.ctx = ctx;
  2692.    v.prog = prog;
  2693.    v.shader_program = shader_program;
  2694.    v.options = options;
  2695.  
  2696.    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
  2697.                                                prog->Parameters);
  2698.  
  2699.    /* Emit Mesa IR for main(). */
  2700.    visit_exec_list(shader->ir, &v);
  2701.    v.emit(NULL, OPCODE_END);
  2702.  
  2703.    prog->NumTemporaries = v.next_temp;
  2704.  
  2705.    unsigned num_instructions = v.instructions.length();
  2706.  
  2707.    mesa_instructions =
  2708.       (struct prog_instruction *)calloc(num_instructions,
  2709.                                         sizeof(*mesa_instructions));
  2710.    mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *,
  2711.                                               num_instructions);
  2712.  
  2713.    v.copy_propagate();
  2714.  
  2715.    /* Convert ir_mesa_instructions into prog_instructions.
  2716.     */
  2717.    mesa_inst = mesa_instructions;
  2718.    i = 0;
  2719.    foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) {
  2720.       mesa_inst->Opcode = inst->op;
  2721.       mesa_inst->CondUpdate = inst->cond_update;
  2722.       if (inst->saturate)
  2723.          mesa_inst->SaturateMode = SATURATE_ZERO_ONE;
  2724.       mesa_inst->DstReg.File = inst->dst.file;
  2725.       mesa_inst->DstReg.Index = inst->dst.index;
  2726.       mesa_inst->DstReg.CondMask = inst->dst.cond_mask;
  2727.       mesa_inst->DstReg.WriteMask = inst->dst.writemask;
  2728.       mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL;
  2729.       mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]);
  2730.       mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]);
  2731.       mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]);
  2732.       mesa_inst->TexSrcUnit = inst->sampler;
  2733.       mesa_inst->TexSrcTarget = inst->tex_target;
  2734.       mesa_inst->TexShadow = inst->tex_shadow;
  2735.       mesa_instruction_annotation[i] = inst->ir;
  2736.  
  2737.       /* Set IndirectRegisterFiles. */
  2738.       if (mesa_inst->DstReg.RelAddr)
  2739.          prog->IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File;
  2740.  
  2741.       /* Update program's bitmask of indirectly accessed register files */
  2742.       for (unsigned src = 0; src < 3; src++)
  2743.          if (mesa_inst->SrcReg[src].RelAddr)
  2744.             prog->IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File;
  2745.  
  2746.       switch (mesa_inst->Opcode) {
  2747.       case OPCODE_IF:
  2748.          if (options->MaxIfDepth == 0) {
  2749.             linker_warning(shader_program,
  2750.                            "Couldn't flatten if-statement.  "
  2751.                            "This will likely result in software "
  2752.                            "rasterization.\n");
  2753.          }
  2754.          break;
  2755.       case OPCODE_BGNLOOP:
  2756.          if (options->EmitNoLoops) {
  2757.             linker_warning(shader_program,
  2758.                            "Couldn't unroll loop.  "
  2759.                            "This will likely result in software "
  2760.                            "rasterization.\n");
  2761.          }
  2762.          break;
  2763.       case OPCODE_CONT:
  2764.          if (options->EmitNoCont) {
  2765.             linker_warning(shader_program,
  2766.                            "Couldn't lower continue-statement.  "
  2767.                            "This will likely result in software "
  2768.                            "rasterization.\n");
  2769.          }
  2770.          break;
  2771.       case OPCODE_ARL:
  2772.          prog->NumAddressRegs = 1;
  2773.          break;
  2774.       default:
  2775.          break;
  2776.       }
  2777.  
  2778.       mesa_inst++;
  2779.       i++;
  2780.  
  2781.       if (!shader_program->LinkStatus)
  2782.          break;
  2783.    }
  2784.  
  2785.    if (!shader_program->LinkStatus) {
  2786.       goto fail_exit;
  2787.    }
  2788.  
  2789.    set_branchtargets(&v, mesa_instructions, num_instructions);
  2790.  
  2791.    if (ctx->_Shader->Flags & GLSL_DUMP) {
  2792.       fprintf(stderr, "\n");
  2793.       fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string,
  2794.               shader_program->Name);
  2795.       _mesa_print_ir(stderr, shader->ir, NULL);
  2796.       fprintf(stderr, "\n");
  2797.       fprintf(stderr, "\n");
  2798.       fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string,
  2799.               shader_program->Name);
  2800.       print_program(mesa_instructions, mesa_instruction_annotation,
  2801.                     num_instructions);
  2802.       fflush(stderr);
  2803.    }
  2804.  
  2805.    prog->Instructions = mesa_instructions;
  2806.    prog->NumInstructions = num_instructions;
  2807.  
  2808.    /* Setting this to NULL prevents a possible double free in the fail_exit
  2809.     * path (far below).
  2810.     */
  2811.    mesa_instructions = NULL;
  2812.  
  2813.    do_set_program_inouts(shader->ir, prog, shader->Stage);
  2814.  
  2815.    prog->SamplersUsed = shader->active_samplers;
  2816.    prog->ShadowSamplers = shader->shadow_samplers;
  2817.    _mesa_update_shader_textures_used(shader_program, prog);
  2818.  
  2819.    /* Set the gl_FragDepth layout. */
  2820.    if (target == GL_FRAGMENT_PROGRAM_ARB) {
  2821.       struct gl_fragment_program *fp = (struct gl_fragment_program *)prog;
  2822.       fp->FragDepthLayout = shader_program->FragDepthLayout;
  2823.    }
  2824.  
  2825.    _mesa_reference_program(ctx, &shader->Program, prog);
  2826.  
  2827.    if ((ctx->_Shader->Flags & GLSL_NO_OPT) == 0) {
  2828.       _mesa_optimize_program(ctx, prog);
  2829.    }
  2830.  
  2831.    /* This has to be done last.  Any operation that can cause
  2832.     * prog->ParameterValues to get reallocated (e.g., anything that adds a
  2833.     * program constant) has to happen before creating this linkage.
  2834.     */
  2835.    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
  2836.    if (!shader_program->LinkStatus) {
  2837.       goto fail_exit;
  2838.    }
  2839.  
  2840.    return prog;
  2841.  
  2842. fail_exit:
  2843.    free(mesa_instructions);
  2844.    _mesa_reference_program(ctx, &shader->Program, NULL);
  2845.    return NULL;
  2846. }
  2847.  
  2848. extern "C" {
  2849.  
  2850. /**
  2851.  * Link a shader.
  2852.  * Called via ctx->Driver.LinkShader()
  2853.  * This actually involves converting GLSL IR into Mesa gl_programs with
  2854.  * code lowering and other optimizations.
  2855.  */
  2856. GLboolean
  2857. _mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  2858. {
  2859.    assert(prog->LinkStatus);
  2860.  
  2861.    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
  2862.       if (prog->_LinkedShaders[i] == NULL)
  2863.          continue;
  2864.  
  2865.       bool progress;
  2866.       exec_list *ir = prog->_LinkedShaders[i]->ir;
  2867.       const struct gl_shader_compiler_options *options =
  2868.             &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage];
  2869.  
  2870.       do {
  2871.          progress = false;
  2872.  
  2873.          /* Lowering */
  2874.          do_mat_op_to_vec(ir);
  2875.          lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2
  2876.                                  | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP
  2877.                                  | ((options->EmitNoPow) ? POW_TO_EXP2 : 0)));
  2878.  
  2879.          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
  2880.  
  2881.          progress = do_common_optimization(ir, true, true,
  2882.                                            options, ctx->Const.NativeIntegers)
  2883.            || progress;
  2884.  
  2885.          progress = lower_quadop_vector(ir, true) || progress;
  2886.  
  2887.          if (options->MaxIfDepth == 0)
  2888.             progress = lower_discard(ir) || progress;
  2889.  
  2890.          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
  2891.  
  2892.          if (options->EmitNoNoise)
  2893.             progress = lower_noise(ir) || progress;
  2894.  
  2895.          /* If there are forms of indirect addressing that the driver
  2896.           * cannot handle, perform the lowering pass.
  2897.           */
  2898.          if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput
  2899.              || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform)
  2900.            progress =
  2901.              lower_variable_index_to_cond_assign(ir,
  2902.                                                  options->EmitNoIndirectInput,
  2903.                                                  options->EmitNoIndirectOutput,
  2904.                                                  options->EmitNoIndirectTemp,
  2905.                                                  options->EmitNoIndirectUniform)
  2906.              || progress;
  2907.  
  2908.          progress = do_vec_index_to_cond_assign(ir) || progress;
  2909.          progress = lower_vector_insert(ir, true) || progress;
  2910.       } while (progress);
  2911.  
  2912.       validate_ir_tree(ir);
  2913.    }
  2914.  
  2915.    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
  2916.       struct gl_program *linked_prog;
  2917.  
  2918.       if (prog->_LinkedShaders[i] == NULL)
  2919.          continue;
  2920.  
  2921.       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
  2922.  
  2923.       if (linked_prog) {
  2924.          _mesa_copy_linked_program_data((gl_shader_stage) i, prog, linked_prog);
  2925.  
  2926.          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
  2927.                                  linked_prog);
  2928.          if (!ctx->Driver.ProgramStringNotify(ctx,
  2929.                                               _mesa_shader_stage_to_program(i),
  2930.                                               linked_prog)) {
  2931.             return GL_FALSE;
  2932.          }
  2933.       }
  2934.  
  2935.       _mesa_reference_program(ctx, &linked_prog, NULL);
  2936.    }
  2937.  
  2938.    return prog->LinkStatus;
  2939. }
  2940.  
  2941. /**
  2942.  * Link a GLSL shader program.  Called via glLinkProgram().
  2943.  */
  2944. void
  2945. _mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  2946. {
  2947.    unsigned int i;
  2948.  
  2949.    _mesa_clear_shader_program_data(prog);
  2950.  
  2951.    prog->LinkStatus = GL_TRUE;
  2952.  
  2953.    for (i = 0; i < prog->NumShaders; i++) {
  2954.       if (!prog->Shaders[i]->CompileStatus) {
  2955.          linker_error(prog, "linking with uncompiled shader");
  2956.       }
  2957.    }
  2958.  
  2959.    if (prog->LinkStatus) {
  2960.       link_shaders(ctx, prog);
  2961.    }
  2962.  
  2963.    if (prog->LinkStatus) {
  2964.       if (!ctx->Driver.LinkShader(ctx, prog)) {
  2965.          prog->LinkStatus = GL_FALSE;
  2966.       }
  2967.    }
  2968.  
  2969.    if (ctx->_Shader->Flags & GLSL_DUMP) {
  2970.       if (!prog->LinkStatus) {
  2971.          fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name);
  2972.       }
  2973.  
  2974.       if (prog->InfoLog && prog->InfoLog[0] != 0) {
  2975.          fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name);
  2976.          fprintf(stderr, "%s\n", prog->InfoLog);
  2977.       }
  2978.    }
  2979. }
  2980.  
  2981. } /* extern "C" */
  2982.