Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
  3.  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
  4.  * Copyright © 2010 Intel Corporation
  5.  * Copyright © 2011 Bryan Cain
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the "Software"),
  9.  * to deal in the Software without restriction, including without limitation
  10.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11.  * and/or sell copies of the Software, and to permit persons to whom the
  12.  * Software is furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the next
  15.  * paragraph) shall be included in all copies or substantial portions of the
  16.  * Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24.  * DEALINGS IN THE SOFTWARE.
  25.  */
  26.  
  27. /**
  28.  * \file glsl_to_tgsi.cpp
  29.  *
  30.  * Translate GLSL IR to TGSI.
  31.  */
  32.  
  33. #include <stdio.h>
  34. #include "main/compiler.h"
  35. #include "ir.h"
  36. #include "ir_visitor.h"
  37. #include "ir_expression_flattening.h"
  38. #include "glsl_types.h"
  39. #include "glsl_parser_extras.h"
  40. #include "../glsl/program.h"
  41. #include "ir_optimization.h"
  42. #include "ast.h"
  43.  
  44. #include "main/mtypes.h"
  45. #include "main/shaderobj.h"
  46. #include "program/hash_table.h"
  47.  
  48. extern "C" {
  49. #include "main/shaderapi.h"
  50. #include "main/uniforms.h"
  51. #include "program/prog_instruction.h"
  52. #include "program/prog_optimize.h"
  53. #include "program/prog_print.h"
  54. #include "program/program.h"
  55. #include "program/prog_parameter.h"
  56. #include "program/sampler.h"
  57.  
  58. #include "pipe/p_compiler.h"
  59. #include "pipe/p_context.h"
  60. #include "pipe/p_screen.h"
  61. #include "pipe/p_shader_tokens.h"
  62. #include "pipe/p_state.h"
  63. #include "util/u_math.h"
  64. #include "tgsi/tgsi_ureg.h"
  65. #include "tgsi/tgsi_info.h"
  66. #include "st_context.h"
  67. #include "st_program.h"
  68. #include "st_glsl_to_tgsi.h"
  69. #include "st_mesa_to_tgsi.h"
  70. }
  71.  
  72. #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
  73. #define PROGRAM_ANY_CONST ((1 << PROGRAM_LOCAL_PARAM) |  \
  74.                            (1 << PROGRAM_ENV_PARAM) |    \
  75.                            (1 << PROGRAM_STATE_VAR) |    \
  76.                            (1 << PROGRAM_CONSTANT) |     \
  77.                            (1 << PROGRAM_UNIFORM))
  78.  
  79. /**
  80.  * Maximum number of temporary registers.
  81.  *
  82.  * It is too big for stack allocated arrays -- it will cause stack overflow on
  83.  * Windows and likely Mac OS X.
  84.  */
  85. #define MAX_TEMPS         4096
  86.  
  87. /**
  88.  * Maximum number of arrays
  89.  */
  90. #define MAX_ARRAYS        256
  91.  
  92. /* will be 4 for GLSL 4.00 */
  93. #define MAX_GLSL_TEXTURE_OFFSET 1
  94.  
  95. class st_src_reg;
  96. class st_dst_reg;
  97.  
  98. static int swizzle_for_size(int size);
  99.  
  100. /**
  101.  * This struct is a corresponding struct to TGSI ureg_src.
  102.  */
  103. class st_src_reg {
  104. public:
  105.    st_src_reg(gl_register_file file, int index, const glsl_type *type)
  106.    {
  107.       this->file = file;
  108.       this->index = index;
  109.       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  110.          this->swizzle = swizzle_for_size(type->vector_elements);
  111.       else
  112.          this->swizzle = SWIZZLE_XYZW;
  113.       this->negate = 0;
  114.       this->index2D = 0;
  115.       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
  116.       this->reladdr = NULL;
  117.    }
  118.  
  119.    st_src_reg(gl_register_file file, int index, int type)
  120.    {
  121.       this->type = type;
  122.       this->file = file;
  123.       this->index = index;
  124.       this->index2D = 0;
  125.       this->swizzle = SWIZZLE_XYZW;
  126.       this->negate = 0;
  127.       this->reladdr = NULL;
  128.    }
  129.  
  130.    st_src_reg(gl_register_file file, int index, int type, int index2D)
  131.    {
  132.       this->type = type;
  133.       this->file = file;
  134.       this->index = index;
  135.       this->index2D = index2D;
  136.       this->swizzle = SWIZZLE_XYZW;
  137.       this->negate = 0;
  138.       this->reladdr = NULL;
  139.    }
  140.  
  141.    st_src_reg()
  142.    {
  143.       this->type = GLSL_TYPE_ERROR;
  144.       this->file = PROGRAM_UNDEFINED;
  145.       this->index = 0;
  146.       this->index2D = 0;
  147.       this->swizzle = 0;
  148.       this->negate = 0;
  149.       this->reladdr = NULL;
  150.    }
  151.  
  152.    explicit st_src_reg(st_dst_reg reg);
  153.  
  154.    gl_register_file file; /**< PROGRAM_* from Mesa */
  155.    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  156.    int index2D;
  157.    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  158.    int negate; /**< NEGATE_XYZW mask from mesa */
  159.    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
  160.    /** Register index should be offset by the integer in this reg. */
  161.    st_src_reg *reladdr;
  162. };
  163.  
  164. class st_dst_reg {
  165. public:
  166.    st_dst_reg(gl_register_file file, int writemask, int type)
  167.    {
  168.       this->file = file;
  169.       this->index = 0;
  170.       this->writemask = writemask;
  171.       this->cond_mask = COND_TR;
  172.       this->reladdr = NULL;
  173.       this->type = type;
  174.    }
  175.  
  176.    st_dst_reg()
  177.    {
  178.       this->type = GLSL_TYPE_ERROR;
  179.       this->file = PROGRAM_UNDEFINED;
  180.       this->index = 0;
  181.       this->writemask = 0;
  182.       this->cond_mask = COND_TR;
  183.       this->reladdr = NULL;
  184.    }
  185.  
  186.    explicit st_dst_reg(st_src_reg reg);
  187.  
  188.    gl_register_file file; /**< PROGRAM_* from Mesa */
  189.    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  190.    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  191.    GLuint cond_mask:4;
  192.    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
  193.    /** Register index should be offset by the integer in this reg. */
  194.    st_src_reg *reladdr;
  195. };
  196.  
  197. st_src_reg::st_src_reg(st_dst_reg reg)
  198. {
  199.    this->type = reg.type;
  200.    this->file = reg.file;
  201.    this->index = reg.index;
  202.    this->swizzle = SWIZZLE_XYZW;
  203.    this->negate = 0;
  204.    this->reladdr = reg.reladdr;
  205.    this->index2D = 0;
  206. }
  207.  
  208. st_dst_reg::st_dst_reg(st_src_reg reg)
  209. {
  210.    this->type = reg.type;
  211.    this->file = reg.file;
  212.    this->index = reg.index;
  213.    this->writemask = WRITEMASK_XYZW;
  214.    this->cond_mask = COND_TR;
  215.    this->reladdr = reg.reladdr;
  216. }
  217.  
  218. class glsl_to_tgsi_instruction : public exec_node {
  219. public:
  220.    /* Callers of this ralloc-based new need not call delete. It's
  221.     * easier to just ralloc_free 'ctx' (or any of its ancestors). */
  222.    static void* operator new(size_t size, void *ctx)
  223.    {
  224.       void *node;
  225.  
  226.       node = rzalloc_size(ctx, size);
  227.       assert(node != NULL);
  228.  
  229.       return node;
  230.    }
  231.  
  232.    unsigned op;
  233.    st_dst_reg dst;
  234.    st_src_reg src[3];
  235.    /** Pointer to the ir source this tree came from for debugging */
  236.    ir_instruction *ir;
  237.    GLboolean cond_update;
  238.    bool saturate;
  239.    int sampler; /**< sampler index */
  240.    int tex_target; /**< One of TEXTURE_*_INDEX */
  241.    GLboolean tex_shadow;
  242.    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
  243.    unsigned tex_offset_num_offset;
  244.    int dead_mask; /**< Used in dead code elimination */
  245.  
  246.    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
  247. };
  248.  
  249. class variable_storage : public exec_node {
  250. public:
  251.    variable_storage(ir_variable *var, gl_register_file file, int index)
  252.       : file(file), index(index), var(var)
  253.    {
  254.       /* empty */
  255.    }
  256.  
  257.    gl_register_file file;
  258.    int index;
  259.    ir_variable *var; /* variable that maps to this, if any */
  260. };
  261.  
  262. class immediate_storage : public exec_node {
  263. public:
  264.    immediate_storage(gl_constant_value *values, int size, int type)
  265.    {
  266.       memcpy(this->values, values, size * sizeof(gl_constant_value));
  267.       this->size = size;
  268.       this->type = type;
  269.    }
  270.    
  271.    gl_constant_value values[4];
  272.    int size; /**< Number of components (1-4) */
  273.    int type; /**< GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
  274. };
  275.  
  276. class function_entry : public exec_node {
  277. public:
  278.    ir_function_signature *sig;
  279.  
  280.    /**
  281.     * identifier of this function signature used by the program.
  282.     *
  283.     * At the point that TGSI instructions for function calls are
  284.     * generated, we don't know the address of the first instruction of
  285.     * the function body.  So we make the BranchTarget that is called a
  286.     * small integer and rewrite them during set_branchtargets().
  287.     */
  288.    int sig_id;
  289.  
  290.    /**
  291.     * Pointer to first instruction of the function body.
  292.     *
  293.     * Set during function body emits after main() is processed.
  294.     */
  295.    glsl_to_tgsi_instruction *bgn_inst;
  296.  
  297.    /**
  298.     * Index of the first instruction of the function body in actual TGSI.
  299.     *
  300.     * Set after conversion from glsl_to_tgsi_instruction to TGSI.
  301.     */
  302.    int inst;
  303.  
  304.    /** Storage for the return value. */
  305.    st_src_reg return_reg;
  306. };
  307.  
  308. struct glsl_to_tgsi_visitor : public ir_visitor {
  309. public:
  310.    glsl_to_tgsi_visitor();
  311.    ~glsl_to_tgsi_visitor();
  312.  
  313.    function_entry *current_function;
  314.  
  315.    struct gl_context *ctx;
  316.    struct gl_program *prog;
  317.    struct gl_shader_program *shader_program;
  318.    struct gl_shader_compiler_options *options;
  319.  
  320.    int next_temp;
  321.  
  322.    unsigned array_sizes[MAX_ARRAYS];
  323.    unsigned next_array;
  324.  
  325.    int num_address_regs;
  326.    int samplers_used;
  327.    bool indirect_addr_consts;
  328.    
  329.    int glsl_version;
  330.    bool native_integers;
  331.    bool have_sqrt;
  332.  
  333.    variable_storage *find_variable_storage(ir_variable *var);
  334.  
  335.    int add_constant(gl_register_file file, gl_constant_value values[4],
  336.                     int size, int datatype, GLuint *swizzle_out);
  337.  
  338.    function_entry *get_function_signature(ir_function_signature *sig);
  339.  
  340.    st_src_reg get_temp(const glsl_type *type);
  341.    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
  342.  
  343.    st_src_reg st_src_reg_for_float(float val);
  344.    st_src_reg st_src_reg_for_int(int val);
  345.    st_src_reg st_src_reg_for_type(int type, int val);
  346.  
  347.    /**
  348.     * \name Visit methods
  349.     *
  350.     * As typical for the visitor pattern, there must be one \c visit method for
  351.     * each concrete subclass of \c ir_instruction.  Virtual base classes within
  352.     * the hierarchy should not have \c visit methods.
  353.     */
  354.    /*@{*/
  355.    virtual void visit(ir_variable *);
  356.    virtual void visit(ir_loop *);
  357.    virtual void visit(ir_loop_jump *);
  358.    virtual void visit(ir_function_signature *);
  359.    virtual void visit(ir_function *);
  360.    virtual void visit(ir_expression *);
  361.    virtual void visit(ir_swizzle *);
  362.    virtual void visit(ir_dereference_variable  *);
  363.    virtual void visit(ir_dereference_array *);
  364.    virtual void visit(ir_dereference_record *);
  365.    virtual void visit(ir_assignment *);
  366.    virtual void visit(ir_constant *);
  367.    virtual void visit(ir_call *);
  368.    virtual void visit(ir_return *);
  369.    virtual void visit(ir_discard *);
  370.    virtual void visit(ir_texture *);
  371.    virtual void visit(ir_if *);
  372.    /*@}*/
  373.  
  374.    st_src_reg result;
  375.  
  376.    /** List of variable_storage */
  377.    exec_list variables;
  378.  
  379.    /** List of immediate_storage */
  380.    exec_list immediates;
  381.    unsigned num_immediates;
  382.  
  383.    /** List of function_entry */
  384.    exec_list function_signatures;
  385.    int next_signature_id;
  386.  
  387.    /** List of glsl_to_tgsi_instruction */
  388.    exec_list instructions;
  389.  
  390.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
  391.  
  392.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  393.                                 st_dst_reg dst, st_src_reg src0);
  394.  
  395.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  396.                                 st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  397.  
  398.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  399.                                 st_dst_reg dst,
  400.                                 st_src_reg src0, st_src_reg src1, st_src_reg src2);
  401.    
  402.    unsigned get_opcode(ir_instruction *ir, unsigned op,
  403.                     st_dst_reg dst,
  404.                     st_src_reg src0, st_src_reg src1);
  405.  
  406.    /**
  407.     * Emit the correct dot-product instruction for the type of arguments
  408.     */
  409.    glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
  410.                                      st_dst_reg dst,
  411.                                      st_src_reg src0,
  412.                                      st_src_reg src1,
  413.                                      unsigned elements);
  414.  
  415.    void emit_scalar(ir_instruction *ir, unsigned op,
  416.                     st_dst_reg dst, st_src_reg src0);
  417.  
  418.    void emit_scalar(ir_instruction *ir, unsigned op,
  419.                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  420.  
  421.    void try_emit_float_set(ir_instruction *ir, unsigned op, st_dst_reg dst);
  422.  
  423.    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
  424.  
  425.    void emit_scs(ir_instruction *ir, unsigned op,
  426.                  st_dst_reg dst, const st_src_reg &src);
  427.  
  428.    bool try_emit_mad(ir_expression *ir,
  429.               int mul_operand);
  430.    bool try_emit_mad_for_and_not(ir_expression *ir,
  431.               int mul_operand);
  432.    bool try_emit_sat(ir_expression *ir);
  433.  
  434.    void emit_swz(ir_expression *ir);
  435.  
  436.    bool process_move_condition(ir_rvalue *ir);
  437.  
  438.    void simplify_cmp(void);
  439.  
  440.    void rename_temp_register(int index, int new_index);
  441.    int get_first_temp_read(int index);
  442.    int get_first_temp_write(int index);
  443.    int get_last_temp_read(int index);
  444.    int get_last_temp_write(int index);
  445.  
  446.    void copy_propagate(void);
  447.    void eliminate_dead_code(void);
  448.    int eliminate_dead_code_advanced(void);
  449.    void merge_registers(void);
  450.    void renumber_registers(void);
  451.  
  452.    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
  453.                        st_dst_reg *l, st_src_reg *r);
  454.  
  455.    void *mem_ctx;
  456. };
  457.  
  458. static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
  459.  
  460. static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
  461.  
  462. static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT);
  463.  
  464. static void
  465. fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
  466.  
  467. static void
  468. fail_link(struct gl_shader_program *prog, const char *fmt, ...)
  469. {
  470.    va_list args;
  471.    va_start(args, fmt);
  472.    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
  473.    va_end(args);
  474.  
  475.    prog->LinkStatus = GL_FALSE;
  476. }
  477.  
  478. static int
  479. swizzle_for_size(int size)
  480. {
  481.    int size_swizzles[4] = {
  482.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
  483.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
  484.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
  485.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
  486.    };
  487.  
  488.    assert((size >= 1) && (size <= 4));
  489.    return size_swizzles[size - 1];
  490. }
  491.  
  492. static bool
  493. is_tex_instruction(unsigned opcode)
  494. {
  495.    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
  496.    return info->is_tex;
  497. }
  498.  
  499. static unsigned
  500. num_inst_dst_regs(unsigned opcode)
  501. {
  502.    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
  503.    return info->num_dst;
  504. }
  505.  
  506. static unsigned
  507. num_inst_src_regs(unsigned opcode)
  508. {
  509.    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
  510.    return info->is_tex ? info->num_src - 1 : info->num_src;
  511. }
  512.  
  513. glsl_to_tgsi_instruction *
  514. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  515.                          st_dst_reg dst,
  516.                          st_src_reg src0, st_src_reg src1, st_src_reg src2)
  517. {
  518.    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
  519.    int num_reladdr = 0, i;
  520.    
  521.    op = get_opcode(ir, op, dst, src0, src1);
  522.  
  523.    /* If we have to do relative addressing, we want to load the ARL
  524.     * reg directly for one of the regs, and preload the other reladdr
  525.     * sources into temps.
  526.     */
  527.    num_reladdr += dst.reladdr != NULL;
  528.    num_reladdr += src0.reladdr != NULL;
  529.    num_reladdr += src1.reladdr != NULL;
  530.    num_reladdr += src2.reladdr != NULL;
  531.  
  532.    reladdr_to_temp(ir, &src2, &num_reladdr);
  533.    reladdr_to_temp(ir, &src1, &num_reladdr);
  534.    reladdr_to_temp(ir, &src0, &num_reladdr);
  535.  
  536.    if (dst.reladdr) {
  537.       emit_arl(ir, address_reg, *dst.reladdr);
  538.       num_reladdr--;
  539.    }
  540.    assert(num_reladdr == 0);
  541.  
  542.    inst->op = op;
  543.    inst->dst = dst;
  544.    inst->src[0] = src0;
  545.    inst->src[1] = src1;
  546.    inst->src[2] = src2;
  547.    inst->ir = ir;
  548.    inst->dead_mask = 0;
  549.  
  550.    inst->function = NULL;
  551.    
  552.    if (op == TGSI_OPCODE_ARL || op == TGSI_OPCODE_UARL)
  553.       this->num_address_regs = 1;
  554.    
  555.    /* Update indirect addressing status used by TGSI */
  556.    if (dst.reladdr) {
  557.       switch(dst.file) {
  558.       case PROGRAM_LOCAL_PARAM:
  559.       case PROGRAM_ENV_PARAM:
  560.       case PROGRAM_STATE_VAR:
  561.       case PROGRAM_CONSTANT:
  562.       case PROGRAM_UNIFORM:
  563.          this->indirect_addr_consts = true;
  564.          break;
  565.       case PROGRAM_IMMEDIATE:
  566.          assert(!"immediates should not have indirect addressing");
  567.          break;
  568.       default:
  569.          break;
  570.       }
  571.    }
  572.    else {
  573.       for (i=0; i<3; i++) {
  574.          if(inst->src[i].reladdr) {
  575.             switch(inst->src[i].file) {
  576.             case PROGRAM_LOCAL_PARAM:
  577.             case PROGRAM_ENV_PARAM:
  578.             case PROGRAM_STATE_VAR:
  579.             case PROGRAM_CONSTANT:
  580.             case PROGRAM_UNIFORM:
  581.                this->indirect_addr_consts = true;
  582.                break;
  583.             case PROGRAM_IMMEDIATE:
  584.                assert(!"immediates should not have indirect addressing");
  585.                break;
  586.             default:
  587.                break;
  588.             }
  589.          }
  590.       }
  591.    }
  592.  
  593.    this->instructions.push_tail(inst);
  594.  
  595.    if (native_integers)
  596.       try_emit_float_set(ir, op, dst);
  597.  
  598.    return inst;
  599. }
  600.  
  601.  
  602. glsl_to_tgsi_instruction *
  603. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  604.                          st_dst_reg dst, st_src_reg src0, st_src_reg src1)
  605. {
  606.    return emit(ir, op, dst, src0, src1, undef_src);
  607. }
  608.  
  609. glsl_to_tgsi_instruction *
  610. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  611.                          st_dst_reg dst, st_src_reg src0)
  612. {
  613.    assert(dst.writemask != 0);
  614.    return emit(ir, op, dst, src0, undef_src, undef_src);
  615. }
  616.  
  617. glsl_to_tgsi_instruction *
  618. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
  619. {
  620.    return emit(ir, op, undef_dst, undef_src, undef_src, undef_src);
  621. }
  622.  
  623.  /**
  624.  * Emits the code to convert the result of float SET instructions to integers.
  625.  */
  626. void
  627. glsl_to_tgsi_visitor::try_emit_float_set(ir_instruction *ir, unsigned op,
  628.                          st_dst_reg dst)
  629. {
  630.    if ((op == TGSI_OPCODE_SEQ ||
  631.         op == TGSI_OPCODE_SNE ||
  632.         op == TGSI_OPCODE_SGE ||
  633.         op == TGSI_OPCODE_SLT))
  634.    {
  635.       st_src_reg src = st_src_reg(dst);
  636.       src.negate = ~src.negate;
  637.       dst.type = GLSL_TYPE_FLOAT;
  638.       emit(ir, TGSI_OPCODE_F2I, dst, src);
  639.    }
  640. }
  641.  
  642. /**
  643.  * Determines whether to use an integer, unsigned integer, or float opcode
  644.  * based on the operands and input opcode, then emits the result.
  645.  */
  646. unsigned
  647. glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
  648.                          st_dst_reg dst,
  649.                          st_src_reg src0, st_src_reg src1)
  650. {
  651.    int type = GLSL_TYPE_FLOAT;
  652.    
  653.    assert(src0.type != GLSL_TYPE_ARRAY);
  654.    assert(src0.type != GLSL_TYPE_STRUCT);
  655.    assert(src1.type != GLSL_TYPE_ARRAY);
  656.    assert(src1.type != GLSL_TYPE_STRUCT);
  657.  
  658.    if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
  659.       type = GLSL_TYPE_FLOAT;
  660.    else if (native_integers)
  661.       type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
  662.  
  663. #define case4(c, f, i, u) \
  664.    case TGSI_OPCODE_##c: \
  665.       if (type == GLSL_TYPE_INT) op = TGSI_OPCODE_##i; \
  666.       else if (type == GLSL_TYPE_UINT) op = TGSI_OPCODE_##u; \
  667.       else op = TGSI_OPCODE_##f; \
  668.       break;
  669. #define case3(f, i, u)  case4(f, f, i, u)
  670. #define case2fi(f, i)   case4(f, f, i, i)
  671. #define case2iu(i, u)   case4(i, LAST, i, u)
  672.    
  673.    switch(op) {
  674.       case2fi(ADD, UADD);
  675.       case2fi(MUL, UMUL);
  676.       case2fi(MAD, UMAD);
  677.       case3(DIV, IDIV, UDIV);
  678.       case3(MAX, IMAX, UMAX);
  679.       case3(MIN, IMIN, UMIN);
  680.       case2iu(MOD, UMOD);
  681.      
  682.       case2fi(SEQ, USEQ);
  683.       case2fi(SNE, USNE);
  684.       case3(SGE, ISGE, USGE);
  685.       case3(SLT, ISLT, USLT);
  686.      
  687.       case2iu(ISHR, USHR);
  688.  
  689.       case2fi(SSG, ISSG);
  690.       case3(ABS, IABS, IABS);
  691.      
  692.       default: break;
  693.    }
  694.    
  695.    assert(op != TGSI_OPCODE_LAST);
  696.    return op;
  697. }
  698.  
  699. glsl_to_tgsi_instruction *
  700. glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
  701.                             st_dst_reg dst, st_src_reg src0, st_src_reg src1,
  702.                             unsigned elements)
  703. {
  704.    static const unsigned dot_opcodes[] = {
  705.       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
  706.    };
  707.  
  708.    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
  709. }
  710.  
  711. /**
  712.  * Emits TGSI scalar opcodes to produce unique answers across channels.
  713.  *
  714.  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
  715.  * channel determines the result across all channels.  So to do a vec4
  716.  * of this operation, we want to emit a scalar per source channel used
  717.  * to produce dest channels.
  718.  */
  719. void
  720. glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
  721.                                 st_dst_reg dst,
  722.                                 st_src_reg orig_src0, st_src_reg orig_src1)
  723. {
  724.    int i, j;
  725.    int done_mask = ~dst.writemask;
  726.  
  727.    /* TGSI RCP is a scalar operation splatting results to all channels,
  728.     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
  729.     * dst channels.
  730.     */
  731.    for (i = 0; i < 4; i++) {
  732.       GLuint this_mask = (1 << i);
  733.       glsl_to_tgsi_instruction *inst;
  734.       st_src_reg src0 = orig_src0;
  735.       st_src_reg src1 = orig_src1;
  736.  
  737.       if (done_mask & this_mask)
  738.          continue;
  739.  
  740.       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
  741.       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
  742.       for (j = i + 1; j < 4; j++) {
  743.          /* If there is another enabled component in the destination that is
  744.           * derived from the same inputs, generate its value on this pass as
  745.           * well.
  746.           */
  747.          if (!(done_mask & (1 << j)) &&
  748.              GET_SWZ(src0.swizzle, j) == src0_swiz &&
  749.              GET_SWZ(src1.swizzle, j) == src1_swiz) {
  750.             this_mask |= (1 << j);
  751.          }
  752.       }
  753.       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
  754.                                    src0_swiz, src0_swiz);
  755.       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
  756.                                   src1_swiz, src1_swiz);
  757.  
  758.       inst = emit(ir, op, dst, src0, src1);
  759.       inst->dst.writemask = this_mask;
  760.       done_mask |= this_mask;
  761.    }
  762. }
  763.  
  764. void
  765. glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
  766.                                 st_dst_reg dst, st_src_reg src0)
  767. {
  768.    st_src_reg undef = undef_src;
  769.  
  770.    undef.swizzle = SWIZZLE_XXXX;
  771.  
  772.    emit_scalar(ir, op, dst, src0, undef);
  773. }
  774.  
  775. void
  776. glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
  777.                                 st_dst_reg dst, st_src_reg src0)
  778. {
  779.    int op = TGSI_OPCODE_ARL;
  780.  
  781.    if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
  782.       op = TGSI_OPCODE_UARL;
  783.  
  784.    emit(NULL, op, dst, src0);
  785. }
  786.  
  787. /**
  788.  * Emit an TGSI_OPCODE_SCS instruction
  789.  *
  790.  * The \c SCS opcode functions a bit differently than the other TGSI opcodes.
  791.  * Instead of splatting its result across all four components of the
  792.  * destination, it writes one value to the \c x component and another value to
  793.  * the \c y component.
  794.  *
  795.  * \param ir        IR instruction being processed
  796.  * \param op        Either \c TGSI_OPCODE_SIN or \c TGSI_OPCODE_COS depending
  797.  *                  on which value is desired.
  798.  * \param dst       Destination register
  799.  * \param src       Source register
  800.  */
  801. void
  802. glsl_to_tgsi_visitor::emit_scs(ir_instruction *ir, unsigned op,
  803.                              st_dst_reg dst,
  804.                              const st_src_reg &src)
  805. {
  806.    /* Vertex programs cannot use the SCS opcode.
  807.     */
  808.    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB) {
  809.       emit_scalar(ir, op, dst, src);
  810.       return;
  811.    }
  812.  
  813.    const unsigned component = (op == TGSI_OPCODE_SIN) ? 0 : 1;
  814.    const unsigned scs_mask = (1U << component);
  815.    int done_mask = ~dst.writemask;
  816.    st_src_reg tmp;
  817.  
  818.    assert(op == TGSI_OPCODE_SIN || op == TGSI_OPCODE_COS);
  819.  
  820.    /* If there are compnents in the destination that differ from the component
  821.     * that will be written by the SCS instrution, we'll need a temporary.
  822.     */
  823.    if (scs_mask != unsigned(dst.writemask)) {
  824.       tmp = get_temp(glsl_type::vec4_type);
  825.    }
  826.  
  827.    for (unsigned i = 0; i < 4; i++) {
  828.       unsigned this_mask = (1U << i);
  829.       st_src_reg src0 = src;
  830.  
  831.       if ((done_mask & this_mask) != 0)
  832.          continue;
  833.  
  834.       /* The source swizzle specified which component of the source generates
  835.        * sine / cosine for the current component in the destination.  The SCS
  836.        * instruction requires that this value be swizzle to the X component.
  837.        * Replace the current swizzle with a swizzle that puts the source in
  838.        * the X component.
  839.        */
  840.       unsigned src0_swiz = GET_SWZ(src.swizzle, i);
  841.  
  842.       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
  843.                                    src0_swiz, src0_swiz);
  844.       for (unsigned j = i + 1; j < 4; j++) {
  845.          /* If there is another enabled component in the destination that is
  846.           * derived from the same inputs, generate its value on this pass as
  847.           * well.
  848.           */
  849.          if (!(done_mask & (1 << j)) &&
  850.              GET_SWZ(src0.swizzle, j) == src0_swiz) {
  851.             this_mask |= (1 << j);
  852.          }
  853.       }
  854.  
  855.       if (this_mask != scs_mask) {
  856.          glsl_to_tgsi_instruction *inst;
  857.          st_dst_reg tmp_dst = st_dst_reg(tmp);
  858.  
  859.          /* Emit the SCS instruction.
  860.           */
  861.          inst = emit(ir, TGSI_OPCODE_SCS, tmp_dst, src0);
  862.          inst->dst.writemask = scs_mask;
  863.  
  864.          /* Move the result of the SCS instruction to the desired location in
  865.           * the destination.
  866.           */
  867.          tmp.swizzle = MAKE_SWIZZLE4(component, component,
  868.                                      component, component);
  869.          inst = emit(ir, TGSI_OPCODE_SCS, dst, tmp);
  870.          inst->dst.writemask = this_mask;
  871.       } else {
  872.          /* Emit the SCS instruction to write directly to the destination.
  873.           */
  874.          glsl_to_tgsi_instruction *inst = emit(ir, TGSI_OPCODE_SCS, dst, src0);
  875.          inst->dst.writemask = scs_mask;
  876.       }
  877.  
  878.       done_mask |= this_mask;
  879.    }
  880. }
  881.  
  882. int
  883. glsl_to_tgsi_visitor::add_constant(gl_register_file file,
  884.                              gl_constant_value values[4], int size, int datatype,
  885.                              GLuint *swizzle_out)
  886. {
  887.    if (file == PROGRAM_CONSTANT) {
  888.       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
  889.                                               size, datatype, swizzle_out);
  890.    } else {
  891.       int index = 0;
  892.       immediate_storage *entry;
  893.       assert(file == PROGRAM_IMMEDIATE);
  894.  
  895.       /* Search immediate storage to see if we already have an identical
  896.        * immediate that we can use instead of adding a duplicate entry.
  897.        */
  898.       foreach_iter(exec_list_iterator, iter, this->immediates) {
  899.          entry = (immediate_storage *)iter.get();
  900.          
  901.          if (entry->size == size &&
  902.              entry->type == datatype &&
  903.              !memcmp(entry->values, values, size * sizeof(gl_constant_value))) {
  904.              return index;
  905.          }
  906.          index++;
  907.       }
  908.      
  909.       /* Add this immediate to the list. */
  910.       entry = new(mem_ctx) immediate_storage(values, size, datatype);
  911.       this->immediates.push_tail(entry);
  912.       this->num_immediates++;
  913.       return index;
  914.    }
  915. }
  916.  
  917. st_src_reg
  918. glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
  919. {
  920.    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
  921.    union gl_constant_value uval;
  922.  
  923.    uval.f = val;
  924.    src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
  925.  
  926.    return src;
  927. }
  928.  
  929. st_src_reg
  930. glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
  931. {
  932.    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
  933.    union gl_constant_value uval;
  934.    
  935.    assert(native_integers);
  936.  
  937.    uval.i = val;
  938.    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
  939.  
  940.    return src;
  941. }
  942.  
  943. st_src_reg
  944. glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
  945. {
  946.    if (native_integers)
  947.       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
  948.                                        st_src_reg_for_int(val);
  949.    else
  950.       return st_src_reg_for_float(val);
  951. }
  952.  
  953. static int
  954. type_size(const struct glsl_type *type)
  955. {
  956.    unsigned int i;
  957.    int size;
  958.  
  959.    switch (type->base_type) {
  960.    case GLSL_TYPE_UINT:
  961.    case GLSL_TYPE_INT:
  962.    case GLSL_TYPE_FLOAT:
  963.    case GLSL_TYPE_BOOL:
  964.       if (type->is_matrix()) {
  965.          return type->matrix_columns;
  966.       } else {
  967.          /* Regardless of size of vector, it gets a vec4. This is bad
  968.           * packing for things like floats, but otherwise arrays become a
  969.           * mess.  Hopefully a later pass over the code can pack scalars
  970.           * down if appropriate.
  971.           */
  972.          return 1;
  973.       }
  974.    case GLSL_TYPE_ARRAY:
  975.       assert(type->length > 0);
  976.       return type_size(type->fields.array) * type->length;
  977.    case GLSL_TYPE_STRUCT:
  978.       size = 0;
  979.       for (i = 0; i < type->length; i++) {
  980.          size += type_size(type->fields.structure[i].type);
  981.       }
  982.       return size;
  983.    case GLSL_TYPE_SAMPLER:
  984.       /* Samplers take up one slot in UNIFORMS[], but they're baked in
  985.        * at link time.
  986.        */
  987.       return 1;
  988.    case GLSL_TYPE_INTERFACE:
  989.    case GLSL_TYPE_VOID:
  990.    case GLSL_TYPE_ERROR:
  991.       assert(!"Invalid type in type_size");
  992.       break;
  993.    }
  994.    return 0;
  995. }
  996.  
  997. /**
  998.  * In the initial pass of codegen, we assign temporary numbers to
  999.  * intermediate results.  (not SSA -- variable assignments will reuse
  1000.  * storage).
  1001.  */
  1002. st_src_reg
  1003. glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
  1004. {
  1005.    st_src_reg src;
  1006.  
  1007.    src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
  1008.    src.reladdr = NULL;
  1009.    src.negate = 0;
  1010.  
  1011.    if (!options->EmitNoIndirectTemp &&
  1012.        (type->is_array() || type->is_matrix())) {
  1013.  
  1014.       src.file = PROGRAM_ARRAY;
  1015.       src.index = next_array << 16 | 0x8000;
  1016.       array_sizes[next_array] = type_size(type);
  1017.       ++next_array;
  1018.  
  1019.    } else {
  1020.       src.file = PROGRAM_TEMPORARY;
  1021.       src.index = next_temp;
  1022.       next_temp += type_size(type);
  1023.    }
  1024.  
  1025.    if (type->is_array() || type->is_record()) {
  1026.       src.swizzle = SWIZZLE_NOOP;
  1027.    } else {
  1028.       src.swizzle = swizzle_for_size(type->vector_elements);
  1029.    }
  1030.  
  1031.    return src;
  1032. }
  1033.  
  1034. variable_storage *
  1035. glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
  1036. {
  1037.    
  1038.    variable_storage *entry;
  1039.  
  1040.    foreach_iter(exec_list_iterator, iter, this->variables) {
  1041.       entry = (variable_storage *)iter.get();
  1042.  
  1043.       if (entry->var == var)
  1044.          return entry;
  1045.    }
  1046.  
  1047.    return NULL;
  1048. }
  1049.  
  1050. void
  1051. glsl_to_tgsi_visitor::visit(ir_variable *ir)
  1052. {
  1053.    if (strcmp(ir->name, "gl_FragCoord") == 0) {
  1054.       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
  1055.  
  1056.       fp->OriginUpperLeft = ir->origin_upper_left;
  1057.       fp->PixelCenterInteger = ir->pixel_center_integer;
  1058.    }
  1059.  
  1060.    if (ir->mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
  1061.       unsigned int i;
  1062.       const ir_state_slot *const slots = ir->state_slots;
  1063.       assert(ir->state_slots != NULL);
  1064.  
  1065.       /* Check if this statevar's setup in the STATE file exactly
  1066.        * matches how we'll want to reference it as a
  1067.        * struct/array/whatever.  If not, then we need to move it into
  1068.        * temporary storage and hope that it'll get copy-propagated
  1069.        * out.
  1070.        */
  1071.       for (i = 0; i < ir->num_state_slots; i++) {
  1072.          if (slots[i].swizzle != SWIZZLE_XYZW) {
  1073.             break;
  1074.          }
  1075.       }
  1076.  
  1077.       variable_storage *storage;
  1078.       st_dst_reg dst;
  1079.       if (i == ir->num_state_slots) {
  1080.          /* We'll set the index later. */
  1081.          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
  1082.          this->variables.push_tail(storage);
  1083.  
  1084.          dst = undef_dst;
  1085.       } else {
  1086.          /* The variable_storage constructor allocates slots based on the size
  1087.           * of the type.  However, this had better match the number of state
  1088.           * elements that we're going to copy into the new temporary.
  1089.           */
  1090.          assert((int) ir->num_state_slots == type_size(ir->type));
  1091.  
  1092.          dst = st_dst_reg(get_temp(ir->type));
  1093.  
  1094.          storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
  1095.  
  1096.          this->variables.push_tail(storage);
  1097.       }
  1098.  
  1099.  
  1100.       for (unsigned int i = 0; i < ir->num_state_slots; i++) {
  1101.          int index = _mesa_add_state_reference(this->prog->Parameters,
  1102.                                                (gl_state_index *)slots[i].tokens);
  1103.  
  1104.          if (storage->file == PROGRAM_STATE_VAR) {
  1105.             if (storage->index == -1) {
  1106.                storage->index = index;
  1107.             } else {
  1108.                assert(index == storage->index + (int)i);
  1109.             }
  1110.          } else {
  1111.                 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
  1112.                  * the data being moved since MOV does not care about the type of
  1113.                  * data it is moving, and we don't want to declare registers with
  1114.                  * array or struct types.
  1115.                  */
  1116.             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
  1117.             src.swizzle = slots[i].swizzle;
  1118.             emit(ir, TGSI_OPCODE_MOV, dst, src);
  1119.             /* even a float takes up a whole vec4 reg in a struct/array. */
  1120.             dst.index++;
  1121.          }
  1122.       }
  1123.  
  1124.       if (storage->file == PROGRAM_TEMPORARY &&
  1125.           dst.index != storage->index + (int) ir->num_state_slots) {
  1126.          fail_link(this->shader_program,
  1127.                    "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
  1128.                    ir->name, dst.index - storage->index,
  1129.                    type_size(ir->type));
  1130.       }
  1131.    }
  1132. }
  1133.  
  1134. void
  1135. glsl_to_tgsi_visitor::visit(ir_loop *ir)
  1136. {
  1137.    ir_dereference_variable *counter = NULL;
  1138.  
  1139.    if (ir->counter != NULL)
  1140.       counter = new(ir) ir_dereference_variable(ir->counter);
  1141.  
  1142.    if (ir->from != NULL) {
  1143.       assert(ir->counter != NULL);
  1144.  
  1145.       ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
  1146.  
  1147.       a->accept(this);
  1148.       delete a;
  1149.    }
  1150.  
  1151.    emit(NULL, TGSI_OPCODE_BGNLOOP);
  1152.  
  1153.    if (ir->to) {
  1154.       ir_expression *e =
  1155.          new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
  1156.                                counter, ir->to);
  1157.       ir_if *if_stmt =  new(ir) ir_if(e);
  1158.  
  1159.       ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
  1160.  
  1161.       if_stmt->then_instructions.push_tail(brk);
  1162.  
  1163.       if_stmt->accept(this);
  1164.  
  1165.       delete if_stmt;
  1166.       delete e;
  1167.       delete brk;
  1168.    }
  1169.  
  1170.    visit_exec_list(&ir->body_instructions, this);
  1171.  
  1172.    if (ir->increment) {
  1173.       ir_expression *e =
  1174.          new(ir) ir_expression(ir_binop_add, counter->type,
  1175.                                counter, ir->increment);
  1176.  
  1177.       ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
  1178.  
  1179.       a->accept(this);
  1180.       delete a;
  1181.       delete e;
  1182.    }
  1183.  
  1184.    emit(NULL, TGSI_OPCODE_ENDLOOP);
  1185. }
  1186.  
  1187. void
  1188. glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
  1189. {
  1190.    switch (ir->mode) {
  1191.    case ir_loop_jump::jump_break:
  1192.       emit(NULL, TGSI_OPCODE_BRK);
  1193.       break;
  1194.    case ir_loop_jump::jump_continue:
  1195.       emit(NULL, TGSI_OPCODE_CONT);
  1196.       break;
  1197.    }
  1198. }
  1199.  
  1200.  
  1201. void
  1202. glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
  1203. {
  1204.    assert(0);
  1205.    (void)ir;
  1206. }
  1207.  
  1208. void
  1209. glsl_to_tgsi_visitor::visit(ir_function *ir)
  1210. {
  1211.    /* Ignore function bodies other than main() -- we shouldn't see calls to
  1212.     * them since they should all be inlined before we get to glsl_to_tgsi.
  1213.     */
  1214.    if (strcmp(ir->name, "main") == 0) {
  1215.       const ir_function_signature *sig;
  1216.       exec_list empty;
  1217.  
  1218.       sig = ir->matching_signature(&empty);
  1219.  
  1220.       assert(sig);
  1221.  
  1222.       foreach_iter(exec_list_iterator, iter, sig->body) {
  1223.          ir_instruction *ir = (ir_instruction *)iter.get();
  1224.  
  1225.          ir->accept(this);
  1226.       }
  1227.    }
  1228. }
  1229.  
  1230. bool
  1231. glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
  1232. {
  1233.    int nonmul_operand = 1 - mul_operand;
  1234.    st_src_reg a, b, c;
  1235.    st_dst_reg result_dst;
  1236.  
  1237.    ir_expression *expr = ir->operands[mul_operand]->as_expression();
  1238.    if (!expr || expr->operation != ir_binop_mul)
  1239.       return false;
  1240.  
  1241.    expr->operands[0]->accept(this);
  1242.    a = this->result;
  1243.    expr->operands[1]->accept(this);
  1244.    b = this->result;
  1245.    ir->operands[nonmul_operand]->accept(this);
  1246.    c = this->result;
  1247.  
  1248.    this->result = get_temp(ir->type);
  1249.    result_dst = st_dst_reg(this->result);
  1250.    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  1251.    emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
  1252.  
  1253.    return true;
  1254. }
  1255.  
  1256. /**
  1257.  * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
  1258.  *
  1259.  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
  1260.  * implemented using multiplication, and logical-or is implemented using
  1261.  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
  1262.  * As result, the logical expression (a & !b) can be rewritten as:
  1263.  *
  1264.  *     - a * !b
  1265.  *     - a * (1 - b)
  1266.  *     - (a * 1) - (a * b)
  1267.  *     - a + -(a * b)
  1268.  *     - a + (a * -b)
  1269.  *
  1270.  * This final expression can be implemented as a single MAD(a, -b, a)
  1271.  * instruction.
  1272.  */
  1273. bool
  1274. glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
  1275. {
  1276.    const int other_operand = 1 - try_operand;
  1277.    st_src_reg a, b;
  1278.  
  1279.    ir_expression *expr = ir->operands[try_operand]->as_expression();
  1280.    if (!expr || expr->operation != ir_unop_logic_not)
  1281.       return false;
  1282.  
  1283.    ir->operands[other_operand]->accept(this);
  1284.    a = this->result;
  1285.    expr->operands[0]->accept(this);
  1286.    b = this->result;
  1287.  
  1288.    b.negate = ~b.negate;
  1289.  
  1290.    this->result = get_temp(ir->type);
  1291.    emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
  1292.  
  1293.    return true;
  1294. }
  1295.  
  1296. bool
  1297. glsl_to_tgsi_visitor::try_emit_sat(ir_expression *ir)
  1298. {
  1299.    /* Emit saturates in the vertex shader only if SM 3.0 is supported.
  1300.     */
  1301.    if (this->prog->Target == GL_VERTEX_PROGRAM_ARB &&
  1302.        !st_context(this->ctx)->has_shader_model3) {
  1303.       return false;
  1304.    }
  1305.  
  1306.    ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
  1307.    if (!sat_src)
  1308.       return false;
  1309.  
  1310.    sat_src->accept(this);
  1311.    st_src_reg src = this->result;
  1312.  
  1313.    /* If we generated an expression instruction into a temporary in
  1314.     * processing the saturate's operand, apply the saturate to that
  1315.     * instruction.  Otherwise, generate a MOV to do the saturate.
  1316.     *
  1317.     * Note that we have to be careful to only do this optimization if
  1318.     * the instruction in question was what generated src->result.  For
  1319.     * example, ir_dereference_array might generate a MUL instruction
  1320.     * to create the reladdr, and return us a src reg using that
  1321.     * reladdr.  That MUL result is not the value we're trying to
  1322.     * saturate.
  1323.     */
  1324.    ir_expression *sat_src_expr = sat_src->as_expression();
  1325.    if (sat_src_expr && (sat_src_expr->operation == ir_binop_mul ||
  1326.                         sat_src_expr->operation == ir_binop_add ||
  1327.                         sat_src_expr->operation == ir_binop_dot)) {
  1328.       glsl_to_tgsi_instruction *new_inst;
  1329.       new_inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
  1330.       new_inst->saturate = true;
  1331.    } else {
  1332.       this->result = get_temp(ir->type);
  1333.       st_dst_reg result_dst = st_dst_reg(this->result);
  1334.       result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  1335.       glsl_to_tgsi_instruction *inst;
  1336.       inst = emit(ir, TGSI_OPCODE_MOV, result_dst, src);
  1337.       inst->saturate = true;
  1338.    }
  1339.  
  1340.    return true;
  1341. }
  1342.  
  1343. void
  1344. glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
  1345.                                     st_src_reg *reg, int *num_reladdr)
  1346. {
  1347.    if (!reg->reladdr)
  1348.       return;
  1349.  
  1350.    emit_arl(ir, address_reg, *reg->reladdr);
  1351.  
  1352.    if (*num_reladdr != 1) {
  1353.       st_src_reg temp = get_temp(glsl_type::vec4_type);
  1354.  
  1355.       emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
  1356.       *reg = temp;
  1357.    }
  1358.  
  1359.    (*num_reladdr)--;
  1360. }
  1361.  
  1362. void
  1363. glsl_to_tgsi_visitor::visit(ir_expression *ir)
  1364. {
  1365.    unsigned int operand;
  1366.    st_src_reg op[Elements(ir->operands)];
  1367.    st_src_reg result_src;
  1368.    st_dst_reg result_dst;
  1369.  
  1370.    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
  1371.     */
  1372.    if (ir->operation == ir_binop_add) {
  1373.       if (try_emit_mad(ir, 1))
  1374.          return;
  1375.       if (try_emit_mad(ir, 0))
  1376.          return;
  1377.    }
  1378.  
  1379.    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
  1380.     */
  1381.    if (ir->operation == ir_binop_logic_and) {
  1382.       if (try_emit_mad_for_and_not(ir, 1))
  1383.          return;
  1384.       if (try_emit_mad_for_and_not(ir, 0))
  1385.          return;
  1386.    }
  1387.  
  1388.    if (try_emit_sat(ir))
  1389.       return;
  1390.  
  1391.    if (ir->operation == ir_quadop_vector)
  1392.       assert(!"ir_quadop_vector should have been lowered");
  1393.  
  1394.    for (operand = 0; operand < ir->get_num_operands(); operand++) {
  1395.       this->result.file = PROGRAM_UNDEFINED;
  1396.       ir->operands[operand]->accept(this);
  1397.       if (this->result.file == PROGRAM_UNDEFINED) {
  1398.          printf("Failed to get tree for expression operand:\n");
  1399.          ir->operands[operand]->print();
  1400.          printf("\n");
  1401.          exit(1);
  1402.       }
  1403.       op[operand] = this->result;
  1404.  
  1405.       /* Matrix expression operands should have been broken down to vector
  1406.        * operations already.
  1407.        */
  1408.       assert(!ir->operands[operand]->type->is_matrix());
  1409.    }
  1410.  
  1411.    int vector_elements = ir->operands[0]->type->vector_elements;
  1412.    if (ir->operands[1]) {
  1413.       vector_elements = MAX2(vector_elements,
  1414.                              ir->operands[1]->type->vector_elements);
  1415.    }
  1416.  
  1417.    this->result.file = PROGRAM_UNDEFINED;
  1418.  
  1419.    /* Storage for our result.  Ideally for an assignment we'd be using
  1420.     * the actual storage for the result here, instead.
  1421.     */
  1422.    result_src = get_temp(ir->type);
  1423.    /* convenience for the emit functions below. */
  1424.    result_dst = st_dst_reg(result_src);
  1425.    /* Limit writes to the channels that will be used by result_src later.
  1426.     * This does limit this temp's use as a temporary for multi-instruction
  1427.     * sequences.
  1428.     */
  1429.    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  1430.  
  1431.    switch (ir->operation) {
  1432.    case ir_unop_logic_not:
  1433.       if (result_dst.type != GLSL_TYPE_FLOAT)
  1434.          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
  1435.       else {
  1436.          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
  1437.           * older GPUs implement SEQ using multiple instructions (i915 uses two
  1438.           * SGE instructions and a MUL instruction).  Since our logic values are
  1439.           * 0.0 and 1.0, 1-x also implements !x.
  1440.           */
  1441.          op[0].negate = ~op[0].negate;
  1442.          emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
  1443.       }
  1444.       break;
  1445.    case ir_unop_neg:
  1446.       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
  1447.          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
  1448.       else {
  1449.          op[0].negate = ~op[0].negate;
  1450.          result_src = op[0];
  1451.       }
  1452.       break;
  1453.    case ir_unop_abs:
  1454.       emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
  1455.       break;
  1456.    case ir_unop_sign:
  1457.       emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
  1458.       break;
  1459.    case ir_unop_rcp:
  1460.       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
  1461.       break;
  1462.  
  1463.    case ir_unop_exp2:
  1464.       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
  1465.       break;
  1466.    case ir_unop_exp:
  1467.    case ir_unop_log:
  1468.       assert(!"not reached: should be handled by ir_explog_to_explog2");
  1469.       break;
  1470.    case ir_unop_log2:
  1471.       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
  1472.       break;
  1473.    case ir_unop_sin:
  1474.       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
  1475.       break;
  1476.    case ir_unop_cos:
  1477.       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
  1478.       break;
  1479.    case ir_unop_sin_reduced:
  1480.       emit_scs(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
  1481.       break;
  1482.    case ir_unop_cos_reduced:
  1483.       emit_scs(ir, TGSI_OPCODE_COS, result_dst, op[0]);
  1484.       break;
  1485.  
  1486.    case ir_unop_dFdx:
  1487.       emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
  1488.       break;
  1489.    case ir_unop_dFdy:
  1490.    {
  1491.       /* The X component contains 1 or -1 depending on whether the framebuffer
  1492.        * is a FBO or the window system buffer, respectively.
  1493.        * It is then multiplied with the source operand of DDY.
  1494.        */
  1495.       static const gl_state_index transform_y_state[STATE_LENGTH]
  1496.          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
  1497.  
  1498.       unsigned transform_y_index =
  1499.          _mesa_add_state_reference(this->prog->Parameters,
  1500.                                    transform_y_state);
  1501.  
  1502.       st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
  1503.                                           transform_y_index,
  1504.                                           glsl_type::vec4_type);
  1505.       transform_y.swizzle = SWIZZLE_XXXX;
  1506.  
  1507.       st_src_reg temp = get_temp(glsl_type::vec4_type);
  1508.  
  1509.       emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
  1510.       emit(ir, TGSI_OPCODE_DDY, result_dst, temp);
  1511.       break;
  1512.    }
  1513.  
  1514.    case ir_unop_noise: {
  1515.       /* At some point, a motivated person could add a better
  1516.        * implementation of noise.  Currently not even the nvidia
  1517.        * binary drivers do anything more than this.  In any case, the
  1518.        * place to do this is in the GL state tracker, not the poor
  1519.        * driver.
  1520.        */
  1521.       emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
  1522.       break;
  1523.    }
  1524.  
  1525.    case ir_binop_add:
  1526.       emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
  1527.       break;
  1528.    case ir_binop_sub:
  1529.       emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
  1530.       break;
  1531.  
  1532.    case ir_binop_mul:
  1533.       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
  1534.       break;
  1535.    case ir_binop_div:
  1536.       if (result_dst.type == GLSL_TYPE_FLOAT)
  1537.          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
  1538.       else
  1539.          emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
  1540.       break;
  1541.    case ir_binop_mod:
  1542.       if (result_dst.type == GLSL_TYPE_FLOAT)
  1543.          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
  1544.       else
  1545.          emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
  1546.       break;
  1547.  
  1548.    case ir_binop_less:
  1549.       emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
  1550.       break;
  1551.    case ir_binop_greater:
  1552.       emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
  1553.       break;
  1554.    case ir_binop_lequal:
  1555.       emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
  1556.       break;
  1557.    case ir_binop_gequal:
  1558.       emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
  1559.       break;
  1560.    case ir_binop_equal:
  1561.       emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
  1562.       break;
  1563.    case ir_binop_nequal:
  1564.       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
  1565.       break;
  1566.    case ir_binop_all_equal:
  1567.       /* "==" operator producing a scalar boolean. */
  1568.       if (ir->operands[0]->type->is_vector() ||
  1569.           ir->operands[1]->type->is_vector()) {
  1570.          st_src_reg temp = get_temp(native_integers ?
  1571.                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
  1572.                glsl_type::vec4_type);
  1573.          
  1574.          if (native_integers) {
  1575.             st_dst_reg temp_dst = st_dst_reg(temp);
  1576.             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
  1577.            
  1578.             emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
  1579.            
  1580.             /* Emit 1-3 AND operations to combine the SEQ results. */
  1581.             switch (ir->operands[0]->type->vector_elements) {
  1582.             case 2:
  1583.                break;
  1584.             case 3:
  1585.                temp_dst.writemask = WRITEMASK_Y;
  1586.                temp1.swizzle = SWIZZLE_YYYY;
  1587.                temp2.swizzle = SWIZZLE_ZZZZ;
  1588.                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
  1589.                break;
  1590.             case 4:
  1591.                temp_dst.writemask = WRITEMASK_X;
  1592.                temp1.swizzle = SWIZZLE_XXXX;
  1593.                temp2.swizzle = SWIZZLE_YYYY;
  1594.                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
  1595.                temp_dst.writemask = WRITEMASK_Y;
  1596.                temp1.swizzle = SWIZZLE_ZZZZ;
  1597.                temp2.swizzle = SWIZZLE_WWWW;
  1598.                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
  1599.             }
  1600.            
  1601.             temp1.swizzle = SWIZZLE_XXXX;
  1602.             temp2.swizzle = SWIZZLE_YYYY;
  1603.             emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
  1604.          } else {
  1605.             emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
  1606.            
  1607.             /* After the dot-product, the value will be an integer on the
  1608.              * range [0,4].  Zero becomes 1.0, and positive values become zero.
  1609.              */
  1610.             emit_dp(ir, result_dst, temp, temp, vector_elements);
  1611.  
  1612.             /* Negating the result of the dot-product gives values on the range
  1613.              * [-4, 0].  Zero becomes 1.0, and negative values become zero.
  1614.              * This is achieved using SGE.
  1615.              */
  1616.             st_src_reg sge_src = result_src;
  1617.             sge_src.negate = ~sge_src.negate;
  1618.             emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
  1619.          }
  1620.       } else {
  1621.          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
  1622.       }
  1623.       break;
  1624.    case ir_binop_any_nequal:
  1625.       /* "!=" operator producing a scalar boolean. */
  1626.       if (ir->operands[0]->type->is_vector() ||
  1627.           ir->operands[1]->type->is_vector()) {
  1628.          st_src_reg temp = get_temp(native_integers ?
  1629.                glsl_type::get_instance(ir->operands[0]->type->base_type, 4, 1) :
  1630.                glsl_type::vec4_type);
  1631.          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
  1632.  
  1633.          if (native_integers) {
  1634.             st_dst_reg temp_dst = st_dst_reg(temp);
  1635.             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
  1636.            
  1637.             /* Emit 1-3 OR operations to combine the SNE results. */
  1638.             switch (ir->operands[0]->type->vector_elements) {
  1639.             case 2:
  1640.                break;
  1641.             case 3:
  1642.                temp_dst.writemask = WRITEMASK_Y;
  1643.                temp1.swizzle = SWIZZLE_YYYY;
  1644.                temp2.swizzle = SWIZZLE_ZZZZ;
  1645.                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
  1646.                break;
  1647.             case 4:
  1648.                temp_dst.writemask = WRITEMASK_X;
  1649.                temp1.swizzle = SWIZZLE_XXXX;
  1650.                temp2.swizzle = SWIZZLE_YYYY;
  1651.                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
  1652.                temp_dst.writemask = WRITEMASK_Y;
  1653.                temp1.swizzle = SWIZZLE_ZZZZ;
  1654.                temp2.swizzle = SWIZZLE_WWWW;
  1655.                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
  1656.             }
  1657.            
  1658.             temp1.swizzle = SWIZZLE_XXXX;
  1659.             temp2.swizzle = SWIZZLE_YYYY;
  1660.             emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
  1661.          } else {
  1662.             /* After the dot-product, the value will be an integer on the
  1663.              * range [0,4].  Zero stays zero, and positive values become 1.0.
  1664.              */
  1665.             glsl_to_tgsi_instruction *const dp =
  1666.                   emit_dp(ir, result_dst, temp, temp, vector_elements);
  1667.             if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1668.                /* The clamping to [0,1] can be done for free in the fragment
  1669.                 * shader with a saturate.
  1670.                 */
  1671.                dp->saturate = true;
  1672.             } else {
  1673.                /* Negating the result of the dot-product gives values on the range
  1674.                 * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
  1675.                 * achieved using SLT.
  1676.                 */
  1677.                st_src_reg slt_src = result_src;
  1678.                slt_src.negate = ~slt_src.negate;
  1679.                emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
  1680.             }
  1681.          }
  1682.       } else {
  1683.          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
  1684.       }
  1685.       break;
  1686.  
  1687.    case ir_unop_any: {
  1688.       assert(ir->operands[0]->type->is_vector());
  1689.  
  1690.       /* After the dot-product, the value will be an integer on the
  1691.        * range [0,4].  Zero stays zero, and positive values become 1.0.
  1692.        */
  1693.       glsl_to_tgsi_instruction *const dp =
  1694.          emit_dp(ir, result_dst, op[0], op[0],
  1695.                  ir->operands[0]->type->vector_elements);
  1696.       if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
  1697.           result_dst.type == GLSL_TYPE_FLOAT) {
  1698.               /* The clamping to [0,1] can be done for free in the fragment
  1699.                * shader with a saturate.
  1700.                */
  1701.               dp->saturate = true;
  1702.       } else if (result_dst.type == GLSL_TYPE_FLOAT) {
  1703.               /* Negating the result of the dot-product gives values on the range
  1704.                * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
  1705.                * is achieved using SLT.
  1706.                */
  1707.               st_src_reg slt_src = result_src;
  1708.               slt_src.negate = ~slt_src.negate;
  1709.               emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
  1710.       }
  1711.       else {
  1712.          /* Use SNE 0 if integers are being used as boolean values. */
  1713.          emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
  1714.       }
  1715.       break;
  1716.    }
  1717.  
  1718.    case ir_binop_logic_xor:
  1719.       if (native_integers)
  1720.          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
  1721.       else
  1722.          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
  1723.       break;
  1724.  
  1725.    case ir_binop_logic_or: {
  1726.       if (native_integers) {
  1727.          /* If integers are used as booleans, we can use an actual "or"
  1728.           * instruction.
  1729.           */
  1730.          assert(native_integers);
  1731.          emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
  1732.       } else {
  1733.          /* After the addition, the value will be an integer on the
  1734.           * range [0,2].  Zero stays zero, and positive values become 1.0.
  1735.           */
  1736.          glsl_to_tgsi_instruction *add =
  1737.             emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
  1738.          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1739.             /* The clamping to [0,1] can be done for free in the fragment
  1740.              * shader with a saturate if floats are being used as boolean values.
  1741.              */
  1742.             add->saturate = true;
  1743.          } else {
  1744.             /* Negating the result of the addition gives values on the range
  1745.              * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
  1746.              * is achieved using SLT.
  1747.              */
  1748.             st_src_reg slt_src = result_src;
  1749.             slt_src.negate = ~slt_src.negate;
  1750.             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
  1751.          }
  1752.       }
  1753.       break;
  1754.    }
  1755.  
  1756.    case ir_binop_logic_and:
  1757.       /* If native integers are disabled, the bool args are stored as float 0.0
  1758.        * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
  1759.        * actual AND opcode.
  1760.        */
  1761.       if (native_integers)
  1762.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
  1763.       else
  1764.          emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
  1765.       break;
  1766.  
  1767.    case ir_binop_dot:
  1768.       assert(ir->operands[0]->type->is_vector());
  1769.       assert(ir->operands[0]->type == ir->operands[1]->type);
  1770.       emit_dp(ir, result_dst, op[0], op[1],
  1771.               ir->operands[0]->type->vector_elements);
  1772.       break;
  1773.  
  1774.    case ir_unop_sqrt:
  1775.       if (have_sqrt) {
  1776.          emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
  1777.       }
  1778.       else {
  1779.          /* sqrt(x) = x * rsq(x). */
  1780.          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
  1781.          emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
  1782.          /* For incoming channels <= 0, set the result to 0. */
  1783.          op[0].negate = ~op[0].negate;
  1784.          emit(ir, TGSI_OPCODE_CMP, result_dst,
  1785.               op[0], result_src, st_src_reg_for_float(0.0));
  1786.       }
  1787.       break;
  1788.    case ir_unop_rsq:
  1789.       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
  1790.       break;
  1791.    case ir_unop_i2f:
  1792.       if (native_integers) {
  1793.          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
  1794.          break;
  1795.       }
  1796.       /* fallthrough to next case otherwise */
  1797.    case ir_unop_b2f:
  1798.       if (native_integers) {
  1799.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
  1800.          break;
  1801.       }
  1802.       /* fallthrough to next case otherwise */
  1803.    case ir_unop_i2u:
  1804.    case ir_unop_u2i:
  1805.       /* Converting between signed and unsigned integers is a no-op. */
  1806.       result_src = op[0];
  1807.       break;
  1808.    case ir_unop_b2i:
  1809.       if (native_integers) {
  1810.          /* Booleans are stored as integers using ~0 for true and 0 for false.
  1811.           * GLSL requires that int(bool) return 1 for true and 0 for false.
  1812.           * This conversion is done with AND, but it could be done with NEG.
  1813.           */
  1814.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
  1815.       } else {
  1816.          /* Booleans and integers are both stored as floats when native
  1817.           * integers are disabled.
  1818.           */
  1819.          result_src = op[0];
  1820.       }
  1821.       break;
  1822.    case ir_unop_f2i:
  1823.       if (native_integers)
  1824.          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
  1825.       else
  1826.          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
  1827.       break;
  1828.    case ir_unop_f2u:
  1829.       if (native_integers)
  1830.          emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
  1831.       else
  1832.          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
  1833.       break;
  1834.    case ir_unop_bitcast_f2i:
  1835.       result_src = op[0];
  1836.       result_src.type = GLSL_TYPE_INT;
  1837.       break;
  1838.    case ir_unop_bitcast_f2u:
  1839.       result_src = op[0];
  1840.       result_src.type = GLSL_TYPE_UINT;
  1841.       break;
  1842.    case ir_unop_bitcast_i2f:
  1843.    case ir_unop_bitcast_u2f:
  1844.       result_src = op[0];
  1845.       result_src.type = GLSL_TYPE_FLOAT;
  1846.       break;
  1847.    case ir_unop_f2b:
  1848.       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
  1849.       break;
  1850.    case ir_unop_i2b:
  1851.       if (native_integers)
  1852.          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
  1853.       else
  1854.          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
  1855.       break;
  1856.    case ir_unop_trunc:
  1857.       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
  1858.       break;
  1859.    case ir_unop_ceil:
  1860.       emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
  1861.       break;
  1862.    case ir_unop_floor:
  1863.       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
  1864.       break;
  1865.    case ir_unop_round_even:
  1866.       emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
  1867.       break;
  1868.    case ir_unop_fract:
  1869.       emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
  1870.       break;
  1871.  
  1872.    case ir_binop_min:
  1873.       emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
  1874.       break;
  1875.    case ir_binop_max:
  1876.       emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
  1877.       break;
  1878.    case ir_binop_pow:
  1879.       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
  1880.       break;
  1881.  
  1882.    case ir_unop_bit_not:
  1883.       if (native_integers) {
  1884.          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
  1885.          break;
  1886.       }
  1887.    case ir_unop_u2f:
  1888.       if (native_integers) {
  1889.          emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
  1890.          break;
  1891.       }
  1892.    case ir_binop_lshift:
  1893.       if (native_integers) {
  1894.          emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
  1895.          break;
  1896.       }
  1897.    case ir_binop_rshift:
  1898.       if (native_integers) {
  1899.          emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
  1900.          break;
  1901.       }
  1902.    case ir_binop_bit_and:
  1903.       if (native_integers) {
  1904.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
  1905.          break;
  1906.       }
  1907.    case ir_binop_bit_xor:
  1908.       if (native_integers) {
  1909.          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
  1910.          break;
  1911.       }
  1912.    case ir_binop_bit_or:
  1913.       if (native_integers) {
  1914.          emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
  1915.          break;
  1916.       }
  1917.  
  1918.       assert(!"GLSL 1.30 features unsupported");
  1919.       break;
  1920.  
  1921.    case ir_binop_ubo_load: {
  1922.       ir_constant *uniform_block = ir->operands[0]->as_constant();
  1923.       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
  1924.       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
  1925.       st_src_reg index_reg = get_temp(glsl_type::uint_type);
  1926.       st_src_reg cbuf;
  1927.  
  1928.       cbuf.type = glsl_type::vec4_type->base_type;
  1929.       cbuf.file = PROGRAM_CONSTANT;
  1930.       cbuf.index = 0;
  1931.       cbuf.index2D = uniform_block->value.u[0] + 1;
  1932.       cbuf.reladdr = NULL;
  1933.       cbuf.negate = 0;
  1934.      
  1935.       assert(ir->type->is_vector() || ir->type->is_scalar());
  1936.  
  1937.       if (const_offset_ir) {
  1938.          index_reg = st_src_reg_for_int(const_offset / 16);
  1939.       } else {
  1940.          emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1], st_src_reg_for_int(4));
  1941.       }
  1942.  
  1943.       cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
  1944.       cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
  1945.                                     const_offset % 16 / 4,
  1946.                                     const_offset % 16 / 4,
  1947.                                     const_offset % 16 / 4);
  1948.  
  1949.       cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
  1950.       memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
  1951.  
  1952.       if (ir->type->base_type == GLSL_TYPE_BOOL) {
  1953.          emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
  1954.       } else {
  1955.          emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
  1956.       }
  1957.       break;
  1958.    }
  1959.    case ir_triop_lrp:
  1960.       /* note: we have to reorder the three args here */
  1961.       emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
  1962.       break;
  1963.    case ir_unop_pack_snorm_2x16:
  1964.    case ir_unop_pack_unorm_2x16:
  1965.    case ir_unop_pack_half_2x16:
  1966.    case ir_unop_pack_snorm_4x8:
  1967.    case ir_unop_pack_unorm_4x8:
  1968.    case ir_unop_unpack_snorm_2x16:
  1969.    case ir_unop_unpack_unorm_2x16:
  1970.    case ir_unop_unpack_half_2x16:
  1971.    case ir_unop_unpack_half_2x16_split_x:
  1972.    case ir_unop_unpack_half_2x16_split_y:
  1973.    case ir_unop_unpack_snorm_4x8:
  1974.    case ir_unop_unpack_unorm_4x8:
  1975.    case ir_binop_pack_half_2x16_split:
  1976.    case ir_unop_bitfield_reverse:
  1977.    case ir_unop_bit_count:
  1978.    case ir_unop_find_msb:
  1979.    case ir_unop_find_lsb:
  1980.    case ir_binop_bfm:
  1981.    case ir_triop_bfi:
  1982.    case ir_triop_bitfield_extract:
  1983.    case ir_quadop_bitfield_insert:
  1984.    case ir_quadop_vector:
  1985.    case ir_binop_vector_extract:
  1986.    case ir_triop_vector_insert:
  1987.       /* This operation is not supported, or should have already been handled.
  1988.        */
  1989.       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
  1990.       break;
  1991.    }
  1992.  
  1993.    this->result = result_src;
  1994. }
  1995.  
  1996.  
  1997. void
  1998. glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
  1999. {
  2000.    st_src_reg src;
  2001.    int i;
  2002.    int swizzle[4];
  2003.  
  2004.    /* Note that this is only swizzles in expressions, not those on the left
  2005.     * hand side of an assignment, which do write masking.  See ir_assignment
  2006.     * for that.
  2007.     */
  2008.  
  2009.    ir->val->accept(this);
  2010.    src = this->result;
  2011.    assert(src.file != PROGRAM_UNDEFINED);
  2012.  
  2013.    for (i = 0; i < 4; i++) {
  2014.       if (i < ir->type->vector_elements) {
  2015.          switch (i) {
  2016.          case 0:
  2017.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
  2018.             break;
  2019.          case 1:
  2020.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
  2021.             break;
  2022.          case 2:
  2023.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
  2024.             break;
  2025.          case 3:
  2026.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
  2027.             break;
  2028.          }
  2029.       } else {
  2030.          /* If the type is smaller than a vec4, replicate the last
  2031.           * channel out.
  2032.           */
  2033.          swizzle[i] = swizzle[ir->type->vector_elements - 1];
  2034.       }
  2035.    }
  2036.  
  2037.    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
  2038.  
  2039.    this->result = src;
  2040. }
  2041.  
  2042. void
  2043. glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
  2044. {
  2045.    variable_storage *entry = find_variable_storage(ir->var);
  2046.    ir_variable *var = ir->var;
  2047.  
  2048.    if (!entry) {
  2049.       switch (var->mode) {
  2050.       case ir_var_uniform:
  2051.          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
  2052.                                                var->location);
  2053.          this->variables.push_tail(entry);
  2054.          break;
  2055.       case ir_var_shader_in:
  2056.          /* The linker assigns locations for varyings and attributes,
  2057.           * including deprecated builtins (like gl_Color), user-assign
  2058.           * generic attributes (glBindVertexLocation), and
  2059.           * user-defined varyings.
  2060.           */
  2061.          assert(var->location != -1);
  2062.          entry = new(mem_ctx) variable_storage(var,
  2063.                                                PROGRAM_INPUT,
  2064.                                                var->location);
  2065.          break;
  2066.       case ir_var_shader_out:
  2067.          assert(var->location != -1);
  2068.          entry = new(mem_ctx) variable_storage(var,
  2069.                                                PROGRAM_OUTPUT,
  2070.                                                var->location + var->index);
  2071.          break;
  2072.       case ir_var_system_value:
  2073.          entry = new(mem_ctx) variable_storage(var,
  2074.                                                PROGRAM_SYSTEM_VALUE,
  2075.                                                var->location);
  2076.          break;
  2077.       case ir_var_auto:
  2078.       case ir_var_temporary:
  2079.          st_src_reg src = get_temp(var->type);
  2080.  
  2081.          entry = new(mem_ctx) variable_storage(var, src.file, src.index);
  2082.          this->variables.push_tail(entry);
  2083.  
  2084.          break;
  2085.       }
  2086.  
  2087.       if (!entry) {
  2088.          printf("Failed to make storage for %s\n", var->name);
  2089.          exit(1);
  2090.       }
  2091.    }
  2092.  
  2093.    this->result = st_src_reg(entry->file, entry->index, var->type);
  2094.    if (!native_integers)
  2095.       this->result.type = GLSL_TYPE_FLOAT;
  2096. }
  2097.  
  2098. void
  2099. glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
  2100. {
  2101.    ir_constant *index;
  2102.    st_src_reg src;
  2103.    int element_size = type_size(ir->type);
  2104.  
  2105.    index = ir->array_index->constant_expression_value();
  2106.  
  2107.    ir->array->accept(this);
  2108.    src = this->result;
  2109.  
  2110.    if (index) {
  2111.       src.index += index->value.i[0] * element_size;
  2112.    } else {
  2113.       /* Variable index array dereference.  It eats the "vec4" of the
  2114.        * base of the array and an index that offsets the TGSI register
  2115.        * index.
  2116.        */
  2117.       ir->array_index->accept(this);
  2118.  
  2119.       st_src_reg index_reg;
  2120.  
  2121.       if (element_size == 1) {
  2122.          index_reg = this->result;
  2123.       } else {
  2124.          index_reg = get_temp(native_integers ?
  2125.                               glsl_type::int_type : glsl_type::float_type);
  2126.  
  2127.          emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
  2128.               this->result, st_src_reg_for_type(index_reg.type, element_size));
  2129.       }
  2130.  
  2131.       /* If there was already a relative address register involved, add the
  2132.        * new and the old together to get the new offset.
  2133.        */
  2134.       if (src.reladdr != NULL) {
  2135.          st_src_reg accum_reg = get_temp(native_integers ?
  2136.                                 glsl_type::int_type : glsl_type::float_type);
  2137.  
  2138.          emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
  2139.               index_reg, *src.reladdr);
  2140.  
  2141.          index_reg = accum_reg;
  2142.       }
  2143.  
  2144.       src.reladdr = ralloc(mem_ctx, st_src_reg);
  2145.       memcpy(src.reladdr, &index_reg, sizeof(index_reg));
  2146.    }
  2147.  
  2148.    /* If the type is smaller than a vec4, replicate the last channel out. */
  2149.    if (ir->type->is_scalar() || ir->type->is_vector())
  2150.       src.swizzle = swizzle_for_size(ir->type->vector_elements);
  2151.    else
  2152.       src.swizzle = SWIZZLE_NOOP;
  2153.  
  2154.    /* Change the register type to the element type of the array. */
  2155.    src.type = ir->type->base_type;
  2156.  
  2157.    this->result = src;
  2158. }
  2159.  
  2160. void
  2161. glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
  2162. {
  2163.    unsigned int i;
  2164.    const glsl_type *struct_type = ir->record->type;
  2165.    int offset = 0;
  2166.  
  2167.    ir->record->accept(this);
  2168.  
  2169.    for (i = 0; i < struct_type->length; i++) {
  2170.       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
  2171.          break;
  2172.       offset += type_size(struct_type->fields.structure[i].type);
  2173.    }
  2174.  
  2175.    /* If the type is smaller than a vec4, replicate the last channel out. */
  2176.    if (ir->type->is_scalar() || ir->type->is_vector())
  2177.       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
  2178.    else
  2179.       this->result.swizzle = SWIZZLE_NOOP;
  2180.  
  2181.    this->result.index += offset;
  2182.    this->result.type = ir->type->base_type;
  2183. }
  2184.  
  2185. /**
  2186.  * We want to be careful in assignment setup to hit the actual storage
  2187.  * instead of potentially using a temporary like we might with the
  2188.  * ir_dereference handler.
  2189.  */
  2190. static st_dst_reg
  2191. get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
  2192. {
  2193.    /* The LHS must be a dereference.  If the LHS is a variable indexed array
  2194.     * access of a vector, it must be separated into a series conditional moves
  2195.     * before reaching this point (see ir_vec_index_to_cond_assign).
  2196.     */
  2197.    assert(ir->as_dereference());
  2198.    ir_dereference_array *deref_array = ir->as_dereference_array();
  2199.    if (deref_array) {
  2200.       assert(!deref_array->array->type->is_vector());
  2201.    }
  2202.  
  2203.    /* Use the rvalue deref handler for the most part.  We'll ignore
  2204.     * swizzles in it and write swizzles using writemask, though.
  2205.     */
  2206.    ir->accept(v);
  2207.    return st_dst_reg(v->result);
  2208. }
  2209.  
  2210. /**
  2211.  * Process the condition of a conditional assignment
  2212.  *
  2213.  * Examines the condition of a conditional assignment to generate the optimal
  2214.  * first operand of a \c CMP instruction.  If the condition is a relational
  2215.  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
  2216.  * used as the source for the \c CMP instruction.  Otherwise the comparison
  2217.  * is processed to a boolean result, and the boolean result is used as the
  2218.  * operand to the CMP instruction.
  2219.  */
  2220. bool
  2221. glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
  2222. {
  2223.    ir_rvalue *src_ir = ir;
  2224.    bool negate = true;
  2225.    bool switch_order = false;
  2226.  
  2227.    ir_expression *const expr = ir->as_expression();
  2228.    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
  2229.       bool zero_on_left = false;
  2230.  
  2231.       if (expr->operands[0]->is_zero()) {
  2232.          src_ir = expr->operands[1];
  2233.          zero_on_left = true;
  2234.       } else if (expr->operands[1]->is_zero()) {
  2235.          src_ir = expr->operands[0];
  2236.          zero_on_left = false;
  2237.       }
  2238.  
  2239.       /*      a is -  0  +            -  0  +
  2240.        * (a <  0)  T  F  F  ( a < 0)  T  F  F
  2241.        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
  2242.        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
  2243.        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
  2244.        * (a >  0)  F  F  T  (-a < 0)  F  F  T
  2245.        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
  2246.        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
  2247.        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
  2248.        *
  2249.        * Note that exchanging the order of 0 and 'a' in the comparison simply
  2250.        * means that the value of 'a' should be negated.
  2251.        */
  2252.       if (src_ir != ir) {
  2253.          switch (expr->operation) {
  2254.          case ir_binop_less:
  2255.             switch_order = false;
  2256.             negate = zero_on_left;
  2257.             break;
  2258.  
  2259.          case ir_binop_greater:
  2260.             switch_order = false;
  2261.             negate = !zero_on_left;
  2262.             break;
  2263.  
  2264.          case ir_binop_lequal:
  2265.             switch_order = true;
  2266.             negate = !zero_on_left;
  2267.             break;
  2268.  
  2269.          case ir_binop_gequal:
  2270.             switch_order = true;
  2271.             negate = zero_on_left;
  2272.             break;
  2273.  
  2274.          default:
  2275.             /* This isn't the right kind of comparison afterall, so make sure
  2276.              * the whole condition is visited.
  2277.              */
  2278.             src_ir = ir;
  2279.             break;
  2280.          }
  2281.       }
  2282.    }
  2283.  
  2284.    src_ir->accept(this);
  2285.  
  2286.    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
  2287.     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
  2288.     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
  2289.     * computing the condition.
  2290.     */
  2291.    if (negate)
  2292.       this->result.negate = ~this->result.negate;
  2293.  
  2294.    return switch_order;
  2295. }
  2296.  
  2297. void
  2298. glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
  2299.                                      st_dst_reg *l, st_src_reg *r)
  2300. {
  2301.    if (type->base_type == GLSL_TYPE_STRUCT) {
  2302.       for (unsigned int i = 0; i < type->length; i++) {
  2303.          emit_block_mov(ir, type->fields.structure[i].type, l, r);
  2304.       }
  2305.       return;
  2306.    }
  2307.  
  2308.    if (type->is_array()) {
  2309.       for (unsigned int i = 0; i < type->length; i++) {
  2310.          emit_block_mov(ir, type->fields.array, l, r);
  2311.       }
  2312.       return;
  2313.    }
  2314.  
  2315.    if (type->is_matrix()) {
  2316.       const struct glsl_type *vec_type;
  2317.  
  2318.       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
  2319.                                          type->vector_elements, 1);
  2320.  
  2321.       for (int i = 0; i < type->matrix_columns; i++) {
  2322.          emit_block_mov(ir, vec_type, l, r);
  2323.       }
  2324.       return;
  2325.    }
  2326.  
  2327.    assert(type->is_scalar() || type->is_vector());
  2328.  
  2329.    r->type = type->base_type;
  2330.    emit(ir, TGSI_OPCODE_MOV, *l, *r);
  2331.    l->index++;
  2332.    r->index++;
  2333. }
  2334.  
  2335. void
  2336. glsl_to_tgsi_visitor::visit(ir_assignment *ir)
  2337. {
  2338.    st_dst_reg l;
  2339.    st_src_reg r;
  2340.    int i;
  2341.  
  2342.    ir->rhs->accept(this);
  2343.    r = this->result;
  2344.  
  2345.    l = get_assignment_lhs(ir->lhs, this);
  2346.  
  2347.    /* FINISHME: This should really set to the correct maximal writemask for each
  2348.     * FINISHME: component written (in the loops below).  This case can only
  2349.     * FINISHME: occur for matrices, arrays, and structures.
  2350.     */
  2351.    if (ir->write_mask == 0) {
  2352.       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
  2353.       l.writemask = WRITEMASK_XYZW;
  2354.    } else if (ir->lhs->type->is_scalar() &&
  2355.               ir->lhs->variable_referenced()->mode == ir_var_shader_out) {
  2356.       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
  2357.        * FINISHME: W component of fragment shader output zero, work correctly.
  2358.        */
  2359.       l.writemask = WRITEMASK_XYZW;
  2360.    } else {
  2361.       int swizzles[4];
  2362.       int first_enabled_chan = 0;
  2363.       int rhs_chan = 0;
  2364.  
  2365.       l.writemask = ir->write_mask;
  2366.  
  2367.       for (int i = 0; i < 4; i++) {
  2368.          if (l.writemask & (1 << i)) {
  2369.             first_enabled_chan = GET_SWZ(r.swizzle, i);
  2370.             break;
  2371.          }
  2372.       }
  2373.  
  2374.       /* Swizzle a small RHS vector into the channels being written.
  2375.        *
  2376.        * glsl ir treats write_mask as dictating how many channels are
  2377.        * present on the RHS while TGSI treats write_mask as just
  2378.        * showing which channels of the vec4 RHS get written.
  2379.        */
  2380.       for (int i = 0; i < 4; i++) {
  2381.          if (l.writemask & (1 << i))
  2382.             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
  2383.          else
  2384.             swizzles[i] = first_enabled_chan;
  2385.       }
  2386.       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
  2387.                                 swizzles[2], swizzles[3]);
  2388.    }
  2389.  
  2390.    assert(l.file != PROGRAM_UNDEFINED);
  2391.    assert(r.file != PROGRAM_UNDEFINED);
  2392.  
  2393.    if (ir->condition) {
  2394.       const bool switch_order = this->process_move_condition(ir->condition);
  2395.       st_src_reg condition = this->result;
  2396.  
  2397.       for (i = 0; i < type_size(ir->lhs->type); i++) {
  2398.          st_src_reg l_src = st_src_reg(l);
  2399.          st_src_reg condition_temp = condition;
  2400.          l_src.swizzle = swizzle_for_size(ir->lhs->type->vector_elements);
  2401.          
  2402.          if (native_integers) {
  2403.             /* This is necessary because TGSI's CMP instruction expects the
  2404.              * condition to be a float, and we store booleans as integers.
  2405.              * TODO: really want to avoid i2f path and use UCMP. Requires
  2406.              * changes to process_move_condition though too.
  2407.              */
  2408.             condition_temp = get_temp(glsl_type::vec4_type);
  2409.             condition.negate = 0;
  2410.             emit(ir, TGSI_OPCODE_I2F, st_dst_reg(condition_temp), condition);
  2411.             condition_temp.swizzle = condition.swizzle;
  2412.          }
  2413.          
  2414.          if (switch_order) {
  2415.             emit(ir, TGSI_OPCODE_CMP, l, condition_temp, l_src, r);
  2416.          } else {
  2417.             emit(ir, TGSI_OPCODE_CMP, l, condition_temp, r, l_src);
  2418.          }
  2419.  
  2420.          l.index++;
  2421.          r.index++;
  2422.       }
  2423.    } else if (ir->rhs->as_expression() &&
  2424.               this->instructions.get_tail() &&
  2425.               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
  2426.               type_size(ir->lhs->type) == 1 &&
  2427.               l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst.writemask) {
  2428.       /* To avoid emitting an extra MOV when assigning an expression to a
  2429.        * variable, emit the last instruction of the expression again, but
  2430.        * replace the destination register with the target of the assignment.
  2431.        * Dead code elimination will remove the original instruction.
  2432.        */
  2433.       glsl_to_tgsi_instruction *inst, *new_inst;
  2434.       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
  2435.       new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
  2436.       new_inst->saturate = inst->saturate;
  2437.       inst->dead_mask = inst->dst.writemask;
  2438.    } else {
  2439.       emit_block_mov(ir, ir->rhs->type, &l, &r);
  2440.    }
  2441. }
  2442.  
  2443.  
  2444. void
  2445. glsl_to_tgsi_visitor::visit(ir_constant *ir)
  2446. {
  2447.    st_src_reg src;
  2448.    GLfloat stack_vals[4] = { 0 };
  2449.    gl_constant_value *values = (gl_constant_value *) stack_vals;
  2450.    GLenum gl_type = GL_NONE;
  2451.    unsigned int i;
  2452.    static int in_array = 0;
  2453.    gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
  2454.  
  2455.    /* Unfortunately, 4 floats is all we can get into
  2456.     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
  2457.     * aggregate constant and move each constant value into it.  If we
  2458.     * get lucky, copy propagation will eliminate the extra moves.
  2459.     */
  2460.    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
  2461.       st_src_reg temp_base = get_temp(ir->type);
  2462.       st_dst_reg temp = st_dst_reg(temp_base);
  2463.  
  2464.       foreach_iter(exec_list_iterator, iter, ir->components) {
  2465.          ir_constant *field_value = (ir_constant *)iter.get();
  2466.          int size = type_size(field_value->type);
  2467.  
  2468.          assert(size > 0);
  2469.  
  2470.          field_value->accept(this);
  2471.          src = this->result;
  2472.  
  2473.          for (i = 0; i < (unsigned int)size; i++) {
  2474.             emit(ir, TGSI_OPCODE_MOV, temp, src);
  2475.  
  2476.             src.index++;
  2477.             temp.index++;
  2478.          }
  2479.       }
  2480.       this->result = temp_base;
  2481.       return;
  2482.    }
  2483.  
  2484.    if (ir->type->is_array()) {
  2485.       st_src_reg temp_base = get_temp(ir->type);
  2486.       st_dst_reg temp = st_dst_reg(temp_base);
  2487.       int size = type_size(ir->type->fields.array);
  2488.  
  2489.       assert(size > 0);
  2490.       in_array++;
  2491.  
  2492.       for (i = 0; i < ir->type->length; i++) {
  2493.          ir->array_elements[i]->accept(this);
  2494.          src = this->result;
  2495.          for (int j = 0; j < size; j++) {
  2496.             emit(ir, TGSI_OPCODE_MOV, temp, src);
  2497.  
  2498.             src.index++;
  2499.             temp.index++;
  2500.          }
  2501.       }
  2502.       this->result = temp_base;
  2503.       in_array--;
  2504.       return;
  2505.    }
  2506.  
  2507.    if (ir->type->is_matrix()) {
  2508.       st_src_reg mat = get_temp(ir->type);
  2509.       st_dst_reg mat_column = st_dst_reg(mat);
  2510.  
  2511.       for (i = 0; i < ir->type->matrix_columns; i++) {
  2512.          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
  2513.          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
  2514.  
  2515.          src = st_src_reg(file, -1, ir->type->base_type);
  2516.          src.index = add_constant(file,
  2517.                                   values,
  2518.                                   ir->type->vector_elements,
  2519.                                   GL_FLOAT,
  2520.                                   &src.swizzle);
  2521.          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
  2522.  
  2523.          mat_column.index++;
  2524.       }
  2525.  
  2526.       this->result = mat;
  2527.       return;
  2528.    }
  2529.  
  2530.    switch (ir->type->base_type) {
  2531.    case GLSL_TYPE_FLOAT:
  2532.       gl_type = GL_FLOAT;
  2533.       for (i = 0; i < ir->type->vector_elements; i++) {
  2534.          values[i].f = ir->value.f[i];
  2535.       }
  2536.       break;
  2537.    case GLSL_TYPE_UINT:
  2538.       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
  2539.       for (i = 0; i < ir->type->vector_elements; i++) {
  2540.          if (native_integers)
  2541.             values[i].u = ir->value.u[i];
  2542.          else
  2543.             values[i].f = ir->value.u[i];
  2544.       }
  2545.       break;
  2546.    case GLSL_TYPE_INT:
  2547.       gl_type = native_integers ? GL_INT : GL_FLOAT;
  2548.       for (i = 0; i < ir->type->vector_elements; i++) {
  2549.          if (native_integers)
  2550.             values[i].i = ir->value.i[i];
  2551.          else
  2552.             values[i].f = ir->value.i[i];
  2553.       }
  2554.       break;
  2555.    case GLSL_TYPE_BOOL:
  2556.       gl_type = native_integers ? GL_BOOL : GL_FLOAT;
  2557.       for (i = 0; i < ir->type->vector_elements; i++) {
  2558.          if (native_integers)
  2559.             values[i].u = ir->value.b[i] ? ~0 : 0;
  2560.          else
  2561.             values[i].f = ir->value.b[i];
  2562.       }
  2563.       break;
  2564.    default:
  2565.       assert(!"Non-float/uint/int/bool constant");
  2566.    }
  2567.  
  2568.    this->result = st_src_reg(file, -1, ir->type);
  2569.    this->result.index = add_constant(file,
  2570.                                      values,
  2571.                                      ir->type->vector_elements,
  2572.                                      gl_type,
  2573.                                      &this->result.swizzle);
  2574. }
  2575.  
  2576. function_entry *
  2577. glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
  2578. {
  2579.    function_entry *entry;
  2580.  
  2581.    foreach_iter(exec_list_iterator, iter, this->function_signatures) {
  2582.       entry = (function_entry *)iter.get();
  2583.  
  2584.       if (entry->sig == sig)
  2585.          return entry;
  2586.    }
  2587.  
  2588.    entry = ralloc(mem_ctx, function_entry);
  2589.    entry->sig = sig;
  2590.    entry->sig_id = this->next_signature_id++;
  2591.    entry->bgn_inst = NULL;
  2592.  
  2593.    /* Allocate storage for all the parameters. */
  2594.    foreach_iter(exec_list_iterator, iter, sig->parameters) {
  2595.       ir_variable *param = (ir_variable *)iter.get();
  2596.       variable_storage *storage;
  2597.  
  2598.       storage = find_variable_storage(param);
  2599.       assert(!storage);
  2600.  
  2601.       st_src_reg src = get_temp(param->type);
  2602.  
  2603.       storage = new(mem_ctx) variable_storage(param, src.file, src.index);
  2604.       this->variables.push_tail(storage);
  2605.    }
  2606.  
  2607.    if (!sig->return_type->is_void()) {
  2608.       entry->return_reg = get_temp(sig->return_type);
  2609.    } else {
  2610.       entry->return_reg = undef_src;
  2611.    }
  2612.  
  2613.    this->function_signatures.push_tail(entry);
  2614.    return entry;
  2615. }
  2616.  
  2617. void
  2618. glsl_to_tgsi_visitor::visit(ir_call *ir)
  2619. {
  2620.    glsl_to_tgsi_instruction *call_inst;
  2621.    ir_function_signature *sig = ir->callee;
  2622.    function_entry *entry = get_function_signature(sig);
  2623.    int i;
  2624.  
  2625.    /* Process in parameters. */
  2626.    exec_list_iterator sig_iter = sig->parameters.iterator();
  2627.    foreach_iter(exec_list_iterator, iter, *ir) {
  2628.       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
  2629.       ir_variable *param = (ir_variable *)sig_iter.get();
  2630.  
  2631.       if (param->mode == ir_var_function_in ||
  2632.           param->mode == ir_var_function_inout) {
  2633.          variable_storage *storage = find_variable_storage(param);
  2634.          assert(storage);
  2635.  
  2636.          param_rval->accept(this);
  2637.          st_src_reg r = this->result;
  2638.  
  2639.          st_dst_reg l;
  2640.          l.file = storage->file;
  2641.          l.index = storage->index;
  2642.          l.reladdr = NULL;
  2643.          l.writemask = WRITEMASK_XYZW;
  2644.          l.cond_mask = COND_TR;
  2645.  
  2646.          for (i = 0; i < type_size(param->type); i++) {
  2647.             emit(ir, TGSI_OPCODE_MOV, l, r);
  2648.             l.index++;
  2649.             r.index++;
  2650.          }
  2651.       }
  2652.  
  2653.       sig_iter.next();
  2654.    }
  2655.    assert(!sig_iter.has_next());
  2656.  
  2657.    /* Emit call instruction */
  2658.    call_inst = emit(ir, TGSI_OPCODE_CAL);
  2659.    call_inst->function = entry;
  2660.  
  2661.    /* Process out parameters. */
  2662.    sig_iter = sig->parameters.iterator();
  2663.    foreach_iter(exec_list_iterator, iter, *ir) {
  2664.       ir_rvalue *param_rval = (ir_rvalue *)iter.get();
  2665.       ir_variable *param = (ir_variable *)sig_iter.get();
  2666.  
  2667.       if (param->mode == ir_var_function_out ||
  2668.           param->mode == ir_var_function_inout) {
  2669.          variable_storage *storage = find_variable_storage(param);
  2670.          assert(storage);
  2671.  
  2672.          st_src_reg r;
  2673.          r.file = storage->file;
  2674.          r.index = storage->index;
  2675.          r.reladdr = NULL;
  2676.          r.swizzle = SWIZZLE_NOOP;
  2677.          r.negate = 0;
  2678.  
  2679.          param_rval->accept(this);
  2680.          st_dst_reg l = st_dst_reg(this->result);
  2681.  
  2682.          for (i = 0; i < type_size(param->type); i++) {
  2683.             emit(ir, TGSI_OPCODE_MOV, l, r);
  2684.             l.index++;
  2685.             r.index++;
  2686.          }
  2687.       }
  2688.  
  2689.       sig_iter.next();
  2690.    }
  2691.    assert(!sig_iter.has_next());
  2692.  
  2693.    /* Process return value. */
  2694.    this->result = entry->return_reg;
  2695. }
  2696.  
  2697. void
  2698. glsl_to_tgsi_visitor::visit(ir_texture *ir)
  2699. {
  2700.    st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy, offset, sample_index;
  2701.    st_dst_reg result_dst, coord_dst, cube_sc_dst;
  2702.    glsl_to_tgsi_instruction *inst = NULL;
  2703.    unsigned opcode = TGSI_OPCODE_NOP;
  2704.    const glsl_type *sampler_type = ir->sampler->type;
  2705.    bool is_cube_array = false;
  2706.  
  2707.    /* if we are a cube array sampler */
  2708.    if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
  2709.         sampler_type->sampler_array)) {
  2710.       is_cube_array = true;
  2711.    }
  2712.  
  2713.    if (ir->coordinate) {
  2714.       ir->coordinate->accept(this);
  2715.  
  2716.       /* Put our coords in a temp.  We'll need to modify them for shadow,
  2717.        * projection, or LOD, so the only case we'd use it as is is if
  2718.        * we're doing plain old texturing.  The optimization passes on
  2719.        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
  2720.        */
  2721.       coord = get_temp(glsl_type::vec4_type);
  2722.       coord_dst = st_dst_reg(coord);
  2723.       coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
  2724.       emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
  2725.    }
  2726.  
  2727.    if (ir->projector) {
  2728.       ir->projector->accept(this);
  2729.       projector = this->result;
  2730.    }
  2731.  
  2732.    /* Storage for our result.  Ideally for an assignment we'd be using
  2733.     * the actual storage for the result here, instead.
  2734.     */
  2735.    result_src = get_temp(ir->type);
  2736.    result_dst = st_dst_reg(result_src);
  2737.  
  2738.    switch (ir->op) {
  2739.    case ir_tex:
  2740.       opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
  2741.       if (ir->offset) {
  2742.          ir->offset->accept(this);
  2743.          offset = this->result;
  2744.       }
  2745.       break;
  2746.    case ir_txb:
  2747.       opcode = is_cube_array ? TGSI_OPCODE_TXB2 : TGSI_OPCODE_TXB;
  2748.       ir->lod_info.bias->accept(this);
  2749.       lod_info = this->result;
  2750.       if (ir->offset) {
  2751.          ir->offset->accept(this);
  2752.          offset = this->result;
  2753.       }
  2754.       break;
  2755.    case ir_txl:
  2756.       opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
  2757.       ir->lod_info.lod->accept(this);
  2758.       lod_info = this->result;
  2759.       if (ir->offset) {
  2760.          ir->offset->accept(this);
  2761.          offset = this->result;
  2762.       }
  2763.       break;
  2764.    case ir_txd:
  2765.       opcode = TGSI_OPCODE_TXD;
  2766.       ir->lod_info.grad.dPdx->accept(this);
  2767.       dx = this->result;
  2768.       ir->lod_info.grad.dPdy->accept(this);
  2769.       dy = this->result;
  2770.       if (ir->offset) {
  2771.          ir->offset->accept(this);
  2772.          offset = this->result;
  2773.       }
  2774.       break;
  2775.    case ir_txs:
  2776.       opcode = TGSI_OPCODE_TXQ;
  2777.       ir->lod_info.lod->accept(this);
  2778.       lod_info = this->result;
  2779.       break;
  2780.    case ir_txf:
  2781.       opcode = TGSI_OPCODE_TXF;
  2782.       ir->lod_info.lod->accept(this);
  2783.       lod_info = this->result;
  2784.       if (ir->offset) {
  2785.          ir->offset->accept(this);
  2786.          offset = this->result;
  2787.       }
  2788.       break;
  2789.    case ir_txf_ms:
  2790.       opcode = TGSI_OPCODE_TXF;
  2791.       ir->lod_info.sample_index->accept(this);
  2792.       sample_index = this->result;
  2793.       break;
  2794.    case ir_lod:
  2795.       assert(!"Unexpected ir_lod opcode");
  2796.       break;
  2797.    }
  2798.  
  2799.    if (ir->projector) {
  2800.       if (opcode == TGSI_OPCODE_TEX) {
  2801.          /* Slot the projector in as the last component of the coord. */
  2802.          coord_dst.writemask = WRITEMASK_W;
  2803.          emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
  2804.          coord_dst.writemask = WRITEMASK_XYZW;
  2805.          opcode = TGSI_OPCODE_TXP;
  2806.       } else {
  2807.          st_src_reg coord_w = coord;
  2808.          coord_w.swizzle = SWIZZLE_WWWW;
  2809.  
  2810.          /* For the other TEX opcodes there's no projective version
  2811.           * since the last slot is taken up by LOD info.  Do the
  2812.           * projective divide now.
  2813.           */
  2814.          coord_dst.writemask = WRITEMASK_W;
  2815.          emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
  2816.  
  2817.          /* In the case where we have to project the coordinates "by hand,"
  2818.           * the shadow comparator value must also be projected.
  2819.           */
  2820.          st_src_reg tmp_src = coord;
  2821.          if (ir->shadow_comparitor) {
  2822.             /* Slot the shadow value in as the second to last component of the
  2823.              * coord.
  2824.              */
  2825.             ir->shadow_comparitor->accept(this);
  2826.  
  2827.             tmp_src = get_temp(glsl_type::vec4_type);
  2828.             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
  2829.  
  2830.             /* Projective division not allowed for array samplers. */
  2831.             assert(!sampler_type->sampler_array);
  2832.  
  2833.             tmp_dst.writemask = WRITEMASK_Z;
  2834.             emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
  2835.  
  2836.             tmp_dst.writemask = WRITEMASK_XY;
  2837.             emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
  2838.          }
  2839.  
  2840.          coord_dst.writemask = WRITEMASK_XYZ;
  2841.          emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
  2842.  
  2843.          coord_dst.writemask = WRITEMASK_XYZW;
  2844.          coord.swizzle = SWIZZLE_XYZW;
  2845.       }
  2846.    }
  2847.  
  2848.    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
  2849.     * comparator was put in the correct place (and projected) by the code,
  2850.     * above, that handles by-hand projection.
  2851.     */
  2852.    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
  2853.       /* Slot the shadow value in as the second to last component of the
  2854.        * coord.
  2855.        */
  2856.       ir->shadow_comparitor->accept(this);
  2857.  
  2858.       if (is_cube_array) {
  2859.          cube_sc = get_temp(glsl_type::float_type);
  2860.          cube_sc_dst = st_dst_reg(cube_sc);
  2861.          cube_sc_dst.writemask = WRITEMASK_X;
  2862.          emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
  2863.          cube_sc_dst.writemask = WRITEMASK_X;
  2864.       }
  2865.       else {
  2866.          if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
  2867.               sampler_type->sampler_array) ||
  2868.              sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
  2869.             coord_dst.writemask = WRITEMASK_W;
  2870.          } else {
  2871.             coord_dst.writemask = WRITEMASK_Z;
  2872.          }
  2873.          
  2874.          emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
  2875.          coord_dst.writemask = WRITEMASK_XYZW;
  2876.       }
  2877.    }
  2878.  
  2879.    if (ir->op == ir_txf_ms) {
  2880.       coord_dst.writemask = WRITEMASK_W;
  2881.       emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
  2882.       coord_dst.writemask = WRITEMASK_XYZW;
  2883.    } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
  2884.        opcode == TGSI_OPCODE_TXF) {
  2885.       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
  2886.       coord_dst.writemask = WRITEMASK_W;
  2887.       emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
  2888.       coord_dst.writemask = WRITEMASK_XYZW;
  2889.    }
  2890.  
  2891.    if (opcode == TGSI_OPCODE_TXD)
  2892.       inst = emit(ir, opcode, result_dst, coord, dx, dy);
  2893.    else if (opcode == TGSI_OPCODE_TXQ)
  2894.       inst = emit(ir, opcode, result_dst, lod_info);
  2895.    else if (opcode == TGSI_OPCODE_TXF) {
  2896.       inst = emit(ir, opcode, result_dst, coord);
  2897.    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
  2898.       inst = emit(ir, opcode, result_dst, coord, lod_info);
  2899.    } else if (opcode == TGSI_OPCODE_TEX2) {
  2900.       inst = emit(ir, opcode, result_dst, coord, cube_sc);
  2901.    } else
  2902.       inst = emit(ir, opcode, result_dst, coord);
  2903.  
  2904.    if (ir->shadow_comparitor)
  2905.       inst->tex_shadow = GL_TRUE;
  2906.  
  2907.    inst->sampler = _mesa_get_sampler_uniform_value(ir->sampler,
  2908.                                                    this->shader_program,
  2909.                                                    this->prog);
  2910.  
  2911.    if (ir->offset) {
  2912.        inst->tex_offset_num_offset = 1;
  2913.        inst->tex_offsets[0].Index = offset.index;
  2914.        inst->tex_offsets[0].File = offset.file;
  2915.        inst->tex_offsets[0].SwizzleX = GET_SWZ(offset.swizzle, 0);
  2916.        inst->tex_offsets[0].SwizzleY = GET_SWZ(offset.swizzle, 1);
  2917.        inst->tex_offsets[0].SwizzleZ = GET_SWZ(offset.swizzle, 2);
  2918.    }
  2919.  
  2920.    switch (sampler_type->sampler_dimensionality) {
  2921.    case GLSL_SAMPLER_DIM_1D:
  2922.       inst->tex_target = (sampler_type->sampler_array)
  2923.          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
  2924.       break;
  2925.    case GLSL_SAMPLER_DIM_2D:
  2926.       inst->tex_target = (sampler_type->sampler_array)
  2927.          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
  2928.       break;
  2929.    case GLSL_SAMPLER_DIM_3D:
  2930.       inst->tex_target = TEXTURE_3D_INDEX;
  2931.       break;
  2932.    case GLSL_SAMPLER_DIM_CUBE:
  2933.       inst->tex_target = (sampler_type->sampler_array)
  2934.          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
  2935.       break;
  2936.    case GLSL_SAMPLER_DIM_RECT:
  2937.       inst->tex_target = TEXTURE_RECT_INDEX;
  2938.       break;
  2939.    case GLSL_SAMPLER_DIM_BUF:
  2940.       inst->tex_target = TEXTURE_BUFFER_INDEX;
  2941.       break;
  2942.    case GLSL_SAMPLER_DIM_EXTERNAL:
  2943.       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
  2944.       break;
  2945.    case GLSL_SAMPLER_DIM_MS:
  2946.       inst->tex_target = (sampler_type->sampler_array)
  2947.          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
  2948.       break;
  2949.    default:
  2950.       assert(!"Should not get here.");
  2951.    }
  2952.  
  2953.    this->result = result_src;
  2954. }
  2955.  
  2956. void
  2957. glsl_to_tgsi_visitor::visit(ir_return *ir)
  2958. {
  2959.    if (ir->get_value()) {
  2960.       st_dst_reg l;
  2961.       int i;
  2962.  
  2963.       assert(current_function);
  2964.  
  2965.       ir->get_value()->accept(this);
  2966.       st_src_reg r = this->result;
  2967.  
  2968.       l = st_dst_reg(current_function->return_reg);
  2969.  
  2970.       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
  2971.          emit(ir, TGSI_OPCODE_MOV, l, r);
  2972.          l.index++;
  2973.          r.index++;
  2974.       }
  2975.    }
  2976.  
  2977.    emit(ir, TGSI_OPCODE_RET);
  2978. }
  2979.  
  2980. void
  2981. glsl_to_tgsi_visitor::visit(ir_discard *ir)
  2982. {
  2983.    if (ir->condition) {
  2984.       ir->condition->accept(this);
  2985.       this->result.negate = ~this->result.negate;
  2986.       emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, this->result);
  2987.    } else {
  2988.       /* unconditional kil */
  2989.       emit(ir, TGSI_OPCODE_KILL);
  2990.    }
  2991. }
  2992.  
  2993. void
  2994. glsl_to_tgsi_visitor::visit(ir_if *ir)
  2995. {
  2996.    unsigned if_opcode;
  2997.    glsl_to_tgsi_instruction *if_inst;
  2998.  
  2999.    ir->condition->accept(this);
  3000.    assert(this->result.file != PROGRAM_UNDEFINED);
  3001.  
  3002.    if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
  3003.  
  3004.    if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
  3005.  
  3006.    this->instructions.push_tail(if_inst);
  3007.  
  3008.    visit_exec_list(&ir->then_instructions, this);
  3009.  
  3010.    if (!ir->else_instructions.is_empty()) {
  3011.       emit(ir->condition, TGSI_OPCODE_ELSE);
  3012.       visit_exec_list(&ir->else_instructions, this);
  3013.    }
  3014.  
  3015.    if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
  3016. }
  3017.  
  3018. glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
  3019. {
  3020.    result.file = PROGRAM_UNDEFINED;
  3021.    next_temp = 1;
  3022.    next_array = 0;
  3023.    next_signature_id = 1;
  3024.    num_immediates = 0;
  3025.    current_function = NULL;
  3026.    num_address_regs = 0;
  3027.    samplers_used = 0;
  3028.    indirect_addr_consts = false;
  3029.    glsl_version = 0;
  3030.    native_integers = false;
  3031.    mem_ctx = ralloc_context(NULL);
  3032.    ctx = NULL;
  3033.    prog = NULL;
  3034.    shader_program = NULL;
  3035.    options = NULL;
  3036. }
  3037.  
  3038. glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
  3039. {
  3040.    ralloc_free(mem_ctx);
  3041. }
  3042.  
  3043. extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
  3044. {
  3045.    delete v;
  3046. }
  3047.  
  3048.  
  3049. /**
  3050.  * Count resources used by the given gpu program (number of texture
  3051.  * samplers, etc).
  3052.  */
  3053. static void
  3054. count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
  3055. {
  3056.    v->samplers_used = 0;
  3057.  
  3058.    foreach_iter(exec_list_iterator, iter, v->instructions) {
  3059.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3060.  
  3061.       if (is_tex_instruction(inst->op)) {
  3062.          v->samplers_used |= 1 << inst->sampler;
  3063.  
  3064.          if (inst->tex_shadow) {
  3065.             prog->ShadowSamplers |= 1 << inst->sampler;
  3066.          }
  3067.       }
  3068.    }
  3069.    
  3070.    prog->SamplersUsed = v->samplers_used;
  3071.  
  3072.    if (v->shader_program != NULL)
  3073.       _mesa_update_shader_textures_used(v->shader_program, prog);
  3074. }
  3075.  
  3076. static void
  3077. set_uniform_initializer(struct gl_context *ctx, void *mem_ctx,
  3078.                         struct gl_shader_program *shader_program,
  3079.                         const char *name, const glsl_type *type,
  3080.                         ir_constant *val)
  3081. {
  3082.    if (type->is_record()) {
  3083.       ir_constant *field_constant;
  3084.  
  3085.       field_constant = (ir_constant *)val->components.get_head();
  3086.  
  3087.       for (unsigned int i = 0; i < type->length; i++) {
  3088.          const glsl_type *field_type = type->fields.structure[i].type;
  3089.          const char *field_name = ralloc_asprintf(mem_ctx, "%s.%s", name,
  3090.                                             type->fields.structure[i].name);
  3091.          set_uniform_initializer(ctx, mem_ctx, shader_program, field_name,
  3092.                                  field_type, field_constant);
  3093.          field_constant = (ir_constant *)field_constant->next;
  3094.       }
  3095.       return;
  3096.    }
  3097.  
  3098.    unsigned offset;
  3099.    unsigned index = _mesa_get_uniform_location(ctx, shader_program, name,
  3100.                                                &offset);
  3101.    if (offset == GL_INVALID_INDEX) {
  3102.       fail_link(shader_program,
  3103.                 "Couldn't find uniform for initializer %s\n", name);
  3104.       return;
  3105.    }
  3106.    int loc = _mesa_uniform_merge_location_offset(shader_program, index, offset);
  3107.  
  3108.    for (unsigned int i = 0; i < (type->is_array() ? type->length : 1); i++) {
  3109.       ir_constant *element;
  3110.       const glsl_type *element_type;
  3111.       if (type->is_array()) {
  3112.          element = val->array_elements[i];
  3113.          element_type = type->fields.array;
  3114.       } else {
  3115.          element = val;
  3116.          element_type = type;
  3117.       }
  3118.  
  3119.       void *values;
  3120.  
  3121.       if (element_type->base_type == GLSL_TYPE_BOOL) {
  3122.          int *conv = ralloc_array(mem_ctx, int, element_type->components());
  3123.          for (unsigned int j = 0; j < element_type->components(); j++) {
  3124.             conv[j] = element->value.b[j];
  3125.          }
  3126.          values = (void *)conv;
  3127.          element_type = glsl_type::get_instance(GLSL_TYPE_INT,
  3128.                                                 element_type->vector_elements,
  3129.                                                 1);
  3130.       } else {
  3131.          values = &element->value;
  3132.       }
  3133.  
  3134.       if (element_type->is_matrix()) {
  3135.          _mesa_uniform_matrix(ctx, shader_program,
  3136.                               element_type->matrix_columns,
  3137.                               element_type->vector_elements,
  3138.                               loc, 1, GL_FALSE, (GLfloat *)values);
  3139.       } else {
  3140.          _mesa_uniform(ctx, shader_program, loc, element_type->matrix_columns,
  3141.                        values, element_type->gl_type);
  3142.       }
  3143.  
  3144.       loc++;
  3145.    }
  3146. }
  3147.  
  3148. /**
  3149.  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
  3150.  * are read from the given src in this instruction
  3151.  */
  3152. static int
  3153. get_src_arg_mask(st_dst_reg dst, st_src_reg src)
  3154. {
  3155.    int read_mask = 0, comp;
  3156.  
  3157.    /* Now, given the src swizzle and the written channels, find which
  3158.     * components are actually read
  3159.     */
  3160.    for (comp = 0; comp < 4; ++comp) {
  3161.       const unsigned coord = GET_SWZ(src.swizzle, comp);
  3162.       ASSERT(coord < 4);
  3163.       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
  3164.          read_mask |= 1 << coord;
  3165.    }
  3166.  
  3167.    return read_mask;
  3168. }
  3169.  
  3170. /**
  3171.  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
  3172.  * instruction is the first instruction to write to register T0.  There are
  3173.  * several lowering passes done in GLSL IR (e.g. branches and
  3174.  * relative addressing) that create a large number of conditional assignments
  3175.  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
  3176.  *
  3177.  * Here is why this conversion is safe:
  3178.  * CMP T0, T1 T2 T0 can be expanded to:
  3179.  * if (T1 < 0.0)
  3180.  *      MOV T0, T2;
  3181.  * else
  3182.  *      MOV T0, T0;
  3183.  *
  3184.  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
  3185.  * as the original program.  If (T1 < 0.0) evaluates to false, executing
  3186.  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
  3187.  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
  3188.  * because any instruction that was going to read from T0 after this was going
  3189.  * to read a garbage value anyway.
  3190.  */
  3191. void
  3192. glsl_to_tgsi_visitor::simplify_cmp(void)
  3193. {
  3194.    unsigned *tempWrites;
  3195.    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
  3196.  
  3197.    tempWrites = new unsigned[MAX_TEMPS];
  3198.    if (!tempWrites) {
  3199.       return;
  3200.    }
  3201.    memset(tempWrites, 0, sizeof(unsigned) * MAX_TEMPS);
  3202.    memset(outputWrites, 0, sizeof(outputWrites));
  3203.  
  3204.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3205.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3206.       unsigned prevWriteMask = 0;
  3207.  
  3208.       /* Give up if we encounter relative addressing or flow control. */
  3209.       if (inst->dst.reladdr ||
  3210.           tgsi_get_opcode_info(inst->op)->is_branch ||
  3211.           inst->op == TGSI_OPCODE_BGNSUB ||
  3212.           inst->op == TGSI_OPCODE_CONT ||
  3213.           inst->op == TGSI_OPCODE_END ||
  3214.           inst->op == TGSI_OPCODE_ENDSUB ||
  3215.           inst->op == TGSI_OPCODE_RET) {
  3216.          break;
  3217.       }
  3218.  
  3219.       if (inst->dst.file == PROGRAM_OUTPUT) {
  3220.          assert(inst->dst.index < MAX_PROGRAM_OUTPUTS);
  3221.          prevWriteMask = outputWrites[inst->dst.index];
  3222.          outputWrites[inst->dst.index] |= inst->dst.writemask;
  3223.       } else if (inst->dst.file == PROGRAM_TEMPORARY) {
  3224.          assert(inst->dst.index < MAX_TEMPS);
  3225.          prevWriteMask = tempWrites[inst->dst.index];
  3226.          tempWrites[inst->dst.index] |= inst->dst.writemask;
  3227.       } else
  3228.          continue;
  3229.  
  3230.       /* For a CMP to be considered a conditional write, the destination
  3231.        * register and source register two must be the same. */
  3232.       if (inst->op == TGSI_OPCODE_CMP
  3233.           && !(inst->dst.writemask & prevWriteMask)
  3234.           && inst->src[2].file == inst->dst.file
  3235.           && inst->src[2].index == inst->dst.index
  3236.           && inst->dst.writemask == get_src_arg_mask(inst->dst, inst->src[2])) {
  3237.  
  3238.          inst->op = TGSI_OPCODE_MOV;
  3239.          inst->src[0] = inst->src[1];
  3240.       }
  3241.    }
  3242.  
  3243.    delete [] tempWrites;
  3244. }
  3245.  
  3246. /* Replaces all references to a temporary register index with another index. */
  3247. void
  3248. glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
  3249. {
  3250.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3251.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3252.       unsigned j;
  3253.      
  3254.       for (j=0; j < num_inst_src_regs(inst->op); j++) {
  3255.          if (inst->src[j].file == PROGRAM_TEMPORARY &&
  3256.              inst->src[j].index == index) {
  3257.             inst->src[j].index = new_index;
  3258.          }
  3259.       }
  3260.      
  3261.       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
  3262.          inst->dst.index = new_index;
  3263.       }
  3264.    }
  3265. }
  3266.  
  3267. int
  3268. glsl_to_tgsi_visitor::get_first_temp_read(int index)
  3269. {
  3270.    int depth = 0; /* loop depth */
  3271.    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
  3272.    unsigned i = 0, j;
  3273.    
  3274.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3275.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3276.      
  3277.       for (j=0; j < num_inst_src_regs(inst->op); j++) {
  3278.          if (inst->src[j].file == PROGRAM_TEMPORARY &&
  3279.              inst->src[j].index == index) {
  3280.             return (depth == 0) ? i : loop_start;
  3281.          }
  3282.       }
  3283.      
  3284.       if (inst->op == TGSI_OPCODE_BGNLOOP) {
  3285.          if(depth++ == 0)
  3286.             loop_start = i;
  3287.       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
  3288.          if (--depth == 0)
  3289.             loop_start = -1;
  3290.       }
  3291.       assert(depth >= 0);
  3292.      
  3293.       i++;
  3294.    }
  3295.    
  3296.    return -1;
  3297. }
  3298.  
  3299. int
  3300. glsl_to_tgsi_visitor::get_first_temp_write(int index)
  3301. {
  3302.    int depth = 0; /* loop depth */
  3303.    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
  3304.    int i = 0;
  3305.    
  3306.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3307.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3308.      
  3309.       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index) {
  3310.          return (depth == 0) ? i : loop_start;
  3311.       }
  3312.      
  3313.       if (inst->op == TGSI_OPCODE_BGNLOOP) {
  3314.          if(depth++ == 0)
  3315.             loop_start = i;
  3316.       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
  3317.          if (--depth == 0)
  3318.             loop_start = -1;
  3319.       }
  3320.       assert(depth >= 0);
  3321.      
  3322.       i++;
  3323.    }
  3324.    
  3325.    return -1;
  3326. }
  3327.  
  3328. int
  3329. glsl_to_tgsi_visitor::get_last_temp_read(int index)
  3330. {
  3331.    int depth = 0; /* loop depth */
  3332.    int last = -1; /* index of last instruction that reads the temporary */
  3333.    unsigned i = 0, j;
  3334.    
  3335.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3336.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3337.      
  3338.       for (j=0; j < num_inst_src_regs(inst->op); j++) {
  3339.          if (inst->src[j].file == PROGRAM_TEMPORARY &&
  3340.              inst->src[j].index == index) {
  3341.             last = (depth == 0) ? i : -2;
  3342.          }
  3343.       }
  3344.      
  3345.       if (inst->op == TGSI_OPCODE_BGNLOOP)
  3346.          depth++;
  3347.       else if (inst->op == TGSI_OPCODE_ENDLOOP)
  3348.          if (--depth == 0 && last == -2)
  3349.             last = i;
  3350.       assert(depth >= 0);
  3351.      
  3352.       i++;
  3353.    }
  3354.    
  3355.    assert(last >= -1);
  3356.    return last;
  3357. }
  3358.  
  3359. int
  3360. glsl_to_tgsi_visitor::get_last_temp_write(int index)
  3361. {
  3362.    int depth = 0; /* loop depth */
  3363.    int last = -1; /* index of last instruction that writes to the temporary */
  3364.    int i = 0;
  3365.    
  3366.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3367.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3368.      
  3369.       if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == index)
  3370.          last = (depth == 0) ? i : -2;
  3371.      
  3372.       if (inst->op == TGSI_OPCODE_BGNLOOP)
  3373.          depth++;
  3374.       else if (inst->op == TGSI_OPCODE_ENDLOOP)
  3375.          if (--depth == 0 && last == -2)
  3376.             last = i;
  3377.       assert(depth >= 0);
  3378.      
  3379.       i++;
  3380.    }
  3381.    
  3382.    assert(last >= -1);
  3383.    return last;
  3384. }
  3385.  
  3386. /*
  3387.  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
  3388.  * channels for copy propagation and updates following instructions to
  3389.  * use the original versions.
  3390.  *
  3391.  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
  3392.  * will occur.  As an example, a TXP production before this pass:
  3393.  *
  3394.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  3395.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  3396.  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
  3397.  *
  3398.  * and after:
  3399.  *
  3400.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  3401.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  3402.  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  3403.  *
  3404.  * which allows for dead code elimination on TEMP[1]'s writes.
  3405.  */
  3406. void
  3407. glsl_to_tgsi_visitor::copy_propagate(void)
  3408. {
  3409.    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
  3410.                                                     glsl_to_tgsi_instruction *,
  3411.                                                     this->next_temp * 4);
  3412.    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
  3413.    int level = 0;
  3414.  
  3415.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3416.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3417.  
  3418.       assert(inst->dst.file != PROGRAM_TEMPORARY
  3419.              || inst->dst.index < this->next_temp);
  3420.  
  3421.       /* First, do any copy propagation possible into the src regs. */
  3422.       for (int r = 0; r < 3; r++) {
  3423.          glsl_to_tgsi_instruction *first = NULL;
  3424.          bool good = true;
  3425.          int acp_base = inst->src[r].index * 4;
  3426.  
  3427.          if (inst->src[r].file != PROGRAM_TEMPORARY ||
  3428.              inst->src[r].reladdr)
  3429.             continue;
  3430.  
  3431.          /* See if we can find entries in the ACP consisting of MOVs
  3432.           * from the same src register for all the swizzled channels
  3433.           * of this src register reference.
  3434.           */
  3435.          for (int i = 0; i < 4; i++) {
  3436.             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
  3437.             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
  3438.  
  3439.             if (!copy_chan) {
  3440.                good = false;
  3441.                break;
  3442.             }
  3443.  
  3444.             assert(acp_level[acp_base + src_chan] <= level);
  3445.  
  3446.             if (!first) {
  3447.                first = copy_chan;
  3448.             } else {
  3449.                if (first->src[0].file != copy_chan->src[0].file ||
  3450.                    first->src[0].index != copy_chan->src[0].index) {
  3451.                   good = false;
  3452.                   break;
  3453.                }
  3454.             }
  3455.          }
  3456.  
  3457.          if (good) {
  3458.             /* We've now validated that we can copy-propagate to
  3459.              * replace this src register reference.  Do it.
  3460.              */
  3461.             inst->src[r].file = first->src[0].file;
  3462.             inst->src[r].index = first->src[0].index;
  3463.  
  3464.             int swizzle = 0;
  3465.             for (int i = 0; i < 4; i++) {
  3466.                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
  3467.                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
  3468.                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) <<
  3469.                            (3 * i));
  3470.             }
  3471.             inst->src[r].swizzle = swizzle;
  3472.          }
  3473.       }
  3474.  
  3475.       switch (inst->op) {
  3476.       case TGSI_OPCODE_BGNLOOP:
  3477.       case TGSI_OPCODE_ENDLOOP:
  3478.          /* End of a basic block, clear the ACP entirely. */
  3479.          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
  3480.          break;
  3481.  
  3482.       case TGSI_OPCODE_IF:
  3483.       case TGSI_OPCODE_UIF:
  3484.          ++level;
  3485.          break;
  3486.  
  3487.       case TGSI_OPCODE_ENDIF:
  3488.       case TGSI_OPCODE_ELSE:
  3489.          /* Clear all channels written inside the block from the ACP, but
  3490.           * leaving those that were not touched.
  3491.           */
  3492.          for (int r = 0; r < this->next_temp; r++) {
  3493.             for (int c = 0; c < 4; c++) {
  3494.                if (!acp[4 * r + c])
  3495.                   continue;
  3496.  
  3497.                if (acp_level[4 * r + c] >= level)
  3498.                   acp[4 * r + c] = NULL;
  3499.             }
  3500.          }
  3501.          if (inst->op == TGSI_OPCODE_ENDIF)
  3502.             --level;
  3503.          break;
  3504.  
  3505.       default:
  3506.          /* Continuing the block, clear any written channels from
  3507.           * the ACP.
  3508.           */
  3509.          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) {
  3510.             /* Any temporary might be written, so no copy propagation
  3511.              * across this instruction.
  3512.              */
  3513.             memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
  3514.          } else if (inst->dst.file == PROGRAM_OUTPUT &&
  3515.                     inst->dst.reladdr) {
  3516.             /* Any output might be written, so no copy propagation
  3517.              * from outputs across this instruction.
  3518.              */
  3519.             for (int r = 0; r < this->next_temp; r++) {
  3520.                for (int c = 0; c < 4; c++) {
  3521.                   if (!acp[4 * r + c])
  3522.                      continue;
  3523.  
  3524.                   if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
  3525.                      acp[4 * r + c] = NULL;
  3526.                }
  3527.             }
  3528.          } else if (inst->dst.file == PROGRAM_TEMPORARY ||
  3529.                     inst->dst.file == PROGRAM_OUTPUT) {
  3530.             /* Clear where it's used as dst. */
  3531.             if (inst->dst.file == PROGRAM_TEMPORARY) {
  3532.                for (int c = 0; c < 4; c++) {
  3533.                   if (inst->dst.writemask & (1 << c)) {
  3534.                      acp[4 * inst->dst.index + c] = NULL;
  3535.                   }
  3536.                }
  3537.             }
  3538.  
  3539.             /* Clear where it's used as src. */
  3540.             for (int r = 0; r < this->next_temp; r++) {
  3541.                for (int c = 0; c < 4; c++) {
  3542.                   if (!acp[4 * r + c])
  3543.                      continue;
  3544.  
  3545.                   int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
  3546.  
  3547.                   if (acp[4 * r + c]->src[0].file == inst->dst.file &&
  3548.                       acp[4 * r + c]->src[0].index == inst->dst.index &&
  3549.                       inst->dst.writemask & (1 << src_chan))
  3550.                   {
  3551.                      acp[4 * r + c] = NULL;
  3552.                   }
  3553.                }
  3554.             }
  3555.          }
  3556.          break;
  3557.       }
  3558.  
  3559.       /* If this is a copy, add it to the ACP. */
  3560.       if (inst->op == TGSI_OPCODE_MOV &&
  3561.           inst->dst.file == PROGRAM_TEMPORARY &&
  3562.           !(inst->dst.file == inst->src[0].file &&
  3563.              inst->dst.index == inst->src[0].index) &&
  3564.           !inst->dst.reladdr &&
  3565.           !inst->saturate &&
  3566.           !inst->src[0].reladdr &&
  3567.           !inst->src[0].negate) {
  3568.          for (int i = 0; i < 4; i++) {
  3569.             if (inst->dst.writemask & (1 << i)) {
  3570.                acp[4 * inst->dst.index + i] = inst;
  3571.                acp_level[4 * inst->dst.index + i] = level;
  3572.             }
  3573.          }
  3574.       }
  3575.    }
  3576.  
  3577.    ralloc_free(acp_level);
  3578.    ralloc_free(acp);
  3579. }
  3580.  
  3581. /*
  3582.  * Tracks available PROGRAM_TEMPORARY registers for dead code elimination.
  3583.  *
  3584.  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
  3585.  * will occur.  As an example, a TXP production after copy propagation but
  3586.  * before this pass:
  3587.  *
  3588.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  3589.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  3590.  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  3591.  *
  3592.  * and after this pass:
  3593.  *
  3594.  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  3595.  *
  3596.  * FIXME: assumes that all functions are inlined (no support for BGNSUB/ENDSUB)
  3597.  * FIXME: doesn't eliminate all dead code inside of loops; it steps around them
  3598.  */
  3599. void
  3600. glsl_to_tgsi_visitor::eliminate_dead_code(void)
  3601. {
  3602.    int i;
  3603.    
  3604.    for (i=0; i < this->next_temp; i++) {
  3605.       int last_read = get_last_temp_read(i);
  3606.       int j = 0;
  3607.      
  3608.       foreach_iter(exec_list_iterator, iter, this->instructions) {
  3609.          glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3610.  
  3611.          if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.index == i &&
  3612.              j > last_read)
  3613.          {
  3614.             iter.remove();
  3615.             delete inst;
  3616.          }
  3617.          
  3618.          j++;
  3619.       }
  3620.    }
  3621. }
  3622.  
  3623. /*
  3624.  * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
  3625.  * code elimination.  This is less primitive than eliminate_dead_code(), as it
  3626.  * is per-channel and can detect consecutive writes without a read between them
  3627.  * as dead code.  However, there is some dead code that can be eliminated by
  3628.  * eliminate_dead_code() but not this function - for example, this function
  3629.  * cannot eliminate an instruction writing to a register that is never read and
  3630.  * is the only instruction writing to that register.
  3631.  *
  3632.  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
  3633.  * will occur.
  3634.  */
  3635. int
  3636. glsl_to_tgsi_visitor::eliminate_dead_code_advanced(void)
  3637. {
  3638.    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
  3639.                                                      glsl_to_tgsi_instruction *,
  3640.                                                      this->next_temp * 4);
  3641.    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
  3642.    int level = 0;
  3643.    int removed = 0;
  3644.  
  3645.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3646.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3647.  
  3648.       assert(inst->dst.file != PROGRAM_TEMPORARY
  3649.              || inst->dst.index < this->next_temp);
  3650.      
  3651.       switch (inst->op) {
  3652.       case TGSI_OPCODE_BGNLOOP:
  3653.       case TGSI_OPCODE_ENDLOOP:
  3654.       case TGSI_OPCODE_CONT:
  3655.       case TGSI_OPCODE_BRK:
  3656.          /* End of a basic block, clear the write array entirely.
  3657.           *
  3658.           * This keeps us from killing dead code when the writes are
  3659.           * on either side of a loop, even when the register isn't touched
  3660.           * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
  3661.           * dead code of this type, so it shouldn't make a difference as long as
  3662.           * the dead code elimination pass in the GLSL compiler does its job.
  3663.           */
  3664.          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
  3665.          break;
  3666.  
  3667.       case TGSI_OPCODE_ENDIF:
  3668.       case TGSI_OPCODE_ELSE:
  3669.          /* Promote the recorded level of all channels written inside the
  3670.           * preceding if or else block to the level above the if/else block.
  3671.           */
  3672.          for (int r = 0; r < this->next_temp; r++) {
  3673.             for (int c = 0; c < 4; c++) {
  3674.                if (!writes[4 * r + c])
  3675.                          continue;
  3676.  
  3677.                if (write_level[4 * r + c] == level)
  3678.                          write_level[4 * r + c] = level-1;
  3679.             }
  3680.          }
  3681.  
  3682.          if(inst->op == TGSI_OPCODE_ENDIF)
  3683.             --level;
  3684.          
  3685.          break;
  3686.  
  3687.       case TGSI_OPCODE_IF:
  3688.       case TGSI_OPCODE_UIF:
  3689.          ++level;
  3690.          /* fallthrough to default case to mark the condition as read */
  3691.      
  3692.       default:
  3693.          /* Continuing the block, clear any channels from the write array that
  3694.           * are read by this instruction.
  3695.           */
  3696.          for (unsigned i = 0; i < Elements(inst->src); i++) {
  3697.             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
  3698.                /* Any temporary might be read, so no dead code elimination
  3699.                 * across this instruction.
  3700.                 */
  3701.                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
  3702.             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
  3703.                /* Clear where it's used as src. */
  3704.                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
  3705.                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
  3706.                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
  3707.                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
  3708.                
  3709.                for (int c = 0; c < 4; c++) {
  3710.                    if (src_chans & (1 << c)) {
  3711.                       writes[4 * inst->src[i].index + c] = NULL;
  3712.                    }
  3713.                }
  3714.             }
  3715.          }
  3716.          break;
  3717.       }
  3718.  
  3719.       /* If this instruction writes to a temporary, add it to the write array.
  3720.        * If there is already an instruction in the write array for one or more
  3721.        * of the channels, flag that channel write as dead.
  3722.        */
  3723.       if (inst->dst.file == PROGRAM_TEMPORARY &&
  3724.           !inst->dst.reladdr &&
  3725.           !inst->saturate) {
  3726.          for (int c = 0; c < 4; c++) {
  3727.             if (inst->dst.writemask & (1 << c)) {
  3728.                if (writes[4 * inst->dst.index + c]) {
  3729.                   if (write_level[4 * inst->dst.index + c] < level)
  3730.                      continue;
  3731.                   else
  3732.                      writes[4 * inst->dst.index + c]->dead_mask |= (1 << c);
  3733.                }
  3734.                writes[4 * inst->dst.index + c] = inst;
  3735.                write_level[4 * inst->dst.index + c] = level;
  3736.             }
  3737.          }
  3738.       }
  3739.    }
  3740.  
  3741.    /* Anything still in the write array at this point is dead code. */
  3742.    for (int r = 0; r < this->next_temp; r++) {
  3743.       for (int c = 0; c < 4; c++) {
  3744.          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
  3745.          if (inst)
  3746.             inst->dead_mask |= (1 << c);
  3747.       }
  3748.    }
  3749.  
  3750.    /* Now actually remove the instructions that are completely dead and update
  3751.     * the writemask of other instructions with dead channels.
  3752.     */
  3753.    foreach_iter(exec_list_iterator, iter, this->instructions) {
  3754.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3755.      
  3756.       if (!inst->dead_mask || !inst->dst.writemask)
  3757.          continue;
  3758.       else if ((inst->dst.writemask & ~inst->dead_mask) == 0) {
  3759.          iter.remove();
  3760.          delete inst;
  3761.          removed++;
  3762.       } else
  3763.          inst->dst.writemask &= ~(inst->dead_mask);
  3764.    }
  3765.  
  3766.    ralloc_free(write_level);
  3767.    ralloc_free(writes);
  3768.    
  3769.    return removed;
  3770. }
  3771.  
  3772. /* Merges temporary registers together where possible to reduce the number of
  3773.  * registers needed to run a program.
  3774.  *
  3775.  * Produces optimal code only after copy propagation and dead code elimination
  3776.  * have been run. */
  3777. void
  3778. glsl_to_tgsi_visitor::merge_registers(void)
  3779. {
  3780.    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
  3781.    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
  3782.    int i, j;
  3783.    
  3784.    /* Read the indices of the last read and first write to each temp register
  3785.     * into an array so that we don't have to traverse the instruction list as
  3786.     * much. */
  3787.    for (i=0; i < this->next_temp; i++) {
  3788.       last_reads[i] = get_last_temp_read(i);
  3789.       first_writes[i] = get_first_temp_write(i);
  3790.    }
  3791.    
  3792.    /* Start looking for registers with non-overlapping usages that can be
  3793.     * merged together. */
  3794.    for (i=0; i < this->next_temp; i++) {
  3795.       /* Don't touch unused registers. */
  3796.       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
  3797.      
  3798.       for (j=0; j < this->next_temp; j++) {
  3799.          /* Don't touch unused registers. */
  3800.          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
  3801.          
  3802.          /* We can merge the two registers if the first write to j is after or
  3803.           * in the same instruction as the last read from i.  Note that the
  3804.           * register at index i will always be used earlier or at the same time
  3805.           * as the register at index j. */
  3806.          if (first_writes[i] <= first_writes[j] &&
  3807.              last_reads[i] <= first_writes[j])
  3808.          {
  3809.             rename_temp_register(j, i); /* Replace all references to j with i.*/
  3810.            
  3811.             /* Update the first_writes and last_reads arrays with the new
  3812.              * values for the merged register index, and mark the newly unused
  3813.              * register index as such. */
  3814.             last_reads[i] = last_reads[j];
  3815.             first_writes[j] = -1;
  3816.             last_reads[j] = -1;
  3817.          }
  3818.       }
  3819.    }
  3820.    
  3821.    ralloc_free(last_reads);
  3822.    ralloc_free(first_writes);
  3823. }
  3824.  
  3825. /* Reassign indices to temporary registers by reusing unused indices created
  3826.  * by optimization passes. */
  3827. void
  3828. glsl_to_tgsi_visitor::renumber_registers(void)
  3829. {
  3830.    int i = 0;
  3831.    int new_index = 0;
  3832.    
  3833.    for (i=0; i < this->next_temp; i++) {
  3834.       if (get_first_temp_read(i) < 0) continue;
  3835.       if (i != new_index)
  3836.          rename_temp_register(i, new_index);
  3837.       new_index++;
  3838.    }
  3839.    
  3840.    this->next_temp = new_index;
  3841. }
  3842.  
  3843. /**
  3844.  * Returns a fragment program which implements the current pixel transfer ops.
  3845.  * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
  3846.  */
  3847. extern "C" void
  3848. get_pixel_transfer_visitor(struct st_fragment_program *fp,
  3849.                            glsl_to_tgsi_visitor *original,
  3850.                            int scale_and_bias, int pixel_maps)
  3851. {
  3852.    glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
  3853.    struct st_context *st = st_context(original->ctx);
  3854.    struct gl_program *prog = &fp->Base.Base;
  3855.    struct gl_program_parameter_list *params = _mesa_new_parameter_list();
  3856.    st_src_reg coord, src0;
  3857.    st_dst_reg dst0;
  3858.    glsl_to_tgsi_instruction *inst;
  3859.  
  3860.    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
  3861.    v->ctx = original->ctx;
  3862.    v->prog = prog;
  3863.    v->shader_program = NULL;
  3864.    v->glsl_version = original->glsl_version;
  3865.    v->native_integers = original->native_integers;
  3866.    v->options = original->options;
  3867.    v->next_temp = original->next_temp;
  3868.    v->num_address_regs = original->num_address_regs;
  3869.    v->samplers_used = prog->SamplersUsed = original->samplers_used;
  3870.    v->indirect_addr_consts = original->indirect_addr_consts;
  3871.    memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
  3872.    v->num_immediates = original->num_immediates;
  3873.  
  3874.    /*
  3875.     * Get initial pixel color from the texture.
  3876.     * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
  3877.     */
  3878.    coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
  3879.    src0 = v->get_temp(glsl_type::vec4_type);
  3880.    dst0 = st_dst_reg(src0);
  3881.    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
  3882.    inst->sampler = 0;
  3883.    inst->tex_target = TEXTURE_2D_INDEX;
  3884.  
  3885.    prog->InputsRead |= VARYING_BIT_TEX0;
  3886.    prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
  3887.    v->samplers_used |= (1 << 0);
  3888.  
  3889.    if (scale_and_bias) {
  3890.       static const gl_state_index scale_state[STATE_LENGTH] =
  3891.          { STATE_INTERNAL, STATE_PT_SCALE,
  3892.            (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
  3893.       static const gl_state_index bias_state[STATE_LENGTH] =
  3894.          { STATE_INTERNAL, STATE_PT_BIAS,
  3895.            (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
  3896.       GLint scale_p, bias_p;
  3897.       st_src_reg scale, bias;
  3898.  
  3899.       scale_p = _mesa_add_state_reference(params, scale_state);
  3900.       bias_p = _mesa_add_state_reference(params, bias_state);
  3901.  
  3902.       /* MAD colorTemp, colorTemp, scale, bias; */
  3903.       scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
  3904.       bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
  3905.       inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
  3906.    }
  3907.  
  3908.    if (pixel_maps) {
  3909.       st_src_reg temp = v->get_temp(glsl_type::vec4_type);
  3910.       st_dst_reg temp_dst = st_dst_reg(temp);
  3911.  
  3912.       assert(st->pixel_xfer.pixelmap_texture);
  3913.  
  3914.       /* With a little effort, we can do four pixel map look-ups with
  3915.        * two TEX instructions:
  3916.        */
  3917.  
  3918.       /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
  3919.       temp_dst.writemask = WRITEMASK_XY; /* write R,G */
  3920.       inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
  3921.       inst->sampler = 1;
  3922.       inst->tex_target = TEXTURE_2D_INDEX;
  3923.  
  3924.       /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
  3925.       src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
  3926.       temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
  3927.       inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
  3928.       inst->sampler = 1;
  3929.       inst->tex_target = TEXTURE_2D_INDEX;
  3930.  
  3931.       prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
  3932.       v->samplers_used |= (1 << 1);
  3933.  
  3934.       /* MOV colorTemp, temp; */
  3935.       inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
  3936.    }
  3937.  
  3938.    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
  3939.     * new visitor. */
  3940.    foreach_iter(exec_list_iterator, iter, original->instructions) {
  3941.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  3942.       glsl_to_tgsi_instruction *newinst;
  3943.       st_src_reg src_regs[3];
  3944.  
  3945.       if (inst->dst.file == PROGRAM_OUTPUT)
  3946.          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
  3947.  
  3948.       for (int i=0; i<3; i++) {
  3949.          src_regs[i] = inst->src[i];
  3950.          if (src_regs[i].file == PROGRAM_INPUT &&
  3951.              src_regs[i].index == VARYING_SLOT_COL0)
  3952.          {
  3953.             src_regs[i].file = PROGRAM_TEMPORARY;
  3954.             src_regs[i].index = src0.index;
  3955.          }
  3956.          else if (src_regs[i].file == PROGRAM_INPUT)
  3957.             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
  3958.       }
  3959.  
  3960.       newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
  3961.       newinst->tex_target = inst->tex_target;
  3962.    }
  3963.  
  3964.    /* Make modifications to fragment program info. */
  3965.    prog->Parameters = _mesa_combine_parameter_lists(params,
  3966.                                                     original->prog->Parameters);
  3967.    _mesa_free_parameter_list(params);
  3968.    count_resources(v, prog);
  3969.    fp->glsl_to_tgsi = v;
  3970. }
  3971.  
  3972. /**
  3973.  * Make fragment program for glBitmap:
  3974.  *   Sample the texture and kill the fragment if the bit is 0.
  3975.  * This program will be combined with the user's fragment program.
  3976.  *
  3977.  * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
  3978.  */
  3979. extern "C" void
  3980. get_bitmap_visitor(struct st_fragment_program *fp,
  3981.                    glsl_to_tgsi_visitor *original, int samplerIndex)
  3982. {
  3983.    glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
  3984.    struct st_context *st = st_context(original->ctx);
  3985.    struct gl_program *prog = &fp->Base.Base;
  3986.    st_src_reg coord, src0;
  3987.    st_dst_reg dst0;
  3988.    glsl_to_tgsi_instruction *inst;
  3989.  
  3990.    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
  3991.    v->ctx = original->ctx;
  3992.    v->prog = prog;
  3993.    v->shader_program = NULL;
  3994.    v->glsl_version = original->glsl_version;
  3995.    v->native_integers = original->native_integers;
  3996.    v->options = original->options;
  3997.    v->next_temp = original->next_temp;
  3998.    v->num_address_regs = original->num_address_regs;
  3999.    v->samplers_used = prog->SamplersUsed = original->samplers_used;
  4000.    v->indirect_addr_consts = original->indirect_addr_consts;
  4001.    memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
  4002.    v->num_immediates = original->num_immediates;
  4003.  
  4004.    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
  4005.    coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
  4006.    src0 = v->get_temp(glsl_type::vec4_type);
  4007.    dst0 = st_dst_reg(src0);
  4008.    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
  4009.    inst->sampler = samplerIndex;
  4010.    inst->tex_target = TEXTURE_2D_INDEX;
  4011.  
  4012.    prog->InputsRead |= VARYING_BIT_TEX0;
  4013.    prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
  4014.    v->samplers_used |= (1 << samplerIndex);
  4015.  
  4016.    /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
  4017.    src0.negate = NEGATE_XYZW;
  4018.    if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
  4019.       src0.swizzle = SWIZZLE_XXXX;
  4020.    inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
  4021.  
  4022.    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
  4023.     * new visitor. */
  4024.    foreach_iter(exec_list_iterator, iter, original->instructions) {
  4025.       glsl_to_tgsi_instruction *inst = (glsl_to_tgsi_instruction *)iter.get();
  4026.       glsl_to_tgsi_instruction *newinst;
  4027.       st_src_reg src_regs[3];
  4028.  
  4029.       if (inst->dst.file == PROGRAM_OUTPUT)
  4030.          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst.index);
  4031.  
  4032.       for (int i=0; i<3; i++) {
  4033.          src_regs[i] = inst->src[i];
  4034.          if (src_regs[i].file == PROGRAM_INPUT)
  4035.             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
  4036.       }
  4037.  
  4038.       newinst = v->emit(NULL, inst->op, inst->dst, src_regs[0], src_regs[1], src_regs[2]);
  4039.       newinst->tex_target = inst->tex_target;
  4040.    }
  4041.  
  4042.    /* Make modifications to fragment program info. */
  4043.    prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
  4044.    count_resources(v, prog);
  4045.    fp->glsl_to_tgsi = v;
  4046. }
  4047.  
  4048. /* ------------------------- TGSI conversion stuff -------------------------- */
  4049. struct label {
  4050.    unsigned branch_target;
  4051.    unsigned token;
  4052. };
  4053.  
  4054. /**
  4055.  * Intermediate state used during shader translation.
  4056.  */
  4057. struct st_translate {
  4058.    struct ureg_program *ureg;
  4059.  
  4060.    struct ureg_dst temps[MAX_TEMPS];
  4061.    struct ureg_dst arrays[MAX_ARRAYS];
  4062.    struct ureg_src *constants;
  4063.    struct ureg_src *immediates;
  4064.    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
  4065.    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
  4066.    struct ureg_dst address[1];
  4067.    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
  4068.    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
  4069.  
  4070.    unsigned array_sizes[MAX_ARRAYS];
  4071.  
  4072.    const GLuint *inputMapping;
  4073.    const GLuint *outputMapping;
  4074.  
  4075.    /* For every instruction that contains a label (eg CALL), keep
  4076.     * details so that we can go back afterwards and emit the correct
  4077.     * tgsi instruction number for each label.
  4078.     */
  4079.    struct label *labels;
  4080.    unsigned labels_size;
  4081.    unsigned labels_count;
  4082.  
  4083.    /* Keep a record of the tgsi instruction number that each mesa
  4084.     * instruction starts at, will be used to fix up labels after
  4085.     * translation.
  4086.     */
  4087.    unsigned *insn;
  4088.    unsigned insn_size;
  4089.    unsigned insn_count;
  4090.  
  4091.    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
  4092.  
  4093.    boolean error;
  4094. };
  4095.  
  4096. /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
  4097. static unsigned mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
  4098.    TGSI_SEMANTIC_FACE,
  4099.    TGSI_SEMANTIC_VERTEXID,
  4100.    TGSI_SEMANTIC_INSTANCEID
  4101. };
  4102.  
  4103. /**
  4104.  * Make note of a branch to a label in the TGSI code.
  4105.  * After we've emitted all instructions, we'll go over the list
  4106.  * of labels built here and patch the TGSI code with the actual
  4107.  * location of each label.
  4108.  */
  4109. static unsigned *get_label(struct st_translate *t, unsigned branch_target)
  4110. {
  4111.    unsigned i;
  4112.  
  4113.    if (t->labels_count + 1 >= t->labels_size) {
  4114.       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
  4115.       t->labels = (struct label *)realloc(t->labels,
  4116.                                           t->labels_size * sizeof(struct label));
  4117.       if (t->labels == NULL) {
  4118.          static unsigned dummy;
  4119.          t->error = TRUE;
  4120.          return &dummy;
  4121.       }
  4122.    }
  4123.  
  4124.    i = t->labels_count++;
  4125.    t->labels[i].branch_target = branch_target;
  4126.    return &t->labels[i].token;
  4127. }
  4128.  
  4129. /**
  4130.  * Called prior to emitting the TGSI code for each instruction.
  4131.  * Allocate additional space for instructions if needed.
  4132.  * Update the insn[] array so the next glsl_to_tgsi_instruction points to
  4133.  * the next TGSI instruction.
  4134.  */
  4135. static void set_insn_start(struct st_translate *t, unsigned start)
  4136. {
  4137.    if (t->insn_count + 1 >= t->insn_size) {
  4138.       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
  4139.       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
  4140.       if (t->insn == NULL) {
  4141.          t->error = TRUE;
  4142.          return;
  4143.       }
  4144.    }
  4145.  
  4146.    t->insn[t->insn_count++] = start;
  4147. }
  4148.  
  4149. /**
  4150.  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
  4151.  */
  4152. static struct ureg_src
  4153. emit_immediate(struct st_translate *t,
  4154.                gl_constant_value values[4],
  4155.                int type, int size)
  4156. {
  4157.    struct ureg_program *ureg = t->ureg;
  4158.  
  4159.    switch(type)
  4160.    {
  4161.    case GL_FLOAT:
  4162.       return ureg_DECL_immediate(ureg, &values[0].f, size);
  4163.    case GL_INT:
  4164.       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
  4165.    case GL_UNSIGNED_INT:
  4166.    case GL_BOOL:
  4167.       return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
  4168.    default:
  4169.       assert(!"should not get here - type must be float, int, uint, or bool");
  4170.       return ureg_src_undef();
  4171.    }
  4172. }
  4173.  
  4174. /**
  4175.  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
  4176.  */
  4177. static struct ureg_dst
  4178. dst_register(struct st_translate *t,
  4179.              gl_register_file file,
  4180.              GLuint index)
  4181. {
  4182.    unsigned array;
  4183.  
  4184.    switch(file) {
  4185.    case PROGRAM_UNDEFINED:
  4186.       return ureg_dst_undef();
  4187.  
  4188.    case PROGRAM_TEMPORARY:
  4189.       assert(index >= 0);
  4190.       assert(index < (int) Elements(t->temps));
  4191.  
  4192.       if (ureg_dst_is_undef(t->temps[index]))
  4193.          t->temps[index] = ureg_DECL_local_temporary(t->ureg);
  4194.  
  4195.       return t->temps[index];
  4196.  
  4197.    case PROGRAM_ARRAY:
  4198.       array = index >> 16;
  4199.  
  4200.       assert(array >= 0);
  4201.       assert(array < (int) Elements(t->arrays));
  4202.  
  4203.       if (ureg_dst_is_undef(t->arrays[array]))
  4204.          t->arrays[array] = ureg_DECL_array_temporary(
  4205.             t->ureg, t->array_sizes[array], TRUE);
  4206.  
  4207.       return ureg_dst_array_offset(t->arrays[array],
  4208.                                    (int)(index & 0xFFFF) - 0x8000);
  4209.  
  4210.    case PROGRAM_OUTPUT:
  4211.       if (t->procType == TGSI_PROCESSOR_VERTEX)
  4212.          assert(index < VARYING_SLOT_MAX);
  4213.       else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
  4214.          assert(index < FRAG_RESULT_MAX);
  4215.       else
  4216.          assert(index < VARYING_SLOT_MAX);
  4217.  
  4218.       assert(t->outputMapping[index] < Elements(t->outputs));
  4219.  
  4220.       return t->outputs[t->outputMapping[index]];
  4221.  
  4222.    case PROGRAM_ADDRESS:
  4223.       return t->address[index];
  4224.  
  4225.    default:
  4226.       assert(!"unknown dst register file");
  4227.       return ureg_dst_undef();
  4228.    }
  4229. }
  4230.  
  4231. /**
  4232.  * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
  4233.  */
  4234. static struct ureg_src
  4235. src_register(struct st_translate *t,
  4236.              gl_register_file file,
  4237.              GLint index, GLint index2D)
  4238. {
  4239.    switch(file) {
  4240.    case PROGRAM_UNDEFINED:
  4241.       return ureg_src_undef();
  4242.  
  4243.    case PROGRAM_TEMPORARY:
  4244.    case PROGRAM_ARRAY:
  4245.       return ureg_src(dst_register(t, file, index));
  4246.  
  4247.    case PROGRAM_ENV_PARAM:
  4248.    case PROGRAM_LOCAL_PARAM:
  4249.    case PROGRAM_UNIFORM:
  4250.       assert(index >= 0);
  4251.       return t->constants[index];
  4252.    case PROGRAM_STATE_VAR:
  4253.    case PROGRAM_CONSTANT:       /* ie, immediate */
  4254.       if (index2D) {
  4255.          struct ureg_src src;
  4256.          src = ureg_src_register(TGSI_FILE_CONSTANT, 0);
  4257.          src.Dimension = 1;
  4258.          src.DimensionIndex = index2D;
  4259.          return src;
  4260.       } else if (index < 0)
  4261.          return ureg_DECL_constant(t->ureg, 0);
  4262.       else
  4263.          return t->constants[index];
  4264.  
  4265.    case PROGRAM_IMMEDIATE:
  4266.       return t->immediates[index];
  4267.  
  4268.    case PROGRAM_INPUT:
  4269.       assert(t->inputMapping[index] < Elements(t->inputs));
  4270.       return t->inputs[t->inputMapping[index]];
  4271.  
  4272.    case PROGRAM_OUTPUT:
  4273.       assert(t->outputMapping[index] < Elements(t->outputs));
  4274.       return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */
  4275.  
  4276.    case PROGRAM_ADDRESS:
  4277.       return ureg_src(t->address[index]);
  4278.  
  4279.    case PROGRAM_SYSTEM_VALUE:
  4280.       assert(index < (int) Elements(t->systemValues));
  4281.       return t->systemValues[index];
  4282.  
  4283.    default:
  4284.       assert(!"unknown src register file");
  4285.       return ureg_src_undef();
  4286.    }
  4287. }
  4288.  
  4289. /**
  4290.  * Create a TGSI ureg_dst register from an st_dst_reg.
  4291.  */
  4292. static struct ureg_dst
  4293. translate_dst(struct st_translate *t,
  4294.               const st_dst_reg *dst_reg,
  4295.               bool saturate, bool clamp_color)
  4296. {
  4297.    struct ureg_dst dst = dst_register(t,
  4298.                                       dst_reg->file,
  4299.                                       dst_reg->index);
  4300.  
  4301.    dst = ureg_writemask(dst, dst_reg->writemask);
  4302.    
  4303.    if (saturate)
  4304.       dst = ureg_saturate(dst);
  4305.    else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
  4306.       /* Clamp colors for ARB_color_buffer_float. */
  4307.       switch (t->procType) {
  4308.       case TGSI_PROCESSOR_VERTEX:
  4309.          /* XXX if the geometry shader is present, this must be done there
  4310.           * instead of here. */
  4311.          if (dst_reg->index == VARYING_SLOT_COL0 ||
  4312.              dst_reg->index == VARYING_SLOT_COL1 ||
  4313.              dst_reg->index == VARYING_SLOT_BFC0 ||
  4314.              dst_reg->index == VARYING_SLOT_BFC1) {
  4315.             dst = ureg_saturate(dst);
  4316.          }
  4317.          break;
  4318.  
  4319.       case TGSI_PROCESSOR_FRAGMENT:
  4320.          if (dst_reg->index >= FRAG_RESULT_COLOR) {
  4321.             dst = ureg_saturate(dst);
  4322.          }
  4323.          break;
  4324.       }
  4325.    }
  4326.  
  4327.    if (dst_reg->reladdr != NULL) {
  4328.       assert(dst_reg->file != PROGRAM_TEMPORARY);
  4329.       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
  4330.    }
  4331.  
  4332.    return dst;
  4333. }
  4334.  
  4335. /**
  4336.  * Create a TGSI ureg_src register from an st_src_reg.
  4337.  */
  4338. static struct ureg_src
  4339. translate_src(struct st_translate *t, const st_src_reg *src_reg)
  4340. {
  4341.    struct ureg_src src = src_register(t, src_reg->file, src_reg->index, src_reg->index2D);
  4342.  
  4343.    src = ureg_swizzle(src,
  4344.                       GET_SWZ(src_reg->swizzle, 0) & 0x3,
  4345.                       GET_SWZ(src_reg->swizzle, 1) & 0x3,
  4346.                       GET_SWZ(src_reg->swizzle, 2) & 0x3,
  4347.                       GET_SWZ(src_reg->swizzle, 3) & 0x3);
  4348.  
  4349.    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
  4350.       src = ureg_negate(src);
  4351.  
  4352.    if (src_reg->reladdr != NULL) {
  4353.       assert(src_reg->file != PROGRAM_TEMPORARY);
  4354.       src = ureg_src_indirect(src, ureg_src(t->address[0]));
  4355.    }
  4356.  
  4357.    return src;
  4358. }
  4359.  
  4360. static struct tgsi_texture_offset
  4361. translate_tex_offset(struct st_translate *t,
  4362.                      const struct tgsi_texture_offset *in_offset)
  4363. {
  4364.    struct tgsi_texture_offset offset;
  4365.    struct ureg_src imm_src;
  4366.  
  4367.    assert(in_offset->File == PROGRAM_IMMEDIATE);
  4368.    imm_src = t->immediates[in_offset->Index];
  4369.  
  4370.    offset.File = imm_src.File;
  4371.    offset.Index = imm_src.Index;
  4372.    offset.SwizzleX = imm_src.SwizzleX;
  4373.    offset.SwizzleY = imm_src.SwizzleY;
  4374.    offset.SwizzleZ = imm_src.SwizzleZ;
  4375.    offset.File = TGSI_FILE_IMMEDIATE;
  4376.    offset.Padding = 0;
  4377.  
  4378.    return offset;
  4379. }
  4380.  
  4381. static void
  4382. compile_tgsi_instruction(struct st_translate *t,
  4383.                          const glsl_to_tgsi_instruction *inst,
  4384.                          bool clamp_dst_color_output)
  4385. {
  4386.    struct ureg_program *ureg = t->ureg;
  4387.    GLuint i;
  4388.    struct ureg_dst dst[1];
  4389.    struct ureg_src src[4];
  4390.    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
  4391.  
  4392.    unsigned num_dst;
  4393.    unsigned num_src;
  4394.    unsigned tex_target;
  4395.  
  4396.    num_dst = num_inst_dst_regs(inst->op);
  4397.    num_src = num_inst_src_regs(inst->op);
  4398.  
  4399.    if (num_dst)
  4400.       dst[0] = translate_dst(t,
  4401.                              &inst->dst,
  4402.                              inst->saturate,
  4403.                              clamp_dst_color_output);
  4404.  
  4405.    for (i = 0; i < num_src; i++)
  4406.       src[i] = translate_src(t, &inst->src[i]);
  4407.  
  4408.    switch(inst->op) {
  4409.    case TGSI_OPCODE_BGNLOOP:
  4410.    case TGSI_OPCODE_CAL:
  4411.    case TGSI_OPCODE_ELSE:
  4412.    case TGSI_OPCODE_ENDLOOP:
  4413.    case TGSI_OPCODE_IF:
  4414.    case TGSI_OPCODE_UIF:
  4415.       assert(num_dst == 0);
  4416.       ureg_label_insn(ureg,
  4417.                       inst->op,
  4418.                       src, num_src,
  4419.                       get_label(t,
  4420.                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
  4421.       return;
  4422.  
  4423.    case TGSI_OPCODE_TEX:
  4424.    case TGSI_OPCODE_TXB:
  4425.    case TGSI_OPCODE_TXD:
  4426.    case TGSI_OPCODE_TXL:
  4427.    case TGSI_OPCODE_TXP:
  4428.    case TGSI_OPCODE_TXQ:
  4429.    case TGSI_OPCODE_TXF:
  4430.    case TGSI_OPCODE_TEX2:
  4431.    case TGSI_OPCODE_TXB2:
  4432.    case TGSI_OPCODE_TXL2:
  4433.       src[num_src++] = t->samplers[inst->sampler];
  4434.       for (i = 0; i < inst->tex_offset_num_offset; i++) {
  4435.          texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]);
  4436.       }
  4437.       tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
  4438.  
  4439.       ureg_tex_insn(ureg,
  4440.                     inst->op,
  4441.                     dst, num_dst,
  4442.                     tex_target,
  4443.                     texoffsets, inst->tex_offset_num_offset,
  4444.                     src, num_src);
  4445.       return;
  4446.  
  4447.    case TGSI_OPCODE_SCS:
  4448.       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
  4449.       ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
  4450.       break;
  4451.  
  4452.    default:
  4453.       ureg_insn(ureg,
  4454.                 inst->op,
  4455.                 dst, num_dst,
  4456.                 src, num_src);
  4457.       break;
  4458.    }
  4459. }
  4460.  
  4461. /**
  4462.  * Emit the TGSI instructions for inverting and adjusting WPOS.
  4463.  * This code is unavoidable because it also depends on whether
  4464.  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
  4465.  */
  4466. static void
  4467. emit_wpos_adjustment( struct st_translate *t,
  4468.                       const struct gl_program *program,
  4469.                       boolean invert,
  4470.                       GLfloat adjX, GLfloat adjY[2])
  4471. {
  4472.    struct ureg_program *ureg = t->ureg;
  4473.  
  4474.    /* Fragment program uses fragment position input.
  4475.     * Need to replace instances of INPUT[WPOS] with temp T
  4476.     * where T = INPUT[WPOS] by y is inverted.
  4477.     */
  4478.    static const gl_state_index wposTransformState[STATE_LENGTH]
  4479.       = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM,
  4480.           (gl_state_index)0, (gl_state_index)0, (gl_state_index)0 };
  4481.    
  4482.    /* XXX: note we are modifying the incoming shader here!  Need to
  4483.     * do this before emitting the constant decls below, or this
  4484.     * will be missed:
  4485.     */
  4486.    unsigned wposTransConst = _mesa_add_state_reference(program->Parameters,
  4487.                                                        wposTransformState);
  4488.  
  4489.    struct ureg_src wpostrans = ureg_DECL_constant( ureg, wposTransConst );
  4490.    struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
  4491.    struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
  4492.  
  4493.    /* First, apply the coordinate shift: */
  4494.    if (adjX || adjY[0] || adjY[1]) {
  4495.       if (adjY[0] != adjY[1]) {
  4496.          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
  4497.           * depending on whether inversion is actually going to be applied
  4498.           * or not, which is determined by testing against the inversion
  4499.           * state variable used below, which will be either +1 or -1.
  4500.           */
  4501.          struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
  4502.  
  4503.          ureg_CMP(ureg, adj_temp,
  4504.                   ureg_scalar(wpostrans, invert ? 2 : 0),
  4505.                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
  4506.                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
  4507.          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
  4508.       } else {
  4509.          ureg_ADD(ureg, wpos_temp, wpos_input,
  4510.                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
  4511.       }
  4512.       wpos_input = ureg_src(wpos_temp);
  4513.    } else {
  4514.       /* MOV wpos_temp, input[wpos]
  4515.        */
  4516.       ureg_MOV( ureg, wpos_temp, wpos_input );
  4517.    }
  4518.  
  4519.    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
  4520.     * inversion/identity, or the other way around if we're drawing to an FBO.
  4521.     */
  4522.    if (invert) {
  4523.       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
  4524.        */
  4525.       ureg_MAD( ureg,
  4526.                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
  4527.                 wpos_input,
  4528.                 ureg_scalar(wpostrans, 0),
  4529.                 ureg_scalar(wpostrans, 1));
  4530.    } else {
  4531.       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
  4532.        */
  4533.       ureg_MAD( ureg,
  4534.                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
  4535.                 wpos_input,
  4536.                 ureg_scalar(wpostrans, 2),
  4537.                 ureg_scalar(wpostrans, 3));
  4538.    }
  4539.  
  4540.    /* Use wpos_temp as position input from here on:
  4541.     */
  4542.    t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
  4543. }
  4544.  
  4545.  
  4546. /**
  4547.  * Emit fragment position/ooordinate code.
  4548.  */
  4549. static void
  4550. emit_wpos(struct st_context *st,
  4551.           struct st_translate *t,
  4552.           const struct gl_program *program,
  4553.           struct ureg_program *ureg)
  4554. {
  4555.    const struct gl_fragment_program *fp =
  4556.       (const struct gl_fragment_program *) program;
  4557.    struct pipe_screen *pscreen = st->pipe->screen;
  4558.    GLfloat adjX = 0.0f;
  4559.    GLfloat adjY[2] = { 0.0f, 0.0f };
  4560.    boolean invert = FALSE;
  4561.  
  4562.    /* Query the pixel center conventions supported by the pipe driver and set
  4563.     * adjX, adjY to help out if it cannot handle the requested one internally.
  4564.     *
  4565.     * The bias of the y-coordinate depends on whether y-inversion takes place
  4566.     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
  4567.     * drawing to an FBO (causes additional inversion), and whether the the pipe
  4568.     * driver origin and the requested origin differ (the latter condition is
  4569.     * stored in the 'invert' variable).
  4570.     *
  4571.     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
  4572.     *
  4573.     * center shift only:
  4574.     * i -> h: +0.5
  4575.     * h -> i: -0.5
  4576.     *
  4577.     * inversion only:
  4578.     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
  4579.     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
  4580.     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
  4581.     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
  4582.     *
  4583.     * inversion and center shift:
  4584.     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
  4585.     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
  4586.     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
  4587.     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
  4588.     */
  4589.    if (fp->OriginUpperLeft) {
  4590.       /* Fragment shader wants origin in upper-left */
  4591.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
  4592.          /* the driver supports upper-left origin */
  4593.       }
  4594.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
  4595.          /* the driver supports lower-left origin, need to invert Y */
  4596.          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
  4597.          invert = TRUE;
  4598.       }
  4599.       else
  4600.          assert(0);
  4601.    }
  4602.    else {
  4603.       /* Fragment shader wants origin in lower-left */
  4604.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
  4605.          /* the driver supports lower-left origin */
  4606.          ureg_property_fs_coord_origin(ureg, TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
  4607.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
  4608.          /* the driver supports upper-left origin, need to invert Y */
  4609.          invert = TRUE;
  4610.       else
  4611.          assert(0);
  4612.    }
  4613.    
  4614.    if (fp->PixelCenterInteger) {
  4615.       /* Fragment shader wants pixel center integer */
  4616.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
  4617.          /* the driver supports pixel center integer */
  4618.          adjY[1] = 1.0f;
  4619.          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
  4620.       }
  4621.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
  4622.          /* the driver supports pixel center half integer, need to bias X,Y */
  4623.          adjX = -0.5f;
  4624.          adjY[0] = -0.5f;
  4625.          adjY[1] = 0.5f;
  4626.       }
  4627.       else
  4628.          assert(0);
  4629.    }
  4630.    else {
  4631.       /* Fragment shader wants pixel center half integer */
  4632.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
  4633.          /* the driver supports pixel center half integer */
  4634.       }
  4635.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
  4636.          /* the driver supports pixel center integer, need to bias X,Y */
  4637.          adjX = adjY[0] = adjY[1] = 0.5f;
  4638.          ureg_property_fs_coord_pixel_center(ureg, TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
  4639.       }
  4640.       else
  4641.          assert(0);
  4642.    }
  4643.  
  4644.    /* we invert after adjustment so that we avoid the MOV to temporary,
  4645.     * and reuse the adjustment ADD instead */
  4646.    emit_wpos_adjustment(t, program, invert, adjX, adjY);
  4647. }
  4648.  
  4649. /**
  4650.  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
  4651.  * TGSI uses +1 for front, -1 for back.
  4652.  * This function converts the TGSI value to the GL value.  Simply clamping/
  4653.  * saturating the value to [0,1] does the job.
  4654.  */
  4655. static void
  4656. emit_face_var(struct st_translate *t)
  4657. {
  4658.    struct ureg_program *ureg = t->ureg;
  4659.    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
  4660.    struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
  4661.  
  4662.    /* MOV_SAT face_temp, input[face] */
  4663.    face_temp = ureg_saturate(face_temp);
  4664.    ureg_MOV(ureg, face_temp, face_input);
  4665.  
  4666.    /* Use face_temp as face input from here on: */
  4667.    t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
  4668. }
  4669.  
  4670. static void
  4671. emit_edgeflags(struct st_translate *t)
  4672. {
  4673.    struct ureg_program *ureg = t->ureg;
  4674.    struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
  4675.    struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
  4676.  
  4677.    ureg_MOV(ureg, edge_dst, edge_src);
  4678. }
  4679.  
  4680. /**
  4681.  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  4682.  * \param program  the program to translate
  4683.  * \param numInputs  number of input registers used
  4684.  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
  4685.  *                      input indexes
  4686.  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
  4687.  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
  4688.  *                            each input
  4689.  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
  4690.  * \param numOutputs  number of output registers used
  4691.  * \param outputMapping  maps Mesa fragment program outputs to TGSI
  4692.  *                       generic outputs
  4693.  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
  4694.  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
  4695.  *                             each output
  4696.  *
  4697.  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
  4698.  */
  4699. extern "C" enum pipe_error
  4700. st_translate_program(
  4701.    struct gl_context *ctx,
  4702.    uint procType,
  4703.    struct ureg_program *ureg,
  4704.    glsl_to_tgsi_visitor *program,
  4705.    const struct gl_program *proginfo,
  4706.    GLuint numInputs,
  4707.    const GLuint inputMapping[],
  4708.    const ubyte inputSemanticName[],
  4709.    const ubyte inputSemanticIndex[],
  4710.    const GLuint interpMode[],
  4711.    const GLboolean is_centroid[],
  4712.    GLuint numOutputs,
  4713.    const GLuint outputMapping[],
  4714.    const ubyte outputSemanticName[],
  4715.    const ubyte outputSemanticIndex[],
  4716.    boolean passthrough_edgeflags,
  4717.    boolean clamp_color)
  4718. {
  4719.    struct st_translate *t;
  4720.    unsigned i;
  4721.    enum pipe_error ret = PIPE_OK;
  4722.  
  4723.    assert(numInputs <= Elements(t->inputs));
  4724.    assert(numOutputs <= Elements(t->outputs));
  4725.  
  4726.    t = CALLOC_STRUCT(st_translate);
  4727.    if (!t) {
  4728.       ret = PIPE_ERROR_OUT_OF_MEMORY;
  4729.       goto out;
  4730.    }
  4731.  
  4732.    memset(t, 0, sizeof *t);
  4733.  
  4734.    t->procType = procType;
  4735.    t->inputMapping = inputMapping;
  4736.    t->outputMapping = outputMapping;
  4737.    t->ureg = ureg;
  4738.  
  4739.    if (program->shader_program) {
  4740.       for (i = 0; i < program->shader_program->NumUserUniformStorage; i++) {
  4741.          struct gl_uniform_storage *const storage =
  4742.                &program->shader_program->UniformStorage[i];
  4743.  
  4744.          _mesa_uniform_detach_all_driver_storage(storage);
  4745.       }
  4746.    }
  4747.  
  4748.    /*
  4749.     * Declare input attributes.
  4750.     */
  4751.    if (procType == TGSI_PROCESSOR_FRAGMENT) {
  4752.       for (i = 0; i < numInputs; i++) {
  4753.          t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
  4754.                                                         inputSemanticName[i],
  4755.                                                         inputSemanticIndex[i],
  4756.                                                         interpMode[i], 0,
  4757.                                                         is_centroid[i]);
  4758.       }
  4759.  
  4760.       if (proginfo->InputsRead & VARYING_BIT_POS) {
  4761.          /* Must do this after setting up t->inputs, and before
  4762.           * emitting constant references, below:
  4763.           */
  4764.           emit_wpos(st_context(ctx), t, proginfo, ureg);
  4765.       }
  4766.  
  4767.       if (proginfo->InputsRead & VARYING_BIT_FACE)
  4768.          emit_face_var(t);
  4769.  
  4770.       /*
  4771.        * Declare output attributes.
  4772.        */
  4773.       for (i = 0; i < numOutputs; i++) {
  4774.          switch (outputSemanticName[i]) {
  4775.          case TGSI_SEMANTIC_POSITION:
  4776.             t->outputs[i] = ureg_DECL_output(ureg,
  4777.                                              TGSI_SEMANTIC_POSITION, /* Z/Depth */
  4778.                                              outputSemanticIndex[i]);
  4779.             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
  4780.             break;
  4781.          case TGSI_SEMANTIC_STENCIL:
  4782.             t->outputs[i] = ureg_DECL_output(ureg,
  4783.                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
  4784.                                              outputSemanticIndex[i]);
  4785.             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
  4786.             break;
  4787.          case TGSI_SEMANTIC_COLOR:
  4788.             t->outputs[i] = ureg_DECL_output(ureg,
  4789.                                              TGSI_SEMANTIC_COLOR,
  4790.                                              outputSemanticIndex[i]);
  4791.             break;
  4792.          default:
  4793.             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
  4794.             ret = PIPE_ERROR_BAD_INPUT;
  4795.             goto out;
  4796.          }
  4797.       }
  4798.    }
  4799.    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
  4800.       for (i = 0; i < numInputs; i++) {
  4801.          t->inputs[i] = ureg_DECL_gs_input(ureg,
  4802.                                            i,
  4803.                                            inputSemanticName[i],
  4804.                                            inputSemanticIndex[i]);
  4805.       }
  4806.  
  4807.       for (i = 0; i < numOutputs; i++) {
  4808.          t->outputs[i] = ureg_DECL_output(ureg,
  4809.                                           outputSemanticName[i],
  4810.                                           outputSemanticIndex[i]);
  4811.       }
  4812.    }
  4813.    else {
  4814.       assert(procType == TGSI_PROCESSOR_VERTEX);
  4815.  
  4816.       for (i = 0; i < numInputs; i++) {
  4817.          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
  4818.       }
  4819.  
  4820.       for (i = 0; i < numOutputs; i++) {
  4821.          t->outputs[i] = ureg_DECL_output(ureg,
  4822.                                           outputSemanticName[i],
  4823.                                           outputSemanticIndex[i]);
  4824.       }
  4825.       if (passthrough_edgeflags)
  4826.          emit_edgeflags(t);
  4827.    }
  4828.  
  4829.    /* Declare address register.
  4830.     */
  4831.    if (program->num_address_regs > 0) {
  4832.       assert(program->num_address_regs == 1);
  4833.       t->address[0] = ureg_DECL_address(ureg);
  4834.    }
  4835.  
  4836.    /* Declare misc input registers
  4837.     */
  4838.    {
  4839.       GLbitfield sysInputs = proginfo->SystemValuesRead;
  4840.       unsigned numSys = 0;
  4841.       for (i = 0; sysInputs; i++) {
  4842.          if (sysInputs & (1 << i)) {
  4843.             unsigned semName = mesa_sysval_to_semantic[i];
  4844.             t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
  4845.             if (semName == TGSI_SEMANTIC_INSTANCEID ||
  4846.                 semName == TGSI_SEMANTIC_VERTEXID) {
  4847.                /* From Gallium perspective, these system values are always
  4848.                 * integer, and require native integer support.  However, if
  4849.                 * native integer is supported on the vertex stage but not the
  4850.                 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
  4851.                 * assumes these system values are floats. To resolve the
  4852.                 * inconsistency, we insert a U2F.
  4853.                 */
  4854.                struct st_context *st = st_context(ctx);
  4855.                struct pipe_screen *pscreen = st->pipe->screen;
  4856.                assert(procType == TGSI_PROCESSOR_VERTEX);
  4857.                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
  4858.                if (!ctx->Const.NativeIntegers) {
  4859.                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
  4860.                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
  4861.                   t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
  4862.                }
  4863.             }
  4864.             numSys++;
  4865.             sysInputs &= ~(1 << i);
  4866.          }
  4867.       }
  4868.    }
  4869.  
  4870.    /* Copy over array sizes
  4871.     */
  4872.    memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
  4873.  
  4874.    /* Emit constants and uniforms.  TGSI uses a single index space for these,
  4875.     * so we put all the translated regs in t->constants.
  4876.     */
  4877.    if (proginfo->Parameters) {
  4878.       t->constants = (struct ureg_src *)
  4879.          calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
  4880.       if (t->constants == NULL) {
  4881.          ret = PIPE_ERROR_OUT_OF_MEMORY;
  4882.          goto out;
  4883.       }
  4884.  
  4885.       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
  4886.          switch (proginfo->Parameters->Parameters[i].Type) {
  4887.          case PROGRAM_ENV_PARAM:
  4888.          case PROGRAM_LOCAL_PARAM:
  4889.          case PROGRAM_STATE_VAR:
  4890.          case PROGRAM_UNIFORM:
  4891.             t->constants[i] = ureg_DECL_constant(ureg, i);
  4892.             break;
  4893.  
  4894.          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
  4895.           * addressing of the const buffer.
  4896.           * FIXME: Be smarter and recognize param arrays:
  4897.           * indirect addressing is only valid within the referenced
  4898.           * array.
  4899.           */
  4900.          case PROGRAM_CONSTANT:
  4901.             if (program->indirect_addr_consts)
  4902.                t->constants[i] = ureg_DECL_constant(ureg, i);
  4903.             else
  4904.                t->constants[i] = emit_immediate(t,
  4905.                                                 proginfo->Parameters->ParameterValues[i],
  4906.                                                 proginfo->Parameters->Parameters[i].DataType,
  4907.                                                 4);
  4908.             break;
  4909.          default:
  4910.             break;
  4911.          }
  4912.       }
  4913.    }
  4914.  
  4915.    if (program->shader_program) {
  4916.       unsigned num_ubos = program->shader_program->NumUniformBlocks;
  4917.  
  4918.       for (i = 0; i < num_ubos; i++) {
  4919.          ureg_DECL_constant2D(t->ureg, 0, program->shader_program->UniformBlocks[i].UniformBufferSize / 4, i + 1);
  4920.       }
  4921.    }
  4922.    
  4923.    /* Emit immediate values.
  4924.     */
  4925.    t->immediates = (struct ureg_src *)
  4926.       calloc(program->num_immediates, sizeof(struct ureg_src));
  4927.    if (t->immediates == NULL) {
  4928.       ret = PIPE_ERROR_OUT_OF_MEMORY;
  4929.       goto out;
  4930.    }
  4931.    i = 0;
  4932.    foreach_iter(exec_list_iterator, iter, program->immediates) {
  4933.       immediate_storage *imm = (immediate_storage *)iter.get();
  4934.       assert(i < program->num_immediates);
  4935.       t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size);
  4936.    }
  4937.    assert(i == program->num_immediates);
  4938.  
  4939.    /* texture samplers */
  4940.    for (i = 0; i < ctx->Const.FragmentProgram.MaxTextureImageUnits; i++) {
  4941.       if (program->samplers_used & (1 << i)) {
  4942.          t->samplers[i] = ureg_DECL_sampler(ureg, i);
  4943.       }
  4944.    }
  4945.  
  4946.    /* Emit each instruction in turn:
  4947.     */
  4948.    foreach_iter(exec_list_iterator, iter, program->instructions) {
  4949.       set_insn_start(t, ureg_get_instruction_number(ureg));
  4950.       compile_tgsi_instruction(t, (glsl_to_tgsi_instruction *)iter.get(),
  4951.                                clamp_color);
  4952.    }
  4953.  
  4954.    /* Fix up all emitted labels:
  4955.     */
  4956.    for (i = 0; i < t->labels_count; i++) {
  4957.       ureg_fixup_label(ureg, t->labels[i].token,
  4958.                        t->insn[t->labels[i].branch_target]);
  4959.    }
  4960.  
  4961.    if (program->shader_program) {
  4962.       /* This has to be done last.  Any operation the can cause
  4963.        * prog->ParameterValues to get reallocated (e.g., anything that adds a
  4964.        * program constant) has to happen before creating this linkage.
  4965.        */
  4966.       for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
  4967.          if (program->shader_program->_LinkedShaders[i] == NULL)
  4968.             continue;
  4969.  
  4970.          _mesa_associate_uniform_storage(ctx, program->shader_program,
  4971.                program->shader_program->_LinkedShaders[i]->Program->Parameters);
  4972.       }
  4973.    }
  4974.  
  4975. out:
  4976.    if (t) {
  4977.       free(t->insn);
  4978.       free(t->labels);
  4979.       free(t->constants);
  4980.       free(t->immediates);
  4981.  
  4982.       if (t->error) {
  4983.          debug_printf("%s: translate error flag set\n", __FUNCTION__);
  4984.       }
  4985.  
  4986.       free(t);
  4987.    }
  4988.  
  4989.    return ret;
  4990. }
  4991. /* ----------------------------- End TGSI code ------------------------------ */
  4992.  
  4993. /**
  4994.  * Convert a shader's GLSL IR into a Mesa gl_program, although without
  4995.  * generating Mesa IR.
  4996.  */
  4997. static struct gl_program *
  4998. get_mesa_program(struct gl_context *ctx,
  4999.                  struct gl_shader_program *shader_program,
  5000.                  struct gl_shader *shader)
  5001. {
  5002.    glsl_to_tgsi_visitor* v;
  5003.    struct gl_program *prog;
  5004.    GLenum target;
  5005.    bool progress;
  5006.    struct gl_shader_compiler_options *options =
  5007.          &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(shader->Type)];
  5008.    struct pipe_screen *pscreen = ctx->st->pipe->screen;
  5009.    unsigned ptarget;
  5010.  
  5011.    switch (shader->Type) {
  5012.    case GL_VERTEX_SHADER:
  5013.       target = GL_VERTEX_PROGRAM_ARB;
  5014.       ptarget = PIPE_SHADER_VERTEX;
  5015.       break;
  5016.    case GL_FRAGMENT_SHADER:
  5017.       target = GL_FRAGMENT_PROGRAM_ARB;
  5018.       ptarget = PIPE_SHADER_FRAGMENT;
  5019.       break;
  5020.    case GL_GEOMETRY_SHADER:
  5021.       target = GL_GEOMETRY_PROGRAM_NV;
  5022.       ptarget = PIPE_SHADER_GEOMETRY;
  5023.       break;
  5024.    default:
  5025.       assert(!"should not be reached");
  5026.       return NULL;
  5027.    }
  5028.  
  5029.    validate_ir_tree(shader->ir);
  5030.  
  5031.    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
  5032.    if (!prog)
  5033.       return NULL;
  5034.    prog->Parameters = _mesa_new_parameter_list();
  5035.    v = new glsl_to_tgsi_visitor();
  5036.    v->ctx = ctx;
  5037.    v->prog = prog;
  5038.    v->shader_program = shader_program;
  5039.    v->options = options;
  5040.    v->glsl_version = ctx->Const.GLSLVersion;
  5041.    v->native_integers = ctx->Const.NativeIntegers;
  5042.  
  5043.    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
  5044.                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
  5045.  
  5046.    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
  5047.                                                prog->Parameters);
  5048.  
  5049.    /* Remove reads from output registers. */
  5050.    lower_output_reads(shader->ir);
  5051.  
  5052.    /* Emit intermediate IR for main(). */
  5053.    visit_exec_list(shader->ir, v);
  5054.  
  5055.    /* Now emit bodies for any functions that were used. */
  5056.    do {
  5057.       progress = GL_FALSE;
  5058.  
  5059.       foreach_iter(exec_list_iterator, iter, v->function_signatures) {
  5060.          function_entry *entry = (function_entry *)iter.get();
  5061.  
  5062.          if (!entry->bgn_inst) {
  5063.             v->current_function = entry;
  5064.  
  5065.             entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
  5066.             entry->bgn_inst->function = entry;
  5067.  
  5068.             visit_exec_list(&entry->sig->body, v);
  5069.  
  5070.             glsl_to_tgsi_instruction *last;
  5071.             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
  5072.             if (last->op != TGSI_OPCODE_RET)
  5073.                v->emit(NULL, TGSI_OPCODE_RET);
  5074.  
  5075.             glsl_to_tgsi_instruction *end;
  5076.             end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
  5077.             end->function = entry;
  5078.  
  5079.             progress = GL_TRUE;
  5080.          }
  5081.       }
  5082.    } while (progress);
  5083.  
  5084. #if 0
  5085.    /* Print out some information (for debugging purposes) used by the
  5086.     * optimization passes. */
  5087.    for (i=0; i < v->next_temp; i++) {
  5088.       int fr = v->get_first_temp_read(i);
  5089.       int fw = v->get_first_temp_write(i);
  5090.       int lr = v->get_last_temp_read(i);
  5091.       int lw = v->get_last_temp_write(i);
  5092.      
  5093.       printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
  5094.       assert(fw <= fr);
  5095.    }
  5096. #endif
  5097.  
  5098.    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
  5099.    v->simplify_cmp();
  5100.    v->copy_propagate();
  5101.    while (v->eliminate_dead_code_advanced());
  5102.  
  5103.    v->eliminate_dead_code();
  5104.    v->merge_registers();
  5105.    v->renumber_registers();
  5106.    
  5107.    /* Write the END instruction. */
  5108.    v->emit(NULL, TGSI_OPCODE_END);
  5109.  
  5110.    if (ctx->Shader.Flags & GLSL_DUMP) {
  5111.       printf("\n");
  5112.       printf("GLSL IR for linked %s program %d:\n",
  5113.              _mesa_glsl_shader_target_name(shader->Type),
  5114.              shader_program->Name);
  5115.       _mesa_print_ir(shader->ir, NULL);
  5116.       printf("\n");
  5117.       printf("\n");
  5118.       fflush(stdout);
  5119.    }
  5120.  
  5121.    prog->Instructions = NULL;
  5122.    prog->NumInstructions = 0;
  5123.  
  5124.    do_set_program_inouts(shader->ir, prog, shader->Type == GL_FRAGMENT_SHADER);
  5125.    count_resources(v, prog);
  5126.  
  5127.    _mesa_reference_program(ctx, &shader->Program, prog);
  5128.    
  5129.    /* This has to be done last.  Any operation the can cause
  5130.     * prog->ParameterValues to get reallocated (e.g., anything that adds a
  5131.     * program constant) has to happen before creating this linkage.
  5132.     */
  5133.    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
  5134.    if (!shader_program->LinkStatus) {
  5135.       return NULL;
  5136.    }
  5137.  
  5138.    struct st_vertex_program *stvp;
  5139.    struct st_fragment_program *stfp;
  5140.    struct st_geometry_program *stgp;
  5141.    
  5142.    switch (shader->Type) {
  5143.    case GL_VERTEX_SHADER:
  5144.       stvp = (struct st_vertex_program *)prog;
  5145.       stvp->glsl_to_tgsi = v;
  5146.       break;
  5147.    case GL_FRAGMENT_SHADER:
  5148.       stfp = (struct st_fragment_program *)prog;
  5149.       stfp->glsl_to_tgsi = v;
  5150.       break;
  5151.    case GL_GEOMETRY_SHADER:
  5152.       stgp = (struct st_geometry_program *)prog;
  5153.       stgp->glsl_to_tgsi = v;
  5154.       break;
  5155.    default:
  5156.       assert(!"should not be reached");
  5157.       return NULL;
  5158.    }
  5159.  
  5160.    return prog;
  5161. }
  5162.  
  5163. extern "C" {
  5164.  
  5165. struct gl_shader *
  5166. st_new_shader(struct gl_context *ctx, GLuint name, GLuint type)
  5167. {
  5168.    struct gl_shader *shader;
  5169.    assert(type == GL_FRAGMENT_SHADER || type == GL_VERTEX_SHADER ||
  5170.           type == GL_GEOMETRY_SHADER_ARB);
  5171.    shader = rzalloc(NULL, struct gl_shader);
  5172.    if (shader) {
  5173.       shader->Type = type;
  5174.       shader->Name = name;
  5175.       _mesa_init_shader(ctx, shader);
  5176.    }
  5177.    return shader;
  5178. }
  5179.  
  5180. struct gl_shader_program *
  5181. st_new_shader_program(struct gl_context *ctx, GLuint name)
  5182. {
  5183.    struct gl_shader_program *shProg;
  5184.    shProg = rzalloc(NULL, struct gl_shader_program);
  5185.    if (shProg) {
  5186.       shProg->Name = name;
  5187.       _mesa_init_shader_program(ctx, shProg);
  5188.    }
  5189.    return shProg;
  5190. }
  5191.  
  5192. /**
  5193.  * Link a shader.
  5194.  * Called via ctx->Driver.LinkShader()
  5195.  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
  5196.  * with code lowering and other optimizations.
  5197.  */
  5198. GLboolean
  5199. st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  5200. {
  5201.    assert(prog->LinkStatus);
  5202.  
  5203.    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
  5204.       if (prog->_LinkedShaders[i] == NULL)
  5205.          continue;
  5206.  
  5207.       bool progress;
  5208.       exec_list *ir = prog->_LinkedShaders[i]->ir;
  5209.       const struct gl_shader_compiler_options *options =
  5210.             &ctx->ShaderCompilerOptions[_mesa_shader_type_to_index(prog->_LinkedShaders[i]->Type)];
  5211.  
  5212.       /* If there are forms of indirect addressing that the driver
  5213.        * cannot handle, perform the lowering pass.
  5214.        */
  5215.       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
  5216.           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
  5217.          lower_variable_index_to_cond_assign(ir,
  5218.                                              options->EmitNoIndirectInput,
  5219.                                              options->EmitNoIndirectOutput,
  5220.                                              options->EmitNoIndirectTemp,
  5221.                                              options->EmitNoIndirectUniform);
  5222.       }
  5223.  
  5224.       if (ctx->Extensions.ARB_shading_language_packing) {
  5225.          unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
  5226.                                LOWER_UNPACK_SNORM_2x16 |
  5227.                                LOWER_PACK_UNORM_2x16 |
  5228.                                LOWER_UNPACK_UNORM_2x16 |
  5229.                                LOWER_PACK_SNORM_4x8 |
  5230.                                LOWER_UNPACK_SNORM_4x8 |
  5231.                                LOWER_UNPACK_UNORM_4x8 |
  5232.                                LOWER_PACK_UNORM_4x8 |
  5233.                                LOWER_PACK_HALF_2x16 |
  5234.                                LOWER_UNPACK_HALF_2x16;
  5235.  
  5236.          lower_packing_builtins(ir, lower_inst);
  5237.       }
  5238.  
  5239.       do_mat_op_to_vec(ir);
  5240.       lower_instructions(ir,
  5241.                          MOD_TO_FRACT |
  5242.                          DIV_TO_MUL_RCP |
  5243.                          EXP_TO_EXP2 |
  5244.                          LOG_TO_LOG2 |
  5245.                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
  5246.                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0));
  5247.  
  5248.       lower_ubo_reference(prog->_LinkedShaders[i], ir);
  5249.       do_vec_index_to_cond_assign(ir);
  5250.       lower_vector_insert(ir, true);
  5251.       lower_quadop_vector(ir, false);
  5252.       lower_noise(ir);
  5253.       if (options->MaxIfDepth == 0) {
  5254.          lower_discard(ir);
  5255.       }
  5256.  
  5257.       do {
  5258.          progress = false;
  5259.  
  5260.          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
  5261.  
  5262.          progress = do_common_optimization(ir, true, true,
  5263.                                            options->MaxUnrollIterations, options)
  5264.            || progress;
  5265.  
  5266.          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
  5267.  
  5268.       } while (progress);
  5269.  
  5270.       validate_ir_tree(ir);
  5271.    }
  5272.  
  5273.    for (unsigned i = 0; i < MESA_SHADER_TYPES; i++) {
  5274.       struct gl_program *linked_prog;
  5275.  
  5276.       if (prog->_LinkedShaders[i] == NULL)
  5277.          continue;
  5278.  
  5279.       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
  5280.  
  5281.       if (linked_prog) {
  5282.          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
  5283.                                  linked_prog);
  5284.          if (!ctx->Driver.ProgramStringNotify(ctx,
  5285.                                               _mesa_program_index_to_target(i),
  5286.                                               linked_prog)) {
  5287.             _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
  5288.                                     NULL);
  5289.             _mesa_reference_program(ctx, &linked_prog, NULL);
  5290.             return GL_FALSE;
  5291.          }
  5292.       }
  5293.  
  5294.       _mesa_reference_program(ctx, &linked_prog, NULL);
  5295.    }
  5296.  
  5297.    return GL_TRUE;
  5298. }
  5299.  
  5300. void
  5301. st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
  5302.                                 const GLuint outputMapping[],
  5303.                                 struct pipe_stream_output_info *so)
  5304. {
  5305.    unsigned i;
  5306.    struct gl_transform_feedback_info *info =
  5307.       &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
  5308.  
  5309.    for (i = 0; i < info->NumOutputs; i++) {
  5310.       so->output[i].register_index =
  5311.          outputMapping[info->Outputs[i].OutputRegister];
  5312.       so->output[i].start_component = info->Outputs[i].ComponentOffset;
  5313.       so->output[i].num_components = info->Outputs[i].NumComponents;
  5314.       so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
  5315.       so->output[i].dst_offset = info->Outputs[i].DstOffset;
  5316.    }
  5317.  
  5318.    for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
  5319.       so->stride[i] = info->BufferStride[i];
  5320.    }
  5321.    so->num_outputs = info->NumOutputs;
  5322. }
  5323.  
  5324. } /* extern "C" */
  5325.