Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2005-2007  Brian Paul   All Rights Reserved.
  3.  * Copyright (C) 2008  VMware, Inc.   All Rights Reserved.
  4.  * Copyright © 2010 Intel Corporation
  5.  * Copyright © 2011 Bryan Cain
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the "Software"),
  9.  * to deal in the Software without restriction, including without limitation
  10.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11.  * and/or sell copies of the Software, and to permit persons to whom the
  12.  * Software is furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the next
  15.  * paragraph) shall be included in all copies or substantial portions of the
  16.  * Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  19.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  20.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  21.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  22.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  23.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  24.  * DEALINGS IN THE SOFTWARE.
  25.  */
  26.  
  27. /**
  28.  * \file glsl_to_tgsi.cpp
  29.  *
  30.  * Translate GLSL IR to TGSI.
  31.  */
  32.  
  33. #include "st_glsl_to_tgsi.h"
  34.  
  35. #include "glsl_parser_extras.h"
  36. #include "ir_optimization.h"
  37.  
  38. #include "main/errors.h"
  39. #include "main/shaderobj.h"
  40. #include "main/uniforms.h"
  41. #include "main/shaderapi.h"
  42. #include "program/prog_instruction.h"
  43. #include "program/sampler.h"
  44.  
  45. #include "pipe/p_context.h"
  46. #include "pipe/p_screen.h"
  47. #include "tgsi/tgsi_ureg.h"
  48. #include "tgsi/tgsi_info.h"
  49. #include "util/u_math.h"
  50. #include "util/u_memory.h"
  51. #include "st_program.h"
  52. #include "st_mesa_to_tgsi.h"
  53.  
  54.  
  55. #define PROGRAM_IMMEDIATE PROGRAM_FILE_MAX
  56. #define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) |    \
  57.                            (1 << PROGRAM_CONSTANT) |     \
  58.                            (1 << PROGRAM_UNIFORM))
  59.  
  60. /**
  61.  * Maximum number of arrays
  62.  */
  63. #define MAX_ARRAYS        256
  64.  
  65. #define MAX_GLSL_TEXTURE_OFFSET 4
  66.  
  67. class st_src_reg;
  68. class st_dst_reg;
  69.  
  70. static int swizzle_for_size(int size);
  71.  
  72. /**
  73.  * This struct is a corresponding struct to TGSI ureg_src.
  74.  */
  75. class st_src_reg {
  76. public:
  77.    st_src_reg(gl_register_file file, int index, const glsl_type *type)
  78.    {
  79.       this->file = file;
  80.       this->index = index;
  81.       if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
  82.          this->swizzle = swizzle_for_size(type->vector_elements);
  83.       else
  84.          this->swizzle = SWIZZLE_XYZW;
  85.       this->negate = 0;
  86.       this->index2D = 0;
  87.       this->type = type ? type->base_type : GLSL_TYPE_ERROR;
  88.       this->reladdr = NULL;
  89.       this->reladdr2 = NULL;
  90.       this->has_index2 = false;
  91.       this->double_reg2 = false;
  92.    }
  93.  
  94.    st_src_reg(gl_register_file file, int index, int type)
  95.    {
  96.       this->type = type;
  97.       this->file = file;
  98.       this->index = index;
  99.       this->index2D = 0;
  100.       this->swizzle = SWIZZLE_XYZW;
  101.       this->negate = 0;
  102.       this->reladdr = NULL;
  103.       this->reladdr2 = NULL;
  104.       this->has_index2 = false;
  105.       this->double_reg2 = false;
  106.    }
  107.  
  108.    st_src_reg(gl_register_file file, int index, int type, int index2D)
  109.    {
  110.       this->type = type;
  111.       this->file = file;
  112.       this->index = index;
  113.       this->index2D = index2D;
  114.       this->swizzle = SWIZZLE_XYZW;
  115.       this->negate = 0;
  116.       this->reladdr = NULL;
  117.       this->reladdr2 = NULL;
  118.       this->has_index2 = false;
  119.       this->double_reg2 = false;
  120.    }
  121.  
  122.    st_src_reg()
  123.    {
  124.       this->type = GLSL_TYPE_ERROR;
  125.       this->file = PROGRAM_UNDEFINED;
  126.       this->index = 0;
  127.       this->index2D = 0;
  128.       this->swizzle = 0;
  129.       this->negate = 0;
  130.       this->reladdr = NULL;
  131.       this->reladdr2 = NULL;
  132.       this->has_index2 = false;
  133.       this->double_reg2 = false;
  134.    }
  135.  
  136.    explicit st_src_reg(st_dst_reg reg);
  137.  
  138.    gl_register_file file; /**< PROGRAM_* from Mesa */
  139.    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  140.    int index2D;
  141.    GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */
  142.    int negate; /**< NEGATE_XYZW mask from mesa */
  143.    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
  144.    /** Register index should be offset by the integer in this reg. */
  145.    st_src_reg *reladdr;
  146.    st_src_reg *reladdr2;
  147.    bool has_index2;
  148.    /*
  149.     * Is this the second half of a double register pair?
  150.     * currently used for input mapping only.
  151.     */
  152.    bool double_reg2;
  153. };
  154.  
  155. class st_dst_reg {
  156. public:
  157.    st_dst_reg(gl_register_file file, int writemask, int type, int index)
  158.    {
  159.       this->file = file;
  160.       this->index = index;
  161.       this->writemask = writemask;
  162.       this->cond_mask = COND_TR;
  163.       this->reladdr = NULL;
  164.       this->type = type;
  165.    }
  166.  
  167.    st_dst_reg(gl_register_file file, int writemask, int type)
  168.    {
  169.       this->file = file;
  170.       this->index = 0;
  171.       this->writemask = writemask;
  172.       this->cond_mask = COND_TR;
  173.       this->reladdr = NULL;
  174.       this->type = type;
  175.    }
  176.  
  177.    st_dst_reg()
  178.    {
  179.       this->type = GLSL_TYPE_ERROR;
  180.       this->file = PROGRAM_UNDEFINED;
  181.       this->index = 0;
  182.       this->writemask = 0;
  183.       this->cond_mask = COND_TR;
  184.       this->reladdr = NULL;
  185.    }
  186.  
  187.    explicit st_dst_reg(st_src_reg reg);
  188.  
  189.    gl_register_file file; /**< PROGRAM_* from Mesa */
  190.    int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */
  191.    int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
  192.    GLuint cond_mask:4;
  193.    int type; /** GLSL_TYPE_* from GLSL IR (enum glsl_base_type) */
  194.    /** Register index should be offset by the integer in this reg. */
  195.    st_src_reg *reladdr;
  196. };
  197.  
  198. st_src_reg::st_src_reg(st_dst_reg reg)
  199. {
  200.    this->type = reg.type;
  201.    this->file = reg.file;
  202.    this->index = reg.index;
  203.    this->swizzle = SWIZZLE_XYZW;
  204.    this->negate = 0;
  205.    this->reladdr = reg.reladdr;
  206.    this->index2D = 0;
  207.    this->reladdr2 = NULL;
  208.    this->has_index2 = false;
  209.    this->double_reg2 = false;
  210. }
  211.  
  212. st_dst_reg::st_dst_reg(st_src_reg reg)
  213. {
  214.    this->type = reg.type;
  215.    this->file = reg.file;
  216.    this->index = reg.index;
  217.    this->writemask = WRITEMASK_XYZW;
  218.    this->cond_mask = COND_TR;
  219.    this->reladdr = reg.reladdr;
  220. }
  221.  
  222. class glsl_to_tgsi_instruction : public exec_node {
  223. public:
  224.    DECLARE_RALLOC_CXX_OPERATORS(glsl_to_tgsi_instruction)
  225.  
  226.    unsigned op;
  227.    st_dst_reg dst[2];
  228.    st_src_reg src[4];
  229.    /** Pointer to the ir source this tree came from for debugging */
  230.    ir_instruction *ir;
  231.    GLboolean cond_update;
  232.    bool saturate;
  233.    st_src_reg sampler; /**< sampler register */
  234.    int sampler_array_size; /**< 1-based size of sampler array, 1 if not array */
  235.    int tex_target; /**< One of TEXTURE_*_INDEX */
  236.    GLboolean tex_shadow;
  237.  
  238.    st_src_reg tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
  239.    unsigned tex_offset_num_offset;
  240.    int dead_mask; /**< Used in dead code elimination */
  241.  
  242.    class function_entry *function; /* Set on TGSI_OPCODE_CAL or TGSI_OPCODE_BGNSUB */
  243. };
  244.  
  245. class variable_storage : public exec_node {
  246. public:
  247.    variable_storage(ir_variable *var, gl_register_file file, int index)
  248.       : file(file), index(index), var(var)
  249.    {
  250.       /* empty */
  251.    }
  252.  
  253.    gl_register_file file;
  254.    int index;
  255.    ir_variable *var; /* variable that maps to this, if any */
  256. };
  257.  
  258. class immediate_storage : public exec_node {
  259. public:
  260.    immediate_storage(gl_constant_value *values, int size32, int type)
  261.    {
  262.       memcpy(this->values, values, size32 * sizeof(gl_constant_value));
  263.       this->size32 = size32;
  264.       this->type = type;
  265.    }
  266.  
  267.    /* doubles are stored across 2 gl_constant_values */
  268.    gl_constant_value values[4];
  269.    int size32; /**< Number of 32-bit components (1-4) */
  270.    int type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */
  271. };
  272.  
  273. class function_entry : public exec_node {
  274. public:
  275.    ir_function_signature *sig;
  276.  
  277.    /**
  278.     * identifier of this function signature used by the program.
  279.     *
  280.     * At the point that TGSI instructions for function calls are
  281.     * generated, we don't know the address of the first instruction of
  282.     * the function body.  So we make the BranchTarget that is called a
  283.     * small integer and rewrite them during set_branchtargets().
  284.     */
  285.    int sig_id;
  286.  
  287.    /**
  288.     * Pointer to first instruction of the function body.
  289.     *
  290.     * Set during function body emits after main() is processed.
  291.     */
  292.    glsl_to_tgsi_instruction *bgn_inst;
  293.  
  294.    /**
  295.     * Index of the first instruction of the function body in actual TGSI.
  296.     *
  297.     * Set after conversion from glsl_to_tgsi_instruction to TGSI.
  298.     */
  299.    int inst;
  300.  
  301.    /** Storage for the return value. */
  302.    st_src_reg return_reg;
  303. };
  304.  
  305. struct glsl_to_tgsi_visitor : public ir_visitor {
  306. public:
  307.    glsl_to_tgsi_visitor();
  308.    ~glsl_to_tgsi_visitor();
  309.  
  310.    function_entry *current_function;
  311.  
  312.    struct gl_context *ctx;
  313.    struct gl_program *prog;
  314.    struct gl_shader_program *shader_program;
  315.    struct gl_shader *shader;
  316.    struct gl_shader_compiler_options *options;
  317.  
  318.    int next_temp;
  319.  
  320.    unsigned array_sizes[MAX_ARRAYS];
  321.    unsigned next_array;
  322.  
  323.    int num_address_regs;
  324.    int samplers_used;
  325.    bool indirect_addr_consts;
  326.    int wpos_transform_const;
  327.  
  328.    int glsl_version;
  329.    bool native_integers;
  330.    bool have_sqrt;
  331.    bool have_fma;
  332.  
  333.    variable_storage *find_variable_storage(ir_variable *var);
  334.  
  335.    int add_constant(gl_register_file file, gl_constant_value values[8],
  336.                     int size, int datatype, GLuint *swizzle_out);
  337.  
  338.    function_entry *get_function_signature(ir_function_signature *sig);
  339.  
  340.    st_src_reg get_temp(const glsl_type *type);
  341.    void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr);
  342.  
  343.    st_src_reg st_src_reg_for_double(double val);
  344.    st_src_reg st_src_reg_for_float(float val);
  345.    st_src_reg st_src_reg_for_int(int val);
  346.    st_src_reg st_src_reg_for_type(int type, int val);
  347.  
  348.    /**
  349.     * \name Visit methods
  350.     *
  351.     * As typical for the visitor pattern, there must be one \c visit method for
  352.     * each concrete subclass of \c ir_instruction.  Virtual base classes within
  353.     * the hierarchy should not have \c visit methods.
  354.     */
  355.    /*@{*/
  356.    virtual void visit(ir_variable *);
  357.    virtual void visit(ir_loop *);
  358.    virtual void visit(ir_loop_jump *);
  359.    virtual void visit(ir_function_signature *);
  360.    virtual void visit(ir_function *);
  361.    virtual void visit(ir_expression *);
  362.    virtual void visit(ir_swizzle *);
  363.    virtual void visit(ir_dereference_variable  *);
  364.    virtual void visit(ir_dereference_array *);
  365.    virtual void visit(ir_dereference_record *);
  366.    virtual void visit(ir_assignment *);
  367.    virtual void visit(ir_constant *);
  368.    virtual void visit(ir_call *);
  369.    virtual void visit(ir_return *);
  370.    virtual void visit(ir_discard *);
  371.    virtual void visit(ir_texture *);
  372.    virtual void visit(ir_if *);
  373.    virtual void visit(ir_emit_vertex *);
  374.    virtual void visit(ir_end_primitive *);
  375.    /*@}*/
  376.  
  377.    st_src_reg result;
  378.  
  379.    /** List of variable_storage */
  380.    exec_list variables;
  381.  
  382.    /** List of immediate_storage */
  383.    exec_list immediates;
  384.    unsigned num_immediates;
  385.  
  386.    /** List of function_entry */
  387.    exec_list function_signatures;
  388.    int next_signature_id;
  389.  
  390.    /** List of glsl_to_tgsi_instruction */
  391.    exec_list instructions;
  392.  
  393.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op);
  394.  
  395.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  396.                                   st_dst_reg dst, st_src_reg src0);
  397.  
  398.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  399.                                   st_dst_reg dst, st_dst_reg dst1,
  400.                                   st_src_reg src0);
  401.  
  402.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  403.                                   st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  404.  
  405.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  406.                                   st_dst_reg dst,
  407.                                   st_src_reg src0, st_src_reg src1, st_src_reg src2);
  408.  
  409.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  410.                                   st_dst_reg dst,
  411.                                   st_src_reg src0, st_src_reg src1,
  412.                                   st_src_reg src2, st_src_reg src3);
  413.  
  414.    glsl_to_tgsi_instruction *emit(ir_instruction *ir, unsigned op,
  415.                                   st_dst_reg dst, st_dst_reg dst1,
  416.                                   st_src_reg src0, st_src_reg src1,
  417.                                   st_src_reg src2, st_src_reg src3);
  418.  
  419.    unsigned get_opcode(ir_instruction *ir, unsigned op,
  420.                     st_dst_reg dst,
  421.                     st_src_reg src0, st_src_reg src1);
  422.  
  423.    /**
  424.     * Emit the correct dot-product instruction for the type of arguments
  425.     */
  426.    glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir,
  427.                                      st_dst_reg dst,
  428.                                      st_src_reg src0,
  429.                                      st_src_reg src1,
  430.                                      unsigned elements);
  431.  
  432.    void emit_scalar(ir_instruction *ir, unsigned op,
  433.                     st_dst_reg dst, st_src_reg src0);
  434.  
  435.    void emit_scalar(ir_instruction *ir, unsigned op,
  436.                     st_dst_reg dst, st_src_reg src0, st_src_reg src1);
  437.  
  438.    void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0);
  439.  
  440.    bool try_emit_mad(ir_expression *ir,
  441.               int mul_operand);
  442.    bool try_emit_mad_for_and_not(ir_expression *ir,
  443.               int mul_operand);
  444.  
  445.    void emit_swz(ir_expression *ir);
  446.  
  447.    bool process_move_condition(ir_rvalue *ir);
  448.  
  449.    void simplify_cmp(void);
  450.  
  451.    void rename_temp_register(int index, int new_index);
  452.    int get_first_temp_read(int index);
  453.    int get_first_temp_write(int index);
  454.    int get_last_temp_read(int index);
  455.    int get_last_temp_write(int index);
  456.  
  457.    void copy_propagate(void);
  458.    int eliminate_dead_code(void);
  459.  
  460.    void merge_two_dsts(void);
  461.    void merge_registers(void);
  462.    void renumber_registers(void);
  463.  
  464.    void emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
  465.                        st_dst_reg *l, st_src_reg *r,
  466.                        st_src_reg *cond, bool cond_swap);
  467.  
  468.    void *mem_ctx;
  469. };
  470.  
  471. static st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR);
  472.  
  473. static st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR);
  474.  
  475. static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 0);
  476. static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 1);
  477. static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, GLSL_TYPE_FLOAT, 2);
  478.  
  479. static void
  480. fail_link(struct gl_shader_program *prog, const char *fmt, ...) PRINTFLIKE(2, 3);
  481.  
  482. static void
  483. fail_link(struct gl_shader_program *prog, const char *fmt, ...)
  484. {
  485.    va_list args;
  486.    va_start(args, fmt);
  487.    ralloc_vasprintf_append(&prog->InfoLog, fmt, args);
  488.    va_end(args);
  489.  
  490.    prog->LinkStatus = GL_FALSE;
  491. }
  492.  
  493. static int
  494. swizzle_for_size(int size)
  495. {
  496.    static const int size_swizzles[4] = {
  497.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
  498.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
  499.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
  500.       MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
  501.    };
  502.  
  503.    assert((size >= 1) && (size <= 4));
  504.    return size_swizzles[size - 1];
  505. }
  506.  
  507. static bool
  508. is_tex_instruction(unsigned opcode)
  509. {
  510.    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
  511.    return info->is_tex;
  512. }
  513.  
  514. static unsigned
  515. num_inst_dst_regs(unsigned opcode)
  516. {
  517.    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
  518.    return info->num_dst;
  519. }
  520.  
  521. static unsigned
  522. num_inst_src_regs(unsigned opcode)
  523. {
  524.    const tgsi_opcode_info* info = tgsi_get_opcode_info(opcode);
  525.    return info->is_tex ? info->num_src - 1 : info->num_src;
  526. }
  527.  
  528. glsl_to_tgsi_instruction *
  529. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  530.                            st_dst_reg dst, st_dst_reg dst1,
  531.                            st_src_reg src0, st_src_reg src1,
  532.                            st_src_reg src2, st_src_reg src3)
  533. {
  534.    glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction();
  535.    int num_reladdr = 0, i, j;
  536.  
  537.    op = get_opcode(ir, op, dst, src0, src1);
  538.  
  539.    /* If we have to do relative addressing, we want to load the ARL
  540.     * reg directly for one of the regs, and preload the other reladdr
  541.     * sources into temps.
  542.     */
  543.    num_reladdr += dst.reladdr != NULL;
  544.    num_reladdr += dst1.reladdr != NULL;
  545.    num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL;
  546.    num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL;
  547.    num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL;
  548.    num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL;
  549.  
  550.    reladdr_to_temp(ir, &src3, &num_reladdr);
  551.    reladdr_to_temp(ir, &src2, &num_reladdr);
  552.    reladdr_to_temp(ir, &src1, &num_reladdr);
  553.    reladdr_to_temp(ir, &src0, &num_reladdr);
  554.  
  555.    if (dst.reladdr) {
  556.       emit_arl(ir, address_reg, *dst.reladdr);
  557.       num_reladdr--;
  558.    }
  559.    if (dst1.reladdr) {
  560.       emit_arl(ir, address_reg, *dst1.reladdr);
  561.       num_reladdr--;
  562.    }
  563.    assert(num_reladdr == 0);
  564.  
  565.    inst->op = op;
  566.    inst->dst[0] = dst;
  567.    inst->dst[1] = dst1;
  568.    inst->src[0] = src0;
  569.    inst->src[1] = src1;
  570.    inst->src[2] = src2;
  571.    inst->src[3] = src3;
  572.    inst->ir = ir;
  573.    inst->dead_mask = 0;
  574.  
  575.    inst->function = NULL;
  576.  
  577.    /* Update indirect addressing status used by TGSI */
  578.    if (dst.reladdr) {
  579.       switch(dst.file) {
  580.       case PROGRAM_STATE_VAR:
  581.       case PROGRAM_CONSTANT:
  582.       case PROGRAM_UNIFORM:
  583.          this->indirect_addr_consts = true;
  584.          break;
  585.       case PROGRAM_IMMEDIATE:
  586.          assert(!"immediates should not have indirect addressing");
  587.          break;
  588.       default:
  589.          break;
  590.       }
  591.    }
  592.    else {
  593.       for (i = 0; i < 4; i++) {
  594.          if(inst->src[i].reladdr) {
  595.             switch(inst->src[i].file) {
  596.             case PROGRAM_STATE_VAR:
  597.             case PROGRAM_CONSTANT:
  598.             case PROGRAM_UNIFORM:
  599.                this->indirect_addr_consts = true;
  600.                break;
  601.             case PROGRAM_IMMEDIATE:
  602.                assert(!"immediates should not have indirect addressing");
  603.                break;
  604.             default:
  605.                break;
  606.             }
  607.          }
  608.       }
  609.    }
  610.  
  611.    this->instructions.push_tail(inst);
  612.  
  613.    /*
  614.     * This section contains the double processing.
  615.     * GLSL just represents doubles as single channel values,
  616.     * however most HW and TGSI represent doubles as pairs of register channels.
  617.     *
  618.     * so we have to fixup destination writemask/index and src swizzle/indexes.
  619.     * dest writemasks need to translate from single channel write mask
  620.     * to a dual-channel writemask, but also need to modify the index,
  621.     * if we are touching the Z,W fields in the pre-translated writemask.
  622.     *
  623.     * src channels have similiar index modifications along with swizzle
  624.     * changes to we pick the XY, ZW pairs from the correct index.
  625.     *
  626.     * GLSL [0].x -> TGSI [0].xy
  627.     * GLSL [0].y -> TGSI [0].zw
  628.     * GLSL [0].z -> TGSI [1].xy
  629.     * GLSL [0].w -> TGSI [1].zw
  630.     */
  631.    if (inst->dst[0].type == GLSL_TYPE_DOUBLE || inst->dst[1].type == GLSL_TYPE_DOUBLE ||
  632.        inst->src[0].type == GLSL_TYPE_DOUBLE) {
  633.       glsl_to_tgsi_instruction *dinst = NULL;
  634.       int initial_src_swz[4], initial_src_idx[4];
  635.       int initial_dst_idx[2], initial_dst_writemask[2];
  636.       /* select the writemask for dst0 or dst1 */
  637.       unsigned writemask = inst->dst[0].file == PROGRAM_UNDEFINED ? inst->dst[1].writemask : inst->dst[0].writemask;
  638.  
  639.       /* copy out the writemask, index and swizzles for all src/dsts. */
  640.       for (j = 0; j < 2; j++) {
  641.          initial_dst_writemask[j] = inst->dst[j].writemask;
  642.          initial_dst_idx[j] = inst->dst[j].index;
  643.       }
  644.  
  645.       for (j = 0; j < 4; j++) {
  646.          initial_src_swz[j] = inst->src[j].swizzle;
  647.          initial_src_idx[j] = inst->src[j].index;
  648.       }
  649.  
  650.       /*
  651.        * scan all the components in the dst writemask
  652.        * generate an instruction for each of them if required.
  653.        */
  654.       while (writemask) {
  655.  
  656.          int i = u_bit_scan(&writemask);
  657.  
  658.          /* first time use previous instruction */
  659.          if (dinst == NULL) {
  660.             dinst = inst;
  661.          } else {
  662.             /* create a new instructions for subsequent attempts */
  663.             dinst = new(mem_ctx) glsl_to_tgsi_instruction();
  664.             *dinst = *inst;
  665.             dinst->next = NULL;
  666.             dinst->prev = NULL;
  667.             this->instructions.push_tail(dinst);
  668.          }
  669.  
  670.          /* modify the destination if we are splitting */
  671.          for (j = 0; j < 2; j++) {
  672.             if (dinst->dst[j].type == GLSL_TYPE_DOUBLE) {
  673.                dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY;
  674.                dinst->dst[j].index = initial_dst_idx[j];
  675.                if (i > 1)
  676.                      dinst->dst[j].index++;
  677.             } else {
  678.                /* if we aren't writing to a double, just get the bit of the initial writemask
  679.                   for this channel */
  680.                dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i);
  681.             }
  682.          }
  683.  
  684.          /* modify the src registers */
  685.          for (j = 0; j < 4; j++) {
  686.             int swz = GET_SWZ(initial_src_swz[j], i);
  687.  
  688.             if (dinst->src[j].type == GLSL_TYPE_DOUBLE) {
  689.                dinst->src[j].index = initial_src_idx[j];
  690.                if (swz > 1) {
  691.                   dinst->src[j].double_reg2 = true;
  692.                   dinst->src[j].index++;
  693.                }
  694.  
  695.                if (swz & 1)
  696.                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
  697.                else
  698.                   dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y);
  699.  
  700.             } else {
  701.                /* some opcodes are special case in what they use as sources
  702.                   - F2D is a float src0, DLDEXP is integer src1 */
  703.                if (op == TGSI_OPCODE_F2D ||
  704.                    op == TGSI_OPCODE_DLDEXP ||
  705.                    (op == TGSI_OPCODE_UCMP && dinst->dst[0].type == GLSL_TYPE_DOUBLE)) {
  706.                   dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz);
  707.                }
  708.             }
  709.          }
  710.       }
  711.       inst = dinst;
  712.    }
  713.  
  714.  
  715.    return inst;
  716. }
  717.  
  718. glsl_to_tgsi_instruction *
  719. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  720.                            st_dst_reg dst,
  721.                            st_src_reg src0, st_src_reg src1,
  722.                            st_src_reg src2, st_src_reg src3)
  723. {
  724.    return emit(ir, op, dst, undef_dst, src0, src1, src2, src3);
  725. }
  726.  
  727. glsl_to_tgsi_instruction *
  728. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  729.                            st_dst_reg dst, st_src_reg src0,
  730.                            st_src_reg src1, st_src_reg src2)
  731. {
  732.    return emit(ir, op, dst, undef_dst, src0, src1, src2, undef_src);
  733. }
  734.  
  735. glsl_to_tgsi_instruction *
  736. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  737.                            st_dst_reg dst, st_src_reg src0, st_src_reg src1)
  738. {
  739.    return emit(ir, op, dst, undef_dst, src0, src1, undef_src, undef_src);
  740. }
  741.  
  742. glsl_to_tgsi_instruction *
  743. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  744.                            st_dst_reg dst, st_src_reg src0)
  745. {
  746.    assert(dst.writemask != 0);
  747.    return emit(ir, op, dst, undef_dst, src0, undef_src, undef_src, undef_src);
  748. }
  749.  
  750. glsl_to_tgsi_instruction *
  751. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op,
  752.                            st_dst_reg dst, st_dst_reg dst1, st_src_reg src0)
  753. {
  754.    return emit(ir, op, dst, dst1, src0, undef_src, undef_src, undef_src);
  755. }
  756.  
  757. glsl_to_tgsi_instruction *
  758. glsl_to_tgsi_visitor::emit(ir_instruction *ir, unsigned op)
  759. {
  760.    return emit(ir, op, undef_dst, undef_dst, undef_src, undef_src, undef_src, undef_src);
  761. }
  762.  
  763. /**
  764.  * Determines whether to use an integer, unsigned integer, or float opcode
  765.  * based on the operands and input opcode, then emits the result.
  766.  */
  767. unsigned
  768. glsl_to_tgsi_visitor::get_opcode(ir_instruction *ir, unsigned op,
  769.                                  st_dst_reg dst,
  770.                                  st_src_reg src0, st_src_reg src1)
  771. {
  772.    int type = GLSL_TYPE_FLOAT;
  773.  
  774.    if (op == TGSI_OPCODE_MOV)
  775.        return op;
  776.  
  777.    assert(src0.type != GLSL_TYPE_ARRAY);
  778.    assert(src0.type != GLSL_TYPE_STRUCT);
  779.    assert(src1.type != GLSL_TYPE_ARRAY);
  780.    assert(src1.type != GLSL_TYPE_STRUCT);
  781.  
  782.    if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE)
  783.       type = GLSL_TYPE_DOUBLE;
  784.    else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT)
  785.       type = GLSL_TYPE_FLOAT;
  786.    else if (native_integers)
  787.       type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type;
  788.  
  789. #define case5(c, f, i, u, d)                    \
  790.    case TGSI_OPCODE_##c: \
  791.       if (type == GLSL_TYPE_DOUBLE)           \
  792.          op = TGSI_OPCODE_##d; \
  793.       else if (type == GLSL_TYPE_INT)       \
  794.          op = TGSI_OPCODE_##i; \
  795.       else if (type == GLSL_TYPE_UINT) \
  796.          op = TGSI_OPCODE_##u; \
  797.       else \
  798.          op = TGSI_OPCODE_##f; \
  799.       break;
  800.  
  801. #define case4(c, f, i, u)                    \
  802.    case TGSI_OPCODE_##c: \
  803.       if (type == GLSL_TYPE_INT) \
  804.          op = TGSI_OPCODE_##i; \
  805.       else if (type == GLSL_TYPE_UINT) \
  806.          op = TGSI_OPCODE_##u; \
  807.       else \
  808.          op = TGSI_OPCODE_##f; \
  809.       break;
  810.  
  811. #define case3(f, i, u)  case4(f, f, i, u)
  812. #define case4d(f, i, u, d)  case5(f, f, i, u, d)
  813. #define case3fid(f, i, d) case5(f, f, i, i, d)
  814. #define case2fi(f, i)   case4(f, f, i, i)
  815. #define case2iu(i, u)   case4(i, LAST, i, u)
  816.  
  817. #define casecomp(c, f, i, u, d)                   \
  818.    case TGSI_OPCODE_##c: \
  819.       if (type == GLSL_TYPE_DOUBLE) \
  820.          op = TGSI_OPCODE_##d; \
  821.       else if (type == GLSL_TYPE_INT)       \
  822.          op = TGSI_OPCODE_##i; \
  823.       else if (type == GLSL_TYPE_UINT) \
  824.          op = TGSI_OPCODE_##u; \
  825.       else if (native_integers) \
  826.          op = TGSI_OPCODE_##f; \
  827.       else \
  828.          op = TGSI_OPCODE_##c; \
  829.       break;
  830.  
  831.    switch(op) {
  832.       case3fid(ADD, UADD, DADD);
  833.       case3fid(MUL, UMUL, DMUL);
  834.       case3fid(MAD, UMAD, DMAD);
  835.       case3fid(FMA, UMAD, DFMA);
  836.       case3(DIV, IDIV, UDIV);
  837.       case4d(MAX, IMAX, UMAX, DMAX);
  838.       case4d(MIN, IMIN, UMIN, DMIN);
  839.       case2iu(MOD, UMOD);
  840.  
  841.       casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ);
  842.       casecomp(SNE, FSNE, USNE, USNE, DSNE);
  843.       casecomp(SGE, FSGE, ISGE, USGE, DSGE);
  844.       casecomp(SLT, FSLT, ISLT, USLT, DSLT);
  845.  
  846.       case2iu(ISHR, USHR);
  847.  
  848.       case3fid(SSG, ISSG, DSSG);
  849.       case3fid(ABS, IABS, DABS);
  850.  
  851.       case2iu(IBFE, UBFE);
  852.       case2iu(IMSB, UMSB);
  853.       case2iu(IMUL_HI, UMUL_HI);
  854.  
  855.       case3fid(SQRT, SQRT, DSQRT);
  856.  
  857.       case3fid(RCP, RCP, DRCP);
  858.       case3fid(RSQ, RSQ, DRSQ);
  859.  
  860.       case3fid(FRC, FRC, DFRAC);
  861.       case3fid(TRUNC, TRUNC, DTRUNC);
  862.       case3fid(CEIL, CEIL, DCEIL);
  863.       case3fid(FLR, FLR, DFLR);
  864.       case3fid(ROUND, ROUND, DROUND);
  865.  
  866.       default: break;
  867.    }
  868.  
  869.    assert(op != TGSI_OPCODE_LAST);
  870.    return op;
  871. }
  872.  
  873. glsl_to_tgsi_instruction *
  874. glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir,
  875.                               st_dst_reg dst, st_src_reg src0, st_src_reg src1,
  876.                               unsigned elements)
  877. {
  878.    static const unsigned dot_opcodes[] = {
  879.       TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4
  880.    };
  881.  
  882.    return emit(ir, dot_opcodes[elements - 2], dst, src0, src1);
  883. }
  884.  
  885. /**
  886.  * Emits TGSI scalar opcodes to produce unique answers across channels.
  887.  *
  888.  * Some TGSI opcodes are scalar-only, like ARB_fp/vp.  The src X
  889.  * channel determines the result across all channels.  So to do a vec4
  890.  * of this operation, we want to emit a scalar per source channel used
  891.  * to produce dest channels.
  892.  */
  893. void
  894. glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
  895.                                   st_dst_reg dst,
  896.                                   st_src_reg orig_src0, st_src_reg orig_src1)
  897. {
  898.    int i, j;
  899.    int done_mask = ~dst.writemask;
  900.  
  901.    /* TGSI RCP is a scalar operation splatting results to all channels,
  902.     * like ARB_fp/vp.  So emit as many RCPs as necessary to cover our
  903.     * dst channels.
  904.     */
  905.    for (i = 0; i < 4; i++) {
  906.       GLuint this_mask = (1 << i);
  907.       st_src_reg src0 = orig_src0;
  908.       st_src_reg src1 = orig_src1;
  909.  
  910.       if (done_mask & this_mask)
  911.          continue;
  912.  
  913.       GLuint src0_swiz = GET_SWZ(src0.swizzle, i);
  914.       GLuint src1_swiz = GET_SWZ(src1.swizzle, i);
  915.       for (j = i + 1; j < 4; j++) {
  916.          /* If there is another enabled component in the destination that is
  917.           * derived from the same inputs, generate its value on this pass as
  918.           * well.
  919.           */
  920.          if (!(done_mask & (1 << j)) &&
  921.              GET_SWZ(src0.swizzle, j) == src0_swiz &&
  922.              GET_SWZ(src1.swizzle, j) == src1_swiz) {
  923.             this_mask |= (1 << j);
  924.          }
  925.       }
  926.       src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz,
  927.                                    src0_swiz, src0_swiz);
  928.       src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz,
  929.                                    src1_swiz, src1_swiz);
  930.  
  931.       dst.writemask = this_mask;
  932.       emit(ir, op, dst, src0, src1);
  933.       done_mask |= this_mask;
  934.    }
  935. }
  936.  
  937. void
  938. glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, unsigned op,
  939.                                   st_dst_reg dst, st_src_reg src0)
  940. {
  941.    st_src_reg undef = undef_src;
  942.  
  943.    undef.swizzle = SWIZZLE_XXXX;
  944.  
  945.    emit_scalar(ir, op, dst, src0, undef);
  946. }
  947.  
  948. void
  949. glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir,
  950.                                st_dst_reg dst, st_src_reg src0)
  951. {
  952.    int op = TGSI_OPCODE_ARL;
  953.  
  954.    if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT)
  955.       op = TGSI_OPCODE_UARL;
  956.  
  957.    assert(dst.file == PROGRAM_ADDRESS);
  958.    if (dst.index >= this->num_address_regs)
  959.       this->num_address_regs = dst.index + 1;
  960.  
  961.    emit(NULL, op, dst, src0);
  962. }
  963.  
  964. int
  965. glsl_to_tgsi_visitor::add_constant(gl_register_file file,
  966.                                    gl_constant_value values[8], int size, int datatype,
  967.                                    GLuint *swizzle_out)
  968. {
  969.    if (file == PROGRAM_CONSTANT) {
  970.       return _mesa_add_typed_unnamed_constant(this->prog->Parameters, values,
  971.                                               size, datatype, swizzle_out);
  972.    }
  973.  
  974.    assert(file == PROGRAM_IMMEDIATE);
  975.  
  976.    int index = 0;
  977.    immediate_storage *entry;
  978.    int size32 = size * (datatype == GL_DOUBLE ? 2 : 1);
  979.    int i;
  980.  
  981.    /* Search immediate storage to see if we already have an identical
  982.     * immediate that we can use instead of adding a duplicate entry.
  983.     */
  984.    foreach_in_list(immediate_storage, entry, &this->immediates) {
  985.       immediate_storage *tmp = entry;
  986.  
  987.       for (i = 0; i * 4 < size32; i++) {
  988.          int slot_size = MIN2(size32 - (i * 4), 4);
  989.          if (tmp->type != datatype || tmp->size32 != slot_size)
  990.             break;
  991.          if (memcmp(tmp->values, &values[i * 4],
  992.                     slot_size * sizeof(gl_constant_value)))
  993.             break;
  994.  
  995.          /* Everything matches, keep going until the full size is matched */
  996.          tmp = (immediate_storage *)tmp->next;
  997.       }
  998.  
  999.       /* The full value matched */
  1000.       if (i * 4 >= size32)
  1001.          return index;
  1002.  
  1003.       index++;
  1004.    }
  1005.  
  1006.    for (i = 0; i * 4 < size32; i++) {
  1007.       int slot_size = MIN2(size32 - (i * 4), 4);
  1008.       /* Add this immediate to the list. */
  1009.       entry = new(mem_ctx) immediate_storage(&values[i * 4], slot_size, datatype);
  1010.       this->immediates.push_tail(entry);
  1011.       this->num_immediates++;
  1012.    }
  1013.    return index;
  1014. }
  1015.  
  1016. st_src_reg
  1017. glsl_to_tgsi_visitor::st_src_reg_for_float(float val)
  1018. {
  1019.    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT);
  1020.    union gl_constant_value uval;
  1021.  
  1022.    uval.f = val;
  1023.    src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle);
  1024.  
  1025.    return src;
  1026. }
  1027.  
  1028. st_src_reg
  1029. glsl_to_tgsi_visitor::st_src_reg_for_double(double val)
  1030. {
  1031.    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE);
  1032.    union gl_constant_value uval[2];
  1033.  
  1034.    uval[0].u = *(uint32_t *)&val;
  1035.    uval[1].u = *(((uint32_t *)&val) + 1);
  1036.    src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle);
  1037.  
  1038.    return src;
  1039. }
  1040.  
  1041. st_src_reg
  1042. glsl_to_tgsi_visitor::st_src_reg_for_int(int val)
  1043. {
  1044.    st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT);
  1045.    union gl_constant_value uval;
  1046.  
  1047.    assert(native_integers);
  1048.  
  1049.    uval.i = val;
  1050.    src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle);
  1051.  
  1052.    return src;
  1053. }
  1054.  
  1055. st_src_reg
  1056. glsl_to_tgsi_visitor::st_src_reg_for_type(int type, int val)
  1057. {
  1058.    if (native_integers)
  1059.       return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) :
  1060.                                        st_src_reg_for_int(val);
  1061.    else
  1062.       return st_src_reg_for_float(val);
  1063. }
  1064.  
  1065. static int
  1066. type_size(const struct glsl_type *type)
  1067. {
  1068.    unsigned int i;
  1069.    int size;
  1070.  
  1071.    switch (type->base_type) {
  1072.    case GLSL_TYPE_UINT:
  1073.    case GLSL_TYPE_INT:
  1074.    case GLSL_TYPE_FLOAT:
  1075.    case GLSL_TYPE_BOOL:
  1076.       if (type->is_matrix()) {
  1077.          return type->matrix_columns;
  1078.       } else {
  1079.          /* Regardless of size of vector, it gets a vec4. This is bad
  1080.           * packing for things like floats, but otherwise arrays become a
  1081.           * mess.  Hopefully a later pass over the code can pack scalars
  1082.           * down if appropriate.
  1083.           */
  1084.          return 1;
  1085.       }
  1086.       break;
  1087.    case GLSL_TYPE_DOUBLE:
  1088.       if (type->is_matrix()) {
  1089.          if (type->vector_elements <= 2)
  1090.             return type->matrix_columns;
  1091.          else
  1092.             return type->matrix_columns * 2;
  1093.       } else {
  1094.          /* For doubles if we have a double or dvec2 they fit in one
  1095.           * vec4, else they need 2 vec4s.
  1096.           */
  1097.          if (type->vector_elements <= 2)
  1098.             return 1;
  1099.          else
  1100.             return 2;
  1101.       }
  1102.       break;
  1103.    case GLSL_TYPE_ARRAY:
  1104.       assert(type->length > 0);
  1105.       return type_size(type->fields.array) * type->length;
  1106.    case GLSL_TYPE_STRUCT:
  1107.       size = 0;
  1108.       for (i = 0; i < type->length; i++) {
  1109.          size += type_size(type->fields.structure[i].type);
  1110.       }
  1111.       return size;
  1112.    case GLSL_TYPE_SAMPLER:
  1113.    case GLSL_TYPE_IMAGE:
  1114.       /* Samplers take up one slot in UNIFORMS[], but they're baked in
  1115.        * at link time.
  1116.        */
  1117.       return 1;
  1118.    case GLSL_TYPE_ATOMIC_UINT:
  1119.    case GLSL_TYPE_INTERFACE:
  1120.    case GLSL_TYPE_VOID:
  1121.    case GLSL_TYPE_ERROR:
  1122.       assert(!"Invalid type in type_size");
  1123.       break;
  1124.    }
  1125.    return 0;
  1126. }
  1127.  
  1128. /**
  1129.  * In the initial pass of codegen, we assign temporary numbers to
  1130.  * intermediate results.  (not SSA -- variable assignments will reuse
  1131.  * storage).
  1132.  */
  1133. st_src_reg
  1134. glsl_to_tgsi_visitor::get_temp(const glsl_type *type)
  1135. {
  1136.    st_src_reg src;
  1137.  
  1138.    src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT;
  1139.    src.reladdr = NULL;
  1140.    src.negate = 0;
  1141.  
  1142.    if (!options->EmitNoIndirectTemp &&
  1143.        (type->is_array() || type->is_matrix())) {
  1144.  
  1145.       src.file = PROGRAM_ARRAY;
  1146.       src.index = next_array << 16 | 0x8000;
  1147.       array_sizes[next_array] = type_size(type);
  1148.       ++next_array;
  1149.  
  1150.    } else {
  1151.       src.file = PROGRAM_TEMPORARY;
  1152.       src.index = next_temp;
  1153.       next_temp += type_size(type);
  1154.    }
  1155.  
  1156.    if (type->is_array() || type->is_record()) {
  1157.       src.swizzle = SWIZZLE_NOOP;
  1158.    } else {
  1159.       src.swizzle = swizzle_for_size(type->vector_elements);
  1160.    }
  1161.  
  1162.    return src;
  1163. }
  1164.  
  1165. variable_storage *
  1166. glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var)
  1167. {
  1168.  
  1169.    foreach_in_list(variable_storage, entry, &this->variables) {
  1170.       if (entry->var == var)
  1171.          return entry;
  1172.    }
  1173.  
  1174.    return NULL;
  1175. }
  1176.  
  1177. void
  1178. glsl_to_tgsi_visitor::visit(ir_variable *ir)
  1179. {
  1180.    if (strcmp(ir->name, "gl_FragCoord") == 0) {
  1181.       struct gl_fragment_program *fp = (struct gl_fragment_program *)this->prog;
  1182.  
  1183.       fp->OriginUpperLeft = ir->data.origin_upper_left;
  1184.       fp->PixelCenterInteger = ir->data.pixel_center_integer;
  1185.    }
  1186.  
  1187.    if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) {
  1188.       unsigned int i;
  1189.       const ir_state_slot *const slots = ir->get_state_slots();
  1190.       assert(slots != NULL);
  1191.  
  1192.       /* Check if this statevar's setup in the STATE file exactly
  1193.        * matches how we'll want to reference it as a
  1194.        * struct/array/whatever.  If not, then we need to move it into
  1195.        * temporary storage and hope that it'll get copy-propagated
  1196.        * out.
  1197.        */
  1198.       for (i = 0; i < ir->get_num_state_slots(); i++) {
  1199.          if (slots[i].swizzle != SWIZZLE_XYZW) {
  1200.             break;
  1201.          }
  1202.       }
  1203.  
  1204.       variable_storage *storage;
  1205.       st_dst_reg dst;
  1206.       if (i == ir->get_num_state_slots()) {
  1207.          /* We'll set the index later. */
  1208.          storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1);
  1209.          this->variables.push_tail(storage);
  1210.  
  1211.          dst = undef_dst;
  1212.       } else {
  1213.          /* The variable_storage constructor allocates slots based on the size
  1214.           * of the type.  However, this had better match the number of state
  1215.           * elements that we're going to copy into the new temporary.
  1216.           */
  1217.          assert((int) ir->get_num_state_slots() == type_size(ir->type));
  1218.  
  1219.          dst = st_dst_reg(get_temp(ir->type));
  1220.  
  1221.          storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index);
  1222.  
  1223.          this->variables.push_tail(storage);
  1224.       }
  1225.  
  1226.  
  1227.       for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) {
  1228.          int index = _mesa_add_state_reference(this->prog->Parameters,
  1229.                                                (gl_state_index *)slots[i].tokens);
  1230.  
  1231.          if (storage->file == PROGRAM_STATE_VAR) {
  1232.             if (storage->index == -1) {
  1233.                storage->index = index;
  1234.             } else {
  1235.                assert(index == storage->index + (int)i);
  1236.             }
  1237.          } else {
  1238.             /* We use GLSL_TYPE_FLOAT here regardless of the actual type of
  1239.              * the data being moved since MOV does not care about the type of
  1240.              * data it is moving, and we don't want to declare registers with
  1241.              * array or struct types.
  1242.              */
  1243.             st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT);
  1244.             src.swizzle = slots[i].swizzle;
  1245.             emit(ir, TGSI_OPCODE_MOV, dst, src);
  1246.             /* even a float takes up a whole vec4 reg in a struct/array. */
  1247.             dst.index++;
  1248.          }
  1249.       }
  1250.  
  1251.       if (storage->file == PROGRAM_TEMPORARY &&
  1252.           dst.index != storage->index + (int) ir->get_num_state_slots()) {
  1253.          fail_link(this->shader_program,
  1254.                   "failed to load builtin uniform `%s'  (%d/%d regs loaded)\n",
  1255.                   ir->name, dst.index - storage->index,
  1256.                   type_size(ir->type));
  1257.       }
  1258.    }
  1259. }
  1260.  
  1261. void
  1262. glsl_to_tgsi_visitor::visit(ir_loop *ir)
  1263. {
  1264.    emit(NULL, TGSI_OPCODE_BGNLOOP);
  1265.  
  1266.    visit_exec_list(&ir->body_instructions, this);
  1267.  
  1268.    emit(NULL, TGSI_OPCODE_ENDLOOP);
  1269. }
  1270.  
  1271. void
  1272. glsl_to_tgsi_visitor::visit(ir_loop_jump *ir)
  1273. {
  1274.    switch (ir->mode) {
  1275.    case ir_loop_jump::jump_break:
  1276.       emit(NULL, TGSI_OPCODE_BRK);
  1277.       break;
  1278.    case ir_loop_jump::jump_continue:
  1279.       emit(NULL, TGSI_OPCODE_CONT);
  1280.       break;
  1281.    }
  1282. }
  1283.  
  1284.  
  1285. void
  1286. glsl_to_tgsi_visitor::visit(ir_function_signature *ir)
  1287. {
  1288.    assert(0);
  1289.    (void)ir;
  1290. }
  1291.  
  1292. void
  1293. glsl_to_tgsi_visitor::visit(ir_function *ir)
  1294. {
  1295.    /* Ignore function bodies other than main() -- we shouldn't see calls to
  1296.     * them since they should all be inlined before we get to glsl_to_tgsi.
  1297.     */
  1298.    if (strcmp(ir->name, "main") == 0) {
  1299.       const ir_function_signature *sig;
  1300.       exec_list empty;
  1301.  
  1302.       sig = ir->matching_signature(NULL, &empty, false);
  1303.  
  1304.       assert(sig);
  1305.  
  1306.       foreach_in_list(ir_instruction, ir, &sig->body) {
  1307.          ir->accept(this);
  1308.       }
  1309.    }
  1310. }
  1311.  
  1312. bool
  1313. glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand)
  1314. {
  1315.    int nonmul_operand = 1 - mul_operand;
  1316.    st_src_reg a, b, c;
  1317.    st_dst_reg result_dst;
  1318.  
  1319.    ir_expression *expr = ir->operands[mul_operand]->as_expression();
  1320.    if (!expr || expr->operation != ir_binop_mul)
  1321.       return false;
  1322.  
  1323.    expr->operands[0]->accept(this);
  1324.    a = this->result;
  1325.    expr->operands[1]->accept(this);
  1326.    b = this->result;
  1327.    ir->operands[nonmul_operand]->accept(this);
  1328.    c = this->result;
  1329.  
  1330.    this->result = get_temp(ir->type);
  1331.    result_dst = st_dst_reg(this->result);
  1332.    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  1333.    emit(ir, TGSI_OPCODE_MAD, result_dst, a, b, c);
  1334.  
  1335.    return true;
  1336. }
  1337.  
  1338. /**
  1339.  * Emit MAD(a, -b, a) instead of AND(a, NOT(b))
  1340.  *
  1341.  * The logic values are 1.0 for true and 0.0 for false.  Logical-and is
  1342.  * implemented using multiplication, and logical-or is implemented using
  1343.  * addition.  Logical-not can be implemented as (true - x), or (1.0 - x).
  1344.  * As result, the logical expression (a & !b) can be rewritten as:
  1345.  *
  1346.  *     - a * !b
  1347.  *     - a * (1 - b)
  1348.  *     - (a * 1) - (a * b)
  1349.  *     - a + -(a * b)
  1350.  *     - a + (a * -b)
  1351.  *
  1352.  * This final expression can be implemented as a single MAD(a, -b, a)
  1353.  * instruction.
  1354.  */
  1355. bool
  1356. glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand)
  1357. {
  1358.    const int other_operand = 1 - try_operand;
  1359.    st_src_reg a, b;
  1360.  
  1361.    ir_expression *expr = ir->operands[try_operand]->as_expression();
  1362.    if (!expr || expr->operation != ir_unop_logic_not)
  1363.       return false;
  1364.  
  1365.    ir->operands[other_operand]->accept(this);
  1366.    a = this->result;
  1367.    expr->operands[0]->accept(this);
  1368.    b = this->result;
  1369.  
  1370.    b.negate = ~b.negate;
  1371.  
  1372.    this->result = get_temp(ir->type);
  1373.    emit(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a);
  1374.  
  1375.    return true;
  1376. }
  1377.  
  1378. void
  1379. glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir,
  1380.                                       st_src_reg *reg, int *num_reladdr)
  1381. {
  1382.    if (!reg->reladdr && !reg->reladdr2)
  1383.       return;
  1384.  
  1385.    if (reg->reladdr) emit_arl(ir, address_reg, *reg->reladdr);
  1386.    if (reg->reladdr2) emit_arl(ir, address_reg2, *reg->reladdr2);
  1387.  
  1388.    if (*num_reladdr != 1) {
  1389.       st_src_reg temp = get_temp(glsl_type::vec4_type);
  1390.  
  1391.       emit(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg);
  1392.       *reg = temp;
  1393.    }
  1394.  
  1395.    (*num_reladdr)--;
  1396. }
  1397.  
  1398. void
  1399. glsl_to_tgsi_visitor::visit(ir_expression *ir)
  1400. {
  1401.    unsigned int operand;
  1402.    st_src_reg op[ARRAY_SIZE(ir->operands)];
  1403.    st_src_reg result_src;
  1404.    st_dst_reg result_dst;
  1405.  
  1406.    /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c)
  1407.     */
  1408.    if (ir->operation == ir_binop_add) {
  1409.       if (try_emit_mad(ir, 1))
  1410.          return;
  1411.       if (try_emit_mad(ir, 0))
  1412.          return;
  1413.    }
  1414.  
  1415.    /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b))
  1416.     */
  1417.    if (!native_integers && ir->operation == ir_binop_logic_and) {
  1418.       if (try_emit_mad_for_and_not(ir, 1))
  1419.          return;
  1420.       if (try_emit_mad_for_and_not(ir, 0))
  1421.          return;
  1422.    }
  1423.  
  1424.    if (ir->operation == ir_quadop_vector)
  1425.       assert(!"ir_quadop_vector should have been lowered");
  1426.  
  1427.    for (operand = 0; operand < ir->get_num_operands(); operand++) {
  1428.       this->result.file = PROGRAM_UNDEFINED;
  1429.       ir->operands[operand]->accept(this);
  1430.       if (this->result.file == PROGRAM_UNDEFINED) {
  1431.          printf("Failed to get tree for expression operand:\n");
  1432.          ir->operands[operand]->print();
  1433.          printf("\n");
  1434.          exit(1);
  1435.       }
  1436.       op[operand] = this->result;
  1437.  
  1438.       /* Matrix expression operands should have been broken down to vector
  1439.        * operations already.
  1440.        */
  1441.       assert(!ir->operands[operand]->type->is_matrix());
  1442.    }
  1443.  
  1444.    int vector_elements = ir->operands[0]->type->vector_elements;
  1445.    if (ir->operands[1]) {
  1446.       vector_elements = MAX2(vector_elements,
  1447.                              ir->operands[1]->type->vector_elements);
  1448.    }
  1449.  
  1450.    this->result.file = PROGRAM_UNDEFINED;
  1451.  
  1452.    /* Storage for our result.  Ideally for an assignment we'd be using
  1453.     * the actual storage for the result here, instead.
  1454.     */
  1455.    result_src = get_temp(ir->type);
  1456.    /* convenience for the emit functions below. */
  1457.    result_dst = st_dst_reg(result_src);
  1458.    /* Limit writes to the channels that will be used by result_src later.
  1459.     * This does limit this temp's use as a temporary for multi-instruction
  1460.     * sequences.
  1461.     */
  1462.    result_dst.writemask = (1 << ir->type->vector_elements) - 1;
  1463.  
  1464.    switch (ir->operation) {
  1465.    case ir_unop_logic_not:
  1466.       if (result_dst.type != GLSL_TYPE_FLOAT)
  1467.          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
  1468.       else {
  1469.          /* Previously 'SEQ dst, src, 0.0' was used for this.  However, many
  1470.           * older GPUs implement SEQ using multiple instructions (i915 uses two
  1471.           * SGE instructions and a MUL instruction).  Since our logic values are
  1472.           * 0.0 and 1.0, 1-x also implements !x.
  1473.           */
  1474.          op[0].negate = ~op[0].negate;
  1475.          emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], st_src_reg_for_float(1.0));
  1476.       }
  1477.       break;
  1478.    case ir_unop_neg:
  1479.       if (result_dst.type == GLSL_TYPE_INT || result_dst.type == GLSL_TYPE_UINT)
  1480.          emit(ir, TGSI_OPCODE_INEG, result_dst, op[0]);
  1481.       else if (result_dst.type == GLSL_TYPE_DOUBLE)
  1482.          emit(ir, TGSI_OPCODE_DNEG, result_dst, op[0]);
  1483.       else {
  1484.          op[0].negate = ~op[0].negate;
  1485.          result_src = op[0];
  1486.       }
  1487.       break;
  1488.    case ir_unop_abs:
  1489.       emit(ir, TGSI_OPCODE_ABS, result_dst, op[0]);
  1490.       break;
  1491.    case ir_unop_sign:
  1492.       emit(ir, TGSI_OPCODE_SSG, result_dst, op[0]);
  1493.       break;
  1494.    case ir_unop_rcp:
  1495.       emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]);
  1496.       break;
  1497.  
  1498.    case ir_unop_exp2:
  1499.       emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]);
  1500.       break;
  1501.    case ir_unop_exp:
  1502.    case ir_unop_log:
  1503.       assert(!"not reached: should be handled by ir_explog_to_explog2");
  1504.       break;
  1505.    case ir_unop_log2:
  1506.       emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]);
  1507.       break;
  1508.    case ir_unop_sin:
  1509.       emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]);
  1510.       break;
  1511.    case ir_unop_cos:
  1512.       emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]);
  1513.       break;
  1514.    case ir_unop_saturate: {
  1515.       glsl_to_tgsi_instruction *inst;
  1516.       inst = emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
  1517.       inst->saturate = true;
  1518.       break;
  1519.    }
  1520.  
  1521.    case ir_unop_dFdx:
  1522.    case ir_unop_dFdx_coarse:
  1523.       emit(ir, TGSI_OPCODE_DDX, result_dst, op[0]);
  1524.       break;
  1525.    case ir_unop_dFdx_fine:
  1526.       emit(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]);
  1527.       break;
  1528.    case ir_unop_dFdy:
  1529.    case ir_unop_dFdy_coarse:
  1530.    case ir_unop_dFdy_fine:
  1531.    {
  1532.       /* The X component contains 1 or -1 depending on whether the framebuffer
  1533.        * is a FBO or the window system buffer, respectively.
  1534.        * It is then multiplied with the source operand of DDY.
  1535.        */
  1536.       static const gl_state_index transform_y_state[STATE_LENGTH]
  1537.          = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM };
  1538.  
  1539.       unsigned transform_y_index =
  1540.          _mesa_add_state_reference(this->prog->Parameters,
  1541.                                    transform_y_state);
  1542.  
  1543.       st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR,
  1544.                                           transform_y_index,
  1545.                                           glsl_type::vec4_type);
  1546.       transform_y.swizzle = SWIZZLE_XXXX;
  1547.  
  1548.       st_src_reg temp = get_temp(glsl_type::vec4_type);
  1549.  
  1550.       emit(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]);
  1551.       emit(ir, ir->operation == ir_unop_dFdy_fine ?
  1552.            TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp);
  1553.       break;
  1554.    }
  1555.  
  1556.    case ir_unop_frexp_sig:
  1557.       emit(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]);
  1558.       break;
  1559.  
  1560.    case ir_unop_frexp_exp:
  1561.       emit(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]);
  1562.       break;
  1563.  
  1564.    case ir_unop_noise: {
  1565.       /* At some point, a motivated person could add a better
  1566.        * implementation of noise.  Currently not even the nvidia
  1567.        * binary drivers do anything more than this.  In any case, the
  1568.        * place to do this is in the GL state tracker, not the poor
  1569.        * driver.
  1570.        */
  1571.       emit(ir, TGSI_OPCODE_MOV, result_dst, st_src_reg_for_float(0.5));
  1572.       break;
  1573.    }
  1574.  
  1575.    case ir_binop_add:
  1576.       emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
  1577.       break;
  1578.    case ir_binop_sub:
  1579.       emit(ir, TGSI_OPCODE_SUB, result_dst, op[0], op[1]);
  1580.       break;
  1581.  
  1582.    case ir_binop_mul:
  1583.       emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
  1584.       break;
  1585.    case ir_binop_div:
  1586.       if (result_dst.type == GLSL_TYPE_FLOAT || result_dst.type == GLSL_TYPE_DOUBLE)
  1587.          assert(!"not reached: should be handled by ir_div_to_mul_rcp");
  1588.       else
  1589.          emit(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]);
  1590.       break;
  1591.    case ir_binop_mod:
  1592.       if (result_dst.type == GLSL_TYPE_FLOAT)
  1593.          assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
  1594.       else
  1595.          emit(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]);
  1596.       break;
  1597.  
  1598.    case ir_binop_less:
  1599.       emit(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]);
  1600.       break;
  1601.    case ir_binop_greater:
  1602.       emit(ir, TGSI_OPCODE_SLT, result_dst, op[1], op[0]);
  1603.       break;
  1604.    case ir_binop_lequal:
  1605.       emit(ir, TGSI_OPCODE_SGE, result_dst, op[1], op[0]);
  1606.       break;
  1607.    case ir_binop_gequal:
  1608.       emit(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]);
  1609.       break;
  1610.    case ir_binop_equal:
  1611.       emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
  1612.       break;
  1613.    case ir_binop_nequal:
  1614.       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
  1615.       break;
  1616.    case ir_binop_all_equal:
  1617.       /* "==" operator producing a scalar boolean. */
  1618.       if (ir->operands[0]->type->is_vector() ||
  1619.           ir->operands[1]->type->is_vector()) {
  1620.          st_src_reg temp = get_temp(native_integers ?
  1621.                                     glsl_type::uvec4_type :
  1622.                                     glsl_type::vec4_type);
  1623.  
  1624.          if (native_integers) {
  1625.             st_dst_reg temp_dst = st_dst_reg(temp);
  1626.             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
  1627.  
  1628.             emit(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]);
  1629.  
  1630.             /* Emit 1-3 AND operations to combine the SEQ results. */
  1631.             switch (ir->operands[0]->type->vector_elements) {
  1632.             case 2:
  1633.                break;
  1634.             case 3:
  1635.                temp_dst.writemask = WRITEMASK_Y;
  1636.                temp1.swizzle = SWIZZLE_YYYY;
  1637.                temp2.swizzle = SWIZZLE_ZZZZ;
  1638.                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
  1639.                break;
  1640.             case 4:
  1641.                temp_dst.writemask = WRITEMASK_X;
  1642.                temp1.swizzle = SWIZZLE_XXXX;
  1643.                temp2.swizzle = SWIZZLE_YYYY;
  1644.                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
  1645.                temp_dst.writemask = WRITEMASK_Y;
  1646.                temp1.swizzle = SWIZZLE_ZZZZ;
  1647.                temp2.swizzle = SWIZZLE_WWWW;
  1648.                emit(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2);
  1649.             }
  1650.  
  1651.             temp1.swizzle = SWIZZLE_XXXX;
  1652.             temp2.swizzle = SWIZZLE_YYYY;
  1653.             emit(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2);
  1654.          } else {
  1655.             emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
  1656.  
  1657.             /* After the dot-product, the value will be an integer on the
  1658.              * range [0,4].  Zero becomes 1.0, and positive values become zero.
  1659.              */
  1660.             emit_dp(ir, result_dst, temp, temp, vector_elements);
  1661.  
  1662.             /* Negating the result of the dot-product gives values on the range
  1663.              * [-4, 0].  Zero becomes 1.0, and negative values become zero.
  1664.              * This is achieved using SGE.
  1665.              */
  1666.             st_src_reg sge_src = result_src;
  1667.             sge_src.negate = ~sge_src.negate;
  1668.             emit(ir, TGSI_OPCODE_SGE, result_dst, sge_src, st_src_reg_for_float(0.0));
  1669.          }
  1670.       } else {
  1671.          emit(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]);
  1672.       }
  1673.       break;
  1674.    case ir_binop_any_nequal:
  1675.       /* "!=" operator producing a scalar boolean. */
  1676.       if (ir->operands[0]->type->is_vector() ||
  1677.           ir->operands[1]->type->is_vector()) {
  1678.          st_src_reg temp = get_temp(native_integers ?
  1679.                                     glsl_type::uvec4_type :
  1680.                                     glsl_type::vec4_type);
  1681.          emit(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]);
  1682.  
  1683.          if (native_integers) {
  1684.             st_dst_reg temp_dst = st_dst_reg(temp);
  1685.             st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp);
  1686.  
  1687.             /* Emit 1-3 OR operations to combine the SNE results. */
  1688.             switch (ir->operands[0]->type->vector_elements) {
  1689.             case 2:
  1690.                break;
  1691.             case 3:
  1692.                temp_dst.writemask = WRITEMASK_Y;
  1693.                temp1.swizzle = SWIZZLE_YYYY;
  1694.                temp2.swizzle = SWIZZLE_ZZZZ;
  1695.                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
  1696.                break;
  1697.             case 4:
  1698.                temp_dst.writemask = WRITEMASK_X;
  1699.                temp1.swizzle = SWIZZLE_XXXX;
  1700.                temp2.swizzle = SWIZZLE_YYYY;
  1701.                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
  1702.                temp_dst.writemask = WRITEMASK_Y;
  1703.                temp1.swizzle = SWIZZLE_ZZZZ;
  1704.                temp2.swizzle = SWIZZLE_WWWW;
  1705.                emit(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2);
  1706.             }
  1707.  
  1708.             temp1.swizzle = SWIZZLE_XXXX;
  1709.             temp2.swizzle = SWIZZLE_YYYY;
  1710.             emit(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2);
  1711.          } else {
  1712.             /* After the dot-product, the value will be an integer on the
  1713.              * range [0,4].  Zero stays zero, and positive values become 1.0.
  1714.              */
  1715.             glsl_to_tgsi_instruction *const dp =
  1716.                   emit_dp(ir, result_dst, temp, temp, vector_elements);
  1717.             if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1718.                /* The clamping to [0,1] can be done for free in the fragment
  1719.                 * shader with a saturate.
  1720.                 */
  1721.                dp->saturate = true;
  1722.             } else {
  1723.                /* Negating the result of the dot-product gives values on the range
  1724.                 * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
  1725.                 * achieved using SLT.
  1726.                 */
  1727.                st_src_reg slt_src = result_src;
  1728.                slt_src.negate = ~slt_src.negate;
  1729.                emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
  1730.             }
  1731.          }
  1732.       } else {
  1733.          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
  1734.       }
  1735.       break;
  1736.  
  1737.    case ir_unop_any: {
  1738.       assert(ir->operands[0]->type->is_vector());
  1739.  
  1740.       if (native_integers) {
  1741.          int dst_swizzle = 0, op0_swizzle, i;
  1742.          st_src_reg accum = op[0];
  1743.  
  1744.          op0_swizzle = op[0].swizzle;
  1745.          accum.swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 0),
  1746.                                        GET_SWZ(op0_swizzle, 0),
  1747.                                        GET_SWZ(op0_swizzle, 0),
  1748.                                        GET_SWZ(op0_swizzle, 0));
  1749.          for (i = 0; i < 4; i++) {
  1750.             if (result_dst.writemask & (1 << i)) {
  1751.                dst_swizzle = MAKE_SWIZZLE4(i, i, i, i);
  1752.                break;
  1753.             }
  1754.          }
  1755.          assert(i != 4);
  1756.          assert(ir->operands[0]->type->is_boolean());
  1757.  
  1758.          /* OR all the components together, since they should be either 0 or ~0
  1759.           */
  1760.          switch (ir->operands[0]->type->vector_elements) {
  1761.          case 4:
  1762.             op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 3),
  1763.                                           GET_SWZ(op0_swizzle, 3),
  1764.                                           GET_SWZ(op0_swizzle, 3),
  1765.                                           GET_SWZ(op0_swizzle, 3));
  1766.             emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
  1767.             accum = st_src_reg(result_dst);
  1768.             accum.swizzle = dst_swizzle;
  1769.             /* fallthrough */
  1770.          case 3:
  1771.             op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 2),
  1772.                                           GET_SWZ(op0_swizzle, 2),
  1773.                                           GET_SWZ(op0_swizzle, 2),
  1774.                                           GET_SWZ(op0_swizzle, 2));
  1775.             emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
  1776.             accum = st_src_reg(result_dst);
  1777.             accum.swizzle = dst_swizzle;
  1778.             /* fallthrough */
  1779.          case 2:
  1780.             op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(op0_swizzle, 1),
  1781.                                           GET_SWZ(op0_swizzle, 1),
  1782.                                           GET_SWZ(op0_swizzle, 1),
  1783.                                           GET_SWZ(op0_swizzle, 1));
  1784.             emit(ir, TGSI_OPCODE_OR, result_dst, accum, op[0]);
  1785.             break;
  1786.          default:
  1787.             assert(!"Unexpected vector size");
  1788.             break;
  1789.          }
  1790.       } else {
  1791.          /* After the dot-product, the value will be an integer on the
  1792.           * range [0,4].  Zero stays zero, and positive values become 1.0.
  1793.           */
  1794.          glsl_to_tgsi_instruction *const dp =
  1795.             emit_dp(ir, result_dst, op[0], op[0],
  1796.                     ir->operands[0]->type->vector_elements);
  1797.          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
  1798.              result_dst.type == GLSL_TYPE_FLOAT) {
  1799.             /* The clamping to [0,1] can be done for free in the fragment
  1800.              * shader with a saturate.
  1801.              */
  1802.             dp->saturate = true;
  1803.          } else if (result_dst.type == GLSL_TYPE_FLOAT) {
  1804.             /* Negating the result of the dot-product gives values on the range
  1805.              * [-4, 0].  Zero stays zero, and negative values become 1.0.  This
  1806.              * is achieved using SLT.
  1807.              */
  1808.             st_src_reg slt_src = result_src;
  1809.             slt_src.negate = ~slt_src.negate;
  1810.             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
  1811.          }
  1812.          else {
  1813.             /* Use SNE 0 if integers are being used as boolean values. */
  1814.             emit(ir, TGSI_OPCODE_SNE, result_dst, result_src, st_src_reg_for_int(0));
  1815.          }
  1816.       }
  1817.       break;
  1818.    }
  1819.  
  1820.    case ir_binop_logic_xor:
  1821.       if (native_integers)
  1822.          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
  1823.       else
  1824.          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]);
  1825.       break;
  1826.  
  1827.    case ir_binop_logic_or: {
  1828.       if (native_integers) {
  1829.          /* If integers are used as booleans, we can use an actual "or"
  1830.           * instruction.
  1831.           */
  1832.          assert(native_integers);
  1833.          emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
  1834.       } else {
  1835.          /* After the addition, the value will be an integer on the
  1836.           * range [0,2].  Zero stays zero, and positive values become 1.0.
  1837.           */
  1838.          glsl_to_tgsi_instruction *add =
  1839.             emit(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]);
  1840.          if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
  1841.             /* The clamping to [0,1] can be done for free in the fragment
  1842.              * shader with a saturate if floats are being used as boolean values.
  1843.              */
  1844.             add->saturate = true;
  1845.          } else {
  1846.             /* Negating the result of the addition gives values on the range
  1847.              * [-2, 0].  Zero stays zero, and negative values become 1.0.  This
  1848.              * is achieved using SLT.
  1849.              */
  1850.             st_src_reg slt_src = result_src;
  1851.             slt_src.negate = ~slt_src.negate;
  1852.             emit(ir, TGSI_OPCODE_SLT, result_dst, slt_src, st_src_reg_for_float(0.0));
  1853.          }
  1854.       }
  1855.       break;
  1856.    }
  1857.  
  1858.    case ir_binop_logic_and:
  1859.       /* If native integers are disabled, the bool args are stored as float 0.0
  1860.        * or 1.0, so "mul" gives us "and".  If they're enabled, just use the
  1861.        * actual AND opcode.
  1862.        */
  1863.       if (native_integers)
  1864.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
  1865.       else
  1866.          emit(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]);
  1867.       break;
  1868.  
  1869.    case ir_binop_dot:
  1870.       assert(ir->operands[0]->type->is_vector());
  1871.       assert(ir->operands[0]->type == ir->operands[1]->type);
  1872.       emit_dp(ir, result_dst, op[0], op[1],
  1873.               ir->operands[0]->type->vector_elements);
  1874.       break;
  1875.  
  1876.    case ir_unop_sqrt:
  1877.       if (have_sqrt) {
  1878.          emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]);
  1879.       } else {
  1880.          /* sqrt(x) = x * rsq(x). */
  1881.          emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
  1882.          emit(ir, TGSI_OPCODE_MUL, result_dst, result_src, op[0]);
  1883.          /* For incoming channels <= 0, set the result to 0. */
  1884.          op[0].negate = ~op[0].negate;
  1885.          emit(ir, TGSI_OPCODE_CMP, result_dst,
  1886.               op[0], result_src, st_src_reg_for_float(0.0));
  1887.       }
  1888.       break;
  1889.    case ir_unop_rsq:
  1890.       emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]);
  1891.       break;
  1892.    case ir_unop_i2f:
  1893.       if (native_integers) {
  1894.          emit(ir, TGSI_OPCODE_I2F, result_dst, op[0]);
  1895.          break;
  1896.       }
  1897.       /* fallthrough to next case otherwise */
  1898.    case ir_unop_b2f:
  1899.       if (native_integers) {
  1900.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_float(1.0));
  1901.          break;
  1902.       }
  1903.       /* fallthrough to next case otherwise */
  1904.    case ir_unop_i2u:
  1905.    case ir_unop_u2i:
  1906.       /* Converting between signed and unsigned integers is a no-op. */
  1907.       result_src = op[0];
  1908.       break;
  1909.    case ir_unop_b2i:
  1910.       if (native_integers) {
  1911.          /* Booleans are stored as integers using ~0 for true and 0 for false.
  1912.           * GLSL requires that int(bool) return 1 for true and 0 for false.
  1913.           * This conversion is done with AND, but it could be done with NEG.
  1914.           */
  1915.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], st_src_reg_for_int(1));
  1916.       } else {
  1917.          /* Booleans and integers are both stored as floats when native
  1918.           * integers are disabled.
  1919.           */
  1920.          result_src = op[0];
  1921.       }
  1922.       break;
  1923.    case ir_unop_f2i:
  1924.       if (native_integers)
  1925.          emit(ir, TGSI_OPCODE_F2I, result_dst, op[0]);
  1926.       else
  1927.          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
  1928.       break;
  1929.    case ir_unop_f2u:
  1930.       if (native_integers)
  1931.          emit(ir, TGSI_OPCODE_F2U, result_dst, op[0]);
  1932.       else
  1933.          emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
  1934.       break;
  1935.    case ir_unop_bitcast_f2i:
  1936.       result_src = op[0];
  1937.       result_src.type = GLSL_TYPE_INT;
  1938.       break;
  1939.    case ir_unop_bitcast_f2u:
  1940.       result_src = op[0];
  1941.       result_src.type = GLSL_TYPE_UINT;
  1942.       break;
  1943.    case ir_unop_bitcast_i2f:
  1944.    case ir_unop_bitcast_u2f:
  1945.       result_src = op[0];
  1946.       result_src.type = GLSL_TYPE_FLOAT;
  1947.       break;
  1948.    case ir_unop_f2b:
  1949.       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
  1950.       break;
  1951.    case ir_unop_d2b:
  1952.       emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_double(0.0));
  1953.       break;
  1954.    case ir_unop_i2b:
  1955.       if (native_integers)
  1956.          emit(ir, TGSI_OPCODE_USNE, result_dst, op[0], st_src_reg_for_int(0));
  1957.       else
  1958.          emit(ir, TGSI_OPCODE_SNE, result_dst, op[0], st_src_reg_for_float(0.0));
  1959.       break;
  1960.    case ir_unop_trunc:
  1961.       emit(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]);
  1962.       break;
  1963.    case ir_unop_ceil:
  1964.       emit(ir, TGSI_OPCODE_CEIL, result_dst, op[0]);
  1965.       break;
  1966.    case ir_unop_floor:
  1967.       emit(ir, TGSI_OPCODE_FLR, result_dst, op[0]);
  1968.       break;
  1969.    case ir_unop_round_even:
  1970.       emit(ir, TGSI_OPCODE_ROUND, result_dst, op[0]);
  1971.       break;
  1972.    case ir_unop_fract:
  1973.       emit(ir, TGSI_OPCODE_FRC, result_dst, op[0]);
  1974.       break;
  1975.  
  1976.    case ir_binop_min:
  1977.       emit(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]);
  1978.       break;
  1979.    case ir_binop_max:
  1980.       emit(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]);
  1981.       break;
  1982.    case ir_binop_pow:
  1983.       emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]);
  1984.       break;
  1985.  
  1986.    case ir_unop_bit_not:
  1987.       if (native_integers) {
  1988.          emit(ir, TGSI_OPCODE_NOT, result_dst, op[0]);
  1989.          break;
  1990.       }
  1991.    case ir_unop_u2f:
  1992.       if (native_integers) {
  1993.          emit(ir, TGSI_OPCODE_U2F, result_dst, op[0]);
  1994.          break;
  1995.       }
  1996.    case ir_binop_lshift:
  1997.       if (native_integers) {
  1998.          emit(ir, TGSI_OPCODE_SHL, result_dst, op[0], op[1]);
  1999.          break;
  2000.       }
  2001.    case ir_binop_rshift:
  2002.       if (native_integers) {
  2003.          emit(ir, TGSI_OPCODE_ISHR, result_dst, op[0], op[1]);
  2004.          break;
  2005.       }
  2006.    case ir_binop_bit_and:
  2007.       if (native_integers) {
  2008.          emit(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]);
  2009.          break;
  2010.       }
  2011.    case ir_binop_bit_xor:
  2012.       if (native_integers) {
  2013.          emit(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]);
  2014.          break;
  2015.       }
  2016.    case ir_binop_bit_or:
  2017.       if (native_integers) {
  2018.          emit(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]);
  2019.          break;
  2020.       }
  2021.  
  2022.       assert(!"GLSL 1.30 features unsupported");
  2023.       break;
  2024.  
  2025.    case ir_binop_ubo_load: {
  2026.       ir_constant *const_uniform_block = ir->operands[0]->as_constant();
  2027.       ir_constant *const_offset_ir = ir->operands[1]->as_constant();
  2028.       unsigned const_offset = const_offset_ir ? const_offset_ir->value.u[0] : 0;
  2029.       unsigned const_block = const_uniform_block ? const_uniform_block->value.u[0] + 1 : 0;
  2030.       st_src_reg index_reg = get_temp(glsl_type::uint_type);
  2031.       st_src_reg cbuf;
  2032.  
  2033.       cbuf.type = ir->type->base_type;
  2034.       cbuf.file = PROGRAM_CONSTANT;
  2035.       cbuf.index = 0;
  2036.       cbuf.reladdr = NULL;
  2037.       cbuf.negate = 0;
  2038.  
  2039.       assert(ir->type->is_vector() || ir->type->is_scalar());
  2040.  
  2041.       if (const_offset_ir) {
  2042.          /* Constant index into constant buffer */
  2043.          cbuf.reladdr = NULL;
  2044.          cbuf.index = const_offset / 16;
  2045.       }
  2046.       else {
  2047.          /* Relative/variable index into constant buffer */
  2048.          emit(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), op[1],
  2049.               st_src_reg_for_int(4));
  2050.          cbuf.reladdr = ralloc(mem_ctx, st_src_reg);
  2051.          memcpy(cbuf.reladdr, &index_reg, sizeof(index_reg));
  2052.       }
  2053.  
  2054.       if (const_uniform_block) {
  2055.          /* Constant constant buffer */
  2056.          cbuf.reladdr2 = NULL;
  2057.          cbuf.index2D = const_block;
  2058.          cbuf.has_index2 = true;
  2059.       }
  2060.       else {
  2061.          /* Relative/variable constant buffer */
  2062.          cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg);
  2063.          cbuf.index2D = 1;
  2064.          memcpy(cbuf.reladdr2, &op[0], sizeof(st_src_reg));
  2065.          cbuf.has_index2 = true;
  2066.       }
  2067.  
  2068.       cbuf.swizzle = swizzle_for_size(ir->type->vector_elements);
  2069.       if (cbuf.type == GLSL_TYPE_DOUBLE)
  2070.          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8,
  2071.                                        const_offset % 16 / 8,
  2072.                                        const_offset % 16 / 8,
  2073.                                        const_offset % 16 / 8);
  2074.       else
  2075.          cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4,
  2076.                                        const_offset % 16 / 4,
  2077.                                        const_offset % 16 / 4,
  2078.                                        const_offset % 16 / 4);
  2079.  
  2080.       if (ir->type->base_type == GLSL_TYPE_BOOL) {
  2081.          emit(ir, TGSI_OPCODE_USNE, result_dst, cbuf, st_src_reg_for_int(0));
  2082.       } else {
  2083.          emit(ir, TGSI_OPCODE_MOV, result_dst, cbuf);
  2084.       }
  2085.       break;
  2086.    }
  2087.    case ir_triop_lrp:
  2088.       /* note: we have to reorder the three args here */
  2089.       emit(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]);
  2090.       break;
  2091.    case ir_triop_csel:
  2092.       if (this->ctx->Const.NativeIntegers)
  2093.          emit(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]);
  2094.       else {
  2095.          op[0].negate = ~op[0].negate;
  2096.          emit(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]);
  2097.       }
  2098.       break;
  2099.    case ir_triop_bitfield_extract:
  2100.       emit(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]);
  2101.       break;
  2102.    case ir_quadop_bitfield_insert:
  2103.       emit(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]);
  2104.       break;
  2105.    case ir_unop_bitfield_reverse:
  2106.       emit(ir, TGSI_OPCODE_BREV, result_dst, op[0]);
  2107.       break;
  2108.    case ir_unop_bit_count:
  2109.       emit(ir, TGSI_OPCODE_POPC, result_dst, op[0]);
  2110.       break;
  2111.    case ir_unop_find_msb:
  2112.       emit(ir, TGSI_OPCODE_IMSB, result_dst, op[0]);
  2113.       break;
  2114.    case ir_unop_find_lsb:
  2115.       emit(ir, TGSI_OPCODE_LSB, result_dst, op[0]);
  2116.       break;
  2117.    case ir_binop_imul_high:
  2118.       emit(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]);
  2119.       break;
  2120.    case ir_triop_fma:
  2121.       /* In theory, MAD is incorrect here. */
  2122.       if (have_fma)
  2123.          emit(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]);
  2124.       else
  2125.          emit(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]);
  2126.       break;
  2127.    case ir_unop_interpolate_at_centroid:
  2128.       emit(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]);
  2129.       break;
  2130.    case ir_binop_interpolate_at_offset:
  2131.       emit(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], op[1]);
  2132.       break;
  2133.    case ir_binop_interpolate_at_sample:
  2134.       emit(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]);
  2135.       break;
  2136.  
  2137.    case ir_unop_d2f:
  2138.       emit(ir, TGSI_OPCODE_D2F, result_dst, op[0]);
  2139.       break;
  2140.    case ir_unop_f2d:
  2141.       emit(ir, TGSI_OPCODE_F2D, result_dst, op[0]);
  2142.       break;
  2143.    case ir_unop_d2i:
  2144.       emit(ir, TGSI_OPCODE_D2I, result_dst, op[0]);
  2145.       break;
  2146.    case ir_unop_i2d:
  2147.       emit(ir, TGSI_OPCODE_I2D, result_dst, op[0]);
  2148.       break;
  2149.    case ir_unop_d2u:
  2150.       emit(ir, TGSI_OPCODE_D2U, result_dst, op[0]);
  2151.       break;
  2152.    case ir_unop_u2d:
  2153.       emit(ir, TGSI_OPCODE_U2D, result_dst, op[0]);
  2154.       break;
  2155.    case ir_unop_unpack_double_2x32:
  2156.    case ir_unop_pack_double_2x32:
  2157.       emit(ir, TGSI_OPCODE_MOV, result_dst, op[0]);
  2158.       break;
  2159.  
  2160.    case ir_binop_ldexp:
  2161.       if (ir->operands[0]->type->base_type == GLSL_TYPE_DOUBLE) {
  2162.          emit(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]);
  2163.       } else {
  2164.          assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()");
  2165.       }
  2166.       break;
  2167.  
  2168.    case ir_unop_pack_snorm_2x16:
  2169.    case ir_unop_pack_unorm_2x16:
  2170.    case ir_unop_pack_half_2x16:
  2171.    case ir_unop_pack_snorm_4x8:
  2172.    case ir_unop_pack_unorm_4x8:
  2173.  
  2174.    case ir_unop_unpack_snorm_2x16:
  2175.    case ir_unop_unpack_unorm_2x16:
  2176.    case ir_unop_unpack_half_2x16:
  2177.    case ir_unop_unpack_half_2x16_split_x:
  2178.    case ir_unop_unpack_half_2x16_split_y:
  2179.    case ir_unop_unpack_snorm_4x8:
  2180.    case ir_unop_unpack_unorm_4x8:
  2181.  
  2182.    case ir_binop_pack_half_2x16_split:
  2183.    case ir_binop_bfm:
  2184.    case ir_triop_bfi:
  2185.    case ir_quadop_vector:
  2186.    case ir_binop_vector_extract:
  2187.    case ir_triop_vector_insert:
  2188.    case ir_binop_carry:
  2189.    case ir_binop_borrow:
  2190.       /* This operation is not supported, or should have already been handled.
  2191.        */
  2192.       assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()");
  2193.       break;
  2194.    }
  2195.  
  2196.    this->result = result_src;
  2197. }
  2198.  
  2199.  
  2200. void
  2201. glsl_to_tgsi_visitor::visit(ir_swizzle *ir)
  2202. {
  2203.    st_src_reg src;
  2204.    int i;
  2205.    int swizzle[4];
  2206.  
  2207.    /* Note that this is only swizzles in expressions, not those on the left
  2208.     * hand side of an assignment, which do write masking.  See ir_assignment
  2209.     * for that.
  2210.     */
  2211.  
  2212.    ir->val->accept(this);
  2213.    src = this->result;
  2214.    assert(src.file != PROGRAM_UNDEFINED);
  2215.    assert(ir->type->vector_elements > 0);
  2216.  
  2217.    for (i = 0; i < 4; i++) {
  2218.       if (i < ir->type->vector_elements) {
  2219.          switch (i) {
  2220.          case 0:
  2221.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x);
  2222.             break;
  2223.          case 1:
  2224.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y);
  2225.             break;
  2226.          case 2:
  2227.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z);
  2228.             break;
  2229.          case 3:
  2230.             swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w);
  2231.             break;
  2232.          }
  2233.       } else {
  2234.          /* If the type is smaller than a vec4, replicate the last
  2235.           * channel out.
  2236.           */
  2237.          swizzle[i] = swizzle[ir->type->vector_elements - 1];
  2238.       }
  2239.    }
  2240.  
  2241.    src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
  2242.  
  2243.    this->result = src;
  2244. }
  2245.  
  2246. void
  2247. glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir)
  2248. {
  2249.    variable_storage *entry = find_variable_storage(ir->var);
  2250.    ir_variable *var = ir->var;
  2251.  
  2252.    if (!entry) {
  2253.       switch (var->data.mode) {
  2254.       case ir_var_uniform:
  2255.          entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM,
  2256.                                                var->data.location);
  2257.          this->variables.push_tail(entry);
  2258.          break;
  2259.       case ir_var_shader_in:
  2260.          /* The linker assigns locations for varyings and attributes,
  2261.           * including deprecated builtins (like gl_Color), user-assign
  2262.           * generic attributes (glBindVertexLocation), and
  2263.           * user-defined varyings.
  2264.           */
  2265.          assert(var->data.location != -1);
  2266.          entry = new(mem_ctx) variable_storage(var,
  2267.                                                PROGRAM_INPUT,
  2268.                                                var->data.location);
  2269.          break;
  2270.       case ir_var_shader_out:
  2271.          assert(var->data.location != -1);
  2272.          entry = new(mem_ctx) variable_storage(var,
  2273.                                                PROGRAM_OUTPUT,
  2274.                                                var->data.location
  2275.                                                + var->data.index);
  2276.          break;
  2277.       case ir_var_system_value:
  2278.          entry = new(mem_ctx) variable_storage(var,
  2279.                                                PROGRAM_SYSTEM_VALUE,
  2280.                                                var->data.location);
  2281.          break;
  2282.       case ir_var_auto:
  2283.       case ir_var_temporary:
  2284.          st_src_reg src = get_temp(var->type);
  2285.  
  2286.          entry = new(mem_ctx) variable_storage(var, src.file, src.index);
  2287.          this->variables.push_tail(entry);
  2288.  
  2289.          break;
  2290.       }
  2291.  
  2292.       if (!entry) {
  2293.          printf("Failed to make storage for %s\n", var->name);
  2294.          exit(1);
  2295.       }
  2296.    }
  2297.  
  2298.    this->result = st_src_reg(entry->file, entry->index, var->type);
  2299.    if (!native_integers)
  2300.       this->result.type = GLSL_TYPE_FLOAT;
  2301. }
  2302.  
  2303. void
  2304. glsl_to_tgsi_visitor::visit(ir_dereference_array *ir)
  2305. {
  2306.    ir_constant *index;
  2307.    st_src_reg src;
  2308.    int element_size = type_size(ir->type);
  2309.    bool is_2D_input;
  2310.  
  2311.    index = ir->array_index->constant_expression_value();
  2312.  
  2313.    ir->array->accept(this);
  2314.    src = this->result;
  2315.  
  2316.    is_2D_input = this->prog->Target == GL_GEOMETRY_PROGRAM_NV &&
  2317.                  src.file == PROGRAM_INPUT &&
  2318.                  ir->array->ir_type != ir_type_dereference_array;
  2319.  
  2320.    if (is_2D_input)
  2321.       element_size = 1;
  2322.  
  2323.    if (index) {
  2324.       if (is_2D_input) {
  2325.          src.index2D = index->value.i[0];
  2326.          src.has_index2 = true;
  2327.       } else
  2328.          src.index += index->value.i[0] * element_size;
  2329.    } else {
  2330.       /* Variable index array dereference.  It eats the "vec4" of the
  2331.        * base of the array and an index that offsets the TGSI register
  2332.        * index.
  2333.        */
  2334.       ir->array_index->accept(this);
  2335.  
  2336.       st_src_reg index_reg;
  2337.  
  2338.       if (element_size == 1) {
  2339.          index_reg = this->result;
  2340.       } else {
  2341.          index_reg = get_temp(native_integers ?
  2342.                               glsl_type::int_type : glsl_type::float_type);
  2343.  
  2344.          emit(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg),
  2345.               this->result, st_src_reg_for_type(index_reg.type, element_size));
  2346.       }
  2347.  
  2348.       /* If there was already a relative address register involved, add the
  2349.        * new and the old together to get the new offset.
  2350.        */
  2351.       if (!is_2D_input && src.reladdr != NULL) {
  2352.          st_src_reg accum_reg = get_temp(native_integers ?
  2353.                                 glsl_type::int_type : glsl_type::float_type);
  2354.  
  2355.          emit(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg),
  2356.               index_reg, *src.reladdr);
  2357.  
  2358.          index_reg = accum_reg;
  2359.       }
  2360.  
  2361.       if (is_2D_input) {
  2362.          src.reladdr2 = ralloc(mem_ctx, st_src_reg);
  2363.          memcpy(src.reladdr2, &index_reg, sizeof(index_reg));
  2364.          src.index2D = 0;
  2365.          src.has_index2 = true;
  2366.       } else {
  2367.          src.reladdr = ralloc(mem_ctx, st_src_reg);
  2368.          memcpy(src.reladdr, &index_reg, sizeof(index_reg));
  2369.       }
  2370.    }
  2371.  
  2372.    /* If the type is smaller than a vec4, replicate the last channel out. */
  2373.    if (ir->type->is_scalar() || ir->type->is_vector())
  2374.       src.swizzle = swizzle_for_size(ir->type->vector_elements);
  2375.    else
  2376.       src.swizzle = SWIZZLE_NOOP;
  2377.  
  2378.    /* Change the register type to the element type of the array. */
  2379.    src.type = ir->type->base_type;
  2380.  
  2381.    this->result = src;
  2382. }
  2383.  
  2384. void
  2385. glsl_to_tgsi_visitor::visit(ir_dereference_record *ir)
  2386. {
  2387.    unsigned int i;
  2388.    const glsl_type *struct_type = ir->record->type;
  2389.    int offset = 0;
  2390.  
  2391.    ir->record->accept(this);
  2392.  
  2393.    for (i = 0; i < struct_type->length; i++) {
  2394.       if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
  2395.          break;
  2396.       offset += type_size(struct_type->fields.structure[i].type);
  2397.    }
  2398.  
  2399.    /* If the type is smaller than a vec4, replicate the last channel out. */
  2400.    if (ir->type->is_scalar() || ir->type->is_vector())
  2401.       this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
  2402.    else
  2403.       this->result.swizzle = SWIZZLE_NOOP;
  2404.  
  2405.    this->result.index += offset;
  2406.    this->result.type = ir->type->base_type;
  2407. }
  2408.  
  2409. /**
  2410.  * We want to be careful in assignment setup to hit the actual storage
  2411.  * instead of potentially using a temporary like we might with the
  2412.  * ir_dereference handler.
  2413.  */
  2414. static st_dst_reg
  2415. get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v)
  2416. {
  2417.    /* The LHS must be a dereference.  If the LHS is a variable indexed array
  2418.     * access of a vector, it must be separated into a series conditional moves
  2419.     * before reaching this point (see ir_vec_index_to_cond_assign).
  2420.     */
  2421.    assert(ir->as_dereference());
  2422.    ir_dereference_array *deref_array = ir->as_dereference_array();
  2423.    if (deref_array) {
  2424.       assert(!deref_array->array->type->is_vector());
  2425.    }
  2426.  
  2427.    /* Use the rvalue deref handler for the most part.  We'll ignore
  2428.     * swizzles in it and write swizzles using writemask, though.
  2429.     */
  2430.    ir->accept(v);
  2431.    return st_dst_reg(v->result);
  2432. }
  2433.  
  2434. /**
  2435.  * Process the condition of a conditional assignment
  2436.  *
  2437.  * Examines the condition of a conditional assignment to generate the optimal
  2438.  * first operand of a \c CMP instruction.  If the condition is a relational
  2439.  * operator with 0 (e.g., \c ir_binop_less), the value being compared will be
  2440.  * used as the source for the \c CMP instruction.  Otherwise the comparison
  2441.  * is processed to a boolean result, and the boolean result is used as the
  2442.  * operand to the CMP instruction.
  2443.  */
  2444. bool
  2445. glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir)
  2446. {
  2447.    ir_rvalue *src_ir = ir;
  2448.    bool negate = true;
  2449.    bool switch_order = false;
  2450.  
  2451.    ir_expression *const expr = ir->as_expression();
  2452.  
  2453.    if (native_integers) {
  2454.       if ((expr != NULL) && (expr->get_num_operands() == 2)) {
  2455.          enum glsl_base_type type = expr->operands[0]->type->base_type;
  2456.          if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT ||
  2457.              type == GLSL_TYPE_BOOL) {
  2458.             if (expr->operation == ir_binop_equal) {
  2459.                if (expr->operands[0]->is_zero()) {
  2460.                   src_ir = expr->operands[1];
  2461.                   switch_order = true;
  2462.                }
  2463.                else if (expr->operands[1]->is_zero()) {
  2464.                   src_ir = expr->operands[0];
  2465.                   switch_order = true;
  2466.                }
  2467.             }
  2468.             else if (expr->operation == ir_binop_nequal) {
  2469.                if (expr->operands[0]->is_zero()) {
  2470.                   src_ir = expr->operands[1];
  2471.                }
  2472.                else if (expr->operands[1]->is_zero()) {
  2473.                   src_ir = expr->operands[0];
  2474.                }
  2475.             }
  2476.          }
  2477.       }
  2478.  
  2479.       src_ir->accept(this);
  2480.       return switch_order;
  2481.    }
  2482.  
  2483.    if ((expr != NULL) && (expr->get_num_operands() == 2)) {
  2484.       bool zero_on_left = false;
  2485.  
  2486.       if (expr->operands[0]->is_zero()) {
  2487.          src_ir = expr->operands[1];
  2488.          zero_on_left = true;
  2489.       } else if (expr->operands[1]->is_zero()) {
  2490.          src_ir = expr->operands[0];
  2491.          zero_on_left = false;
  2492.       }
  2493.  
  2494.       /*      a is -  0  +            -  0  +
  2495.        * (a <  0)  T  F  F  ( a < 0)  T  F  F
  2496.        * (0 <  a)  F  F  T  (-a < 0)  F  F  T
  2497.        * (a <= 0)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
  2498.        * (0 <= a)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
  2499.        * (a >  0)  F  F  T  (-a < 0)  F  F  T
  2500.        * (0 >  a)  T  F  F  ( a < 0)  T  F  F
  2501.        * (a >= 0)  F  T  T  ( a < 0)  T  F  F  (swap order of other operands)
  2502.        * (0 >= a)  T  T  F  (-a < 0)  F  F  T  (swap order of other operands)
  2503.        *
  2504.        * Note that exchanging the order of 0 and 'a' in the comparison simply
  2505.        * means that the value of 'a' should be negated.
  2506.        */
  2507.       if (src_ir != ir) {
  2508.          switch (expr->operation) {
  2509.          case ir_binop_less:
  2510.             switch_order = false;
  2511.             negate = zero_on_left;
  2512.             break;
  2513.  
  2514.          case ir_binop_greater:
  2515.             switch_order = false;
  2516.             negate = !zero_on_left;
  2517.             break;
  2518.  
  2519.          case ir_binop_lequal:
  2520.             switch_order = true;
  2521.             negate = !zero_on_left;
  2522.             break;
  2523.  
  2524.          case ir_binop_gequal:
  2525.             switch_order = true;
  2526.             negate = zero_on_left;
  2527.             break;
  2528.  
  2529.          default:
  2530.             /* This isn't the right kind of comparison afterall, so make sure
  2531.              * the whole condition is visited.
  2532.              */
  2533.             src_ir = ir;
  2534.             break;
  2535.          }
  2536.       }
  2537.    }
  2538.  
  2539.    src_ir->accept(this);
  2540.  
  2541.    /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the
  2542.     * condition we produced is 0.0 or 1.0.  By flipping the sign, we can
  2543.     * choose which value TGSI_OPCODE_CMP produces without an extra instruction
  2544.     * computing the condition.
  2545.     */
  2546.    if (negate)
  2547.       this->result.negate = ~this->result.negate;
  2548.  
  2549.    return switch_order;
  2550. }
  2551.  
  2552. void
  2553. glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type,
  2554.                                      st_dst_reg *l, st_src_reg *r,
  2555.                                      st_src_reg *cond, bool cond_swap)
  2556. {
  2557.    if (type->base_type == GLSL_TYPE_STRUCT) {
  2558.       for (unsigned int i = 0; i < type->length; i++) {
  2559.          emit_block_mov(ir, type->fields.structure[i].type, l, r,
  2560.                         cond, cond_swap);
  2561.       }
  2562.       return;
  2563.    }
  2564.  
  2565.    if (type->is_array()) {
  2566.       for (unsigned int i = 0; i < type->length; i++) {
  2567.          emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap);
  2568.       }
  2569.       return;
  2570.    }
  2571.  
  2572.    if (type->is_matrix()) {
  2573.       const struct glsl_type *vec_type;
  2574.  
  2575.       vec_type = glsl_type::get_instance(GLSL_TYPE_FLOAT,
  2576.                                          type->vector_elements, 1);
  2577.  
  2578.       for (int i = 0; i < type->matrix_columns; i++) {
  2579.          emit_block_mov(ir, vec_type, l, r, cond, cond_swap);
  2580.       }
  2581.       return;
  2582.    }
  2583.  
  2584.    assert(type->is_scalar() || type->is_vector());
  2585.  
  2586.    r->type = type->base_type;
  2587.    if (cond) {
  2588.       st_src_reg l_src = st_src_reg(*l);
  2589.       l_src.swizzle = swizzle_for_size(type->vector_elements);
  2590.  
  2591.       if (native_integers) {
  2592.          emit(ir, TGSI_OPCODE_UCMP, *l, *cond,
  2593.               cond_swap ? l_src : *r,
  2594.               cond_swap ? *r : l_src);
  2595.       } else {
  2596.          emit(ir, TGSI_OPCODE_CMP, *l, *cond,
  2597.               cond_swap ? l_src : *r,
  2598.               cond_swap ? *r : l_src);
  2599.       }
  2600.    } else {
  2601.       emit(ir, TGSI_OPCODE_MOV, *l, *r);
  2602.    }
  2603.    l->index++;
  2604.    r->index++;
  2605. }
  2606.  
  2607. void
  2608. glsl_to_tgsi_visitor::visit(ir_assignment *ir)
  2609. {
  2610.    st_dst_reg l;
  2611.    st_src_reg r;
  2612.  
  2613.    ir->rhs->accept(this);
  2614.    r = this->result;
  2615.  
  2616.    l = get_assignment_lhs(ir->lhs, this);
  2617.  
  2618.    /* FINISHME: This should really set to the correct maximal writemask for each
  2619.     * FINISHME: component written (in the loops below).  This case can only
  2620.     * FINISHME: occur for matrices, arrays, and structures.
  2621.     */
  2622.    if (ir->write_mask == 0) {
  2623.       assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector());
  2624.       l.writemask = WRITEMASK_XYZW;
  2625.    } else if (ir->lhs->type->is_scalar() &&
  2626.               !ir->lhs->type->is_double() &&
  2627.               ir->lhs->variable_referenced()->data.mode == ir_var_shader_out) {
  2628.       /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the
  2629.        * FINISHME: W component of fragment shader output zero, work correctly.
  2630.        */
  2631.       l.writemask = WRITEMASK_XYZW;
  2632.    } else {
  2633.       int swizzles[4];
  2634.       int first_enabled_chan = 0;
  2635.       int rhs_chan = 0;
  2636.  
  2637.       l.writemask = ir->write_mask;
  2638.  
  2639.       for (int i = 0; i < 4; i++) {
  2640.          if (l.writemask & (1 << i)) {
  2641.             first_enabled_chan = GET_SWZ(r.swizzle, i);
  2642.             break;
  2643.          }
  2644.       }
  2645.  
  2646.       /* Swizzle a small RHS vector into the channels being written.
  2647.        *
  2648.        * glsl ir treats write_mask as dictating how many channels are
  2649.        * present on the RHS while TGSI treats write_mask as just
  2650.        * showing which channels of the vec4 RHS get written.
  2651.        */
  2652.       for (int i = 0; i < 4; i++) {
  2653.          if (l.writemask & (1 << i))
  2654.             swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++);
  2655.          else
  2656.             swizzles[i] = first_enabled_chan;
  2657.       }
  2658.       r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1],
  2659.                                 swizzles[2], swizzles[3]);
  2660.    }
  2661.  
  2662.    assert(l.file != PROGRAM_UNDEFINED);
  2663.    assert(r.file != PROGRAM_UNDEFINED);
  2664.  
  2665.    if (ir->condition) {
  2666.       const bool switch_order = this->process_move_condition(ir->condition);
  2667.       st_src_reg condition = this->result;
  2668.  
  2669.       emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order);
  2670.    } else if (ir->rhs->as_expression() &&
  2671.               this->instructions.get_tail() &&
  2672.               ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir &&
  2673.               type_size(ir->lhs->type) == 1 &&
  2674.               l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) {
  2675.       /* To avoid emitting an extra MOV when assigning an expression to a
  2676.        * variable, emit the last instruction of the expression again, but
  2677.        * replace the destination register with the target of the assignment.
  2678.        * Dead code elimination will remove the original instruction.
  2679.        */
  2680.       glsl_to_tgsi_instruction *inst, *new_inst;
  2681.       inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail();
  2682.       new_inst = emit(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2]);
  2683.       new_inst->saturate = inst->saturate;
  2684.       inst->dead_mask = inst->dst[0].writemask;
  2685.    } else {
  2686.       emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false);
  2687.    }
  2688. }
  2689.  
  2690.  
  2691. void
  2692. glsl_to_tgsi_visitor::visit(ir_constant *ir)
  2693. {
  2694.    st_src_reg src;
  2695.    GLdouble stack_vals[4] = { 0 };
  2696.    gl_constant_value *values = (gl_constant_value *) stack_vals;
  2697.    GLenum gl_type = GL_NONE;
  2698.    unsigned int i;
  2699.    static int in_array = 0;
  2700.    gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE;
  2701.  
  2702.    /* Unfortunately, 4 floats is all we can get into
  2703.     * _mesa_add_typed_unnamed_constant.  So, make a temp to store an
  2704.     * aggregate constant and move each constant value into it.  If we
  2705.     * get lucky, copy propagation will eliminate the extra moves.
  2706.     */
  2707.    if (ir->type->base_type == GLSL_TYPE_STRUCT) {
  2708.       st_src_reg temp_base = get_temp(ir->type);
  2709.       st_dst_reg temp = st_dst_reg(temp_base);
  2710.  
  2711.       foreach_in_list(ir_constant, field_value, &ir->components) {
  2712.          int size = type_size(field_value->type);
  2713.  
  2714.          assert(size > 0);
  2715.  
  2716.          field_value->accept(this);
  2717.          src = this->result;
  2718.  
  2719.          for (i = 0; i < (unsigned int)size; i++) {
  2720.             emit(ir, TGSI_OPCODE_MOV, temp, src);
  2721.  
  2722.             src.index++;
  2723.             temp.index++;
  2724.          }
  2725.       }
  2726.       this->result = temp_base;
  2727.       return;
  2728.    }
  2729.  
  2730.    if (ir->type->is_array()) {
  2731.       st_src_reg temp_base = get_temp(ir->type);
  2732.       st_dst_reg temp = st_dst_reg(temp_base);
  2733.       int size = type_size(ir->type->fields.array);
  2734.  
  2735.       assert(size > 0);
  2736.       in_array++;
  2737.  
  2738.       for (i = 0; i < ir->type->length; i++) {
  2739.          ir->array_elements[i]->accept(this);
  2740.          src = this->result;
  2741.          for (int j = 0; j < size; j++) {
  2742.             emit(ir, TGSI_OPCODE_MOV, temp, src);
  2743.  
  2744.             src.index++;
  2745.             temp.index++;
  2746.          }
  2747.       }
  2748.       this->result = temp_base;
  2749.       in_array--;
  2750.       return;
  2751.    }
  2752.  
  2753.    if (ir->type->is_matrix()) {
  2754.       st_src_reg mat = get_temp(ir->type);
  2755.       st_dst_reg mat_column = st_dst_reg(mat);
  2756.  
  2757.       for (i = 0; i < ir->type->matrix_columns; i++) {
  2758.          assert(ir->type->base_type == GLSL_TYPE_FLOAT);
  2759.          values = (gl_constant_value *) &ir->value.f[i * ir->type->vector_elements];
  2760.  
  2761.          src = st_src_reg(file, -1, ir->type->base_type);
  2762.          src.index = add_constant(file,
  2763.                                   values,
  2764.                                   ir->type->vector_elements,
  2765.                                   GL_FLOAT,
  2766.                                   &src.swizzle);
  2767.          emit(ir, TGSI_OPCODE_MOV, mat_column, src);
  2768.  
  2769.          mat_column.index++;
  2770.       }
  2771.  
  2772.       this->result = mat;
  2773.       return;
  2774.    }
  2775.  
  2776.    switch (ir->type->base_type) {
  2777.    case GLSL_TYPE_FLOAT:
  2778.       gl_type = GL_FLOAT;
  2779.       for (i = 0; i < ir->type->vector_elements; i++) {
  2780.          values[i].f = ir->value.f[i];
  2781.       }
  2782.       break;
  2783.    case GLSL_TYPE_DOUBLE:
  2784.       gl_type = GL_DOUBLE;
  2785.       for (i = 0; i < ir->type->vector_elements; i++) {
  2786.          values[i * 2].i = *(uint32_t *)&ir->value.d[i];
  2787.          values[i * 2 + 1].i = *(((uint32_t *)&ir->value.d[i]) + 1);
  2788.       }
  2789.       break;
  2790.    case GLSL_TYPE_UINT:
  2791.       gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT;
  2792.       for (i = 0; i < ir->type->vector_elements; i++) {
  2793.          if (native_integers)
  2794.             values[i].u = ir->value.u[i];
  2795.          else
  2796.             values[i].f = ir->value.u[i];
  2797.       }
  2798.       break;
  2799.    case GLSL_TYPE_INT:
  2800.       gl_type = native_integers ? GL_INT : GL_FLOAT;
  2801.       for (i = 0; i < ir->type->vector_elements; i++) {
  2802.          if (native_integers)
  2803.             values[i].i = ir->value.i[i];
  2804.          else
  2805.             values[i].f = ir->value.i[i];
  2806.       }
  2807.       break;
  2808.    case GLSL_TYPE_BOOL:
  2809.       gl_type = native_integers ? GL_BOOL : GL_FLOAT;
  2810.       for (i = 0; i < ir->type->vector_elements; i++) {
  2811.          values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0;
  2812.       }
  2813.       break;
  2814.    default:
  2815.       assert(!"Non-float/uint/int/bool constant");
  2816.    }
  2817.  
  2818.    this->result = st_src_reg(file, -1, ir->type);
  2819.    this->result.index = add_constant(file,
  2820.                                      values,
  2821.                                      ir->type->vector_elements,
  2822.                                      gl_type,
  2823.                                      &this->result.swizzle);
  2824. }
  2825.  
  2826. function_entry *
  2827. glsl_to_tgsi_visitor::get_function_signature(ir_function_signature *sig)
  2828. {
  2829.    foreach_in_list_use_after(function_entry, entry, &this->function_signatures) {
  2830.       if (entry->sig == sig)
  2831.          return entry;
  2832.    }
  2833.  
  2834.    entry = ralloc(mem_ctx, function_entry);
  2835.    entry->sig = sig;
  2836.    entry->sig_id = this->next_signature_id++;
  2837.    entry->bgn_inst = NULL;
  2838.  
  2839.    /* Allocate storage for all the parameters. */
  2840.    foreach_in_list(ir_variable, param, &sig->parameters) {
  2841.       variable_storage *storage;
  2842.  
  2843.       storage = find_variable_storage(param);
  2844.       assert(!storage);
  2845.  
  2846.       st_src_reg src = get_temp(param->type);
  2847.  
  2848.       storage = new(mem_ctx) variable_storage(param, src.file, src.index);
  2849.       this->variables.push_tail(storage);
  2850.    }
  2851.  
  2852.    if (!sig->return_type->is_void()) {
  2853.       entry->return_reg = get_temp(sig->return_type);
  2854.    } else {
  2855.       entry->return_reg = undef_src;
  2856.    }
  2857.  
  2858.    this->function_signatures.push_tail(entry);
  2859.    return entry;
  2860. }
  2861.  
  2862. void
  2863. glsl_to_tgsi_visitor::visit(ir_call *ir)
  2864. {
  2865.    glsl_to_tgsi_instruction *call_inst;
  2866.    ir_function_signature *sig = ir->callee;
  2867.    function_entry *entry = get_function_signature(sig);
  2868.    int i;
  2869.  
  2870.    /* Process in parameters. */
  2871.    foreach_two_lists(formal_node, &sig->parameters,
  2872.                      actual_node, &ir->actual_parameters) {
  2873.       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
  2874.       ir_variable *param = (ir_variable *) formal_node;
  2875.  
  2876.       if (param->data.mode == ir_var_function_in ||
  2877.           param->data.mode == ir_var_function_inout) {
  2878.          variable_storage *storage = find_variable_storage(param);
  2879.          assert(storage);
  2880.  
  2881.          param_rval->accept(this);
  2882.          st_src_reg r = this->result;
  2883.  
  2884.          st_dst_reg l;
  2885.          l.file = storage->file;
  2886.          l.index = storage->index;
  2887.          l.reladdr = NULL;
  2888.          l.writemask = WRITEMASK_XYZW;
  2889.          l.cond_mask = COND_TR;
  2890.  
  2891.          for (i = 0; i < type_size(param->type); i++) {
  2892.             emit(ir, TGSI_OPCODE_MOV, l, r);
  2893.             l.index++;
  2894.             r.index++;
  2895.          }
  2896.       }
  2897.    }
  2898.  
  2899.    /* Emit call instruction */
  2900.    call_inst = emit(ir, TGSI_OPCODE_CAL);
  2901.    call_inst->function = entry;
  2902.  
  2903.    /* Process out parameters. */
  2904.    foreach_two_lists(formal_node, &sig->parameters,
  2905.                      actual_node, &ir->actual_parameters) {
  2906.       ir_rvalue *param_rval = (ir_rvalue *) actual_node;
  2907.       ir_variable *param = (ir_variable *) formal_node;
  2908.  
  2909.       if (param->data.mode == ir_var_function_out ||
  2910.           param->data.mode == ir_var_function_inout) {
  2911.          variable_storage *storage = find_variable_storage(param);
  2912.          assert(storage);
  2913.  
  2914.          st_src_reg r;
  2915.          r.file = storage->file;
  2916.          r.index = storage->index;
  2917.          r.reladdr = NULL;
  2918.          r.swizzle = SWIZZLE_NOOP;
  2919.          r.negate = 0;
  2920.  
  2921.          param_rval->accept(this);
  2922.          st_dst_reg l = st_dst_reg(this->result);
  2923.  
  2924.          for (i = 0; i < type_size(param->type); i++) {
  2925.             emit(ir, TGSI_OPCODE_MOV, l, r);
  2926.             l.index++;
  2927.             r.index++;
  2928.          }
  2929.       }
  2930.    }
  2931.  
  2932.    /* Process return value. */
  2933.    this->result = entry->return_reg;
  2934. }
  2935.  
  2936. void
  2937. glsl_to_tgsi_visitor::visit(ir_texture *ir)
  2938. {
  2939.    st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy;
  2940.    st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component;
  2941.    st_src_reg levels_src;
  2942.    st_dst_reg result_dst, coord_dst, cube_sc_dst;
  2943.    glsl_to_tgsi_instruction *inst = NULL;
  2944.    unsigned opcode = TGSI_OPCODE_NOP;
  2945.    const glsl_type *sampler_type = ir->sampler->type;
  2946.    ir_rvalue *sampler_index =
  2947.       _mesa_get_sampler_array_nonconst_index(ir->sampler);
  2948.    bool is_cube_array = false;
  2949.    unsigned i;
  2950.  
  2951.    /* if we are a cube array sampler */
  2952.    if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
  2953.         sampler_type->sampler_array)) {
  2954.       is_cube_array = true;
  2955.    }
  2956.  
  2957.    if (ir->coordinate) {
  2958.       ir->coordinate->accept(this);
  2959.  
  2960.       /* Put our coords in a temp.  We'll need to modify them for shadow,
  2961.        * projection, or LOD, so the only case we'd use it as is is if
  2962.        * we're doing plain old texturing.  The optimization passes on
  2963.        * glsl_to_tgsi_visitor should handle cleaning up our mess in that case.
  2964.        */
  2965.       coord = get_temp(glsl_type::vec4_type);
  2966.       coord_dst = st_dst_reg(coord);
  2967.       coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1;
  2968.       emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
  2969.    }
  2970.  
  2971.    if (ir->projector) {
  2972.       ir->projector->accept(this);
  2973.       projector = this->result;
  2974.    }
  2975.  
  2976.    /* Storage for our result.  Ideally for an assignment we'd be using
  2977.     * the actual storage for the result here, instead.
  2978.     */
  2979.    result_src = get_temp(ir->type);
  2980.    result_dst = st_dst_reg(result_src);
  2981.  
  2982.    switch (ir->op) {
  2983.    case ir_tex:
  2984.       opcode = (is_cube_array && ir->shadow_comparitor) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX;
  2985.       if (ir->offset) {
  2986.          ir->offset->accept(this);
  2987.          offset[0] = this->result;
  2988.       }
  2989.       break;
  2990.    case ir_txb:
  2991.       if (is_cube_array ||
  2992.           sampler_type == glsl_type::samplerCubeShadow_type) {
  2993.          opcode = TGSI_OPCODE_TXB2;
  2994.       }
  2995.       else {
  2996.          opcode = TGSI_OPCODE_TXB;
  2997.       }
  2998.       ir->lod_info.bias->accept(this);
  2999.       lod_info = this->result;
  3000.       if (ir->offset) {
  3001.          ir->offset->accept(this);
  3002.          offset[0] = this->result;
  3003.       }
  3004.       break;
  3005.    case ir_txl:
  3006.       opcode = is_cube_array ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL;
  3007.       ir->lod_info.lod->accept(this);
  3008.       lod_info = this->result;
  3009.       if (ir->offset) {
  3010.          ir->offset->accept(this);
  3011.          offset[0] = this->result;
  3012.       }
  3013.       break;
  3014.    case ir_txd:
  3015.       opcode = TGSI_OPCODE_TXD;
  3016.       ir->lod_info.grad.dPdx->accept(this);
  3017.       dx = this->result;
  3018.       ir->lod_info.grad.dPdy->accept(this);
  3019.       dy = this->result;
  3020.       if (ir->offset) {
  3021.          ir->offset->accept(this);
  3022.          offset[0] = this->result;
  3023.       }
  3024.       break;
  3025.    case ir_txs:
  3026.       opcode = TGSI_OPCODE_TXQ;
  3027.       ir->lod_info.lod->accept(this);
  3028.       lod_info = this->result;
  3029.       break;
  3030.    case ir_query_levels:
  3031.       opcode = TGSI_OPCODE_TXQ;
  3032.       lod_info = undef_src;
  3033.       levels_src = get_temp(ir->type);
  3034.       break;
  3035.    case ir_txf:
  3036.       opcode = TGSI_OPCODE_TXF;
  3037.       ir->lod_info.lod->accept(this);
  3038.       lod_info = this->result;
  3039.       if (ir->offset) {
  3040.          ir->offset->accept(this);
  3041.          offset[0] = this->result;
  3042.       }
  3043.       break;
  3044.    case ir_txf_ms:
  3045.       opcode = TGSI_OPCODE_TXF;
  3046.       ir->lod_info.sample_index->accept(this);
  3047.       sample_index = this->result;
  3048.       break;
  3049.    case ir_tg4:
  3050.       opcode = TGSI_OPCODE_TG4;
  3051.       ir->lod_info.component->accept(this);
  3052.       component = this->result;
  3053.       if (ir->offset) {
  3054.          ir->offset->accept(this);
  3055.          if (ir->offset->type->base_type == GLSL_TYPE_ARRAY) {
  3056.             const glsl_type *elt_type = ir->offset->type->fields.array;
  3057.             for (i = 0; i < ir->offset->type->length; i++) {
  3058.                offset[i] = this->result;
  3059.                offset[i].index += i * type_size(elt_type);
  3060.                offset[i].type = elt_type->base_type;
  3061.                offset[i].swizzle = swizzle_for_size(elt_type->vector_elements);
  3062.             }
  3063.          } else {
  3064.             offset[0] = this->result;
  3065.          }
  3066.       }
  3067.       break;
  3068.    case ir_lod:
  3069.       opcode = TGSI_OPCODE_LODQ;
  3070.       break;
  3071.    }
  3072.  
  3073.    if (ir->projector) {
  3074.       if (opcode == TGSI_OPCODE_TEX) {
  3075.          /* Slot the projector in as the last component of the coord. */
  3076.          coord_dst.writemask = WRITEMASK_W;
  3077.          emit(ir, TGSI_OPCODE_MOV, coord_dst, projector);
  3078.          coord_dst.writemask = WRITEMASK_XYZW;
  3079.          opcode = TGSI_OPCODE_TXP;
  3080.       } else {
  3081.          st_src_reg coord_w = coord;
  3082.          coord_w.swizzle = SWIZZLE_WWWW;
  3083.  
  3084.          /* For the other TEX opcodes there's no projective version
  3085.           * since the last slot is taken up by LOD info.  Do the
  3086.           * projective divide now.
  3087.           */
  3088.          coord_dst.writemask = WRITEMASK_W;
  3089.          emit(ir, TGSI_OPCODE_RCP, coord_dst, projector);
  3090.  
  3091.          /* In the case where we have to project the coordinates "by hand,"
  3092.           * the shadow comparator value must also be projected.
  3093.           */
  3094.          st_src_reg tmp_src = coord;
  3095.          if (ir->shadow_comparitor) {
  3096.             /* Slot the shadow value in as the second to last component of the
  3097.              * coord.
  3098.              */
  3099.             ir->shadow_comparitor->accept(this);
  3100.  
  3101.             tmp_src = get_temp(glsl_type::vec4_type);
  3102.             st_dst_reg tmp_dst = st_dst_reg(tmp_src);
  3103.  
  3104.             /* Projective division not allowed for array samplers. */
  3105.             assert(!sampler_type->sampler_array);
  3106.  
  3107.             tmp_dst.writemask = WRITEMASK_Z;
  3108.             emit(ir, TGSI_OPCODE_MOV, tmp_dst, this->result);
  3109.  
  3110.             tmp_dst.writemask = WRITEMASK_XY;
  3111.             emit(ir, TGSI_OPCODE_MOV, tmp_dst, coord);
  3112.          }
  3113.  
  3114.          coord_dst.writemask = WRITEMASK_XYZ;
  3115.          emit(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w);
  3116.  
  3117.          coord_dst.writemask = WRITEMASK_XYZW;
  3118.          coord.swizzle = SWIZZLE_XYZW;
  3119.       }
  3120.    }
  3121.  
  3122.    /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the shadow
  3123.     * comparator was put in the correct place (and projected) by the code,
  3124.     * above, that handles by-hand projection.
  3125.     */
  3126.    if (ir->shadow_comparitor && (!ir->projector || opcode == TGSI_OPCODE_TXP)) {
  3127.       /* Slot the shadow value in as the second to last component of the
  3128.        * coord.
  3129.        */
  3130.       ir->shadow_comparitor->accept(this);
  3131.  
  3132.       if (is_cube_array) {
  3133.          cube_sc = get_temp(glsl_type::float_type);
  3134.          cube_sc_dst = st_dst_reg(cube_sc);
  3135.          cube_sc_dst.writemask = WRITEMASK_X;
  3136.          emit(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result);
  3137.          cube_sc_dst.writemask = WRITEMASK_X;
  3138.       }
  3139.       else {
  3140.          if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D &&
  3141.               sampler_type->sampler_array) ||
  3142.              sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) {
  3143.             coord_dst.writemask = WRITEMASK_W;
  3144.          } else {
  3145.             coord_dst.writemask = WRITEMASK_Z;
  3146.          }
  3147.          emit(ir, TGSI_OPCODE_MOV, coord_dst, this->result);
  3148.          coord_dst.writemask = WRITEMASK_XYZW;
  3149.       }
  3150.    }
  3151.  
  3152.    if (ir->op == ir_txf_ms) {
  3153.       coord_dst.writemask = WRITEMASK_W;
  3154.       emit(ir, TGSI_OPCODE_MOV, coord_dst, sample_index);
  3155.       coord_dst.writemask = WRITEMASK_XYZW;
  3156.    } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB ||
  3157.        opcode == TGSI_OPCODE_TXF) {
  3158.       /* TGSI stores LOD or LOD bias in the last channel of the coords. */
  3159.       coord_dst.writemask = WRITEMASK_W;
  3160.       emit(ir, TGSI_OPCODE_MOV, coord_dst, lod_info);
  3161.       coord_dst.writemask = WRITEMASK_XYZW;
  3162.    }
  3163.  
  3164.    if (sampler_index) {
  3165.       sampler_index->accept(this);
  3166.       emit_arl(ir, sampler_reladdr, this->result);
  3167.    }
  3168.  
  3169.    if (opcode == TGSI_OPCODE_TXD)
  3170.       inst = emit(ir, opcode, result_dst, coord, dx, dy);
  3171.    else if (opcode == TGSI_OPCODE_TXQ) {
  3172.       if (ir->op == ir_query_levels) {
  3173.          /* the level is stored in W */
  3174.          inst = emit(ir, opcode, st_dst_reg(levels_src), lod_info);
  3175.          result_dst.writemask = WRITEMASK_X;
  3176.          levels_src.swizzle = SWIZZLE_WWWW;
  3177.          emit(ir, TGSI_OPCODE_MOV, result_dst, levels_src);
  3178.       } else
  3179.          inst = emit(ir, opcode, result_dst, lod_info);
  3180.    } else if (opcode == TGSI_OPCODE_TXF) {
  3181.       inst = emit(ir, opcode, result_dst, coord);
  3182.    } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) {
  3183.       inst = emit(ir, opcode, result_dst, coord, lod_info);
  3184.    } else if (opcode == TGSI_OPCODE_TEX2) {
  3185.       inst = emit(ir, opcode, result_dst, coord, cube_sc);
  3186.    } else if (opcode == TGSI_OPCODE_TG4) {
  3187.       if (is_cube_array && ir->shadow_comparitor) {
  3188.          inst = emit(ir, opcode, result_dst, coord, cube_sc);
  3189.       } else {
  3190.          inst = emit(ir, opcode, result_dst, coord, component);
  3191.       }
  3192.    } else
  3193.       inst = emit(ir, opcode, result_dst, coord);
  3194.  
  3195.    if (ir->shadow_comparitor)
  3196.       inst->tex_shadow = GL_TRUE;
  3197.  
  3198.    inst->sampler.index = _mesa_get_sampler_uniform_value(ir->sampler,
  3199.                                                          this->shader_program,
  3200.                                                          this->prog);
  3201.    if (sampler_index) {
  3202.       inst->sampler.reladdr = ralloc(mem_ctx, st_src_reg);
  3203.       memcpy(inst->sampler.reladdr, &sampler_reladdr, sizeof(sampler_reladdr));
  3204.       inst->sampler_array_size =
  3205.          ir->sampler->as_dereference_array()->array->type->array_size();
  3206.    } else {
  3207.       inst->sampler_array_size = 1;
  3208.    }
  3209.  
  3210.    if (ir->offset) {
  3211.       for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && offset[i].file != PROGRAM_UNDEFINED; i++)
  3212.          inst->tex_offsets[i] = offset[i];
  3213.       inst->tex_offset_num_offset = i;
  3214.    }
  3215.  
  3216.    switch (sampler_type->sampler_dimensionality) {
  3217.    case GLSL_SAMPLER_DIM_1D:
  3218.       inst->tex_target = (sampler_type->sampler_array)
  3219.          ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX;
  3220.       break;
  3221.    case GLSL_SAMPLER_DIM_2D:
  3222.       inst->tex_target = (sampler_type->sampler_array)
  3223.          ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX;
  3224.       break;
  3225.    case GLSL_SAMPLER_DIM_3D:
  3226.       inst->tex_target = TEXTURE_3D_INDEX;
  3227.       break;
  3228.    case GLSL_SAMPLER_DIM_CUBE:
  3229.       inst->tex_target = (sampler_type->sampler_array)
  3230.          ? TEXTURE_CUBE_ARRAY_INDEX : TEXTURE_CUBE_INDEX;
  3231.       break;
  3232.    case GLSL_SAMPLER_DIM_RECT:
  3233.       inst->tex_target = TEXTURE_RECT_INDEX;
  3234.       break;
  3235.    case GLSL_SAMPLER_DIM_BUF:
  3236.       inst->tex_target = TEXTURE_BUFFER_INDEX;
  3237.       break;
  3238.    case GLSL_SAMPLER_DIM_EXTERNAL:
  3239.       inst->tex_target = TEXTURE_EXTERNAL_INDEX;
  3240.       break;
  3241.    case GLSL_SAMPLER_DIM_MS:
  3242.       inst->tex_target = (sampler_type->sampler_array)
  3243.          ? TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX : TEXTURE_2D_MULTISAMPLE_INDEX;
  3244.       break;
  3245.    default:
  3246.       assert(!"Should not get here.");
  3247.    }
  3248.  
  3249.    this->result = result_src;
  3250. }
  3251.  
  3252. void
  3253. glsl_to_tgsi_visitor::visit(ir_return *ir)
  3254. {
  3255.    if (ir->get_value()) {
  3256.       st_dst_reg l;
  3257.       int i;
  3258.  
  3259.       assert(current_function);
  3260.  
  3261.       ir->get_value()->accept(this);
  3262.       st_src_reg r = this->result;
  3263.  
  3264.       l = st_dst_reg(current_function->return_reg);
  3265.  
  3266.       for (i = 0; i < type_size(current_function->sig->return_type); i++) {
  3267.          emit(ir, TGSI_OPCODE_MOV, l, r);
  3268.          l.index++;
  3269.          r.index++;
  3270.       }
  3271.    }
  3272.  
  3273.    emit(ir, TGSI_OPCODE_RET);
  3274. }
  3275.  
  3276. void
  3277. glsl_to_tgsi_visitor::visit(ir_discard *ir)
  3278. {
  3279.    if (ir->condition) {
  3280.       ir->condition->accept(this);
  3281.       st_src_reg condition = this->result;
  3282.  
  3283.       /* Convert the bool condition to a float so we can negate. */
  3284.       if (native_integers) {
  3285.          st_src_reg temp = get_temp(ir->condition->type);
  3286.          emit(ir, TGSI_OPCODE_AND, st_dst_reg(temp),
  3287.               condition, st_src_reg_for_float(1.0));
  3288.          condition = temp;
  3289.       }
  3290.  
  3291.       condition.negate = ~condition.negate;
  3292.       emit(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition);
  3293.    } else {
  3294.       /* unconditional kil */
  3295.       emit(ir, TGSI_OPCODE_KILL);
  3296.    }
  3297. }
  3298.  
  3299. void
  3300. glsl_to_tgsi_visitor::visit(ir_if *ir)
  3301. {
  3302.    unsigned if_opcode;
  3303.    glsl_to_tgsi_instruction *if_inst;
  3304.  
  3305.    ir->condition->accept(this);
  3306.    assert(this->result.file != PROGRAM_UNDEFINED);
  3307.  
  3308.    if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF;
  3309.  
  3310.    if_inst = emit(ir->condition, if_opcode, undef_dst, this->result);
  3311.  
  3312.    this->instructions.push_tail(if_inst);
  3313.  
  3314.    visit_exec_list(&ir->then_instructions, this);
  3315.  
  3316.    if (!ir->else_instructions.is_empty()) {
  3317.       emit(ir->condition, TGSI_OPCODE_ELSE);
  3318.       visit_exec_list(&ir->else_instructions, this);
  3319.    }
  3320.  
  3321.    if_inst = emit(ir->condition, TGSI_OPCODE_ENDIF);
  3322. }
  3323.  
  3324.  
  3325. void
  3326. glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir)
  3327. {
  3328.    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
  3329.  
  3330.    ir->stream->accept(this);
  3331.    emit(ir, TGSI_OPCODE_EMIT, undef_dst, this->result);
  3332. }
  3333.  
  3334. void
  3335. glsl_to_tgsi_visitor::visit(ir_end_primitive *ir)
  3336. {
  3337.    assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV);
  3338.  
  3339.    ir->stream->accept(this);
  3340.    emit(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result);
  3341. }
  3342.  
  3343. glsl_to_tgsi_visitor::glsl_to_tgsi_visitor()
  3344. {
  3345.    result.file = PROGRAM_UNDEFINED;
  3346.    next_temp = 1;
  3347.    next_array = 0;
  3348.    next_signature_id = 1;
  3349.    num_immediates = 0;
  3350.    current_function = NULL;
  3351.    num_address_regs = 0;
  3352.    samplers_used = 0;
  3353.    indirect_addr_consts = false;
  3354.    wpos_transform_const = -1;
  3355.    glsl_version = 0;
  3356.    native_integers = false;
  3357.    mem_ctx = ralloc_context(NULL);
  3358.    ctx = NULL;
  3359.    prog = NULL;
  3360.    shader_program = NULL;
  3361.    shader = NULL;
  3362.    options = NULL;
  3363.    have_sqrt = false;
  3364.    have_fma = false;
  3365. }
  3366.  
  3367. glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor()
  3368. {
  3369.    ralloc_free(mem_ctx);
  3370. }
  3371.  
  3372. extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v)
  3373. {
  3374.    delete v;
  3375. }
  3376.  
  3377.  
  3378. /**
  3379.  * Count resources used by the given gpu program (number of texture
  3380.  * samplers, etc).
  3381.  */
  3382. static void
  3383. count_resources(glsl_to_tgsi_visitor *v, gl_program *prog)
  3384. {
  3385.    v->samplers_used = 0;
  3386.  
  3387.    foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) {
  3388.       if (is_tex_instruction(inst->op)) {
  3389.          for (int i = 0; i < inst->sampler_array_size; i++) {
  3390.             v->samplers_used |= 1 << (inst->sampler.index + i);
  3391.  
  3392.             if (inst->tex_shadow) {
  3393.                prog->ShadowSamplers |= 1 << (inst->sampler.index + i);
  3394.             }
  3395.          }
  3396.       }
  3397.    }
  3398.    prog->SamplersUsed = v->samplers_used;
  3399.  
  3400.    if (v->shader_program != NULL)
  3401.       _mesa_update_shader_textures_used(v->shader_program, prog);
  3402. }
  3403.  
  3404. /**
  3405.  * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which
  3406.  * are read from the given src in this instruction
  3407.  */
  3408. static int
  3409. get_src_arg_mask(st_dst_reg dst, st_src_reg src)
  3410. {
  3411.    int read_mask = 0, comp;
  3412.  
  3413.    /* Now, given the src swizzle and the written channels, find which
  3414.     * components are actually read
  3415.     */
  3416.    for (comp = 0; comp < 4; ++comp) {
  3417.       const unsigned coord = GET_SWZ(src.swizzle, comp);
  3418.       assert(coord < 4);
  3419.       if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W)
  3420.          read_mask |= 1 << coord;
  3421.    }
  3422.  
  3423.    return read_mask;
  3424. }
  3425.  
  3426. /**
  3427.  * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP
  3428.  * instruction is the first instruction to write to register T0.  There are
  3429.  * several lowering passes done in GLSL IR (e.g. branches and
  3430.  * relative addressing) that create a large number of conditional assignments
  3431.  * that ir_to_mesa converts to CMP instructions like the one mentioned above.
  3432.  *
  3433.  * Here is why this conversion is safe:
  3434.  * CMP T0, T1 T2 T0 can be expanded to:
  3435.  * if (T1 < 0.0)
  3436.  *   MOV T0, T2;
  3437.  * else
  3438.  *   MOV T0, T0;
  3439.  *
  3440.  * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same
  3441.  * as the original program.  If (T1 < 0.0) evaluates to false, executing
  3442.  * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized.
  3443.  * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2
  3444.  * because any instruction that was going to read from T0 after this was going
  3445.  * to read a garbage value anyway.
  3446.  */
  3447. void
  3448. glsl_to_tgsi_visitor::simplify_cmp(void)
  3449. {
  3450.    int tempWritesSize = 0;
  3451.    unsigned *tempWrites = NULL;
  3452.    unsigned outputWrites[MAX_PROGRAM_OUTPUTS];
  3453.  
  3454.    memset(outputWrites, 0, sizeof(outputWrites));
  3455.  
  3456.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3457.       unsigned prevWriteMask = 0;
  3458.  
  3459.       /* Give up if we encounter relative addressing or flow control. */
  3460.       if (inst->dst[0].reladdr ||
  3461.           inst->dst[1].reladdr ||
  3462.           tgsi_get_opcode_info(inst->op)->is_branch ||
  3463.           inst->op == TGSI_OPCODE_BGNSUB ||
  3464.           inst->op == TGSI_OPCODE_CONT ||
  3465.           inst->op == TGSI_OPCODE_END ||
  3466.           inst->op == TGSI_OPCODE_ENDSUB ||
  3467.           inst->op == TGSI_OPCODE_RET) {
  3468.          break;
  3469.       }
  3470.  
  3471.       if (inst->dst[0].file == PROGRAM_OUTPUT) {
  3472.          assert(inst->dst[0].index < MAX_PROGRAM_OUTPUTS);
  3473.          prevWriteMask = outputWrites[inst->dst[0].index];
  3474.          outputWrites[inst->dst[0].index] |= inst->dst[0].writemask;
  3475.       } else if (inst->dst[0].file == PROGRAM_TEMPORARY) {
  3476.          if (inst->dst[0].index >= tempWritesSize) {
  3477.             const int inc = 4096;
  3478.  
  3479.             tempWrites = (unsigned*)
  3480.                          realloc(tempWrites,
  3481.                                  (tempWritesSize + inc) * sizeof(unsigned));
  3482.             if (!tempWrites)
  3483.                return;
  3484.  
  3485.             memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned));
  3486.             tempWritesSize += inc;
  3487.          }
  3488.  
  3489.          prevWriteMask = tempWrites[inst->dst[0].index];
  3490.          tempWrites[inst->dst[0].index] |= inst->dst[0].writemask;
  3491.       } else
  3492.          continue;
  3493.  
  3494.       /* For a CMP to be considered a conditional write, the destination
  3495.        * register and source register two must be the same. */
  3496.       if (inst->op == TGSI_OPCODE_CMP
  3497.           && !(inst->dst[0].writemask & prevWriteMask)
  3498.           && inst->src[2].file == inst->dst[0].file
  3499.           && inst->src[2].index == inst->dst[0].index
  3500.           && inst->dst[0].writemask == get_src_arg_mask(inst->dst[0], inst->src[2])) {
  3501.  
  3502.          inst->op = TGSI_OPCODE_MOV;
  3503.          inst->src[0] = inst->src[1];
  3504.       }
  3505.    }
  3506.  
  3507.    free(tempWrites);
  3508. }
  3509.  
  3510. /* Replaces all references to a temporary register index with another index. */
  3511. void
  3512. glsl_to_tgsi_visitor::rename_temp_register(int index, int new_index)
  3513. {
  3514.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3515.       unsigned j;
  3516.  
  3517.       for (j = 0; j < num_inst_src_regs(inst->op); j++) {
  3518.          if (inst->src[j].file == PROGRAM_TEMPORARY &&
  3519.              inst->src[j].index == index) {
  3520.             inst->src[j].index = new_index;
  3521.          }
  3522.       }
  3523.  
  3524.       for (j = 0; j < inst->tex_offset_num_offset; j++) {
  3525.          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
  3526.              inst->tex_offsets[j].index == index) {
  3527.             inst->tex_offsets[j].index = new_index;
  3528.          }
  3529.       }
  3530.  
  3531.       for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
  3532.          if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
  3533.             inst->dst[j].index = new_index;
  3534.          }
  3535.       }
  3536.    }
  3537. }
  3538.  
  3539. int
  3540. glsl_to_tgsi_visitor::get_first_temp_read(int index)
  3541. {
  3542.    int depth = 0; /* loop depth */
  3543.    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
  3544.    unsigned i = 0, j;
  3545.  
  3546.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3547.       for (j = 0; j < num_inst_src_regs(inst->op); j++) {
  3548.          if (inst->src[j].file == PROGRAM_TEMPORARY &&
  3549.              inst->src[j].index == index) {
  3550.             return (depth == 0) ? i : loop_start;
  3551.          }
  3552.       }
  3553.       for (j = 0; j < inst->tex_offset_num_offset; j++) {
  3554.          if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
  3555.              inst->tex_offsets[j].index == index) {
  3556.             return (depth == 0) ? i : loop_start;
  3557.          }
  3558.       }
  3559.       if (inst->op == TGSI_OPCODE_BGNLOOP) {
  3560.          if(depth++ == 0)
  3561.             loop_start = i;
  3562.       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
  3563.          if (--depth == 0)
  3564.             loop_start = -1;
  3565.       }
  3566.       assert(depth >= 0);
  3567.       i++;
  3568.    }
  3569.    return -1;
  3570. }
  3571.  
  3572. int
  3573. glsl_to_tgsi_visitor::get_first_temp_write(int index)
  3574. {
  3575.    int depth = 0; /* loop depth */
  3576.    int loop_start = -1; /* index of the first active BGNLOOP (if any) */
  3577.    int i = 0;
  3578.    unsigned j;
  3579.  
  3580.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3581.       for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
  3582.          if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index) {
  3583.             return (depth == 0) ? i : loop_start;
  3584.          }
  3585.       }
  3586.       if (inst->op == TGSI_OPCODE_BGNLOOP) {
  3587.          if(depth++ == 0)
  3588.             loop_start = i;
  3589.       } else if (inst->op == TGSI_OPCODE_ENDLOOP) {
  3590.          if (--depth == 0)
  3591.             loop_start = -1;
  3592.       }
  3593.       assert(depth >= 0);
  3594.       i++;
  3595.    }
  3596.    return -1;
  3597. }
  3598.  
  3599. int
  3600. glsl_to_tgsi_visitor::get_last_temp_read(int index)
  3601. {
  3602.    int depth = 0; /* loop depth */
  3603.    int last = -1; /* index of last instruction that reads the temporary */
  3604.    unsigned i = 0, j;
  3605.  
  3606.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3607.       for (j = 0; j < num_inst_src_regs(inst->op); j++) {
  3608.          if (inst->src[j].file == PROGRAM_TEMPORARY &&
  3609.              inst->src[j].index == index) {
  3610.             last = (depth == 0) ? i : -2;
  3611.          }
  3612.       }
  3613.       for (j = 0; j < inst->tex_offset_num_offset; j++) {
  3614.           if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY &&
  3615.               inst->tex_offsets[j].index == index)
  3616.               last = (depth == 0) ? i : -2;
  3617.       }
  3618.       if (inst->op == TGSI_OPCODE_BGNLOOP)
  3619.          depth++;
  3620.       else if (inst->op == TGSI_OPCODE_ENDLOOP)
  3621.          if (--depth == 0 && last == -2)
  3622.             last = i;
  3623.       assert(depth >= 0);
  3624.       i++;
  3625.    }
  3626.    assert(last >= -1);
  3627.    return last;
  3628. }
  3629.  
  3630. int
  3631. glsl_to_tgsi_visitor::get_last_temp_write(int index)
  3632. {
  3633.    int depth = 0; /* loop depth */
  3634.    int last = -1; /* index of last instruction that writes to the temporary */
  3635.    int i = 0;
  3636.    unsigned j;
  3637.  
  3638.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3639.       for (j = 0; j < num_inst_dst_regs(inst->op); j++) {
  3640.          if (inst->dst[j].file == PROGRAM_TEMPORARY && inst->dst[j].index == index)
  3641.             last = (depth == 0) ? i : -2;
  3642.       }
  3643.  
  3644.       if (inst->op == TGSI_OPCODE_BGNLOOP)
  3645.          depth++;
  3646.       else if (inst->op == TGSI_OPCODE_ENDLOOP)
  3647.          if (--depth == 0 && last == -2)
  3648.             last = i;
  3649.       assert(depth >= 0);
  3650.       i++;
  3651.    }
  3652.    assert(last >= -1);
  3653.    return last;
  3654. }
  3655.  
  3656. /*
  3657.  * On a basic block basis, tracks available PROGRAM_TEMPORARY register
  3658.  * channels for copy propagation and updates following instructions to
  3659.  * use the original versions.
  3660.  *
  3661.  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
  3662.  * will occur.  As an example, a TXP production before this pass:
  3663.  *
  3664.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  3665.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  3666.  * 2: TXP TEMP[2], TEMP[1], texture[0], 2D;
  3667.  *
  3668.  * and after:
  3669.  *
  3670.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  3671.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  3672.  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  3673.  *
  3674.  * which allows for dead code elimination on TEMP[1]'s writes.
  3675.  */
  3676. void
  3677. glsl_to_tgsi_visitor::copy_propagate(void)
  3678. {
  3679.    glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx,
  3680.                                                   glsl_to_tgsi_instruction *,
  3681.                                                   this->next_temp * 4);
  3682.    int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
  3683.    int level = 0;
  3684.  
  3685.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3686.       assert(inst->dst[0].file != PROGRAM_TEMPORARY
  3687.              || inst->dst[0].index < this->next_temp);
  3688.  
  3689.       /* First, do any copy propagation possible into the src regs. */
  3690.       for (int r = 0; r < 3; r++) {
  3691.          glsl_to_tgsi_instruction *first = NULL;
  3692.          bool good = true;
  3693.          int acp_base = inst->src[r].index * 4;
  3694.  
  3695.          if (inst->src[r].file != PROGRAM_TEMPORARY ||
  3696.              inst->src[r].reladdr ||
  3697.              inst->src[r].reladdr2)
  3698.             continue;
  3699.  
  3700.          /* See if we can find entries in the ACP consisting of MOVs
  3701.           * from the same src register for all the swizzled channels
  3702.           * of this src register reference.
  3703.           */
  3704.          for (int i = 0; i < 4; i++) {
  3705.             int src_chan = GET_SWZ(inst->src[r].swizzle, i);
  3706.             glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan];
  3707.  
  3708.             if (!copy_chan) {
  3709.                good = false;
  3710.                break;
  3711.             }
  3712.  
  3713.             assert(acp_level[acp_base + src_chan] <= level);
  3714.  
  3715.             if (!first) {
  3716.                first = copy_chan;
  3717.             } else {
  3718.                if (first->src[0].file != copy_chan->src[0].file ||
  3719.                    first->src[0].index != copy_chan->src[0].index ||
  3720.                    first->src[0].double_reg2 != copy_chan->src[0].double_reg2 ||
  3721.                    first->src[0].index2D != copy_chan->src[0].index2D) {
  3722.                   good = false;
  3723.                   break;
  3724.                }
  3725.             }
  3726.          }
  3727.  
  3728.          if (good) {
  3729.             /* We've now validated that we can copy-propagate to
  3730.              * replace this src register reference.  Do it.
  3731.              */
  3732.             inst->src[r].file = first->src[0].file;
  3733.             inst->src[r].index = first->src[0].index;
  3734.             inst->src[r].index2D = first->src[0].index2D;
  3735.             inst->src[r].has_index2 = first->src[0].has_index2;
  3736.             inst->src[r].double_reg2 = first->src[0].double_reg2;
  3737.  
  3738.             int swizzle = 0;
  3739.             for (int i = 0; i < 4; i++) {
  3740.                int src_chan = GET_SWZ(inst->src[r].swizzle, i);
  3741.                glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan];
  3742.                swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i));
  3743.             }
  3744.             inst->src[r].swizzle = swizzle;
  3745.          }
  3746.       }
  3747.  
  3748.       switch (inst->op) {
  3749.       case TGSI_OPCODE_BGNLOOP:
  3750.       case TGSI_OPCODE_ENDLOOP:
  3751.          /* End of a basic block, clear the ACP entirely. */
  3752.          memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
  3753.          break;
  3754.  
  3755.       case TGSI_OPCODE_IF:
  3756.       case TGSI_OPCODE_UIF:
  3757.          ++level;
  3758.          break;
  3759.  
  3760.       case TGSI_OPCODE_ENDIF:
  3761.       case TGSI_OPCODE_ELSE:
  3762.          /* Clear all channels written inside the block from the ACP, but
  3763.           * leaving those that were not touched.
  3764.           */
  3765.          for (int r = 0; r < this->next_temp; r++) {
  3766.             for (int c = 0; c < 4; c++) {
  3767.                if (!acp[4 * r + c])
  3768.                   continue;
  3769.  
  3770.                if (acp_level[4 * r + c] >= level)
  3771.                   acp[4 * r + c] = NULL;
  3772.             }
  3773.          }
  3774.          if (inst->op == TGSI_OPCODE_ENDIF)
  3775.             --level;
  3776.          break;
  3777.  
  3778.       default:
  3779.          /* Continuing the block, clear any written channels from
  3780.           * the ACP.
  3781.           */
  3782.          for (int d = 0; d < 2; d++) {
  3783.             if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) {
  3784.                /* Any temporary might be written, so no copy propagation
  3785.                 * across this instruction.
  3786.                 */
  3787.                memset(acp, 0, sizeof(*acp) * this->next_temp * 4);
  3788.             } else if (inst->dst[d].file == PROGRAM_OUTPUT &&
  3789.                        inst->dst[d].reladdr) {
  3790.                /* Any output might be written, so no copy propagation
  3791.                 * from outputs across this instruction.
  3792.                 */
  3793.                for (int r = 0; r < this->next_temp; r++) {
  3794.                   for (int c = 0; c < 4; c++) {
  3795.                      if (!acp[4 * r + c])
  3796.                         continue;
  3797.  
  3798.                      if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT)
  3799.                         acp[4 * r + c] = NULL;
  3800.                   }
  3801.                }
  3802.             } else if (inst->dst[d].file == PROGRAM_TEMPORARY ||
  3803.                        inst->dst[d].file == PROGRAM_OUTPUT) {
  3804.                /* Clear where it's used as dst. */
  3805.                if (inst->dst[d].file == PROGRAM_TEMPORARY) {
  3806.                   for (int c = 0; c < 4; c++) {
  3807.                      if (inst->dst[d].writemask & (1 << c))
  3808.                         acp[4 * inst->dst[d].index + c] = NULL;
  3809.                   }
  3810.                }
  3811.  
  3812.                /* Clear where it's used as src. */
  3813.                for (int r = 0; r < this->next_temp; r++) {
  3814.                   for (int c = 0; c < 4; c++) {
  3815.                      if (!acp[4 * r + c])
  3816.                         continue;
  3817.  
  3818.                      int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c);
  3819.  
  3820.                      if (acp[4 * r + c]->src[0].file == inst->dst[d].file &&
  3821.                          acp[4 * r + c]->src[0].index == inst->dst[d].index &&
  3822.                          inst->dst[d].writemask & (1 << src_chan)) {
  3823.                         acp[4 * r + c] = NULL;
  3824.                      }
  3825.                   }
  3826.                }
  3827.             }
  3828.          }
  3829.          break;
  3830.       }
  3831.  
  3832.       /* If this is a copy, add it to the ACP. */
  3833.       if (inst->op == TGSI_OPCODE_MOV &&
  3834.           inst->dst[0].file == PROGRAM_TEMPORARY &&
  3835.           !(inst->dst[0].file == inst->src[0].file &&
  3836.              inst->dst[0].index == inst->src[0].index) &&
  3837.           !inst->dst[0].reladdr &&
  3838.           !inst->saturate &&
  3839.           inst->src[0].file != PROGRAM_ARRAY &&
  3840.           !inst->src[0].reladdr &&
  3841.           !inst->src[0].reladdr2 &&
  3842.           !inst->src[0].negate) {
  3843.          for (int i = 0; i < 4; i++) {
  3844.             if (inst->dst[0].writemask & (1 << i)) {
  3845.                acp[4 * inst->dst[0].index + i] = inst;
  3846.                acp_level[4 * inst->dst[0].index + i] = level;
  3847.             }
  3848.          }
  3849.       }
  3850.    }
  3851.  
  3852.    ralloc_free(acp_level);
  3853.    ralloc_free(acp);
  3854. }
  3855.  
  3856. /*
  3857.  * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead
  3858.  * code elimination.
  3859.  *
  3860.  * The glsl_to_tgsi_visitor lazily produces code assuming that this pass
  3861.  * will occur.  As an example, a TXP production after copy propagation but
  3862.  * before this pass:
  3863.  *
  3864.  * 0: MOV TEMP[1], INPUT[4].xyyy;
  3865.  * 1: MOV TEMP[1].w, INPUT[4].wwww;
  3866.  * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  3867.  *
  3868.  * and after this pass:
  3869.  *
  3870.  * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D;
  3871.  */
  3872. int
  3873. glsl_to_tgsi_visitor::eliminate_dead_code(void)
  3874. {
  3875.    glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx,
  3876.                                                      glsl_to_tgsi_instruction *,
  3877.                                                      this->next_temp * 4);
  3878.    int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4);
  3879.    int level = 0;
  3880.    int removed = 0;
  3881.  
  3882.    foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) {
  3883.       assert(inst->dst[0].file != PROGRAM_TEMPORARY
  3884.              || inst->dst[0].index < this->next_temp);
  3885.  
  3886.       switch (inst->op) {
  3887.       case TGSI_OPCODE_BGNLOOP:
  3888.       case TGSI_OPCODE_ENDLOOP:
  3889.       case TGSI_OPCODE_CONT:
  3890.       case TGSI_OPCODE_BRK:
  3891.          /* End of a basic block, clear the write array entirely.
  3892.           *
  3893.           * This keeps us from killing dead code when the writes are
  3894.           * on either side of a loop, even when the register isn't touched
  3895.           * inside the loop.  However, glsl_to_tgsi_visitor doesn't seem to emit
  3896.           * dead code of this type, so it shouldn't make a difference as long as
  3897.           * the dead code elimination pass in the GLSL compiler does its job.
  3898.           */
  3899.          memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
  3900.          break;
  3901.  
  3902.       case TGSI_OPCODE_ENDIF:
  3903.       case TGSI_OPCODE_ELSE:
  3904.          /* Promote the recorded level of all channels written inside the
  3905.           * preceding if or else block to the level above the if/else block.
  3906.           */
  3907.          for (int r = 0; r < this->next_temp; r++) {
  3908.             for (int c = 0; c < 4; c++) {
  3909.                if (!writes[4 * r + c])
  3910.                   continue;
  3911.  
  3912.                if (write_level[4 * r + c] == level)
  3913.                   write_level[4 * r + c] = level-1;
  3914.             }
  3915.          }
  3916.          if(inst->op == TGSI_OPCODE_ENDIF)
  3917.             --level;
  3918.          break;
  3919.  
  3920.       case TGSI_OPCODE_IF:
  3921.       case TGSI_OPCODE_UIF:
  3922.          ++level;
  3923.          /* fallthrough to default case to mark the condition as read */
  3924.       default:
  3925.          /* Continuing the block, clear any channels from the write array that
  3926.           * are read by this instruction.
  3927.           */
  3928.          for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) {
  3929.             if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){
  3930.                /* Any temporary might be read, so no dead code elimination
  3931.                 * across this instruction.
  3932.                 */
  3933.                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
  3934.             } else if (inst->src[i].file == PROGRAM_TEMPORARY) {
  3935.                /* Clear where it's used as src. */
  3936.                int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0);
  3937.                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1);
  3938.                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2);
  3939.                src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3);
  3940.  
  3941.                for (int c = 0; c < 4; c++) {
  3942.                   if (src_chans & (1 << c))
  3943.                      writes[4 * inst->src[i].index + c] = NULL;
  3944.                }
  3945.             }
  3946.          }
  3947.          for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) {
  3948.             if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){
  3949.                /* Any temporary might be read, so no dead code elimination
  3950.                 * across this instruction.
  3951.                 */
  3952.                memset(writes, 0, sizeof(*writes) * this->next_temp * 4);
  3953.             } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) {
  3954.                /* Clear where it's used as src. */
  3955.                int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0);
  3956.                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1);
  3957.                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2);
  3958.                src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3);
  3959.  
  3960.                for (int c = 0; c < 4; c++) {
  3961.                   if (src_chans & (1 << c))
  3962.                      writes[4 * inst->tex_offsets[i].index + c] = NULL;
  3963.                }
  3964.             }
  3965.          }
  3966.          break;
  3967.       }
  3968.  
  3969.       /* If this instruction writes to a temporary, add it to the write array.
  3970.        * If there is already an instruction in the write array for one or more
  3971.        * of the channels, flag that channel write as dead.
  3972.        */
  3973.       for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) {
  3974.          if (inst->dst[i].file == PROGRAM_TEMPORARY &&
  3975.              !inst->dst[i].reladdr &&
  3976.              !inst->saturate) {
  3977.             for (int c = 0; c < 4; c++) {
  3978.                if (inst->dst[i].writemask & (1 << c)) {
  3979.                   if (writes[4 * inst->dst[i].index + c]) {
  3980.                      if (write_level[4 * inst->dst[i].index + c] < level)
  3981.                         continue;
  3982.                      else
  3983.                         writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c);
  3984.                   }
  3985.                   writes[4 * inst->dst[i].index + c] = inst;
  3986.                   write_level[4 * inst->dst[i].index + c] = level;
  3987.                }
  3988.             }
  3989.          }
  3990.       }
  3991.    }
  3992.  
  3993.    /* Anything still in the write array at this point is dead code. */
  3994.    for (int r = 0; r < this->next_temp; r++) {
  3995.       for (int c = 0; c < 4; c++) {
  3996.          glsl_to_tgsi_instruction *inst = writes[4 * r + c];
  3997.          if (inst)
  3998.             inst->dead_mask |= (1 << c);
  3999.       }
  4000.    }
  4001.  
  4002.    /* Now actually remove the instructions that are completely dead and update
  4003.     * the writemask of other instructions with dead channels.
  4004.     */
  4005.    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
  4006.       if (!inst->dead_mask || !inst->dst[0].writemask)
  4007.          continue;
  4008.       else if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) {
  4009.          inst->remove();
  4010.          delete inst;
  4011.          removed++;
  4012.       } else {
  4013.          if (inst->dst[0].type == GLSL_TYPE_DOUBLE) {
  4014.             if (inst->dead_mask == WRITEMASK_XY ||
  4015.                 inst->dead_mask == WRITEMASK_ZW)
  4016.                inst->dst[0].writemask &= ~(inst->dead_mask);
  4017.          } else
  4018.             inst->dst[0].writemask &= ~(inst->dead_mask);
  4019.       }
  4020.    }
  4021.  
  4022.    ralloc_free(write_level);
  4023.    ralloc_free(writes);
  4024.  
  4025.    return removed;
  4026. }
  4027.  
  4028. /* merge DFRACEXP instructions into one. */
  4029. void
  4030. glsl_to_tgsi_visitor::merge_two_dsts(void)
  4031. {
  4032.    foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) {
  4033.       glsl_to_tgsi_instruction *inst2;
  4034.       bool merged;
  4035.       if (num_inst_dst_regs(inst->op) != 2)
  4036.          continue;
  4037.  
  4038.       if (inst->dst[0].file != PROGRAM_UNDEFINED &&
  4039.           inst->dst[1].file != PROGRAM_UNDEFINED)
  4040.          continue;
  4041.  
  4042.       inst2 = (glsl_to_tgsi_instruction *) inst->next;
  4043.       do {
  4044.  
  4045.          if (inst->src[0].file == inst2->src[0].file &&
  4046.              inst->src[0].index == inst2->src[0].index &&
  4047.              inst->src[0].type == inst2->src[0].type &&
  4048.              inst->src[0].swizzle == inst2->src[0].swizzle)
  4049.             break;
  4050.          inst2 = (glsl_to_tgsi_instruction *) inst2->next;
  4051.       } while (inst2);
  4052.  
  4053.       if (!inst2)
  4054.          continue;
  4055.       merged = false;
  4056.       if (inst->dst[0].file == PROGRAM_UNDEFINED) {
  4057.          merged = true;
  4058.          inst->dst[0] = inst2->dst[0];
  4059.       } else if (inst->dst[1].file == PROGRAM_UNDEFINED) {
  4060.          inst->dst[1] = inst2->dst[1];
  4061.          merged = true;
  4062.       }
  4063.  
  4064.       if (merged) {
  4065.          inst2->remove();
  4066.          delete inst2;
  4067.       }
  4068.    }
  4069. }
  4070.  
  4071. /* Merges temporary registers together where possible to reduce the number of
  4072.  * registers needed to run a program.
  4073.  *
  4074.  * Produces optimal code only after copy propagation and dead code elimination
  4075.  * have been run. */
  4076. void
  4077. glsl_to_tgsi_visitor::merge_registers(void)
  4078. {
  4079.    int *last_reads = rzalloc_array(mem_ctx, int, this->next_temp);
  4080.    int *first_writes = rzalloc_array(mem_ctx, int, this->next_temp);
  4081.    int i, j;
  4082.  
  4083.    /* Read the indices of the last read and first write to each temp register
  4084.     * into an array so that we don't have to traverse the instruction list as
  4085.     * much. */
  4086.    for (i = 0; i < this->next_temp; i++) {
  4087.       last_reads[i] = get_last_temp_read(i);
  4088.       first_writes[i] = get_first_temp_write(i);
  4089.    }
  4090.  
  4091.    /* Start looking for registers with non-overlapping usages that can be
  4092.     * merged together. */
  4093.    for (i = 0; i < this->next_temp; i++) {
  4094.       /* Don't touch unused registers. */
  4095.       if (last_reads[i] < 0 || first_writes[i] < 0) continue;
  4096.  
  4097.       for (j = 0; j < this->next_temp; j++) {
  4098.          /* Don't touch unused registers. */
  4099.          if (last_reads[j] < 0 || first_writes[j] < 0) continue;
  4100.  
  4101.          /* We can merge the two registers if the first write to j is after or
  4102.           * in the same instruction as the last read from i.  Note that the
  4103.           * register at index i will always be used earlier or at the same time
  4104.           * as the register at index j. */
  4105.          if (first_writes[i] <= first_writes[j] &&
  4106.              last_reads[i] <= first_writes[j]) {
  4107.             rename_temp_register(j, i); /* Replace all references to j with i.*/
  4108.  
  4109.             /* Update the first_writes and last_reads arrays with the new
  4110.              * values for the merged register index, and mark the newly unused
  4111.              * register index as such. */
  4112.             last_reads[i] = last_reads[j];
  4113.             first_writes[j] = -1;
  4114.             last_reads[j] = -1;
  4115.          }
  4116.       }
  4117.    }
  4118.  
  4119.    ralloc_free(last_reads);
  4120.    ralloc_free(first_writes);
  4121. }
  4122.  
  4123. /* Reassign indices to temporary registers by reusing unused indices created
  4124.  * by optimization passes. */
  4125. void
  4126. glsl_to_tgsi_visitor::renumber_registers(void)
  4127. {
  4128.    int i = 0;
  4129.    int new_index = 0;
  4130.  
  4131.    for (i = 0; i < this->next_temp; i++) {
  4132.       if (get_first_temp_read(i) < 0) continue;
  4133.       if (i != new_index)
  4134.          rename_temp_register(i, new_index);
  4135.       new_index++;
  4136.    }
  4137.  
  4138.    this->next_temp = new_index;
  4139. }
  4140.  
  4141. /**
  4142.  * Returns a fragment program which implements the current pixel transfer ops.
  4143.  * Based on get_pixel_transfer_program in st_atom_pixeltransfer.c.
  4144.  */
  4145. extern "C" void
  4146. get_pixel_transfer_visitor(struct st_fragment_program *fp,
  4147.                            glsl_to_tgsi_visitor *original,
  4148.                            int scale_and_bias, int pixel_maps)
  4149. {
  4150.    glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
  4151.    struct st_context *st = st_context(original->ctx);
  4152.    struct gl_program *prog = &fp->Base.Base;
  4153.    struct gl_program_parameter_list *params = _mesa_new_parameter_list();
  4154.    st_src_reg coord, src0;
  4155.    st_dst_reg dst0;
  4156.    glsl_to_tgsi_instruction *inst;
  4157.  
  4158.    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
  4159.    v->ctx = original->ctx;
  4160.    v->prog = prog;
  4161.    v->shader_program = NULL;
  4162.    v->shader = NULL;
  4163.    v->glsl_version = original->glsl_version;
  4164.    v->native_integers = original->native_integers;
  4165.    v->options = original->options;
  4166.    v->next_temp = original->next_temp;
  4167.    v->num_address_regs = original->num_address_regs;
  4168.    v->samplers_used = prog->SamplersUsed = original->samplers_used;
  4169.    v->indirect_addr_consts = original->indirect_addr_consts;
  4170.    memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
  4171.    v->num_immediates = original->num_immediates;
  4172.  
  4173.    /*
  4174.     * Get initial pixel color from the texture.
  4175.     * TEX colorTemp, fragment.texcoord[0], texture[0], 2D;
  4176.     */
  4177.    coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
  4178.    src0 = v->get_temp(glsl_type::vec4_type);
  4179.    dst0 = st_dst_reg(src0);
  4180.    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
  4181.    inst->sampler_array_size = 1;
  4182.    inst->tex_target = TEXTURE_2D_INDEX;
  4183.  
  4184.    prog->InputsRead |= VARYING_BIT_TEX0;
  4185.    prog->SamplersUsed |= (1 << 0); /* mark sampler 0 as used */
  4186.    v->samplers_used |= (1 << 0);
  4187.  
  4188.    if (scale_and_bias) {
  4189.       static const gl_state_index scale_state[STATE_LENGTH] =
  4190.          { STATE_INTERNAL, STATE_PT_SCALE,
  4191.            (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
  4192.       static const gl_state_index bias_state[STATE_LENGTH] =
  4193.          { STATE_INTERNAL, STATE_PT_BIAS,
  4194.            (gl_state_index) 0, (gl_state_index) 0, (gl_state_index) 0 };
  4195.       GLint scale_p, bias_p;
  4196.       st_src_reg scale, bias;
  4197.  
  4198.       scale_p = _mesa_add_state_reference(params, scale_state);
  4199.       bias_p = _mesa_add_state_reference(params, bias_state);
  4200.  
  4201.       /* MAD colorTemp, colorTemp, scale, bias; */
  4202.       scale = st_src_reg(PROGRAM_STATE_VAR, scale_p, GLSL_TYPE_FLOAT);
  4203.       bias = st_src_reg(PROGRAM_STATE_VAR, bias_p, GLSL_TYPE_FLOAT);
  4204.       inst = v->emit(NULL, TGSI_OPCODE_MAD, dst0, src0, scale, bias);
  4205.    }
  4206.  
  4207.    if (pixel_maps) {
  4208.       st_src_reg temp = v->get_temp(glsl_type::vec4_type);
  4209.       st_dst_reg temp_dst = st_dst_reg(temp);
  4210.  
  4211.       assert(st->pixel_xfer.pixelmap_texture);
  4212.  
  4213.       /* With a little effort, we can do four pixel map look-ups with
  4214.        * two TEX instructions:
  4215.        */
  4216.  
  4217.       /* TEX temp.rg, colorTemp.rgba, texture[1], 2D; */
  4218.       temp_dst.writemask = WRITEMASK_XY; /* write R,G */
  4219.       inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
  4220.       inst->sampler.index = 1;
  4221.       inst->sampler_array_size = 1;
  4222.       inst->tex_target = TEXTURE_2D_INDEX;
  4223.  
  4224.       /* TEX temp.ba, colorTemp.baba, texture[1], 2D; */
  4225.       src0.swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, SWIZZLE_Z, SWIZZLE_W);
  4226.       temp_dst.writemask = WRITEMASK_ZW; /* write B,A */
  4227.       inst = v->emit(NULL, TGSI_OPCODE_TEX, temp_dst, src0);
  4228.       inst->sampler.index = 1;
  4229.       inst->sampler_array_size = 1;
  4230.       inst->tex_target = TEXTURE_2D_INDEX;
  4231.  
  4232.       prog->SamplersUsed |= (1 << 1); /* mark sampler 1 as used */
  4233.       v->samplers_used |= (1 << 1);
  4234.  
  4235.       /* MOV colorTemp, temp; */
  4236.       inst = v->emit(NULL, TGSI_OPCODE_MOV, dst0, temp);
  4237.    }
  4238.  
  4239.    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
  4240.     * new visitor. */
  4241.    foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
  4242.       glsl_to_tgsi_instruction *newinst;
  4243.       st_src_reg src_regs[3];
  4244.  
  4245.       if (inst->dst[0].file == PROGRAM_OUTPUT)
  4246.          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
  4247.  
  4248.       for (int i = 0; i < 3; i++) {
  4249.          src_regs[i] = inst->src[i];
  4250.          if (src_regs[i].file == PROGRAM_INPUT &&
  4251.              src_regs[i].index == VARYING_SLOT_COL0) {
  4252.             src_regs[i].file = PROGRAM_TEMPORARY;
  4253.             src_regs[i].index = src0.index;
  4254.          }
  4255.          else if (src_regs[i].file == PROGRAM_INPUT)
  4256.             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
  4257.       }
  4258.  
  4259.       newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
  4260.       newinst->tex_target = inst->tex_target;
  4261.       newinst->sampler_array_size = inst->sampler_array_size;
  4262.    }
  4263.  
  4264.    /* Make modifications to fragment program info. */
  4265.    prog->Parameters = _mesa_combine_parameter_lists(params,
  4266.                                                     original->prog->Parameters);
  4267.    _mesa_free_parameter_list(params);
  4268.    count_resources(v, prog);
  4269.    fp->glsl_to_tgsi = v;
  4270. }
  4271.  
  4272. /**
  4273.  * Make fragment program for glBitmap:
  4274.  *   Sample the texture and kill the fragment if the bit is 0.
  4275.  * This program will be combined with the user's fragment program.
  4276.  *
  4277.  * Based on make_bitmap_fragment_program in st_cb_bitmap.c.
  4278.  */
  4279. extern "C" void
  4280. get_bitmap_visitor(struct st_fragment_program *fp,
  4281.                    glsl_to_tgsi_visitor *original, int samplerIndex)
  4282. {
  4283.    glsl_to_tgsi_visitor *v = new glsl_to_tgsi_visitor();
  4284.    struct st_context *st = st_context(original->ctx);
  4285.    struct gl_program *prog = &fp->Base.Base;
  4286.    st_src_reg coord, src0;
  4287.    st_dst_reg dst0;
  4288.    glsl_to_tgsi_instruction *inst;
  4289.  
  4290.    /* Copy attributes of the glsl_to_tgsi_visitor in the original shader. */
  4291.    v->ctx = original->ctx;
  4292.    v->prog = prog;
  4293.    v->shader_program = NULL;
  4294.    v->shader = NULL;
  4295.    v->glsl_version = original->glsl_version;
  4296.    v->native_integers = original->native_integers;
  4297.    v->options = original->options;
  4298.    v->next_temp = original->next_temp;
  4299.    v->num_address_regs = original->num_address_regs;
  4300.    v->samplers_used = prog->SamplersUsed = original->samplers_used;
  4301.    v->indirect_addr_consts = original->indirect_addr_consts;
  4302.    memcpy(&v->immediates, &original->immediates, sizeof(v->immediates));
  4303.    v->num_immediates = original->num_immediates;
  4304.  
  4305.    /* TEX tmp0, fragment.texcoord[0], texture[0], 2D; */
  4306.    coord = st_src_reg(PROGRAM_INPUT, VARYING_SLOT_TEX0, glsl_type::vec2_type);
  4307.    src0 = v->get_temp(glsl_type::vec4_type);
  4308.    dst0 = st_dst_reg(src0);
  4309.    inst = v->emit(NULL, TGSI_OPCODE_TEX, dst0, coord);
  4310.    inst->sampler.index = samplerIndex;
  4311.    inst->sampler_array_size = 1;
  4312.    inst->tex_target = TEXTURE_2D_INDEX;
  4313.  
  4314.    prog->InputsRead |= VARYING_BIT_TEX0;
  4315.    prog->SamplersUsed |= (1 << samplerIndex); /* mark sampler as used */
  4316.    v->samplers_used |= (1 << samplerIndex);
  4317.  
  4318.    /* KIL if -tmp0 < 0 # texel=0 -> keep / texel=0 -> discard */
  4319.    src0.negate = NEGATE_XYZW;
  4320.    if (st->bitmap.tex_format == PIPE_FORMAT_L8_UNORM)
  4321.       src0.swizzle = SWIZZLE_XXXX;
  4322.    inst = v->emit(NULL, TGSI_OPCODE_KILL_IF, undef_dst, src0);
  4323.  
  4324.    /* Now copy the instructions from the original glsl_to_tgsi_visitor into the
  4325.     * new visitor. */
  4326.    foreach_in_list(glsl_to_tgsi_instruction, inst, &original->instructions) {
  4327.       glsl_to_tgsi_instruction *newinst;
  4328.       st_src_reg src_regs[3];
  4329.  
  4330.       if (inst->dst[0].file == PROGRAM_OUTPUT)
  4331.          prog->OutputsWritten |= BITFIELD64_BIT(inst->dst[0].index);
  4332.  
  4333.       for (int i = 0; i < 3; i++) {
  4334.          src_regs[i] = inst->src[i];
  4335.          if (src_regs[i].file == PROGRAM_INPUT)
  4336.             prog->InputsRead |= BITFIELD64_BIT(src_regs[i].index);
  4337.       }
  4338.  
  4339.       newinst = v->emit(NULL, inst->op, inst->dst[0], src_regs[0], src_regs[1], src_regs[2]);
  4340.       newinst->tex_target = inst->tex_target;
  4341.       newinst->sampler_array_size = inst->sampler_array_size;
  4342.    }
  4343.  
  4344.    /* Make modifications to fragment program info. */
  4345.    prog->Parameters = _mesa_clone_parameter_list(original->prog->Parameters);
  4346.    count_resources(v, prog);
  4347.    fp->glsl_to_tgsi = v;
  4348. }
  4349.  
  4350. /* ------------------------- TGSI conversion stuff -------------------------- */
  4351. struct label {
  4352.    unsigned branch_target;
  4353.    unsigned token;
  4354. };
  4355.  
  4356. /**
  4357.  * Intermediate state used during shader translation.
  4358.  */
  4359. struct st_translate {
  4360.    struct ureg_program *ureg;
  4361.  
  4362.    unsigned temps_size;
  4363.    struct ureg_dst *temps;
  4364.  
  4365.    struct ureg_dst arrays[MAX_ARRAYS];
  4366.    struct ureg_src *constants;
  4367.    int num_constants;
  4368.    struct ureg_src *immediates;
  4369.    int num_immediates;
  4370.    struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS];
  4371.    struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS];
  4372.    struct ureg_dst address[3];
  4373.    struct ureg_src samplers[PIPE_MAX_SAMPLERS];
  4374.    struct ureg_src systemValues[SYSTEM_VALUE_MAX];
  4375.    struct tgsi_texture_offset tex_offsets[MAX_GLSL_TEXTURE_OFFSET];
  4376.    unsigned array_sizes[MAX_ARRAYS];
  4377.  
  4378.    const GLuint *inputMapping;
  4379.    const GLuint *outputMapping;
  4380.  
  4381.    /* For every instruction that contains a label (eg CALL), keep
  4382.     * details so that we can go back afterwards and emit the correct
  4383.     * tgsi instruction number for each label.
  4384.     */
  4385.    struct label *labels;
  4386.    unsigned labels_size;
  4387.    unsigned labels_count;
  4388.  
  4389.    /* Keep a record of the tgsi instruction number that each mesa
  4390.     * instruction starts at, will be used to fix up labels after
  4391.     * translation.
  4392.     */
  4393.    unsigned *insn;
  4394.    unsigned insn_size;
  4395.    unsigned insn_count;
  4396.  
  4397.    unsigned procType;  /**< TGSI_PROCESSOR_VERTEX/FRAGMENT */
  4398.  
  4399.    boolean error;
  4400. };
  4401.  
  4402. /** Map Mesa's SYSTEM_VALUE_x to TGSI_SEMANTIC_x */
  4403. const unsigned _mesa_sysval_to_semantic[SYSTEM_VALUE_MAX] = {
  4404.    /* Vertex shader
  4405.     */
  4406.    TGSI_SEMANTIC_VERTEXID,
  4407.    TGSI_SEMANTIC_INSTANCEID,
  4408.    TGSI_SEMANTIC_VERTEXID_NOBASE,
  4409.    TGSI_SEMANTIC_BASEVERTEX,
  4410.  
  4411.    /* Geometry shader
  4412.     */
  4413.    TGSI_SEMANTIC_INVOCATIONID,
  4414.  
  4415.    /* Fragment shader
  4416.     */
  4417.    TGSI_SEMANTIC_FACE,
  4418.    TGSI_SEMANTIC_SAMPLEID,
  4419.    TGSI_SEMANTIC_SAMPLEPOS,
  4420.    TGSI_SEMANTIC_SAMPLEMASK,
  4421. };
  4422.  
  4423. /**
  4424.  * Make note of a branch to a label in the TGSI code.
  4425.  * After we've emitted all instructions, we'll go over the list
  4426.  * of labels built here and patch the TGSI code with the actual
  4427.  * location of each label.
  4428.  */
  4429. static unsigned *get_label(struct st_translate *t, unsigned branch_target)
  4430. {
  4431.    unsigned i;
  4432.  
  4433.    if (t->labels_count + 1 >= t->labels_size) {
  4434.       t->labels_size = 1 << (util_logbase2(t->labels_size) + 1);
  4435.       t->labels = (struct label *)realloc(t->labels,
  4436.                                           t->labels_size * sizeof(struct label));
  4437.       if (t->labels == NULL) {
  4438.          static unsigned dummy;
  4439.          t->error = TRUE;
  4440.          return &dummy;
  4441.       }
  4442.    }
  4443.  
  4444.    i = t->labels_count++;
  4445.    t->labels[i].branch_target = branch_target;
  4446.    return &t->labels[i].token;
  4447. }
  4448.  
  4449. /**
  4450.  * Called prior to emitting the TGSI code for each instruction.
  4451.  * Allocate additional space for instructions if needed.
  4452.  * Update the insn[] array so the next glsl_to_tgsi_instruction points to
  4453.  * the next TGSI instruction.
  4454.  */
  4455. static void set_insn_start(struct st_translate *t, unsigned start)
  4456. {
  4457.    if (t->insn_count + 1 >= t->insn_size) {
  4458.       t->insn_size = 1 << (util_logbase2(t->insn_size) + 1);
  4459.       t->insn = (unsigned *)realloc(t->insn, t->insn_size * sizeof(t->insn[0]));
  4460.       if (t->insn == NULL) {
  4461.          t->error = TRUE;
  4462.          return;
  4463.       }
  4464.    }
  4465.  
  4466.    t->insn[t->insn_count++] = start;
  4467. }
  4468.  
  4469. /**
  4470.  * Map a glsl_to_tgsi constant/immediate to a TGSI immediate.
  4471.  */
  4472. static struct ureg_src
  4473. emit_immediate(struct st_translate *t,
  4474.                gl_constant_value values[4],
  4475.                int type, int size)
  4476. {
  4477.    struct ureg_program *ureg = t->ureg;
  4478.  
  4479.    switch(type)
  4480.    {
  4481.    case GL_FLOAT:
  4482.       return ureg_DECL_immediate(ureg, &values[0].f, size);
  4483.    case GL_DOUBLE:
  4484.       return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size);
  4485.    case GL_INT:
  4486.       return ureg_DECL_immediate_int(ureg, &values[0].i, size);
  4487.    case GL_UNSIGNED_INT:
  4488.    case GL_BOOL:
  4489.       return ureg_DECL_immediate_uint(ureg, &values[0].u, size);
  4490.    default:
  4491.       assert(!"should not get here - type must be float, int, uint, or bool");
  4492.       return ureg_src_undef();
  4493.    }
  4494. }
  4495.  
  4496. /**
  4497.  * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register.
  4498.  */
  4499. static struct ureg_dst
  4500. dst_register(struct st_translate *t,
  4501.              gl_register_file file,
  4502.              GLuint index)
  4503. {
  4504.    unsigned array;
  4505.  
  4506.    switch(file) {
  4507.    case PROGRAM_UNDEFINED:
  4508.       return ureg_dst_undef();
  4509.  
  4510.    case PROGRAM_TEMPORARY:
  4511.       /* Allocate space for temporaries on demand. */
  4512.       if (index >= t->temps_size) {
  4513.          const int inc = 4096;
  4514.  
  4515.          t->temps = (struct ureg_dst*)
  4516.                     realloc(t->temps,
  4517.                             (t->temps_size + inc) * sizeof(struct ureg_dst));
  4518.          if (!t->temps)
  4519.             return ureg_dst_undef();
  4520.  
  4521.          memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst));
  4522.          t->temps_size += inc;
  4523.       }
  4524.  
  4525.       if (ureg_dst_is_undef(t->temps[index]))
  4526.          t->temps[index] = ureg_DECL_local_temporary(t->ureg);
  4527.  
  4528.       return t->temps[index];
  4529.  
  4530.    case PROGRAM_ARRAY:
  4531.       array = index >> 16;
  4532.  
  4533.       assert(array < ARRAY_SIZE(t->arrays));
  4534.  
  4535.       if (ureg_dst_is_undef(t->arrays[array]))
  4536.          t->arrays[array] = ureg_DECL_array_temporary(
  4537.             t->ureg, t->array_sizes[array], TRUE);
  4538.  
  4539.       return ureg_dst_array_offset(t->arrays[array],
  4540.                                    (int)(index & 0xFFFF) - 0x8000);
  4541.  
  4542.    case PROGRAM_OUTPUT:
  4543.       if (t->procType == TGSI_PROCESSOR_VERTEX)
  4544.          assert(index < VARYING_SLOT_MAX);
  4545.       else if (t->procType == TGSI_PROCESSOR_FRAGMENT)
  4546.          assert(index < FRAG_RESULT_MAX);
  4547.       else
  4548.          assert(index < VARYING_SLOT_MAX);
  4549.  
  4550.       assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs));
  4551.  
  4552.       return t->outputs[t->outputMapping[index]];
  4553.  
  4554.    case PROGRAM_ADDRESS:
  4555.       return t->address[index];
  4556.  
  4557.    default:
  4558.       assert(!"unknown dst register file");
  4559.       return ureg_dst_undef();
  4560.    }
  4561. }
  4562.  
  4563. /**
  4564.  * Map a glsl_to_tgsi src register to a TGSI ureg_src register.
  4565.  */
  4566. static struct ureg_src
  4567. src_register(struct st_translate *t, const st_src_reg *reg)
  4568. {
  4569.    int index = reg->index;
  4570.    int double_reg2 = reg->double_reg2 ? 1 : 0;
  4571.  
  4572.    switch(reg->file) {
  4573.    case PROGRAM_UNDEFINED:
  4574.       return ureg_imm4f(t->ureg, 0, 0, 0, 0);
  4575.  
  4576.    case PROGRAM_TEMPORARY:
  4577.    case PROGRAM_ARRAY:
  4578.       return ureg_src(dst_register(t, reg->file, reg->index));
  4579.  
  4580.    case PROGRAM_UNIFORM:
  4581.       assert(reg->index >= 0);
  4582.       return reg->index < t->num_constants ?
  4583.                t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
  4584.    case PROGRAM_STATE_VAR:
  4585.    case PROGRAM_CONSTANT:       /* ie, immediate */
  4586.       if (reg->has_index2)
  4587.          return ureg_src_register(TGSI_FILE_CONSTANT, reg->index);
  4588.       else
  4589.          return reg->index >= 0 && reg->index < t->num_constants ?
  4590.                   t->constants[reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0);
  4591.  
  4592.    case PROGRAM_IMMEDIATE:
  4593.       assert(reg->index >= 0 && reg->index < t->num_immediates);
  4594.       return t->immediates[reg->index];
  4595.  
  4596.    case PROGRAM_INPUT:
  4597.       /* GLSL inputs are 64-bit containers, so we have to
  4598.        * map back to the original index and add the offset after
  4599.        * mapping. */
  4600.       index -= double_reg2;
  4601.       assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs));
  4602.       return t->inputs[t->inputMapping[index] + double_reg2];
  4603.  
  4604.    case PROGRAM_OUTPUT:
  4605.       assert(t->outputMapping[reg->index] < ARRAY_SIZE(t->outputs));
  4606.       return ureg_src(t->outputs[t->outputMapping[reg->index]]); /* not needed? */
  4607.  
  4608.    case PROGRAM_ADDRESS:
  4609.       return ureg_src(t->address[reg->index]);
  4610.  
  4611.    case PROGRAM_SYSTEM_VALUE:
  4612.       assert(reg->index < (int) ARRAY_SIZE(t->systemValues));
  4613.       return t->systemValues[reg->index];
  4614.  
  4615.    default:
  4616.       assert(!"unknown src register file");
  4617.       return ureg_src_undef();
  4618.    }
  4619. }
  4620.  
  4621. /**
  4622.  * Create a TGSI ureg_dst register from an st_dst_reg.
  4623.  */
  4624. static struct ureg_dst
  4625. translate_dst(struct st_translate *t,
  4626.               const st_dst_reg *dst_reg,
  4627.               bool saturate, bool clamp_color)
  4628. {
  4629.    struct ureg_dst dst = dst_register(t,
  4630.                                       dst_reg->file,
  4631.                                       dst_reg->index);
  4632.  
  4633.    if (dst.File == TGSI_FILE_NULL)
  4634.       return dst;
  4635.  
  4636.    dst = ureg_writemask(dst, dst_reg->writemask);
  4637.  
  4638.    if (saturate)
  4639.       dst = ureg_saturate(dst);
  4640.    else if (clamp_color && dst_reg->file == PROGRAM_OUTPUT) {
  4641.       /* Clamp colors for ARB_color_buffer_float. */
  4642.       switch (t->procType) {
  4643.       case TGSI_PROCESSOR_VERTEX:
  4644.          /* This can only occur with a compatibility profile, which doesn't
  4645.           * support geometry shaders. */
  4646.          if (dst_reg->index == VARYING_SLOT_COL0 ||
  4647.              dst_reg->index == VARYING_SLOT_COL1 ||
  4648.              dst_reg->index == VARYING_SLOT_BFC0 ||
  4649.              dst_reg->index == VARYING_SLOT_BFC1) {
  4650.             dst = ureg_saturate(dst);
  4651.          }
  4652.          break;
  4653.  
  4654.       case TGSI_PROCESSOR_FRAGMENT:
  4655.          if (dst_reg->index == FRAG_RESULT_COLOR ||
  4656.              dst_reg->index >= FRAG_RESULT_DATA0) {
  4657.             dst = ureg_saturate(dst);
  4658.          }
  4659.          break;
  4660.       }
  4661.    }
  4662.  
  4663.    if (dst_reg->reladdr != NULL) {
  4664.       assert(dst_reg->file != PROGRAM_TEMPORARY);
  4665.       dst = ureg_dst_indirect(dst, ureg_src(t->address[0]));
  4666.    }
  4667.  
  4668.    return dst;
  4669. }
  4670.  
  4671. /**
  4672.  * Create a TGSI ureg_src register from an st_src_reg.
  4673.  */
  4674. static struct ureg_src
  4675. translate_src(struct st_translate *t, const st_src_reg *src_reg)
  4676. {
  4677.    struct ureg_src src = src_register(t, src_reg);
  4678.  
  4679.    if (src_reg->has_index2) {
  4680.       /* 2D indexes occur with geometry shader inputs (attrib, vertex)
  4681.        * and UBO constant buffers (buffer, position).
  4682.        */
  4683.       if (src_reg->reladdr2)
  4684.          src = ureg_src_dimension_indirect(src, ureg_src(t->address[1]),
  4685.                                            src_reg->index2D);
  4686.       else
  4687.          src = ureg_src_dimension(src, src_reg->index2D);
  4688.    }
  4689.  
  4690.    src = ureg_swizzle(src,
  4691.                       GET_SWZ(src_reg->swizzle, 0) & 0x3,
  4692.                       GET_SWZ(src_reg->swizzle, 1) & 0x3,
  4693.                       GET_SWZ(src_reg->swizzle, 2) & 0x3,
  4694.                       GET_SWZ(src_reg->swizzle, 3) & 0x3);
  4695.  
  4696.    if ((src_reg->negate & 0xf) == NEGATE_XYZW)
  4697.       src = ureg_negate(src);
  4698.  
  4699.    if (src_reg->reladdr != NULL) {
  4700.       assert(src_reg->file != PROGRAM_TEMPORARY);
  4701.       src = ureg_src_indirect(src, ureg_src(t->address[0]));
  4702.    }
  4703.  
  4704.    return src;
  4705. }
  4706.  
  4707. static struct tgsi_texture_offset
  4708. translate_tex_offset(struct st_translate *t,
  4709.                      const st_src_reg *in_offset, int idx)
  4710. {
  4711.    struct tgsi_texture_offset offset;
  4712.    struct ureg_src imm_src;
  4713.    struct ureg_dst dst;
  4714.    int array;
  4715.  
  4716.    switch (in_offset->file) {
  4717.    case PROGRAM_IMMEDIATE:
  4718.       assert(in_offset->index >= 0 && in_offset->index < t->num_immediates);
  4719.       imm_src = t->immediates[in_offset->index];
  4720.  
  4721.       offset.File = imm_src.File;
  4722.       offset.Index = imm_src.Index;
  4723.       offset.SwizzleX = imm_src.SwizzleX;
  4724.       offset.SwizzleY = imm_src.SwizzleY;
  4725.       offset.SwizzleZ = imm_src.SwizzleZ;
  4726.       offset.Padding = 0;
  4727.       break;
  4728.    case PROGRAM_TEMPORARY:
  4729.       imm_src = ureg_src(t->temps[in_offset->index]);
  4730.       offset.File = imm_src.File;
  4731.       offset.Index = imm_src.Index;
  4732.       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
  4733.       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
  4734.       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
  4735.       offset.Padding = 0;
  4736.       break;
  4737.    case PROGRAM_ARRAY:
  4738.       array = in_offset->index >> 16;
  4739.  
  4740.       assert(array >= 0);
  4741.       assert(array < (int) ARRAY_SIZE(t->arrays));
  4742.  
  4743.       dst = t->arrays[array];
  4744.       offset.File = dst.File;
  4745.       offset.Index = dst.Index + (in_offset->index & 0xFFFF) - 0x8000;
  4746.       offset.SwizzleX = GET_SWZ(in_offset->swizzle, 0);
  4747.       offset.SwizzleY = GET_SWZ(in_offset->swizzle, 1);
  4748.       offset.SwizzleZ = GET_SWZ(in_offset->swizzle, 2);
  4749.       offset.Padding = 0;
  4750.       break;
  4751.    default:
  4752.       break;
  4753.    }
  4754.    return offset;
  4755. }
  4756.  
  4757. static void
  4758. compile_tgsi_instruction(struct st_translate *t,
  4759.                          const glsl_to_tgsi_instruction *inst,
  4760.                          bool clamp_dst_color_output)
  4761. {
  4762.    struct ureg_program *ureg = t->ureg;
  4763.    GLuint i;
  4764.    struct ureg_dst dst[2];
  4765.    struct ureg_src src[4];
  4766.    struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET];
  4767.  
  4768.    unsigned num_dst;
  4769.    unsigned num_src;
  4770.    unsigned tex_target;
  4771.  
  4772.    num_dst = num_inst_dst_regs(inst->op);
  4773.    num_src = num_inst_src_regs(inst->op);
  4774.  
  4775.    for (i = 0; i < num_dst; i++)
  4776.       dst[i] = translate_dst(t,
  4777.                              &inst->dst[i],
  4778.                              inst->saturate,
  4779.                              clamp_dst_color_output);
  4780.  
  4781.    for (i = 0; i < num_src; i++)
  4782.       src[i] = translate_src(t, &inst->src[i]);
  4783.  
  4784.    switch(inst->op) {
  4785.    case TGSI_OPCODE_BGNLOOP:
  4786.    case TGSI_OPCODE_CAL:
  4787.    case TGSI_OPCODE_ELSE:
  4788.    case TGSI_OPCODE_ENDLOOP:
  4789.    case TGSI_OPCODE_IF:
  4790.    case TGSI_OPCODE_UIF:
  4791.       assert(num_dst == 0);
  4792.       ureg_label_insn(ureg,
  4793.                       inst->op,
  4794.                       src, num_src,
  4795.                       get_label(t,
  4796.                                 inst->op == TGSI_OPCODE_CAL ? inst->function->sig_id : 0));
  4797.       return;
  4798.  
  4799.    case TGSI_OPCODE_TEX:
  4800.    case TGSI_OPCODE_TXB:
  4801.    case TGSI_OPCODE_TXD:
  4802.    case TGSI_OPCODE_TXL:
  4803.    case TGSI_OPCODE_TXP:
  4804.    case TGSI_OPCODE_TXQ:
  4805.    case TGSI_OPCODE_TXF:
  4806.    case TGSI_OPCODE_TEX2:
  4807.    case TGSI_OPCODE_TXB2:
  4808.    case TGSI_OPCODE_TXL2:
  4809.    case TGSI_OPCODE_TG4:
  4810.    case TGSI_OPCODE_LODQ:
  4811.       src[num_src] = t->samplers[inst->sampler.index];
  4812.       assert(src[num_src].File != TGSI_FILE_NULL);
  4813.       if (inst->sampler.reladdr)
  4814.          src[num_src] =
  4815.             ureg_src_indirect(src[num_src], ureg_src(t->address[2]));
  4816.       num_src++;
  4817.       for (i = 0; i < inst->tex_offset_num_offset; i++) {
  4818.          texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i], i);
  4819.       }
  4820.       tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow);
  4821.  
  4822.       ureg_tex_insn(ureg,
  4823.                     inst->op,
  4824.                     dst, num_dst,
  4825.                     tex_target,
  4826.                     texoffsets, inst->tex_offset_num_offset,
  4827.                     src, num_src);
  4828.       return;
  4829.  
  4830.    case TGSI_OPCODE_SCS:
  4831.       dst[0] = ureg_writemask(dst[0], TGSI_WRITEMASK_XY);
  4832.       ureg_insn(ureg, inst->op, dst, num_dst, src, num_src);
  4833.       break;
  4834.  
  4835.    default:
  4836.       ureg_insn(ureg,
  4837.                 inst->op,
  4838.                 dst, num_dst,
  4839.                 src, num_src);
  4840.       break;
  4841.    }
  4842. }
  4843.  
  4844. /**
  4845.  * Emit the TGSI instructions for inverting and adjusting WPOS.
  4846.  * This code is unavoidable because it also depends on whether
  4847.  * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM).
  4848.  */
  4849. static void
  4850. emit_wpos_adjustment( struct st_translate *t,
  4851.                       int wpos_transform_const,
  4852.                       boolean invert,
  4853.                       GLfloat adjX, GLfloat adjY[2])
  4854. {
  4855.    struct ureg_program *ureg = t->ureg;
  4856.  
  4857.    assert(wpos_transform_const >= 0);
  4858.  
  4859.    /* Fragment program uses fragment position input.
  4860.     * Need to replace instances of INPUT[WPOS] with temp T
  4861.     * where T = INPUT[WPOS] is inverted by Y.
  4862.     */
  4863.    struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const);
  4864.    struct ureg_dst wpos_temp = ureg_DECL_temporary( ureg );
  4865.    struct ureg_src wpos_input = t->inputs[t->inputMapping[VARYING_SLOT_POS]];
  4866.  
  4867.    /* First, apply the coordinate shift: */
  4868.    if (adjX || adjY[0] || adjY[1]) {
  4869.       if (adjY[0] != adjY[1]) {
  4870.          /* Adjust the y coordinate by adjY[1] or adjY[0] respectively
  4871.           * depending on whether inversion is actually going to be applied
  4872.           * or not, which is determined by testing against the inversion
  4873.           * state variable used below, which will be either +1 or -1.
  4874.           */
  4875.          struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg);
  4876.  
  4877.          ureg_CMP(ureg, adj_temp,
  4878.                   ureg_scalar(wpostrans, invert ? 2 : 0),
  4879.                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f),
  4880.                   ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f));
  4881.          ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp));
  4882.       } else {
  4883.          ureg_ADD(ureg, wpos_temp, wpos_input,
  4884.                   ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f));
  4885.       }
  4886.       wpos_input = ureg_src(wpos_temp);
  4887.    } else {
  4888.       /* MOV wpos_temp, input[wpos]
  4889.        */
  4890.       ureg_MOV( ureg, wpos_temp, wpos_input );
  4891.    }
  4892.  
  4893.    /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be
  4894.     * inversion/identity, or the other way around if we're drawing to an FBO.
  4895.     */
  4896.    if (invert) {
  4897.       /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy
  4898.        */
  4899.       ureg_MAD( ureg,
  4900.                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
  4901.                 wpos_input,
  4902.                 ureg_scalar(wpostrans, 0),
  4903.                 ureg_scalar(wpostrans, 1));
  4904.    } else {
  4905.       /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww
  4906.        */
  4907.       ureg_MAD( ureg,
  4908.                 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y ),
  4909.                 wpos_input,
  4910.                 ureg_scalar(wpostrans, 2),
  4911.                 ureg_scalar(wpostrans, 3));
  4912.    }
  4913.  
  4914.    /* Use wpos_temp as position input from here on:
  4915.     */
  4916.    t->inputs[t->inputMapping[VARYING_SLOT_POS]] = ureg_src(wpos_temp);
  4917. }
  4918.  
  4919.  
  4920. /**
  4921.  * Emit fragment position/ooordinate code.
  4922.  */
  4923. static void
  4924. emit_wpos(struct st_context *st,
  4925.           struct st_translate *t,
  4926.           const struct gl_program *program,
  4927.           struct ureg_program *ureg,
  4928.           int wpos_transform_const)
  4929. {
  4930.    const struct gl_fragment_program *fp =
  4931.       (const struct gl_fragment_program *) program;
  4932.    struct pipe_screen *pscreen = st->pipe->screen;
  4933.    GLfloat adjX = 0.0f;
  4934.    GLfloat adjY[2] = { 0.0f, 0.0f };
  4935.    boolean invert = FALSE;
  4936.  
  4937.    /* Query the pixel center conventions supported by the pipe driver and set
  4938.     * adjX, adjY to help out if it cannot handle the requested one internally.
  4939.     *
  4940.     * The bias of the y-coordinate depends on whether y-inversion takes place
  4941.     * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are
  4942.     * drawing to an FBO (causes additional inversion), and whether the the pipe
  4943.     * driver origin and the requested origin differ (the latter condition is
  4944.     * stored in the 'invert' variable).
  4945.     *
  4946.     * For height = 100 (i = integer, h = half-integer, l = lower, u = upper):
  4947.     *
  4948.     * center shift only:
  4949.     * i -> h: +0.5
  4950.     * h -> i: -0.5
  4951.     *
  4952.     * inversion only:
  4953.     * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99
  4954.     * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5
  4955.     * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0
  4956.     * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5
  4957.     *
  4958.     * inversion and center shift:
  4959.     * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5
  4960.     * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99
  4961.     * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5
  4962.     * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0
  4963.     */
  4964.    if (fp->OriginUpperLeft) {
  4965.       /* Fragment shader wants origin in upper-left */
  4966.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) {
  4967.          /* the driver supports upper-left origin */
  4968.       }
  4969.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) {
  4970.          /* the driver supports lower-left origin, need to invert Y */
  4971.          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
  4972.                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
  4973.          invert = TRUE;
  4974.       }
  4975.       else
  4976.          assert(0);
  4977.    }
  4978.    else {
  4979.       /* Fragment shader wants origin in lower-left */
  4980.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT))
  4981.          /* the driver supports lower-left origin */
  4982.          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN,
  4983.                        TGSI_FS_COORD_ORIGIN_LOWER_LEFT);
  4984.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT))
  4985.          /* the driver supports upper-left origin, need to invert Y */
  4986.          invert = TRUE;
  4987.       else
  4988.          assert(0);
  4989.    }
  4990.  
  4991.    if (fp->PixelCenterInteger) {
  4992.       /* Fragment shader wants pixel center integer */
  4993.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
  4994.          /* the driver supports pixel center integer */
  4995.          adjY[1] = 1.0f;
  4996.          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
  4997.                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
  4998.       }
  4999.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
  5000.          /* the driver supports pixel center half integer, need to bias X,Y */
  5001.          adjX = -0.5f;
  5002.          adjY[0] = -0.5f;
  5003.          adjY[1] = 0.5f;
  5004.       }
  5005.       else
  5006.          assert(0);
  5007.    }
  5008.    else {
  5009.       /* Fragment shader wants pixel center half integer */
  5010.       if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) {
  5011.          /* the driver supports pixel center half integer */
  5012.       }
  5013.       else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) {
  5014.          /* the driver supports pixel center integer, need to bias X,Y */
  5015.          adjX = adjY[0] = adjY[1] = 0.5f;
  5016.          ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER,
  5017.                        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
  5018.       }
  5019.       else
  5020.          assert(0);
  5021.    }
  5022.  
  5023.    /* we invert after adjustment so that we avoid the MOV to temporary,
  5024.     * and reuse the adjustment ADD instead */
  5025.    emit_wpos_adjustment(t, wpos_transform_const, invert, adjX, adjY);
  5026. }
  5027.  
  5028. /**
  5029.  * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back.
  5030.  * TGSI uses +1 for front, -1 for back.
  5031.  * This function converts the TGSI value to the GL value.  Simply clamping/
  5032.  * saturating the value to [0,1] does the job.
  5033.  */
  5034. static void
  5035. emit_face_var(struct gl_context *ctx, struct st_translate *t)
  5036. {
  5037.    struct ureg_program *ureg = t->ureg;
  5038.    struct ureg_dst face_temp = ureg_DECL_temporary(ureg);
  5039.    struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]];
  5040.  
  5041.    if (ctx->Const.NativeIntegers) {
  5042.       ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0));
  5043.    }
  5044.    else {
  5045.       /* MOV_SAT face_temp, input[face] */
  5046.       ureg_MOV(ureg, ureg_saturate(face_temp), face_input);
  5047.    }
  5048.  
  5049.    /* Use face_temp as face input from here on: */
  5050.    t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp);
  5051. }
  5052.  
  5053. static void
  5054. emit_edgeflags(struct st_translate *t)
  5055. {
  5056.    struct ureg_program *ureg = t->ureg;
  5057.    struct ureg_dst edge_dst = t->outputs[t->outputMapping[VARYING_SLOT_EDGE]];
  5058.    struct ureg_src edge_src = t->inputs[t->inputMapping[VERT_ATTRIB_EDGEFLAG]];
  5059.  
  5060.    ureg_MOV(ureg, edge_dst, edge_src);
  5061. }
  5062.  
  5063. /**
  5064.  * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format.
  5065.  * \param program  the program to translate
  5066.  * \param numInputs  number of input registers used
  5067.  * \param inputMapping  maps Mesa fragment program inputs to TGSI generic
  5068.  *                      input indexes
  5069.  * \param inputSemanticName  the TGSI_SEMANTIC flag for each input
  5070.  * \param inputSemanticIndex  the semantic index (ex: which texcoord) for
  5071.  *                            each input
  5072.  * \param interpMode  the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input
  5073.  * \param interpLocation the TGSI_INTERPOLATE_LOC_* location for each input
  5074.  * \param numOutputs  number of output registers used
  5075.  * \param outputMapping  maps Mesa fragment program outputs to TGSI
  5076.  *                       generic outputs
  5077.  * \param outputSemanticName  the TGSI_SEMANTIC flag for each output
  5078.  * \param outputSemanticIndex  the semantic index (ex: which texcoord) for
  5079.  *                             each output
  5080.  *
  5081.  * \return  PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY
  5082.  */
  5083. extern "C" enum pipe_error
  5084. st_translate_program(
  5085.    struct gl_context *ctx,
  5086.    uint procType,
  5087.    struct ureg_program *ureg,
  5088.    glsl_to_tgsi_visitor *program,
  5089.    const struct gl_program *proginfo,
  5090.    GLuint numInputs,
  5091.    const GLuint inputMapping[],
  5092.    const ubyte inputSemanticName[],
  5093.    const ubyte inputSemanticIndex[],
  5094.    const GLuint interpMode[],
  5095.    const GLuint interpLocation[],
  5096.    GLuint numOutputs,
  5097.    const GLuint outputMapping[],
  5098.    const ubyte outputSemanticName[],
  5099.    const ubyte outputSemanticIndex[],
  5100.    boolean passthrough_edgeflags,
  5101.    boolean clamp_color)
  5102. {
  5103.    struct st_translate *t;
  5104.    unsigned i;
  5105.    enum pipe_error ret = PIPE_OK;
  5106.  
  5107.    assert(numInputs <= ARRAY_SIZE(t->inputs));
  5108.    assert(numOutputs <= ARRAY_SIZE(t->outputs));
  5109.  
  5110.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_FRONT_FACE] ==
  5111.           TGSI_SEMANTIC_FACE);
  5112.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID] ==
  5113.           TGSI_SEMANTIC_VERTEXID);
  5114.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INSTANCE_ID] ==
  5115.           TGSI_SEMANTIC_INSTANCEID);
  5116.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_ID] ==
  5117.           TGSI_SEMANTIC_SAMPLEID);
  5118.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_POS] ==
  5119.           TGSI_SEMANTIC_SAMPLEPOS);
  5120.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_SAMPLE_MASK_IN] ==
  5121.           TGSI_SEMANTIC_SAMPLEMASK);
  5122.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_INVOCATION_ID] ==
  5123.           TGSI_SEMANTIC_INVOCATIONID);
  5124.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_VERTEX_ID_ZERO_BASE] ==
  5125.           TGSI_SEMANTIC_VERTEXID_NOBASE);
  5126.    assert(_mesa_sysval_to_semantic[SYSTEM_VALUE_BASE_VERTEX] ==
  5127.           TGSI_SEMANTIC_BASEVERTEX);
  5128.  
  5129.    t = CALLOC_STRUCT(st_translate);
  5130.    if (!t) {
  5131.       ret = PIPE_ERROR_OUT_OF_MEMORY;
  5132.       goto out;
  5133.    }
  5134.  
  5135.    memset(t, 0, sizeof *t);
  5136.  
  5137.    t->procType = procType;
  5138.    t->inputMapping = inputMapping;
  5139.    t->outputMapping = outputMapping;
  5140.    t->ureg = ureg;
  5141.  
  5142.    /*
  5143.     * Declare input attributes.
  5144.     */
  5145.    if (procType == TGSI_PROCESSOR_FRAGMENT) {
  5146.       for (i = 0; i < numInputs; i++) {
  5147.          t->inputs[i] = ureg_DECL_fs_input_cyl_centroid(ureg,
  5148.                                                         inputSemanticName[i],
  5149.                                                         inputSemanticIndex[i],
  5150.                                                         interpMode[i], 0,
  5151.                                                         interpLocation[i]);
  5152.       }
  5153.  
  5154.       if (proginfo->InputsRead & VARYING_BIT_POS) {
  5155.           /* Must do this after setting up t->inputs. */
  5156.           emit_wpos(st_context(ctx), t, proginfo, ureg,
  5157.                     program->wpos_transform_const);
  5158.       }
  5159.  
  5160.       if (proginfo->InputsRead & VARYING_BIT_FACE)
  5161.          emit_face_var(ctx, t);
  5162.  
  5163.       /*
  5164.        * Declare output attributes.
  5165.        */
  5166.       for (i = 0; i < numOutputs; i++) {
  5167.          switch (outputSemanticName[i]) {
  5168.          case TGSI_SEMANTIC_POSITION:
  5169.             t->outputs[i] = ureg_DECL_output(ureg,
  5170.                                              TGSI_SEMANTIC_POSITION, /* Z/Depth */
  5171.                                              outputSemanticIndex[i]);
  5172.             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z);
  5173.             break;
  5174.          case TGSI_SEMANTIC_STENCIL:
  5175.             t->outputs[i] = ureg_DECL_output(ureg,
  5176.                                              TGSI_SEMANTIC_STENCIL, /* Stencil */
  5177.                                              outputSemanticIndex[i]);
  5178.             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y);
  5179.             break;
  5180.          case TGSI_SEMANTIC_COLOR:
  5181.             t->outputs[i] = ureg_DECL_output(ureg,
  5182.                                              TGSI_SEMANTIC_COLOR,
  5183.                                              outputSemanticIndex[i]);
  5184.             break;
  5185.          case TGSI_SEMANTIC_SAMPLEMASK:
  5186.             t->outputs[i] = ureg_DECL_output(ureg,
  5187.                                              TGSI_SEMANTIC_SAMPLEMASK,
  5188.                                              outputSemanticIndex[i]);
  5189.             /* TODO: If we ever support more than 32 samples, this will have
  5190.              * to become an array.
  5191.              */
  5192.             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
  5193.             break;
  5194.          default:
  5195.             assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR");
  5196.             ret = PIPE_ERROR_BAD_INPUT;
  5197.             goto out;
  5198.          }
  5199.       }
  5200.    }
  5201.    else if (procType == TGSI_PROCESSOR_GEOMETRY) {
  5202.       for (i = 0; i < numInputs; i++) {
  5203.          t->inputs[i] = ureg_DECL_gs_input(ureg,
  5204.                                            i,
  5205.                                            inputSemanticName[i],
  5206.                                            inputSemanticIndex[i]);
  5207.       }
  5208.  
  5209.       for (i = 0; i < numOutputs; i++) {
  5210.          t->outputs[i] = ureg_DECL_output(ureg,
  5211.                                           outputSemanticName[i],
  5212.                                           outputSemanticIndex[i]);
  5213.       }
  5214.    }
  5215.    else {
  5216.       assert(procType == TGSI_PROCESSOR_VERTEX);
  5217.  
  5218.       for (i = 0; i < numInputs; i++) {
  5219.          t->inputs[i] = ureg_DECL_vs_input(ureg, i);
  5220.       }
  5221.  
  5222.       for (i = 0; i < numOutputs; i++) {
  5223.          t->outputs[i] = ureg_DECL_output(ureg,
  5224.                                           outputSemanticName[i],
  5225.                                           outputSemanticIndex[i]);
  5226.          if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) {
  5227.             /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */
  5228.             ureg_MOV(ureg,
  5229.                      ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW),
  5230.                      ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f));
  5231.             t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X);
  5232.          }
  5233.       }
  5234.       if (passthrough_edgeflags)
  5235.          emit_edgeflags(t);
  5236.    }
  5237.  
  5238.    /* Declare address register.
  5239.     */
  5240.    if (program->num_address_regs > 0) {
  5241.       assert(program->num_address_regs <= 3);
  5242.       for (int i = 0; i < program->num_address_regs; i++)
  5243.          t->address[i] = ureg_DECL_address(ureg);
  5244.    }
  5245.  
  5246.    /* Declare misc input registers
  5247.     */
  5248.    {
  5249.       GLbitfield sysInputs = proginfo->SystemValuesRead;
  5250.       unsigned numSys = 0;
  5251.       for (i = 0; sysInputs; i++) {
  5252.          if (sysInputs & (1 << i)) {
  5253.             unsigned semName = _mesa_sysval_to_semantic[i];
  5254.             t->systemValues[i] = ureg_DECL_system_value(ureg, numSys, semName, 0);
  5255.             if (semName == TGSI_SEMANTIC_INSTANCEID ||
  5256.                 semName == TGSI_SEMANTIC_VERTEXID) {
  5257.                /* From Gallium perspective, these system values are always
  5258.                 * integer, and require native integer support.  However, if
  5259.                 * native integer is supported on the vertex stage but not the
  5260.                 * pixel stage (e.g, i915g + draw), Mesa will generate IR that
  5261.                 * assumes these system values are floats. To resolve the
  5262.                 * inconsistency, we insert a U2F.
  5263.                 */
  5264.                struct st_context *st = st_context(ctx);
  5265.                struct pipe_screen *pscreen = st->pipe->screen;
  5266.                assert(procType == TGSI_PROCESSOR_VERTEX);
  5267.                assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS));
  5268.                if (!ctx->Const.NativeIntegers) {
  5269.                   struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg);
  5270.                   ureg_U2F( t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), t->systemValues[i]);
  5271.                   t->systemValues[i] = ureg_scalar(ureg_src(temp), 0);
  5272.                }
  5273.             }
  5274.             numSys++;
  5275.             sysInputs &= ~(1 << i);
  5276.          }
  5277.       }
  5278.    }
  5279.  
  5280.    /* Copy over array sizes
  5281.     */
  5282.    memcpy(t->array_sizes, program->array_sizes, sizeof(unsigned) * program->next_array);
  5283.  
  5284.    /* Emit constants and uniforms.  TGSI uses a single index space for these,
  5285.     * so we put all the translated regs in t->constants.
  5286.     */
  5287.    if (proginfo->Parameters) {
  5288.       t->constants = (struct ureg_src *)
  5289.          calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0]));
  5290.       if (t->constants == NULL) {
  5291.          ret = PIPE_ERROR_OUT_OF_MEMORY;
  5292.          goto out;
  5293.       }
  5294.       t->num_constants = proginfo->Parameters->NumParameters;
  5295.  
  5296.       for (i = 0; i < proginfo->Parameters->NumParameters; i++) {
  5297.          switch (proginfo->Parameters->Parameters[i].Type) {
  5298.          case PROGRAM_STATE_VAR:
  5299.          case PROGRAM_UNIFORM:
  5300.             t->constants[i] = ureg_DECL_constant(ureg, i);
  5301.             break;
  5302.  
  5303.          /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect
  5304.           * addressing of the const buffer.
  5305.           * FIXME: Be smarter and recognize param arrays:
  5306.           * indirect addressing is only valid within the referenced
  5307.           * array.
  5308.           */
  5309.          case PROGRAM_CONSTANT:
  5310.             if (program->indirect_addr_consts)
  5311.                t->constants[i] = ureg_DECL_constant(ureg, i);
  5312.             else
  5313.                t->constants[i] = emit_immediate(t,
  5314.                                                 proginfo->Parameters->ParameterValues[i],
  5315.                                                 proginfo->Parameters->Parameters[i].DataType,
  5316.                                                 4);
  5317.             break;
  5318.          default:
  5319.             break;
  5320.          }
  5321.       }
  5322.    }
  5323.  
  5324.    if (program->shader) {
  5325.       unsigned num_ubos = program->shader->NumUniformBlocks;
  5326.  
  5327.       for (i = 0; i < num_ubos; i++) {
  5328.          unsigned size = program->shader->UniformBlocks[i].UniformBufferSize;
  5329.          unsigned num_const_vecs = (size + 15) / 16;
  5330.          unsigned first, last;
  5331.          assert(num_const_vecs > 0);
  5332.          first = 0;
  5333.          last = num_const_vecs > 0 ? num_const_vecs - 1 : 0;
  5334.          ureg_DECL_constant2D(t->ureg, first, last, i + 1);
  5335.       }
  5336.    }
  5337.  
  5338.    /* Emit immediate values.
  5339.     */
  5340.    t->immediates = (struct ureg_src *)
  5341.       calloc(program->num_immediates, sizeof(struct ureg_src));
  5342.    if (t->immediates == NULL) {
  5343.       ret = PIPE_ERROR_OUT_OF_MEMORY;
  5344.       goto out;
  5345.    }
  5346.    t->num_immediates = program->num_immediates;
  5347.  
  5348.    i = 0;
  5349.    foreach_in_list(immediate_storage, imm, &program->immediates) {
  5350.       assert(i < program->num_immediates);
  5351.       t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32);
  5352.    }
  5353.    assert(i == program->num_immediates);
  5354.  
  5355.    /* texture samplers */
  5356.    for (i = 0; i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) {
  5357.       if (program->samplers_used & (1 << i)) {
  5358.          t->samplers[i] = ureg_DECL_sampler(ureg, i);
  5359.       }
  5360.    }
  5361.  
  5362.    /* Emit each instruction in turn:
  5363.     */
  5364.    foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) {
  5365.       set_insn_start(t, ureg_get_instruction_number(ureg));
  5366.       compile_tgsi_instruction(t, inst, clamp_color);
  5367.    }
  5368.  
  5369.    /* Fix up all emitted labels:
  5370.     */
  5371.    for (i = 0; i < t->labels_count; i++) {
  5372.       ureg_fixup_label(ureg, t->labels[i].token,
  5373.                        t->insn[t->labels[i].branch_target]);
  5374.    }
  5375.  
  5376. out:
  5377.    if (t) {
  5378.       free(t->temps);
  5379.       free(t->insn);
  5380.       free(t->labels);
  5381.       free(t->constants);
  5382.       t->num_constants = 0;
  5383.       free(t->immediates);
  5384.       t->num_immediates = 0;
  5385.  
  5386.       if (t->error) {
  5387.          debug_printf("%s: translate error flag set\n", __func__);
  5388.       }
  5389.  
  5390.       FREE(t);
  5391.    }
  5392.  
  5393.    return ret;
  5394. }
  5395. /* ----------------------------- End TGSI code ------------------------------ */
  5396.  
  5397.  
  5398. static unsigned
  5399. shader_stage_to_ptarget(gl_shader_stage stage)
  5400. {
  5401.    switch (stage) {
  5402.    case MESA_SHADER_VERTEX:
  5403.       return PIPE_SHADER_VERTEX;
  5404.    case MESA_SHADER_FRAGMENT:
  5405.       return PIPE_SHADER_FRAGMENT;
  5406.    case MESA_SHADER_GEOMETRY:
  5407.       return PIPE_SHADER_GEOMETRY;
  5408.    case MESA_SHADER_COMPUTE:
  5409.       return PIPE_SHADER_COMPUTE;
  5410.    }
  5411.  
  5412.    assert(!"should not be reached");
  5413.    return PIPE_SHADER_VERTEX;
  5414. }
  5415.  
  5416.  
  5417. /**
  5418.  * Convert a shader's GLSL IR into a Mesa gl_program, although without
  5419.  * generating Mesa IR.
  5420.  */
  5421. static struct gl_program *
  5422. get_mesa_program(struct gl_context *ctx,
  5423.                  struct gl_shader_program *shader_program,
  5424.                  struct gl_shader *shader)
  5425. {
  5426.    glsl_to_tgsi_visitor* v;
  5427.    struct gl_program *prog;
  5428.    GLenum target = _mesa_shader_stage_to_program(shader->Stage);
  5429.    bool progress;
  5430.    struct gl_shader_compiler_options *options =
  5431.          &ctx->Const.ShaderCompilerOptions[_mesa_shader_enum_to_shader_stage(shader->Type)];
  5432.    struct pipe_screen *pscreen = ctx->st->pipe->screen;
  5433.    unsigned ptarget = shader_stage_to_ptarget(shader->Stage);
  5434.  
  5435.    validate_ir_tree(shader->ir);
  5436.  
  5437.    prog = ctx->Driver.NewProgram(ctx, target, shader_program->Name);
  5438.    if (!prog)
  5439.       return NULL;
  5440.    prog->Parameters = _mesa_new_parameter_list();
  5441.    v = new glsl_to_tgsi_visitor();
  5442.    v->ctx = ctx;
  5443.    v->prog = prog;
  5444.    v->shader_program = shader_program;
  5445.    v->shader = shader;
  5446.    v->options = options;
  5447.    v->glsl_version = ctx->Const.GLSLVersion;
  5448.    v->native_integers = ctx->Const.NativeIntegers;
  5449.  
  5450.    v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget,
  5451.                                             PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED);
  5452.    v->have_fma = pscreen->get_shader_param(pscreen, ptarget,
  5453.                                            PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED);
  5454.  
  5455.    _mesa_copy_linked_program_data(shader->Stage, shader_program, prog);
  5456.    _mesa_generate_parameters_list_for_uniforms(shader_program, shader,
  5457.                                                prog->Parameters);
  5458.  
  5459.    /* Remove reads from output registers. */
  5460.    lower_output_reads(shader->ir);
  5461.  
  5462.    /* Emit intermediate IR for main(). */
  5463.    visit_exec_list(shader->ir, v);
  5464.  
  5465.    /* Now emit bodies for any functions that were used. */
  5466.    do {
  5467.       progress = GL_FALSE;
  5468.  
  5469.       foreach_in_list(function_entry, entry, &v->function_signatures) {
  5470.          if (!entry->bgn_inst) {
  5471.             v->current_function = entry;
  5472.  
  5473.             entry->bgn_inst = v->emit(NULL, TGSI_OPCODE_BGNSUB);
  5474.             entry->bgn_inst->function = entry;
  5475.  
  5476.             visit_exec_list(&entry->sig->body, v);
  5477.  
  5478.             glsl_to_tgsi_instruction *last;
  5479.             last = (glsl_to_tgsi_instruction *)v->instructions.get_tail();
  5480.             if (last->op != TGSI_OPCODE_RET)
  5481.                v->emit(NULL, TGSI_OPCODE_RET);
  5482.  
  5483.             glsl_to_tgsi_instruction *end;
  5484.             end = v->emit(NULL, TGSI_OPCODE_ENDSUB);
  5485.             end->function = entry;
  5486.  
  5487.             progress = GL_TRUE;
  5488.          }
  5489.       }
  5490.    } while (progress);
  5491.  
  5492. #if 0
  5493.    /* Print out some information (for debugging purposes) used by the
  5494.     * optimization passes. */
  5495.    for (i = 0; i < v->next_temp; i++) {
  5496.       int fr = v->get_first_temp_read(i);
  5497.       int fw = v->get_first_temp_write(i);
  5498.       int lr = v->get_last_temp_read(i);
  5499.       int lw = v->get_last_temp_write(i);
  5500.  
  5501.       printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, fr, fw, lr, lw);
  5502.       assert(fw <= fr);
  5503.    }
  5504. #endif
  5505.  
  5506.    /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */
  5507.    v->simplify_cmp();
  5508.    v->copy_propagate();
  5509.    while (v->eliminate_dead_code());
  5510.  
  5511.    v->merge_two_dsts();
  5512.    v->merge_registers();
  5513.    v->renumber_registers();
  5514.  
  5515.    /* Write the END instruction. */
  5516.    v->emit(NULL, TGSI_OPCODE_END);
  5517.  
  5518.    if (ctx->_Shader->Flags & GLSL_DUMP) {
  5519.       _mesa_log("\n");
  5520.       _mesa_log("GLSL IR for linked %s program %d:\n",
  5521.              _mesa_shader_stage_to_string(shader->Stage),
  5522.              shader_program->Name);
  5523.       _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL);
  5524.       _mesa_log("\n\n");
  5525.    }
  5526.  
  5527.    prog->Instructions = NULL;
  5528.    prog->NumInstructions = 0;
  5529.  
  5530.    do_set_program_inouts(shader->ir, prog, shader->Stage);
  5531.    count_resources(v, prog);
  5532.  
  5533.    /* This must be done before the uniform storage is associated. */
  5534.    if (shader->Type == GL_FRAGMENT_SHADER &&
  5535.        prog->InputsRead & VARYING_BIT_POS){
  5536.       static const gl_state_index wposTransformState[STATE_LENGTH] = {
  5537.          STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM
  5538.       };
  5539.  
  5540.       v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters,
  5541.                                                           wposTransformState);
  5542.    }
  5543.  
  5544.    _mesa_reference_program(ctx, &shader->Program, prog);
  5545.  
  5546.    /* This has to be done last.  Any operation the can cause
  5547.     * prog->ParameterValues to get reallocated (e.g., anything that adds a
  5548.     * program constant) has to happen before creating this linkage.
  5549.     */
  5550.    _mesa_associate_uniform_storage(ctx, shader_program, prog->Parameters);
  5551.    if (!shader_program->LinkStatus) {
  5552.       free_glsl_to_tgsi_visitor(v);
  5553.       return NULL;
  5554.    }
  5555.  
  5556.    struct st_vertex_program *stvp;
  5557.    struct st_fragment_program *stfp;
  5558.    struct st_geometry_program *stgp;
  5559.  
  5560.    switch (shader->Type) {
  5561.    case GL_VERTEX_SHADER:
  5562.       stvp = (struct st_vertex_program *)prog;
  5563.       stvp->glsl_to_tgsi = v;
  5564.       break;
  5565.    case GL_FRAGMENT_SHADER:
  5566.       stfp = (struct st_fragment_program *)prog;
  5567.       stfp->glsl_to_tgsi = v;
  5568.       break;
  5569.    case GL_GEOMETRY_SHADER:
  5570.       stgp = (struct st_geometry_program *)prog;
  5571.       stgp->glsl_to_tgsi = v;
  5572.       break;
  5573.    default:
  5574.       assert(!"should not be reached");
  5575.       return NULL;
  5576.    }
  5577.  
  5578.    return prog;
  5579. }
  5580.  
  5581. extern "C" {
  5582.  
  5583. /**
  5584.  * Link a shader.
  5585.  * Called via ctx->Driver.LinkShader()
  5586.  * This actually involves converting GLSL IR into an intermediate TGSI-like IR
  5587.  * with code lowering and other optimizations.
  5588.  */
  5589. GLboolean
  5590. st_link_shader(struct gl_context *ctx, struct gl_shader_program *prog)
  5591. {
  5592.    struct pipe_screen *pscreen = ctx->st->pipe->screen;
  5593.    assert(prog->LinkStatus);
  5594.  
  5595.    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
  5596.       if (prog->_LinkedShaders[i] == NULL)
  5597.          continue;
  5598.  
  5599.       bool progress;
  5600.       exec_list *ir = prog->_LinkedShaders[i]->ir;
  5601.       gl_shader_stage stage = _mesa_shader_enum_to_shader_stage(prog->_LinkedShaders[i]->Type);
  5602.       const struct gl_shader_compiler_options *options =
  5603.             &ctx->Const.ShaderCompilerOptions[stage];
  5604.       unsigned ptarget = shader_stage_to_ptarget(stage);
  5605.       bool have_dround = pscreen->get_shader_param(pscreen, ptarget,
  5606.                                                    PIPE_SHADER_CAP_TGSI_DROUND_SUPPORTED);
  5607.       bool have_dfrexp = pscreen->get_shader_param(pscreen, ptarget,
  5608.                                                    PIPE_SHADER_CAP_TGSI_DFRACEXP_DLDEXP_SUPPORTED);
  5609.  
  5610.       /* If there are forms of indirect addressing that the driver
  5611.        * cannot handle, perform the lowering pass.
  5612.        */
  5613.       if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput ||
  5614.           options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) {
  5615.          lower_variable_index_to_cond_assign(ir,
  5616.                                              options->EmitNoIndirectInput,
  5617.                                              options->EmitNoIndirectOutput,
  5618.                                              options->EmitNoIndirectTemp,
  5619.                                              options->EmitNoIndirectUniform);
  5620.       }
  5621.  
  5622.       if (ctx->Extensions.ARB_shading_language_packing) {
  5623.          unsigned lower_inst = LOWER_PACK_SNORM_2x16 |
  5624.                                LOWER_UNPACK_SNORM_2x16 |
  5625.                                LOWER_PACK_UNORM_2x16 |
  5626.                                LOWER_UNPACK_UNORM_2x16 |
  5627.                                LOWER_PACK_SNORM_4x8 |
  5628.                                LOWER_UNPACK_SNORM_4x8 |
  5629.                                LOWER_UNPACK_UNORM_4x8 |
  5630.                                LOWER_PACK_UNORM_4x8 |
  5631.                                LOWER_PACK_HALF_2x16 |
  5632.                                LOWER_UNPACK_HALF_2x16;
  5633.  
  5634.          lower_packing_builtins(ir, lower_inst);
  5635.       }
  5636.  
  5637.       if (!pscreen->get_param(pscreen, PIPE_CAP_TEXTURE_GATHER_OFFSETS))
  5638.          lower_offset_arrays(ir);
  5639.       do_mat_op_to_vec(ir);
  5640.       lower_instructions(ir,
  5641.                          MOD_TO_FLOOR |
  5642.                          DIV_TO_MUL_RCP |
  5643.                          EXP_TO_EXP2 |
  5644.                          LOG_TO_LOG2 |
  5645.                          LDEXP_TO_ARITH |
  5646.                          (have_dfrexp ? 0 : DFREXP_DLDEXP_TO_ARITH) |
  5647.                          CARRY_TO_ARITH |
  5648.                          BORROW_TO_ARITH |
  5649.                          (have_dround ? 0 : DOPS_TO_DFRAC) |
  5650.                          (options->EmitNoPow ? POW_TO_EXP2 : 0) |
  5651.                          (!ctx->Const.NativeIntegers ? INT_DIV_TO_MUL_RCP : 0) |
  5652.                          (options->EmitNoSat ? SAT_TO_CLAMP : 0));
  5653.  
  5654.       lower_ubo_reference(prog->_LinkedShaders[i], ir);
  5655.       do_vec_index_to_cond_assign(ir);
  5656.       lower_vector_insert(ir, true);
  5657.       lower_quadop_vector(ir, false);
  5658.       lower_noise(ir);
  5659.       if (options->MaxIfDepth == 0) {
  5660.          lower_discard(ir);
  5661.       }
  5662.  
  5663.       do {
  5664.          progress = false;
  5665.  
  5666.          progress = do_lower_jumps(ir, true, true, options->EmitNoMainReturn, options->EmitNoCont, options->EmitNoLoops) || progress;
  5667.  
  5668.          progress = do_common_optimization(ir, true, true, options,
  5669.                                            ctx->Const.NativeIntegers)
  5670.            || progress;
  5671.  
  5672.          progress = lower_if_to_cond_assign(ir, options->MaxIfDepth) || progress;
  5673.  
  5674.       } while (progress);
  5675.  
  5676.       validate_ir_tree(ir);
  5677.    }
  5678.  
  5679.    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
  5680.       struct gl_program *linked_prog;
  5681.  
  5682.       if (prog->_LinkedShaders[i] == NULL)
  5683.          continue;
  5684.  
  5685.       linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]);
  5686.  
  5687.       if (linked_prog) {
  5688.          _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
  5689.                                  linked_prog);
  5690.          if (!ctx->Driver.ProgramStringNotify(ctx,
  5691.                                               _mesa_shader_stage_to_program(i),
  5692.                                               linked_prog)) {
  5693.             _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program,
  5694.                                     NULL);
  5695.             _mesa_reference_program(ctx, &linked_prog, NULL);
  5696.             return GL_FALSE;
  5697.          }
  5698.       }
  5699.  
  5700.       _mesa_reference_program(ctx, &linked_prog, NULL);
  5701.    }
  5702.  
  5703.    return GL_TRUE;
  5704. }
  5705.  
  5706. void
  5707. st_translate_stream_output_info(glsl_to_tgsi_visitor *glsl_to_tgsi,
  5708.                                 const GLuint outputMapping[],
  5709.                                 struct pipe_stream_output_info *so)
  5710. {
  5711.    unsigned i;
  5712.    struct gl_transform_feedback_info *info =
  5713.       &glsl_to_tgsi->shader_program->LinkedTransformFeedback;
  5714.  
  5715.    for (i = 0; i < info->NumOutputs; i++) {
  5716.       so->output[i].register_index =
  5717.          outputMapping[info->Outputs[i].OutputRegister];
  5718.       so->output[i].start_component = info->Outputs[i].ComponentOffset;
  5719.       so->output[i].num_components = info->Outputs[i].NumComponents;
  5720.       so->output[i].output_buffer = info->Outputs[i].OutputBuffer;
  5721.       so->output[i].dst_offset = info->Outputs[i].DstOffset;
  5722.       so->output[i].stream = info->Outputs[i].StreamId;
  5723.    }
  5724.  
  5725.    for (i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {
  5726.       so->stride[i] = info->BufferStride[i];
  5727.    }
  5728.    so->num_outputs = info->NumOutputs;
  5729. }
  5730.  
  5731. } /* extern "C" */
  5732.