Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /* Copyright © 2011 Intel Corporation
  2.  *
  3.  * Permission is hereby granted, free of charge, to any person obtaining a
  4.  * copy of this software and associated documentation files (the "Software"),
  5.  * to deal in the Software without restriction, including without limitation
  6.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  7.  * and/or sell copies of the Software, and to permit persons to whom the
  8.  * Software is furnished to do so, subject to the following conditions:
  9.  *
  10.  * The above copyright notice and this permission notice (including the next
  11.  * paragraph) shall be included in all copies or substantial portions of the
  12.  * Software.
  13.  *
  14.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  15.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  16.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  17.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  18.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  19.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  20.  * IN THE SOFTWARE.
  21.  */
  22.  
  23. #include "brw_vec4.h"
  24.  
  25. extern "C" {
  26. #include "brw_eu.h"
  27. #include "main/macros.h"
  28. #include "program/prog_print.h"
  29. #include "program/prog_parameter.h"
  30. };
  31.  
  32. namespace brw {
  33.  
  34. struct brw_reg
  35. vec4_instruction::get_dst(void)
  36. {
  37.    struct brw_reg brw_reg;
  38.  
  39.    switch (dst.file) {
  40.    case GRF:
  41.       brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
  42.       brw_reg = retype(brw_reg, dst.type);
  43.       brw_reg.dw1.bits.writemask = dst.writemask;
  44.       break;
  45.  
  46.    case MRF:
  47.       brw_reg = brw_message_reg(dst.reg + dst.reg_offset);
  48.       brw_reg = retype(brw_reg, dst.type);
  49.       brw_reg.dw1.bits.writemask = dst.writemask;
  50.       break;
  51.  
  52.    case HW_REG:
  53.       brw_reg = dst.fixed_hw_reg;
  54.       break;
  55.  
  56.    case BAD_FILE:
  57.       brw_reg = brw_null_reg();
  58.       break;
  59.  
  60.    default:
  61.       assert(!"not reached");
  62.       brw_reg = brw_null_reg();
  63.       break;
  64.    }
  65.    return brw_reg;
  66. }
  67.  
  68. struct brw_reg
  69. vec4_instruction::get_src(int i)
  70. {
  71.    struct brw_reg brw_reg;
  72.  
  73.    switch (src[i].file) {
  74.    case GRF:
  75.       brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
  76.       brw_reg = retype(brw_reg, src[i].type);
  77.       brw_reg.dw1.bits.swizzle = src[i].swizzle;
  78.       if (src[i].abs)
  79.          brw_reg = brw_abs(brw_reg);
  80.       if (src[i].negate)
  81.          brw_reg = negate(brw_reg);
  82.       break;
  83.  
  84.    case IMM:
  85.       switch (src[i].type) {
  86.       case BRW_REGISTER_TYPE_F:
  87.          brw_reg = brw_imm_f(src[i].imm.f);
  88.          break;
  89.       case BRW_REGISTER_TYPE_D:
  90.          brw_reg = brw_imm_d(src[i].imm.i);
  91.          break;
  92.       case BRW_REGISTER_TYPE_UD:
  93.          brw_reg = brw_imm_ud(src[i].imm.u);
  94.          break;
  95.       default:
  96.          assert(!"not reached");
  97.          brw_reg = brw_null_reg();
  98.          break;
  99.       }
  100.       break;
  101.  
  102.    case UNIFORM:
  103.       brw_reg = stride(brw_vec4_grf(1 + (src[i].reg + src[i].reg_offset) / 2,
  104.                                     ((src[i].reg + src[i].reg_offset) % 2) * 4),
  105.                        0, 4, 1);
  106.       brw_reg = retype(brw_reg, src[i].type);
  107.       brw_reg.dw1.bits.swizzle = src[i].swizzle;
  108.       if (src[i].abs)
  109.          brw_reg = brw_abs(brw_reg);
  110.       if (src[i].negate)
  111.          brw_reg = negate(brw_reg);
  112.  
  113.       /* This should have been moved to pull constants. */
  114.       assert(!src[i].reladdr);
  115.       break;
  116.  
  117.    case HW_REG:
  118.       brw_reg = src[i].fixed_hw_reg;
  119.       break;
  120.  
  121.    case BAD_FILE:
  122.       /* Probably unused. */
  123.       brw_reg = brw_null_reg();
  124.       break;
  125.    case ATTR:
  126.    default:
  127.       assert(!"not reached");
  128.       brw_reg = brw_null_reg();
  129.       break;
  130.    }
  131.  
  132.    return brw_reg;
  133. }
  134.  
  135. vec4_generator::vec4_generator(struct brw_context *brw,
  136.                                struct gl_shader_program *shader_prog,
  137.                                struct gl_program *prog,
  138.                                void *mem_ctx,
  139.                                bool debug_flag)
  140.    : brw(brw), shader_prog(shader_prog), prog(prog), mem_ctx(mem_ctx),
  141.      debug_flag(debug_flag)
  142. {
  143.    shader = shader_prog ? shader_prog->_LinkedShaders[MESA_SHADER_VERTEX] : NULL;
  144.  
  145.    p = rzalloc(mem_ctx, struct brw_compile);
  146.    brw_init_compile(brw, p, mem_ctx);
  147. }
  148.  
  149. vec4_generator::~vec4_generator()
  150. {
  151. }
  152.  
  153. void
  154. vec4_generator::generate_math1_gen4(vec4_instruction *inst,
  155.                                     struct brw_reg dst,
  156.                                     struct brw_reg src)
  157. {
  158.    brw_math(p,
  159.             dst,
  160.             brw_math_function(inst->opcode),
  161.             inst->base_mrf,
  162.             src,
  163.             BRW_MATH_DATA_VECTOR,
  164.             BRW_MATH_PRECISION_FULL);
  165. }
  166.  
  167. static void
  168. check_gen6_math_src_arg(struct brw_reg src)
  169. {
  170.    /* Source swizzles are ignored. */
  171.    assert(!src.abs);
  172.    assert(!src.negate);
  173.    assert(src.dw1.bits.swizzle == BRW_SWIZZLE_XYZW);
  174. }
  175.  
  176. void
  177. vec4_generator::generate_math1_gen6(vec4_instruction *inst,
  178.                                     struct brw_reg dst,
  179.                                     struct brw_reg src)
  180. {
  181.    /* Can't do writemask because math can't be align16. */
  182.    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
  183.    check_gen6_math_src_arg(src);
  184.  
  185.    brw_set_access_mode(p, BRW_ALIGN_1);
  186.    brw_math(p,
  187.             dst,
  188.             brw_math_function(inst->opcode),
  189.             inst->base_mrf,
  190.             src,
  191.             BRW_MATH_DATA_SCALAR,
  192.             BRW_MATH_PRECISION_FULL);
  193.    brw_set_access_mode(p, BRW_ALIGN_16);
  194. }
  195.  
  196. void
  197. vec4_generator::generate_math2_gen7(vec4_instruction *inst,
  198.                                     struct brw_reg dst,
  199.                                     struct brw_reg src0,
  200.                                     struct brw_reg src1)
  201. {
  202.    brw_math2(p,
  203.              dst,
  204.              brw_math_function(inst->opcode),
  205.              src0, src1);
  206. }
  207.  
  208. void
  209. vec4_generator::generate_math2_gen6(vec4_instruction *inst,
  210.                                     struct brw_reg dst,
  211.                                     struct brw_reg src0,
  212.                                     struct brw_reg src1)
  213. {
  214.    /* Can't do writemask because math can't be align16. */
  215.    assert(dst.dw1.bits.writemask == WRITEMASK_XYZW);
  216.    /* Source swizzles are ignored. */
  217.    check_gen6_math_src_arg(src0);
  218.    check_gen6_math_src_arg(src1);
  219.  
  220.    brw_set_access_mode(p, BRW_ALIGN_1);
  221.    brw_math2(p,
  222.              dst,
  223.              brw_math_function(inst->opcode),
  224.              src0, src1);
  225.    brw_set_access_mode(p, BRW_ALIGN_16);
  226. }
  227.  
  228. void
  229. vec4_generator::generate_math2_gen4(vec4_instruction *inst,
  230.                                     struct brw_reg dst,
  231.                                     struct brw_reg src0,
  232.                                     struct brw_reg src1)
  233. {
  234.    /* From the Ironlake PRM, Volume 4, Part 1, Section 6.1.13
  235.     * "Message Payload":
  236.     *
  237.     * "Operand0[7].  For the INT DIV functions, this operand is the
  238.     *  denominator."
  239.     *  ...
  240.     * "Operand1[7].  For the INT DIV functions, this operand is the
  241.     *  numerator."
  242.     */
  243.    bool is_int_div = inst->opcode != SHADER_OPCODE_POW;
  244.    struct brw_reg &op0 = is_int_div ? src1 : src0;
  245.    struct brw_reg &op1 = is_int_div ? src0 : src1;
  246.  
  247.    brw_push_insn_state(p);
  248.    brw_set_saturate(p, false);
  249.    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
  250.    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), op1.type), op1);
  251.    brw_pop_insn_state(p);
  252.  
  253.    brw_math(p,
  254.             dst,
  255.             brw_math_function(inst->opcode),
  256.             inst->base_mrf,
  257.             op0,
  258.             BRW_MATH_DATA_VECTOR,
  259.             BRW_MATH_PRECISION_FULL);
  260. }
  261.  
  262. void
  263. vec4_generator::generate_tex(vec4_instruction *inst,
  264.                              struct brw_reg dst,
  265.                              struct brw_reg src)
  266. {
  267.    int msg_type = -1;
  268.  
  269.    if (brw->gen >= 5) {
  270.       switch (inst->opcode) {
  271.       case SHADER_OPCODE_TEX:
  272.       case SHADER_OPCODE_TXL:
  273.          if (inst->shadow_compare) {
  274.             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
  275.          } else {
  276.             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
  277.          }
  278.          break;
  279.       case SHADER_OPCODE_TXD:
  280.          if (inst->shadow_compare) {
  281.             /* Gen7.5+.  Otherwise, lowered by brw_lower_texture_gradients(). */
  282.             assert(brw->is_haswell);
  283.             msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE;
  284.          } else {
  285.             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
  286.          }
  287.          break;
  288.       case SHADER_OPCODE_TXF:
  289.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
  290.          break;
  291.       case SHADER_OPCODE_TXF_MS:
  292.          if (brw->gen >= 7)
  293.             msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS;
  294.          else
  295.             msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
  296.          break;
  297.       case SHADER_OPCODE_TXS:
  298.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
  299.          break;
  300.       default:
  301.          assert(!"should not get here: invalid VS texture opcode");
  302.          break;
  303.       }
  304.    } else {
  305.       switch (inst->opcode) {
  306.       case SHADER_OPCODE_TEX:
  307.       case SHADER_OPCODE_TXL:
  308.          if (inst->shadow_compare) {
  309.             msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD_COMPARE;
  310.             assert(inst->mlen == 3);
  311.          } else {
  312.             msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD;
  313.             assert(inst->mlen == 2);
  314.          }
  315.          break;
  316.       case SHADER_OPCODE_TXD:
  317.          /* There is no sample_d_c message; comparisons are done manually. */
  318.          msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS;
  319.          assert(inst->mlen == 4);
  320.          break;
  321.       case SHADER_OPCODE_TXF:
  322.          msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_LD;
  323.          assert(inst->mlen == 2);
  324.          break;
  325.       case SHADER_OPCODE_TXS:
  326.          msg_type = BRW_SAMPLER_MESSAGE_SIMD4X2_RESINFO;
  327.          assert(inst->mlen == 2);
  328.          break;
  329.       default:
  330.          assert(!"should not get here: invalid VS texture opcode");
  331.          break;
  332.       }
  333.    }
  334.  
  335.    assert(msg_type != -1);
  336.  
  337.    /* Load the message header if present.  If there's a texture offset, we need
  338.     * to set it up explicitly and load the offset bitfield.  Otherwise, we can
  339.     * use an implied move from g0 to the first message register.
  340.     */
  341.    if (inst->texture_offset) {
  342.       /* Explicitly set up the message header by copying g0 to the MRF. */
  343.       brw_push_insn_state(p);
  344.       brw_set_mask_control(p, BRW_MASK_DISABLE);
  345.       brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD),
  346.                  retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
  347.  
  348.       /* Then set the offset bits in DWord 2. */
  349.       brw_set_access_mode(p, BRW_ALIGN_1);
  350.       brw_MOV(p,
  351.               retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, inst->base_mrf, 2),
  352.                      BRW_REGISTER_TYPE_UD),
  353.               brw_imm_uw(inst->texture_offset));
  354.       brw_pop_insn_state(p);
  355.    } else if (inst->header_present) {
  356.       /* Set up an implied move from g0 to the MRF. */
  357.       src = brw_vec8_grf(0, 0);
  358.    }
  359.  
  360.    uint32_t return_format;
  361.  
  362.    switch (dst.type) {
  363.    case BRW_REGISTER_TYPE_D:
  364.       return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32;
  365.       break;
  366.    case BRW_REGISTER_TYPE_UD:
  367.       return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32;
  368.       break;
  369.    default:
  370.       return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32;
  371.       break;
  372.    }
  373.  
  374.    brw_SAMPLE(p,
  375.               dst,
  376.               inst->base_mrf,
  377.               src,
  378.               SURF_INDEX_VS_TEXTURE(inst->sampler),
  379.               inst->sampler,
  380.               msg_type,
  381.               1, /* response length */
  382.               inst->mlen,
  383.               inst->header_present,
  384.               BRW_SAMPLER_SIMD_MODE_SIMD4X2,
  385.               return_format);
  386. }
  387.  
  388. void
  389. vec4_generator::generate_urb_write(vec4_instruction *inst)
  390. {
  391.    brw_urb_WRITE(p,
  392.                  brw_null_reg(), /* dest */
  393.                  inst->base_mrf, /* starting mrf reg nr */
  394.                  brw_vec8_grf(0, 0), /* src */
  395.                  false,         /* allocate */
  396.                  true,          /* used */
  397.                  inst->mlen,
  398.                  0,             /* response len */
  399.                  inst->eot,     /* eot */
  400.                  inst->eot,     /* writes complete */
  401.                  inst->offset,  /* urb destination offset */
  402.                  BRW_URB_SWIZZLE_INTERLEAVE);
  403. }
  404.  
  405. void
  406. vec4_generator::generate_oword_dual_block_offsets(struct brw_reg m1,
  407.                                                   struct brw_reg index)
  408. {
  409.    int second_vertex_offset;
  410.  
  411.    if (brw->gen >= 6)
  412.       second_vertex_offset = 1;
  413.    else
  414.       second_vertex_offset = 16;
  415.  
  416.    m1 = retype(m1, BRW_REGISTER_TYPE_D);
  417.  
  418.    /* Set up M1 (message payload).  Only the block offsets in M1.0 and
  419.     * M1.4 are used, and the rest are ignored.
  420.     */
  421.    struct brw_reg m1_0 = suboffset(vec1(m1), 0);
  422.    struct brw_reg m1_4 = suboffset(vec1(m1), 4);
  423.    struct brw_reg index_0 = suboffset(vec1(index), 0);
  424.    struct brw_reg index_4 = suboffset(vec1(index), 4);
  425.  
  426.    brw_push_insn_state(p);
  427.    brw_set_mask_control(p, BRW_MASK_DISABLE);
  428.    brw_set_access_mode(p, BRW_ALIGN_1);
  429.  
  430.    brw_MOV(p, m1_0, index_0);
  431.  
  432.    if (index.file == BRW_IMMEDIATE_VALUE) {
  433.       index_4.dw1.ud += second_vertex_offset;
  434.       brw_MOV(p, m1_4, index_4);
  435.    } else {
  436.       brw_ADD(p, m1_4, index_4, brw_imm_d(second_vertex_offset));
  437.    }
  438.  
  439.    brw_pop_insn_state(p);
  440. }
  441.  
  442. void
  443. vec4_generator::generate_scratch_read(vec4_instruction *inst,
  444.                                       struct brw_reg dst,
  445.                                       struct brw_reg index)
  446. {
  447.    struct brw_reg header = brw_vec8_grf(0, 0);
  448.  
  449.    gen6_resolve_implied_move(p, &header, inst->base_mrf);
  450.  
  451.    generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
  452.                                      index);
  453.  
  454.    uint32_t msg_type;
  455.  
  456.    if (brw->gen >= 6)
  457.       msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
  458.    else if (brw->gen == 5 || brw->is_g4x)
  459.       msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
  460.    else
  461.       msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
  462.  
  463.    /* Each of the 8 channel enables is considered for whether each
  464.     * dword is written.
  465.     */
  466.    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
  467.    brw_set_dest(p, send, dst);
  468.    brw_set_src0(p, send, header);
  469.    if (brw->gen < 6)
  470.       send->header.destreg__conditionalmod = inst->base_mrf;
  471.    brw_set_dp_read_message(p, send,
  472.                            255, /* binding table index: stateless access */
  473.                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
  474.                            msg_type,
  475.                            BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
  476.                            2, /* mlen */
  477.                            true, /* header_present */
  478.                            1 /* rlen */);
  479. }
  480.  
  481. void
  482. vec4_generator::generate_scratch_write(vec4_instruction *inst,
  483.                                        struct brw_reg dst,
  484.                                        struct brw_reg src,
  485.                                        struct brw_reg index)
  486. {
  487.    struct brw_reg header = brw_vec8_grf(0, 0);
  488.    bool write_commit;
  489.  
  490.    /* If the instruction is predicated, we'll predicate the send, not
  491.     * the header setup.
  492.     */
  493.    brw_set_predicate_control(p, false);
  494.  
  495.    gen6_resolve_implied_move(p, &header, inst->base_mrf);
  496.  
  497.    generate_oword_dual_block_offsets(brw_message_reg(inst->base_mrf + 1),
  498.                                      index);
  499.  
  500.    brw_MOV(p,
  501.            retype(brw_message_reg(inst->base_mrf + 2), BRW_REGISTER_TYPE_D),
  502.            retype(src, BRW_REGISTER_TYPE_D));
  503.  
  504.    uint32_t msg_type;
  505.  
  506.    if (brw->gen >= 7)
  507.       msg_type = GEN7_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
  508.    else if (brw->gen == 6)
  509.       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
  510.    else
  511.       msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE;
  512.  
  513.    brw_set_predicate_control(p, inst->predicate);
  514.  
  515.    /* Pre-gen6, we have to specify write commits to ensure ordering
  516.     * between reads and writes within a thread.  Afterwards, that's
  517.     * guaranteed and write commits only matter for inter-thread
  518.     * synchronization.
  519.     */
  520.    if (brw->gen >= 6) {
  521.       write_commit = false;
  522.    } else {
  523.       /* The visitor set up our destination register to be g0.  This
  524.        * means that when the next read comes along, we will end up
  525.        * reading from g0 and causing a block on the write commit.  For
  526.        * write-after-read, we are relying on the value of the previous
  527.        * read being used (and thus blocking on completion) before our
  528.        * write is executed.  This means we have to be careful in
  529.        * instruction scheduling to not violate this assumption.
  530.        */
  531.       write_commit = true;
  532.    }
  533.  
  534.    /* Each of the 8 channel enables is considered for whether each
  535.     * dword is written.
  536.     */
  537.    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
  538.    brw_set_dest(p, send, dst);
  539.    brw_set_src0(p, send, header);
  540.    if (brw->gen < 6)
  541.       send->header.destreg__conditionalmod = inst->base_mrf;
  542.    brw_set_dp_write_message(p, send,
  543.                             255, /* binding table index: stateless access */
  544.                             BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
  545.                             msg_type,
  546.                             3, /* mlen */
  547.                             true, /* header present */
  548.                             false, /* not a render target write */
  549.                             write_commit, /* rlen */
  550.                             false, /* eot */
  551.                             write_commit);
  552. }
  553.  
  554. void
  555. vec4_generator::generate_pull_constant_load(vec4_instruction *inst,
  556.                                             struct brw_reg dst,
  557.                                             struct brw_reg index,
  558.                                             struct brw_reg offset)
  559. {
  560.    assert(brw->gen <= 7);
  561.    assert(index.file == BRW_IMMEDIATE_VALUE &&
  562.           index.type == BRW_REGISTER_TYPE_UD);
  563.    uint32_t surf_index = index.dw1.ud;
  564.  
  565.    struct brw_reg header = brw_vec8_grf(0, 0);
  566.  
  567.    gen6_resolve_implied_move(p, &header, inst->base_mrf);
  568.  
  569.    brw_MOV(p, retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_D),
  570.            offset);
  571.  
  572.    uint32_t msg_type;
  573.  
  574.    if (brw->gen >= 6)
  575.       msg_type = GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
  576.    else if (brw->gen == 5 || brw->is_g4x)
  577.       msg_type = G45_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
  578.    else
  579.       msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ;
  580.  
  581.    /* Each of the 8 channel enables is considered for whether each
  582.     * dword is written.
  583.     */
  584.    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
  585.    brw_set_dest(p, send, dst);
  586.    brw_set_src0(p, send, header);
  587.    if (brw->gen < 6)
  588.       send->header.destreg__conditionalmod = inst->base_mrf;
  589.    brw_set_dp_read_message(p, send,
  590.                            surf_index,
  591.                            BRW_DATAPORT_OWORD_DUAL_BLOCK_1OWORD,
  592.                            msg_type,
  593.                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
  594.                            2, /* mlen */
  595.                            true, /* header_present */
  596.                            1 /* rlen */);
  597. }
  598.  
  599. void
  600. vec4_generator::generate_pull_constant_load_gen7(vec4_instruction *inst,
  601.                                                  struct brw_reg dst,
  602.                                                  struct brw_reg surf_index,
  603.                                                  struct brw_reg offset)
  604. {
  605.    assert(surf_index.file == BRW_IMMEDIATE_VALUE &&
  606.           surf_index.type == BRW_REGISTER_TYPE_UD);
  607.  
  608.    brw_instruction *insn = brw_next_insn(p, BRW_OPCODE_SEND);
  609.    brw_set_dest(p, insn, dst);
  610.    brw_set_src0(p, insn, offset);
  611.    brw_set_sampler_message(p, insn,
  612.                            surf_index.dw1.ud,
  613.                            0, /* LD message ignores sampler unit */
  614.                            GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
  615.                            1, /* rlen */
  616.                            1, /* mlen */
  617.                            false, /* no header */
  618.                            BRW_SAMPLER_SIMD_MODE_SIMD4X2,
  619.                            0);
  620. }
  621.  
  622. /**
  623.  * Generate assembly for a Vec4 IR instruction.
  624.  *
  625.  * \param instruction The Vec4 IR instruction to generate code for.
  626.  * \param dst         The destination register.
  627.  * \param src         An array of up to three source registers.
  628.  */
  629. void
  630. vec4_generator::generate_vec4_instruction(vec4_instruction *instruction,
  631.                                           struct brw_reg dst,
  632.                                           struct brw_reg *src)
  633. {
  634.    vec4_instruction *inst = (vec4_instruction *) instruction;
  635.  
  636.    switch (inst->opcode) {
  637.    case BRW_OPCODE_MOV:
  638.       brw_MOV(p, dst, src[0]);
  639.       break;
  640.    case BRW_OPCODE_ADD:
  641.       brw_ADD(p, dst, src[0], src[1]);
  642.       break;
  643.    case BRW_OPCODE_MUL:
  644.       brw_MUL(p, dst, src[0], src[1]);
  645.       break;
  646.    case BRW_OPCODE_MACH:
  647.       brw_set_acc_write_control(p, 1);
  648.       brw_MACH(p, dst, src[0], src[1]);
  649.       brw_set_acc_write_control(p, 0);
  650.       break;
  651.  
  652.    case BRW_OPCODE_MAD:
  653.       brw_MAD(p, dst, src[0], src[1], src[2]);
  654.       break;
  655.  
  656.    case BRW_OPCODE_FRC:
  657.       brw_FRC(p, dst, src[0]);
  658.       break;
  659.    case BRW_OPCODE_RNDD:
  660.       brw_RNDD(p, dst, src[0]);
  661.       break;
  662.    case BRW_OPCODE_RNDE:
  663.       brw_RNDE(p, dst, src[0]);
  664.       break;
  665.    case BRW_OPCODE_RNDZ:
  666.       brw_RNDZ(p, dst, src[0]);
  667.       break;
  668.  
  669.    case BRW_OPCODE_AND:
  670.       brw_AND(p, dst, src[0], src[1]);
  671.       break;
  672.    case BRW_OPCODE_OR:
  673.       brw_OR(p, dst, src[0], src[1]);
  674.       break;
  675.    case BRW_OPCODE_XOR:
  676.       brw_XOR(p, dst, src[0], src[1]);
  677.       break;
  678.    case BRW_OPCODE_NOT:
  679.       brw_NOT(p, dst, src[0]);
  680.       break;
  681.    case BRW_OPCODE_ASR:
  682.       brw_ASR(p, dst, src[0], src[1]);
  683.       break;
  684.    case BRW_OPCODE_SHR:
  685.       brw_SHR(p, dst, src[0], src[1]);
  686.       break;
  687.    case BRW_OPCODE_SHL:
  688.       brw_SHL(p, dst, src[0], src[1]);
  689.       break;
  690.  
  691.    case BRW_OPCODE_CMP:
  692.       brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
  693.       break;
  694.    case BRW_OPCODE_SEL:
  695.       brw_SEL(p, dst, src[0], src[1]);
  696.       break;
  697.  
  698.    case BRW_OPCODE_DPH:
  699.       brw_DPH(p, dst, src[0], src[1]);
  700.       break;
  701.  
  702.    case BRW_OPCODE_DP4:
  703.       brw_DP4(p, dst, src[0], src[1]);
  704.       break;
  705.  
  706.    case BRW_OPCODE_DP3:
  707.       brw_DP3(p, dst, src[0], src[1]);
  708.       break;
  709.  
  710.    case BRW_OPCODE_DP2:
  711.       brw_DP2(p, dst, src[0], src[1]);
  712.       break;
  713.  
  714.    case BRW_OPCODE_F32TO16:
  715.       brw_F32TO16(p, dst, src[0]);
  716.       break;
  717.  
  718.    case BRW_OPCODE_F16TO32:
  719.       brw_F16TO32(p, dst, src[0]);
  720.       break;
  721.  
  722.    case BRW_OPCODE_LRP:
  723.       brw_LRP(p, dst, src[0], src[1], src[2]);
  724.       break;
  725.  
  726.    case BRW_OPCODE_BFREV:
  727.       /* BFREV only supports UD type for src and dst. */
  728.       brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD),
  729.                    retype(src[0], BRW_REGISTER_TYPE_UD));
  730.       break;
  731.    case BRW_OPCODE_FBH:
  732.       /* FBH only supports UD type for dst. */
  733.       brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
  734.       break;
  735.    case BRW_OPCODE_FBL:
  736.       /* FBL only supports UD type for dst. */
  737.       brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
  738.       break;
  739.    case BRW_OPCODE_CBIT:
  740.       /* CBIT only supports UD type for dst. */
  741.       brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]);
  742.       break;
  743.  
  744.    case BRW_OPCODE_BFE:
  745.       brw_BFE(p, dst, src[0], src[1], src[2]);
  746.       break;
  747.  
  748.    case BRW_OPCODE_BFI1:
  749.       brw_BFI1(p, dst, src[0], src[1]);
  750.       break;
  751.    case BRW_OPCODE_BFI2:
  752.       brw_BFI2(p, dst, src[0], src[1], src[2]);
  753.       break;
  754.  
  755.    case BRW_OPCODE_IF:
  756.       if (inst->src[0].file != BAD_FILE) {
  757.          /* The instruction has an embedded compare (only allowed on gen6) */
  758.          assert(brw->gen == 6);
  759.          gen6_IF(p, inst->conditional_mod, src[0], src[1]);
  760.       } else {
  761.          struct brw_instruction *brw_inst = brw_IF(p, BRW_EXECUTE_8);
  762.          brw_inst->header.predicate_control = inst->predicate;
  763.       }
  764.       break;
  765.  
  766.    case BRW_OPCODE_ELSE:
  767.       brw_ELSE(p);
  768.       break;
  769.    case BRW_OPCODE_ENDIF:
  770.       brw_ENDIF(p);
  771.       break;
  772.  
  773.    case BRW_OPCODE_DO:
  774.       brw_DO(p, BRW_EXECUTE_8);
  775.       break;
  776.  
  777.    case BRW_OPCODE_BREAK:
  778.       brw_BREAK(p);
  779.       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
  780.       break;
  781.    case BRW_OPCODE_CONTINUE:
  782.       /* FINISHME: We need to write the loop instruction support still. */
  783.       if (brw->gen >= 6)
  784.          gen6_CONT(p);
  785.       else
  786.          brw_CONT(p);
  787.       brw_set_predicate_control(p, BRW_PREDICATE_NONE);
  788.       break;
  789.  
  790.    case BRW_OPCODE_WHILE:
  791.       brw_WHILE(p);
  792.       break;
  793.  
  794.    case SHADER_OPCODE_RCP:
  795.    case SHADER_OPCODE_RSQ:
  796.    case SHADER_OPCODE_SQRT:
  797.    case SHADER_OPCODE_EXP2:
  798.    case SHADER_OPCODE_LOG2:
  799.    case SHADER_OPCODE_SIN:
  800.    case SHADER_OPCODE_COS:
  801.       if (brw->gen == 6) {
  802.          generate_math1_gen6(inst, dst, src[0]);
  803.       } else {
  804.          /* Also works for Gen7. */
  805.          generate_math1_gen4(inst, dst, src[0]);
  806.       }
  807.       break;
  808.  
  809.    case SHADER_OPCODE_POW:
  810.    case SHADER_OPCODE_INT_QUOTIENT:
  811.    case SHADER_OPCODE_INT_REMAINDER:
  812.       if (brw->gen >= 7) {
  813.          generate_math2_gen7(inst, dst, src[0], src[1]);
  814.       } else if (brw->gen == 6) {
  815.          generate_math2_gen6(inst, dst, src[0], src[1]);
  816.       } else {
  817.          generate_math2_gen4(inst, dst, src[0], src[1]);
  818.       }
  819.       break;
  820.  
  821.    case SHADER_OPCODE_TEX:
  822.    case SHADER_OPCODE_TXD:
  823.    case SHADER_OPCODE_TXF:
  824.    case SHADER_OPCODE_TXF_MS:
  825.    case SHADER_OPCODE_TXL:
  826.    case SHADER_OPCODE_TXS:
  827.       generate_tex(inst, dst, src[0]);
  828.       break;
  829.  
  830.    case VS_OPCODE_URB_WRITE:
  831.       generate_urb_write(inst);
  832.       break;
  833.  
  834.    case VS_OPCODE_SCRATCH_READ:
  835.       generate_scratch_read(inst, dst, src[0]);
  836.       break;
  837.  
  838.    case VS_OPCODE_SCRATCH_WRITE:
  839.       generate_scratch_write(inst, dst, src[0], src[1]);
  840.       break;
  841.  
  842.    case VS_OPCODE_PULL_CONSTANT_LOAD:
  843.       generate_pull_constant_load(inst, dst, src[0], src[1]);
  844.       break;
  845.  
  846.    case VS_OPCODE_PULL_CONSTANT_LOAD_GEN7:
  847.       generate_pull_constant_load_gen7(inst, dst, src[0], src[1]);
  848.       break;
  849.  
  850.    case SHADER_OPCODE_SHADER_TIME_ADD:
  851.       brw_shader_time_add(p, src[0], SURF_INDEX_VS_SHADER_TIME);
  852.       break;
  853.  
  854.    default:
  855.       if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) {
  856.          _mesa_problem(ctx, "Unsupported opcode in `%s' in VS\n",
  857.                        opcode_descs[inst->opcode].name);
  858.       } else {
  859.          _mesa_problem(ctx, "Unsupported opcode %d in VS", inst->opcode);
  860.       }
  861.       abort();
  862.    }
  863. }
  864.  
  865. void
  866. vec4_generator::generate_code(exec_list *instructions)
  867. {
  868.    int last_native_insn_offset = 0;
  869.    const char *last_annotation_string = NULL;
  870.    const void *last_annotation_ir = NULL;
  871.  
  872.    if (unlikely(debug_flag)) {
  873.       if (shader) {
  874.          printf("Native code for vertex shader %d:\n", shader_prog->Name);
  875.       } else {
  876.          printf("Native code for vertex program %d:\n", prog->Id);
  877.       }
  878.    }
  879.  
  880.    foreach_list(node, instructions) {
  881.       vec4_instruction *inst = (vec4_instruction *)node;
  882.       struct brw_reg src[3], dst;
  883.  
  884.       if (unlikely(debug_flag)) {
  885.          if (last_annotation_ir != inst->ir) {
  886.             last_annotation_ir = inst->ir;
  887.             if (last_annotation_ir) {
  888.                printf("   ");
  889.                if (shader) {
  890.                   ((ir_instruction *) last_annotation_ir)->print();
  891.                } else {
  892.                   const prog_instruction *vpi;
  893.                   vpi = (const prog_instruction *) inst->ir;
  894.                   printf("%d: ", (int)(vpi - prog->Instructions));
  895.                   _mesa_fprint_instruction_opt(stdout, vpi, 0,
  896.                                                PROG_PRINT_DEBUG, NULL);
  897.                }
  898.                printf("\n");
  899.             }
  900.          }
  901.          if (last_annotation_string != inst->annotation) {
  902.             last_annotation_string = inst->annotation;
  903.             if (last_annotation_string)
  904.                printf("   %s\n", last_annotation_string);
  905.          }
  906.       }
  907.  
  908.       for (unsigned int i = 0; i < 3; i++) {
  909.          src[i] = inst->get_src(i);
  910.       }
  911.       dst = inst->get_dst();
  912.  
  913.       brw_set_conditionalmod(p, inst->conditional_mod);
  914.       brw_set_predicate_control(p, inst->predicate);
  915.       brw_set_predicate_inverse(p, inst->predicate_inverse);
  916.       brw_set_saturate(p, inst->saturate);
  917.       brw_set_mask_control(p, inst->force_writemask_all);
  918.  
  919.       unsigned pre_emit_nr_insn = p->nr_insn;
  920.  
  921.       generate_vec4_instruction(inst, dst, src);
  922.  
  923.       if (inst->no_dd_clear || inst->no_dd_check) {
  924.          assert(p->nr_insn == pre_emit_nr_insn + 1 ||
  925.                 !"no_dd_check or no_dd_clear set for IR emitting more "
  926.                 "than 1 instruction");
  927.  
  928.          struct brw_instruction *last = &p->store[pre_emit_nr_insn];
  929.  
  930.          if (inst->no_dd_clear)
  931.             last->header.dependency_control |= BRW_DEPENDENCY_NOTCLEARED;
  932.          if (inst->no_dd_check)
  933.             last->header.dependency_control |= BRW_DEPENDENCY_NOTCHECKED;
  934.       }
  935.  
  936.       if (unlikely(debug_flag)) {
  937.          brw_dump_compile(p, stdout,
  938.                           last_native_insn_offset, p->next_insn_offset);
  939.       }
  940.  
  941.       last_native_insn_offset = p->next_insn_offset;
  942.    }
  943.  
  944.    if (unlikely(debug_flag)) {
  945.       printf("\n");
  946.    }
  947.  
  948.    brw_set_uip_jip(p);
  949.  
  950.    /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
  951.     * emit issues, it doesn't get the jump distances into the output,
  952.     * which is often something we want to debug.  So this is here in
  953.     * case you're doing that.
  954.     */
  955.    if (0 && unlikely(debug_flag)) {
  956.       brw_dump_compile(p, stdout, 0, p->next_insn_offset);
  957.    }
  958. }
  959.  
  960. const unsigned *
  961. vec4_generator::generate_assembly(exec_list *instructions,
  962.                                   unsigned *assembly_size)
  963. {
  964.    brw_set_access_mode(p, BRW_ALIGN_16);
  965.    generate_code(instructions);
  966.    return brw_get_program(p, assembly_size);
  967. }
  968.  
  969. } /* namespace brw */
  970.