Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
  3.  Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
  4.  develop this 3D driver.
  5.  
  6.  Permission is hereby granted, free of charge, to any person obtaining
  7.  a copy of this software and associated documentation files (the
  8.  "Software"), to deal in the Software without restriction, including
  9.  without limitation the rights to use, copy, modify, merge, publish,
  10.  distribute, sublicense, and/or sell copies of the Software, and to
  11.  permit persons to whom the Software is furnished to do so, subject to
  12.  the following conditions:
  13.  
  14.  The above copyright notice and this permission notice (including the
  15.  next paragraph) shall be included in all copies or substantial
  16.  portions of the Software.
  17.  
  18.  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
  19.  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
  21.  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
  22.  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
  23.  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
  24.  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  
  26.  **********************************************************************/
  27.  /*
  28.   * Authors:
  29.   *   Keith Whitwell <keith@tungstengraphics.com>
  30.   */
  31.      
  32.  
  33. #include "brw_context.h"
  34. #include "brw_defines.h"
  35. #include "brw_eu.h"
  36.  
  37. #include "glsl/ralloc.h"
  38.  
  39. /***********************************************************************
  40.  * Internal helper for constructing instructions
  41.  */
  42.  
  43. static void guess_execution_size(struct brw_compile *p,
  44.                                  struct brw_instruction *insn,
  45.                                  struct brw_reg reg)
  46. {
  47.    if (reg.width == BRW_WIDTH_8 && p->compressed)
  48.       insn->header.execution_size = BRW_EXECUTE_16;
  49.    else
  50.       insn->header.execution_size = reg.width;  /* note - definitions are compatible */
  51. }
  52.  
  53.  
  54. /**
  55.  * Prior to Sandybridge, the SEND instruction accepted non-MRF source
  56.  * registers, implicitly moving the operand to a message register.
  57.  *
  58.  * On Sandybridge, this is no longer the case.  This function performs the
  59.  * explicit move; it should be called before emitting a SEND instruction.
  60.  */
  61. void
  62. gen6_resolve_implied_move(struct brw_compile *p,
  63.                           struct brw_reg *src,
  64.                           GLuint msg_reg_nr)
  65. {
  66.    struct brw_context *brw = p->brw;
  67.    if (brw->gen < 6)
  68.       return;
  69.  
  70.    if (src->file == BRW_MESSAGE_REGISTER_FILE)
  71.       return;
  72.  
  73.    if (src->file != BRW_ARCHITECTURE_REGISTER_FILE || src->nr != BRW_ARF_NULL) {
  74.       brw_push_insn_state(p);
  75.       brw_set_mask_control(p, BRW_MASK_DISABLE);
  76.       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  77.       brw_MOV(p, retype(brw_message_reg(msg_reg_nr), BRW_REGISTER_TYPE_UD),
  78.               retype(*src, BRW_REGISTER_TYPE_UD));
  79.       brw_pop_insn_state(p);
  80.    }
  81.    *src = brw_message_reg(msg_reg_nr);
  82. }
  83.  
  84. static void
  85. gen7_convert_mrf_to_grf(struct brw_compile *p, struct brw_reg *reg)
  86. {
  87.    /* From the Ivybridge PRM, Volume 4 Part 3, page 218 ("send"):
  88.     * "The send with EOT should use register space R112-R127 for <src>. This is
  89.     *  to enable loading of a new thread into the same slot while the message
  90.     *  with EOT for current thread is pending dispatch."
  91.     *
  92.     * Since we're pretending to have 16 MRFs anyway, we may as well use the
  93.     * registers required for messages with EOT.
  94.     */
  95.    struct brw_context *brw = p->brw;
  96.    if (brw->gen == 7 && reg->file == BRW_MESSAGE_REGISTER_FILE) {
  97.       reg->file = BRW_GENERAL_REGISTER_FILE;
  98.       reg->nr += GEN7_MRF_HACK_START;
  99.    }
  100. }
  101.  
  102.  
  103. void
  104. brw_set_dest(struct brw_compile *p, struct brw_instruction *insn,
  105.              struct brw_reg dest)
  106. {
  107.    if (dest.file != BRW_ARCHITECTURE_REGISTER_FILE &&
  108.        dest.file != BRW_MESSAGE_REGISTER_FILE)
  109.       assert(dest.nr < 128);
  110.  
  111.    gen7_convert_mrf_to_grf(p, &dest);
  112.  
  113.    insn->bits1.da1.dest_reg_file = dest.file;
  114.    insn->bits1.da1.dest_reg_type = dest.type;
  115.    insn->bits1.da1.dest_address_mode = dest.address_mode;
  116.  
  117.    if (dest.address_mode == BRW_ADDRESS_DIRECT) {  
  118.       insn->bits1.da1.dest_reg_nr = dest.nr;
  119.  
  120.       if (insn->header.access_mode == BRW_ALIGN_1) {
  121.          insn->bits1.da1.dest_subreg_nr = dest.subnr;
  122.          if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
  123.             dest.hstride = BRW_HORIZONTAL_STRIDE_1;
  124.          insn->bits1.da1.dest_horiz_stride = dest.hstride;
  125.       }
  126.       else {
  127.          insn->bits1.da16.dest_subreg_nr = dest.subnr / 16;
  128.          insn->bits1.da16.dest_writemask = dest.dw1.bits.writemask;
  129.          /* From the Ivybridge PRM, Vol 4, Part 3, Section 5.2.4.1:
  130.           *    Although Dst.HorzStride is a don't care for Align16, HW needs
  131.           *    this to be programmed as "01".
  132.           */
  133.          insn->bits1.da16.dest_horiz_stride = 1;
  134.       }
  135.    }
  136.    else {
  137.       insn->bits1.ia1.dest_subreg_nr = dest.subnr;
  138.  
  139.       /* These are different sizes in align1 vs align16:
  140.        */
  141.       if (insn->header.access_mode == BRW_ALIGN_1) {
  142.          insn->bits1.ia1.dest_indirect_offset = dest.dw1.bits.indirect_offset;
  143.          if (dest.hstride == BRW_HORIZONTAL_STRIDE_0)
  144.             dest.hstride = BRW_HORIZONTAL_STRIDE_1;
  145.          insn->bits1.ia1.dest_horiz_stride = dest.hstride;
  146.       }
  147.       else {
  148.          insn->bits1.ia16.dest_indirect_offset = dest.dw1.bits.indirect_offset;
  149.          /* even ignored in da16, still need to set as '01' */
  150.          insn->bits1.ia16.dest_horiz_stride = 1;
  151.       }
  152.    }
  153.  
  154.    /* NEW: Set the execution size based on dest.width and
  155.     * insn->compression_control:
  156.     */
  157.    guess_execution_size(p, insn, dest);
  158. }
  159.  
  160. extern int reg_type_size[];
  161.  
  162. static void
  163. validate_reg(struct brw_instruction *insn, struct brw_reg reg)
  164. {
  165.    int hstride_for_reg[] = {0, 1, 2, 4};
  166.    int vstride_for_reg[] = {0, 1, 2, 4, 8, 16, 32, 64, 128, 256};
  167.    int width_for_reg[] = {1, 2, 4, 8, 16};
  168.    int execsize_for_reg[] = {1, 2, 4, 8, 16};
  169.    int width, hstride, vstride, execsize;
  170.  
  171.    if (reg.file == BRW_IMMEDIATE_VALUE) {
  172.       /* 3.3.6: Region Parameters.  Restriction: Immediate vectors
  173.        * mean the destination has to be 128-bit aligned and the
  174.        * destination horiz stride has to be a word.
  175.        */
  176.       if (reg.type == BRW_REGISTER_TYPE_V) {
  177.          assert(hstride_for_reg[insn->bits1.da1.dest_horiz_stride] *
  178.                 reg_type_size[insn->bits1.da1.dest_reg_type] == 2);
  179.       }
  180.  
  181.       return;
  182.    }
  183.  
  184.    if (reg.file == BRW_ARCHITECTURE_REGISTER_FILE &&
  185.        reg.file == BRW_ARF_NULL)
  186.       return;
  187.  
  188.    assert(reg.hstride >= 0 && reg.hstride < Elements(hstride_for_reg));
  189.    hstride = hstride_for_reg[reg.hstride];
  190.  
  191.    if (reg.vstride == 0xf) {
  192.       vstride = -1;
  193.    } else {
  194.       assert(reg.vstride >= 0 && reg.vstride < Elements(vstride_for_reg));
  195.       vstride = vstride_for_reg[reg.vstride];
  196.    }
  197.  
  198.    assert(reg.width >= 0 && reg.width < Elements(width_for_reg));
  199.    width = width_for_reg[reg.width];
  200.  
  201.    assert(insn->header.execution_size >= 0 &&
  202.           insn->header.execution_size < Elements(execsize_for_reg));
  203.    execsize = execsize_for_reg[insn->header.execution_size];
  204.  
  205.    /* Restrictions from 3.3.10: Register Region Restrictions. */
  206.    /* 3. */
  207.    assert(execsize >= width);
  208.  
  209.    /* 4. */
  210.    if (execsize == width && hstride != 0) {
  211.       assert(vstride == -1 || vstride == width * hstride);
  212.    }
  213.  
  214.    /* 5. */
  215.    if (execsize == width && hstride == 0) {
  216.       /* no restriction on vstride. */
  217.    }
  218.  
  219.    /* 6. */
  220.    if (width == 1) {
  221.       assert(hstride == 0);
  222.    }
  223.  
  224.    /* 7. */
  225.    if (execsize == 1 && width == 1) {
  226.       assert(hstride == 0);
  227.       assert(vstride == 0);
  228.    }
  229.  
  230.    /* 8. */
  231.    if (vstride == 0 && hstride == 0) {
  232.       assert(width == 1);
  233.    }
  234.  
  235.    /* 10. Check destination issues. */
  236. }
  237.  
  238. void
  239. brw_set_src0(struct brw_compile *p, struct brw_instruction *insn,
  240.              struct brw_reg reg)
  241. {
  242.    struct brw_context *brw = p->brw;
  243.  
  244.    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
  245.       assert(reg.nr < 128);
  246.  
  247.    gen7_convert_mrf_to_grf(p, &reg);
  248.  
  249.    if (brw->gen >= 6 && (insn->header.opcode == BRW_OPCODE_SEND ||
  250.                            insn->header.opcode == BRW_OPCODE_SENDC)) {
  251.       /* Any source modifiers or regions will be ignored, since this just
  252.        * identifies the MRF/GRF to start reading the message contents from.
  253.        * Check for some likely failures.
  254.        */
  255.       assert(!reg.negate);
  256.       assert(!reg.abs);
  257.       assert(reg.address_mode == BRW_ADDRESS_DIRECT);
  258.    }
  259.  
  260.    validate_reg(insn, reg);
  261.  
  262.    insn->bits1.da1.src0_reg_file = reg.file;
  263.    insn->bits1.da1.src0_reg_type = reg.type;
  264.    insn->bits2.da1.src0_abs = reg.abs;
  265.    insn->bits2.da1.src0_negate = reg.negate;
  266.    insn->bits2.da1.src0_address_mode = reg.address_mode;
  267.  
  268.    if (reg.file == BRW_IMMEDIATE_VALUE) {
  269.       insn->bits3.ud = reg.dw1.ud;
  270.    
  271.       /* Required to set some fields in src1 as well:
  272.        */
  273.       insn->bits1.da1.src1_reg_file = 0; /* arf */
  274.       insn->bits1.da1.src1_reg_type = reg.type;
  275.    }
  276.    else
  277.    {
  278.       if (reg.address_mode == BRW_ADDRESS_DIRECT) {
  279.          if (insn->header.access_mode == BRW_ALIGN_1) {
  280.             insn->bits2.da1.src0_subreg_nr = reg.subnr;
  281.             insn->bits2.da1.src0_reg_nr = reg.nr;
  282.          }
  283.          else {
  284.             insn->bits2.da16.src0_subreg_nr = reg.subnr / 16;
  285.             insn->bits2.da16.src0_reg_nr = reg.nr;
  286.          }
  287.       }
  288.       else {
  289.          insn->bits2.ia1.src0_subreg_nr = reg.subnr;
  290.  
  291.          if (insn->header.access_mode == BRW_ALIGN_1) {
  292.             insn->bits2.ia1.src0_indirect_offset = reg.dw1.bits.indirect_offset;
  293.          }
  294.          else {
  295.             insn->bits2.ia16.src0_subreg_nr = reg.dw1.bits.indirect_offset;
  296.          }
  297.       }
  298.  
  299.       if (insn->header.access_mode == BRW_ALIGN_1) {
  300.          if (reg.width == BRW_WIDTH_1 &&
  301.              insn->header.execution_size == BRW_EXECUTE_1) {
  302.             insn->bits2.da1.src0_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
  303.             insn->bits2.da1.src0_width = BRW_WIDTH_1;
  304.             insn->bits2.da1.src0_vert_stride = BRW_VERTICAL_STRIDE_0;
  305.          }
  306.          else {
  307.             insn->bits2.da1.src0_horiz_stride = reg.hstride;
  308.             insn->bits2.da1.src0_width = reg.width;
  309.             insn->bits2.da1.src0_vert_stride = reg.vstride;
  310.          }
  311.       }
  312.       else {
  313.          insn->bits2.da16.src0_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
  314.          insn->bits2.da16.src0_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
  315.          insn->bits2.da16.src0_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
  316.          insn->bits2.da16.src0_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
  317.  
  318.          /* This is an oddity of the fact we're using the same
  319.           * descriptions for registers in align_16 as align_1:
  320.           */
  321.          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
  322.             insn->bits2.da16.src0_vert_stride = BRW_VERTICAL_STRIDE_4;
  323.          else
  324.             insn->bits2.da16.src0_vert_stride = reg.vstride;
  325.       }
  326.    }
  327. }
  328.  
  329.  
  330. void brw_set_src1(struct brw_compile *p,
  331.                   struct brw_instruction *insn,
  332.                   struct brw_reg reg)
  333. {
  334.    assert(reg.file != BRW_MESSAGE_REGISTER_FILE);
  335.  
  336.    if (reg.type != BRW_ARCHITECTURE_REGISTER_FILE)
  337.       assert(reg.nr < 128);
  338.  
  339.    gen7_convert_mrf_to_grf(p, &reg);
  340.  
  341.    validate_reg(insn, reg);
  342.  
  343.    insn->bits1.da1.src1_reg_file = reg.file;
  344.    insn->bits1.da1.src1_reg_type = reg.type;
  345.    insn->bits3.da1.src1_abs = reg.abs;
  346.    insn->bits3.da1.src1_negate = reg.negate;
  347.  
  348.    /* Only src1 can be immediate in two-argument instructions.
  349.     */
  350.    assert(insn->bits1.da1.src0_reg_file != BRW_IMMEDIATE_VALUE);
  351.  
  352.    if (reg.file == BRW_IMMEDIATE_VALUE) {
  353.       insn->bits3.ud = reg.dw1.ud;
  354.    }
  355.    else {
  356.       /* This is a hardware restriction, which may or may not be lifted
  357.        * in the future:
  358.        */
  359.       assert (reg.address_mode == BRW_ADDRESS_DIRECT);
  360.       /* assert (reg.file == BRW_GENERAL_REGISTER_FILE); */
  361.  
  362.       if (insn->header.access_mode == BRW_ALIGN_1) {
  363.          insn->bits3.da1.src1_subreg_nr = reg.subnr;
  364.          insn->bits3.da1.src1_reg_nr = reg.nr;
  365.       }
  366.       else {
  367.          insn->bits3.da16.src1_subreg_nr = reg.subnr / 16;
  368.          insn->bits3.da16.src1_reg_nr = reg.nr;
  369.       }
  370.  
  371.       if (insn->header.access_mode == BRW_ALIGN_1) {
  372.          if (reg.width == BRW_WIDTH_1 &&
  373.              insn->header.execution_size == BRW_EXECUTE_1) {
  374.             insn->bits3.da1.src1_horiz_stride = BRW_HORIZONTAL_STRIDE_0;
  375.             insn->bits3.da1.src1_width = BRW_WIDTH_1;
  376.             insn->bits3.da1.src1_vert_stride = BRW_VERTICAL_STRIDE_0;
  377.          }
  378.          else {
  379.             insn->bits3.da1.src1_horiz_stride = reg.hstride;
  380.             insn->bits3.da1.src1_width = reg.width;
  381.             insn->bits3.da1.src1_vert_stride = reg.vstride;
  382.          }
  383.       }
  384.       else {
  385.          insn->bits3.da16.src1_swz_x = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_X);
  386.          insn->bits3.da16.src1_swz_y = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Y);
  387.          insn->bits3.da16.src1_swz_z = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_Z);
  388.          insn->bits3.da16.src1_swz_w = BRW_GET_SWZ(reg.dw1.bits.swizzle, BRW_CHANNEL_W);
  389.  
  390.          /* This is an oddity of the fact we're using the same
  391.           * descriptions for registers in align_16 as align_1:
  392.           */
  393.          if (reg.vstride == BRW_VERTICAL_STRIDE_8)
  394.             insn->bits3.da16.src1_vert_stride = BRW_VERTICAL_STRIDE_4;
  395.          else
  396.             insn->bits3.da16.src1_vert_stride = reg.vstride;
  397.       }
  398.    }
  399. }
  400.  
  401. /**
  402.  * Set the Message Descriptor and Extended Message Descriptor fields
  403.  * for SEND messages.
  404.  *
  405.  * \note This zeroes out the Function Control bits, so it must be called
  406.  *       \b before filling out any message-specific data.  Callers can
  407.  *       choose not to fill in irrelevant bits; they will be zero.
  408.  */
  409. static void
  410. brw_set_message_descriptor(struct brw_compile *p,
  411.                            struct brw_instruction *inst,
  412.                            enum brw_message_target sfid,
  413.                            unsigned msg_length,
  414.                            unsigned response_length,
  415.                            bool header_present,
  416.                            bool end_of_thread)
  417. {
  418.    struct brw_context *brw = p->brw;
  419.  
  420.    brw_set_src1(p, inst, brw_imm_d(0));
  421.  
  422.    if (brw->gen >= 5) {
  423.       inst->bits3.generic_gen5.header_present = header_present;
  424.       inst->bits3.generic_gen5.response_length = response_length;
  425.       inst->bits3.generic_gen5.msg_length = msg_length;
  426.       inst->bits3.generic_gen5.end_of_thread = end_of_thread;
  427.  
  428.       if (brw->gen >= 6) {
  429.          /* On Gen6+ Message target/SFID goes in bits 27:24 of the header */
  430.          inst->header.destreg__conditionalmod = sfid;
  431.       } else {
  432.          /* Set Extended Message Descriptor (ex_desc) */
  433.          inst->bits2.send_gen5.sfid = sfid;
  434.          inst->bits2.send_gen5.end_of_thread = end_of_thread;
  435.       }
  436.    } else {
  437.       inst->bits3.generic.response_length = response_length;
  438.       inst->bits3.generic.msg_length = msg_length;
  439.       inst->bits3.generic.msg_target = sfid;
  440.       inst->bits3.generic.end_of_thread = end_of_thread;
  441.    }
  442. }
  443.  
  444. static void brw_set_math_message( struct brw_compile *p,
  445.                                   struct brw_instruction *insn,
  446.                                   GLuint function,
  447.                                   GLuint integer_type,
  448.                                   bool low_precision,
  449.                                   GLuint dataType )
  450. {
  451.    struct brw_context *brw = p->brw;
  452.    unsigned msg_length;
  453.    unsigned response_length;
  454.  
  455.    /* Infer message length from the function */
  456.    switch (function) {
  457.    case BRW_MATH_FUNCTION_POW:
  458.    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT:
  459.    case BRW_MATH_FUNCTION_INT_DIV_REMAINDER:
  460.    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
  461.       msg_length = 2;
  462.       break;
  463.    default:
  464.       msg_length = 1;
  465.       break;
  466.    }
  467.  
  468.    /* Infer response length from the function */
  469.    switch (function) {
  470.    case BRW_MATH_FUNCTION_SINCOS:
  471.    case BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
  472.       response_length = 2;
  473.       break;
  474.    default:
  475.       response_length = 1;
  476.       break;
  477.    }
  478.  
  479.  
  480.    brw_set_message_descriptor(p, insn, BRW_SFID_MATH,
  481.                               msg_length, response_length, false, false);
  482.    if (brw->gen == 5) {
  483.       insn->bits3.math_gen5.function = function;
  484.       insn->bits3.math_gen5.int_type = integer_type;
  485.       insn->bits3.math_gen5.precision = low_precision;
  486.       insn->bits3.math_gen5.saturate = insn->header.saturate;
  487.       insn->bits3.math_gen5.data_type = dataType;
  488.       insn->bits3.math_gen5.snapshot = 0;
  489.    } else {
  490.       insn->bits3.math.function = function;
  491.       insn->bits3.math.int_type = integer_type;
  492.       insn->bits3.math.precision = low_precision;
  493.       insn->bits3.math.saturate = insn->header.saturate;
  494.       insn->bits3.math.data_type = dataType;
  495.    }
  496.    insn->header.saturate = 0;
  497. }
  498.  
  499.  
  500. static void brw_set_ff_sync_message(struct brw_compile *p,
  501.                                     struct brw_instruction *insn,
  502.                                     bool allocate,
  503.                                     GLuint response_length,
  504.                                     bool end_of_thread)
  505. {
  506.    brw_set_message_descriptor(p, insn, BRW_SFID_URB,
  507.                               1, response_length, true, end_of_thread);
  508.    insn->bits3.urb_gen5.opcode = 1; /* FF_SYNC */
  509.    insn->bits3.urb_gen5.offset = 0; /* Not used by FF_SYNC */
  510.    insn->bits3.urb_gen5.swizzle_control = 0; /* Not used by FF_SYNC */
  511.    insn->bits3.urb_gen5.allocate = allocate;
  512.    insn->bits3.urb_gen5.used = 0; /* Not used by FF_SYNC */
  513.    insn->bits3.urb_gen5.complete = 0; /* Not used by FF_SYNC */
  514. }
  515.  
  516. static void brw_set_urb_message( struct brw_compile *p,
  517.                                  struct brw_instruction *insn,
  518.                                  bool allocate,
  519.                                  bool used,
  520.                                  GLuint msg_length,
  521.                                  GLuint response_length,
  522.                                  bool end_of_thread,
  523.                                  bool complete,
  524.                                  GLuint offset,
  525.                                  GLuint swizzle_control )
  526. {
  527.    struct brw_context *brw = p->brw;
  528.  
  529.    brw_set_message_descriptor(p, insn, BRW_SFID_URB,
  530.                               msg_length, response_length, true, end_of_thread);
  531.    if (brw->gen == 7) {
  532.       insn->bits3.urb_gen7.opcode = 0;  /* URB_WRITE_HWORD */
  533.       insn->bits3.urb_gen7.offset = offset;
  534.       assert(swizzle_control != BRW_URB_SWIZZLE_TRANSPOSE);
  535.       insn->bits3.urb_gen7.swizzle_control = swizzle_control;
  536.       /* per_slot_offset = 0 makes it ignore offsets in message header */
  537.       insn->bits3.urb_gen7.per_slot_offset = 0;
  538.       insn->bits3.urb_gen7.complete = complete;
  539.    } else if (brw->gen >= 5) {
  540.       insn->bits3.urb_gen5.opcode = 0;  /* URB_WRITE */
  541.       insn->bits3.urb_gen5.offset = offset;
  542.       insn->bits3.urb_gen5.swizzle_control = swizzle_control;
  543.       insn->bits3.urb_gen5.allocate = allocate;
  544.       insn->bits3.urb_gen5.used = used; /* ? */
  545.       insn->bits3.urb_gen5.complete = complete;
  546.    } else {
  547.       insn->bits3.urb.opcode = 0;       /* ? */
  548.       insn->bits3.urb.offset = offset;
  549.       insn->bits3.urb.swizzle_control = swizzle_control;
  550.       insn->bits3.urb.allocate = allocate;
  551.       insn->bits3.urb.used = used;      /* ? */
  552.       insn->bits3.urb.complete = complete;
  553.    }
  554. }
  555.  
  556. void
  557. brw_set_dp_write_message(struct brw_compile *p,
  558.                          struct brw_instruction *insn,
  559.                          GLuint binding_table_index,
  560.                          GLuint msg_control,
  561.                          GLuint msg_type,
  562.                          GLuint msg_length,
  563.                          bool header_present,
  564.                          GLuint last_render_target,
  565.                          GLuint response_length,
  566.                          GLuint end_of_thread,
  567.                          GLuint send_commit_msg)
  568. {
  569.    struct brw_context *brw = p->brw;
  570.    unsigned sfid;
  571.  
  572.    if (brw->gen >= 7) {
  573.       /* Use the Render Cache for RT writes; otherwise use the Data Cache */
  574.       if (msg_type == GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE)
  575.          sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
  576.       else
  577.          sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
  578.    } else if (brw->gen == 6) {
  579.       /* Use the render cache for all write messages. */
  580.       sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
  581.    } else {
  582.       sfid = BRW_SFID_DATAPORT_WRITE;
  583.    }
  584.  
  585.    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
  586.                               header_present, end_of_thread);
  587.  
  588.    if (brw->gen >= 7) {
  589.       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
  590.       insn->bits3.gen7_dp.msg_control = msg_control;
  591.       insn->bits3.gen7_dp.last_render_target = last_render_target;
  592.       insn->bits3.gen7_dp.msg_type = msg_type;
  593.    } else if (brw->gen == 6) {
  594.       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
  595.       insn->bits3.gen6_dp.msg_control = msg_control;
  596.       insn->bits3.gen6_dp.last_render_target = last_render_target;
  597.       insn->bits3.gen6_dp.msg_type = msg_type;
  598.       insn->bits3.gen6_dp.send_commit_msg = send_commit_msg;
  599.    } else if (brw->gen == 5) {
  600.       insn->bits3.dp_write_gen5.binding_table_index = binding_table_index;
  601.       insn->bits3.dp_write_gen5.msg_control = msg_control;
  602.       insn->bits3.dp_write_gen5.last_render_target = last_render_target;
  603.       insn->bits3.dp_write_gen5.msg_type = msg_type;
  604.       insn->bits3.dp_write_gen5.send_commit_msg = send_commit_msg;
  605.    } else {
  606.       insn->bits3.dp_write.binding_table_index = binding_table_index;
  607.       insn->bits3.dp_write.msg_control = msg_control;
  608.       insn->bits3.dp_write.last_render_target = last_render_target;
  609.       insn->bits3.dp_write.msg_type = msg_type;
  610.       insn->bits3.dp_write.send_commit_msg = send_commit_msg;
  611.    }
  612. }
  613.  
  614. void
  615. brw_set_dp_read_message(struct brw_compile *p,
  616.                         struct brw_instruction *insn,
  617.                         GLuint binding_table_index,
  618.                         GLuint msg_control,
  619.                         GLuint msg_type,
  620.                         GLuint target_cache,
  621.                         GLuint msg_length,
  622.                         bool header_present,
  623.                         GLuint response_length)
  624. {
  625.    struct brw_context *brw = p->brw;
  626.    unsigned sfid;
  627.  
  628.    if (brw->gen >= 7) {
  629.       sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
  630.    } else if (brw->gen == 6) {
  631.       if (target_cache == BRW_DATAPORT_READ_TARGET_RENDER_CACHE)
  632.          sfid = GEN6_SFID_DATAPORT_RENDER_CACHE;
  633.       else
  634.          sfid = GEN6_SFID_DATAPORT_SAMPLER_CACHE;
  635.    } else {
  636.       sfid = BRW_SFID_DATAPORT_READ;
  637.    }
  638.  
  639.    brw_set_message_descriptor(p, insn, sfid, msg_length, response_length,
  640.                               header_present, false);
  641.  
  642.    if (brw->gen >= 7) {
  643.       insn->bits3.gen7_dp.binding_table_index = binding_table_index;
  644.       insn->bits3.gen7_dp.msg_control = msg_control;
  645.       insn->bits3.gen7_dp.last_render_target = 0;
  646.       insn->bits3.gen7_dp.msg_type = msg_type;
  647.    } else if (brw->gen == 6) {
  648.       insn->bits3.gen6_dp.binding_table_index = binding_table_index;
  649.       insn->bits3.gen6_dp.msg_control = msg_control;
  650.       insn->bits3.gen6_dp.last_render_target = 0;
  651.       insn->bits3.gen6_dp.msg_type = msg_type;
  652.       insn->bits3.gen6_dp.send_commit_msg = 0;
  653.    } else if (brw->gen == 5) {
  654.       insn->bits3.dp_read_gen5.binding_table_index = binding_table_index;
  655.       insn->bits3.dp_read_gen5.msg_control = msg_control;
  656.       insn->bits3.dp_read_gen5.msg_type = msg_type;
  657.       insn->bits3.dp_read_gen5.target_cache = target_cache;
  658.    } else if (brw->is_g4x) {
  659.       insn->bits3.dp_read_g4x.binding_table_index = binding_table_index; /*0:7*/
  660.       insn->bits3.dp_read_g4x.msg_control = msg_control;  /*8:10*/
  661.       insn->bits3.dp_read_g4x.msg_type = msg_type;  /*11:13*/
  662.       insn->bits3.dp_read_g4x.target_cache = target_cache;  /*14:15*/
  663.    } else {
  664.       insn->bits3.dp_read.binding_table_index = binding_table_index; /*0:7*/
  665.       insn->bits3.dp_read.msg_control = msg_control;  /*8:11*/
  666.       insn->bits3.dp_read.msg_type = msg_type;  /*12:13*/
  667.       insn->bits3.dp_read.target_cache = target_cache;  /*14:15*/
  668.    }
  669. }
  670.  
  671. void
  672. brw_set_sampler_message(struct brw_compile *p,
  673.                         struct brw_instruction *insn,
  674.                         GLuint binding_table_index,
  675.                         GLuint sampler,
  676.                         GLuint msg_type,
  677.                         GLuint response_length,
  678.                         GLuint msg_length,
  679.                         GLuint header_present,
  680.                         GLuint simd_mode,
  681.                         GLuint return_format)
  682. {
  683.    struct brw_context *brw = p->brw;
  684.  
  685.    brw_set_message_descriptor(p, insn, BRW_SFID_SAMPLER, msg_length,
  686.                               response_length, header_present, false);
  687.  
  688.    if (brw->gen >= 7) {
  689.       insn->bits3.sampler_gen7.binding_table_index = binding_table_index;
  690.       insn->bits3.sampler_gen7.sampler = sampler;
  691.       insn->bits3.sampler_gen7.msg_type = msg_type;
  692.       insn->bits3.sampler_gen7.simd_mode = simd_mode;
  693.    } else if (brw->gen >= 5) {
  694.       insn->bits3.sampler_gen5.binding_table_index = binding_table_index;
  695.       insn->bits3.sampler_gen5.sampler = sampler;
  696.       insn->bits3.sampler_gen5.msg_type = msg_type;
  697.       insn->bits3.sampler_gen5.simd_mode = simd_mode;
  698.    } else if (brw->is_g4x) {
  699.       insn->bits3.sampler_g4x.binding_table_index = binding_table_index;
  700.       insn->bits3.sampler_g4x.sampler = sampler;
  701.       insn->bits3.sampler_g4x.msg_type = msg_type;
  702.    } else {
  703.       insn->bits3.sampler.binding_table_index = binding_table_index;
  704.       insn->bits3.sampler.sampler = sampler;
  705.       insn->bits3.sampler.msg_type = msg_type;
  706.       insn->bits3.sampler.return_format = return_format;
  707.    }
  708. }
  709.  
  710.  
  711. #define next_insn brw_next_insn
  712. struct brw_instruction *
  713. brw_next_insn(struct brw_compile *p, GLuint opcode)
  714. {
  715.    struct brw_instruction *insn;
  716.  
  717.    if (p->nr_insn + 1 > p->store_size) {
  718.       if (0)
  719.          printf("incresing the store size to %d\n", p->store_size << 1);
  720.       p->store_size <<= 1;
  721.       p->store = reralloc(p->mem_ctx, p->store,
  722.                           struct brw_instruction, p->store_size);
  723.       if (!p->store)
  724.          assert(!"realloc eu store memeory failed");
  725.    }
  726.  
  727.    p->next_insn_offset += 16;
  728.    insn = &p->store[p->nr_insn++];
  729.    memcpy(insn, p->current, sizeof(*insn));
  730.  
  731.    /* Reset this one-shot flag:
  732.     */
  733.  
  734.    if (p->current->header.destreg__conditionalmod) {
  735.       p->current->header.destreg__conditionalmod = 0;
  736.       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
  737.    }
  738.  
  739.    insn->header.opcode = opcode;
  740.    return insn;
  741. }
  742.  
  743. static struct brw_instruction *brw_alu1( struct brw_compile *p,
  744.                                          GLuint opcode,
  745.                                          struct brw_reg dest,
  746.                                          struct brw_reg src )
  747. {
  748.    struct brw_instruction *insn = next_insn(p, opcode);
  749.    brw_set_dest(p, insn, dest);
  750.    brw_set_src0(p, insn, src);
  751.    return insn;
  752. }
  753.  
  754. static struct brw_instruction *brw_alu2(struct brw_compile *p,
  755.                                         GLuint opcode,
  756.                                         struct brw_reg dest,
  757.                                         struct brw_reg src0,
  758.                                         struct brw_reg src1 )
  759. {
  760.    struct brw_instruction *insn = next_insn(p, opcode);  
  761.    brw_set_dest(p, insn, dest);
  762.    brw_set_src0(p, insn, src0);
  763.    brw_set_src1(p, insn, src1);
  764.    return insn;
  765. }
  766.  
  767. static int
  768. get_3src_subreg_nr(struct brw_reg reg)
  769. {
  770.    if (reg.vstride == BRW_VERTICAL_STRIDE_0) {
  771.       assert(brw_is_single_value_swizzle(reg.dw1.bits.swizzle));
  772.       return reg.subnr / 4 + BRW_GET_SWZ(reg.dw1.bits.swizzle, 0);
  773.    } else {
  774.       return reg.subnr / 4;
  775.    }
  776. }
  777.  
  778. static struct brw_instruction *brw_alu3(struct brw_compile *p,
  779.                                         GLuint opcode,
  780.                                         struct brw_reg dest,
  781.                                         struct brw_reg src0,
  782.                                         struct brw_reg src1,
  783.                                         struct brw_reg src2)
  784. {
  785.    struct brw_context *brw = p->brw;
  786.    struct brw_instruction *insn = next_insn(p, opcode);
  787.  
  788.    gen7_convert_mrf_to_grf(p, &dest);
  789.  
  790.    assert(insn->header.access_mode == BRW_ALIGN_16);
  791.  
  792.    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
  793.           dest.file == BRW_MESSAGE_REGISTER_FILE);
  794.    assert(dest.nr < 128);
  795.    assert(dest.address_mode == BRW_ADDRESS_DIRECT);
  796.    assert(dest.type == BRW_REGISTER_TYPE_F ||
  797.           dest.type == BRW_REGISTER_TYPE_D ||
  798.           dest.type == BRW_REGISTER_TYPE_UD);
  799.    insn->bits1.da3src.dest_reg_file = (dest.file == BRW_MESSAGE_REGISTER_FILE);
  800.    insn->bits1.da3src.dest_reg_nr = dest.nr;
  801.    insn->bits1.da3src.dest_subreg_nr = dest.subnr / 16;
  802.    insn->bits1.da3src.dest_writemask = dest.dw1.bits.writemask;
  803.    guess_execution_size(p, insn, dest);
  804.  
  805.    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
  806.    assert(src0.address_mode == BRW_ADDRESS_DIRECT);
  807.    assert(src0.nr < 128);
  808.    insn->bits2.da3src.src0_swizzle = src0.dw1.bits.swizzle;
  809.    insn->bits2.da3src.src0_subreg_nr = get_3src_subreg_nr(src0);
  810.    insn->bits2.da3src.src0_reg_nr = src0.nr;
  811.    insn->bits1.da3src.src0_abs = src0.abs;
  812.    insn->bits1.da3src.src0_negate = src0.negate;
  813.    insn->bits2.da3src.src0_rep_ctrl = src0.vstride == BRW_VERTICAL_STRIDE_0;
  814.  
  815.    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
  816.    assert(src1.address_mode == BRW_ADDRESS_DIRECT);
  817.    assert(src1.nr < 128);
  818.    insn->bits2.da3src.src1_swizzle = src1.dw1.bits.swizzle;
  819.    insn->bits2.da3src.src1_subreg_nr_low = get_3src_subreg_nr(src1) & 0x3;
  820.    insn->bits3.da3src.src1_subreg_nr_high = get_3src_subreg_nr(src1) >> 2;
  821.    insn->bits2.da3src.src1_rep_ctrl = src1.vstride == BRW_VERTICAL_STRIDE_0;
  822.    insn->bits3.da3src.src1_reg_nr = src1.nr;
  823.    insn->bits1.da3src.src1_abs = src1.abs;
  824.    insn->bits1.da3src.src1_negate = src1.negate;
  825.  
  826.    assert(src2.file == BRW_GENERAL_REGISTER_FILE);
  827.    assert(src2.address_mode == BRW_ADDRESS_DIRECT);
  828.    assert(src2.nr < 128);
  829.    insn->bits3.da3src.src2_swizzle = src2.dw1.bits.swizzle;
  830.    insn->bits3.da3src.src2_subreg_nr = get_3src_subreg_nr(src2);
  831.    insn->bits3.da3src.src2_rep_ctrl = src2.vstride == BRW_VERTICAL_STRIDE_0;
  832.    insn->bits3.da3src.src2_reg_nr = src2.nr;
  833.    insn->bits1.da3src.src2_abs = src2.abs;
  834.    insn->bits1.da3src.src2_negate = src2.negate;
  835.  
  836.    if (brw->gen >= 7) {
  837.       /* Set both the source and destination types based on dest.type,
  838.        * ignoring the source register types.  The MAD and LRP emitters ensure
  839.        * that all four types are float.  The BFE and BFI2 emitters, however,
  840.        * may send us mixed D and UD types and want us to ignore that and use
  841.        * the destination type.
  842.        */
  843.       switch (dest.type) {
  844.       case BRW_REGISTER_TYPE_F:
  845.          insn->bits1.da3src.src_type = BRW_3SRC_TYPE_F;
  846.          insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_F;
  847.          break;
  848.       case BRW_REGISTER_TYPE_D:
  849.          insn->bits1.da3src.src_type = BRW_3SRC_TYPE_D;
  850.          insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_D;
  851.          break;
  852.       case BRW_REGISTER_TYPE_UD:
  853.          insn->bits1.da3src.src_type = BRW_3SRC_TYPE_UD;
  854.          insn->bits1.da3src.dst_type = BRW_3SRC_TYPE_UD;
  855.          break;
  856.       }
  857.    }
  858.  
  859.    return insn;
  860. }
  861.  
  862.  
  863. /***********************************************************************
  864.  * Convenience routines.
  865.  */
  866. #define ALU1(OP)                                        \
  867. struct brw_instruction *brw_##OP(struct brw_compile *p, \
  868.               struct brw_reg dest,                      \
  869.               struct brw_reg src0)                      \
  870. {                                                       \
  871.    return brw_alu1(p, BRW_OPCODE_##OP, dest, src0);     \
  872. }
  873.  
  874. #define ALU2(OP)                                        \
  875. struct brw_instruction *brw_##OP(struct brw_compile *p, \
  876.               struct brw_reg dest,                      \
  877.               struct brw_reg src0,                      \
  878.               struct brw_reg src1)                      \
  879. {                                                       \
  880.    return brw_alu2(p, BRW_OPCODE_##OP, dest, src0, src1);       \
  881. }
  882.  
  883. #define ALU3(OP)                                        \
  884. struct brw_instruction *brw_##OP(struct brw_compile *p, \
  885.               struct brw_reg dest,                      \
  886.               struct brw_reg src0,                      \
  887.               struct brw_reg src1,                      \
  888.               struct brw_reg src2)                      \
  889. {                                                       \
  890.    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
  891. }
  892.  
  893. #define ALU3F(OP)                                               \
  894. struct brw_instruction *brw_##OP(struct brw_compile *p,         \
  895.                                  struct brw_reg dest,           \
  896.                                  struct brw_reg src0,           \
  897.                                  struct brw_reg src1,           \
  898.                                  struct brw_reg src2)           \
  899. {                                                               \
  900.    assert(dest.type == BRW_REGISTER_TYPE_F);                    \
  901.    assert(src0.type == BRW_REGISTER_TYPE_F);                    \
  902.    assert(src1.type == BRW_REGISTER_TYPE_F);                    \
  903.    assert(src2.type == BRW_REGISTER_TYPE_F);                    \
  904.    return brw_alu3(p, BRW_OPCODE_##OP, dest, src0, src1, src2); \
  905. }
  906.  
  907. /* Rounding operations (other than RNDD) require two instructions - the first
  908.  * stores a rounded value (possibly the wrong way) in the dest register, but
  909.  * also sets a per-channel "increment bit" in the flag register.  A predicated
  910.  * add of 1.0 fixes dest to contain the desired result.
  911.  *
  912.  * Sandybridge and later appear to round correctly without an ADD.
  913.  */
  914. #define ROUND(OP)                                                             \
  915. void brw_##OP(struct brw_compile *p,                                          \
  916.               struct brw_reg dest,                                            \
  917.               struct brw_reg src)                                             \
  918. {                                                                             \
  919.    struct brw_instruction *rnd, *add;                                         \
  920.    rnd = next_insn(p, BRW_OPCODE_##OP);                                       \
  921.    brw_set_dest(p, rnd, dest);                                                \
  922.    brw_set_src0(p, rnd, src);                                                 \
  923.                                                                               \
  924.    if (p->brw->gen < 6) {                                                     \
  925.       /* turn on round-increments */                                          \
  926.       rnd->header.destreg__conditionalmod = BRW_CONDITIONAL_R;                \
  927.       add = brw_ADD(p, dest, dest, brw_imm_f(1.0f));                          \
  928.       add->header.predicate_control = BRW_PREDICATE_NORMAL;                   \
  929.    }                                                                          \
  930. }
  931.  
  932.  
  933. ALU1(MOV)
  934. ALU2(SEL)
  935. ALU1(NOT)
  936. ALU2(AND)
  937. ALU2(OR)
  938. ALU2(XOR)
  939. ALU2(SHR)
  940. ALU2(SHL)
  941. ALU2(RSR)
  942. ALU2(RSL)
  943. ALU2(ASR)
  944. ALU1(F32TO16)
  945. ALU1(F16TO32)
  946. ALU1(FRC)
  947. ALU1(RNDD)
  948. ALU2(MAC)
  949. ALU2(MACH)
  950. ALU1(LZD)
  951. ALU2(DP4)
  952. ALU2(DPH)
  953. ALU2(DP3)
  954. ALU2(DP2)
  955. ALU2(LINE)
  956. ALU2(PLN)
  957. ALU3F(MAD)
  958. ALU3F(LRP)
  959. ALU1(BFREV)
  960. ALU3(BFE)
  961. ALU2(BFI1)
  962. ALU3(BFI2)
  963. ALU1(FBH)
  964. ALU1(FBL)
  965. ALU1(CBIT)
  966.  
  967. ROUND(RNDZ)
  968. ROUND(RNDE)
  969.  
  970.  
  971. struct brw_instruction *brw_ADD(struct brw_compile *p,
  972.                                 struct brw_reg dest,
  973.                                 struct brw_reg src0,
  974.                                 struct brw_reg src1)
  975. {
  976.    /* 6.2.2: add */
  977.    if (src0.type == BRW_REGISTER_TYPE_F ||
  978.        (src0.file == BRW_IMMEDIATE_VALUE &&
  979.         src0.type == BRW_REGISTER_TYPE_VF)) {
  980.       assert(src1.type != BRW_REGISTER_TYPE_UD);
  981.       assert(src1.type != BRW_REGISTER_TYPE_D);
  982.    }
  983.  
  984.    if (src1.type == BRW_REGISTER_TYPE_F ||
  985.        (src1.file == BRW_IMMEDIATE_VALUE &&
  986.         src1.type == BRW_REGISTER_TYPE_VF)) {
  987.       assert(src0.type != BRW_REGISTER_TYPE_UD);
  988.       assert(src0.type != BRW_REGISTER_TYPE_D);
  989.    }
  990.  
  991.    return brw_alu2(p, BRW_OPCODE_ADD, dest, src0, src1);
  992. }
  993.  
  994. struct brw_instruction *brw_AVG(struct brw_compile *p,
  995.                                 struct brw_reg dest,
  996.                                 struct brw_reg src0,
  997.                                 struct brw_reg src1)
  998. {
  999.    assert(dest.type == src0.type);
  1000.    assert(src0.type == src1.type);
  1001.    switch (src0.type) {
  1002.    case BRW_REGISTER_TYPE_B:
  1003.    case BRW_REGISTER_TYPE_UB:
  1004.    case BRW_REGISTER_TYPE_W:
  1005.    case BRW_REGISTER_TYPE_UW:
  1006.    case BRW_REGISTER_TYPE_D:
  1007.    case BRW_REGISTER_TYPE_UD:
  1008.       break;
  1009.    default:
  1010.       assert(!"Bad type for brw_AVG");
  1011.    }
  1012.  
  1013.    return brw_alu2(p, BRW_OPCODE_AVG, dest, src0, src1);
  1014. }
  1015.  
  1016. struct brw_instruction *brw_MUL(struct brw_compile *p,
  1017.                                 struct brw_reg dest,
  1018.                                 struct brw_reg src0,
  1019.                                 struct brw_reg src1)
  1020. {
  1021.    /* 6.32.38: mul */
  1022.    if (src0.type == BRW_REGISTER_TYPE_D ||
  1023.        src0.type == BRW_REGISTER_TYPE_UD ||
  1024.        src1.type == BRW_REGISTER_TYPE_D ||
  1025.        src1.type == BRW_REGISTER_TYPE_UD) {
  1026.       assert(dest.type != BRW_REGISTER_TYPE_F);
  1027.    }
  1028.  
  1029.    if (src0.type == BRW_REGISTER_TYPE_F ||
  1030.        (src0.file == BRW_IMMEDIATE_VALUE &&
  1031.         src0.type == BRW_REGISTER_TYPE_VF)) {
  1032.       assert(src1.type != BRW_REGISTER_TYPE_UD);
  1033.       assert(src1.type != BRW_REGISTER_TYPE_D);
  1034.    }
  1035.  
  1036.    if (src1.type == BRW_REGISTER_TYPE_F ||
  1037.        (src1.file == BRW_IMMEDIATE_VALUE &&
  1038.         src1.type == BRW_REGISTER_TYPE_VF)) {
  1039.       assert(src0.type != BRW_REGISTER_TYPE_UD);
  1040.       assert(src0.type != BRW_REGISTER_TYPE_D);
  1041.    }
  1042.  
  1043.    assert(src0.file != BRW_ARCHITECTURE_REGISTER_FILE ||
  1044.           src0.nr != BRW_ARF_ACCUMULATOR);
  1045.    assert(src1.file != BRW_ARCHITECTURE_REGISTER_FILE ||
  1046.           src1.nr != BRW_ARF_ACCUMULATOR);
  1047.  
  1048.    return brw_alu2(p, BRW_OPCODE_MUL, dest, src0, src1);
  1049. }
  1050.  
  1051.  
  1052. void brw_NOP(struct brw_compile *p)
  1053. {
  1054.    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_NOP);  
  1055.    brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
  1056.    brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
  1057.    brw_set_src1(p, insn, brw_imm_ud(0x0));
  1058. }
  1059.  
  1060.  
  1061.  
  1062.  
  1063.  
  1064. /***********************************************************************
  1065.  * Comparisons, if/else/endif
  1066.  */
  1067.  
  1068. struct brw_instruction *brw_JMPI(struct brw_compile *p,
  1069.                                  struct brw_reg dest,
  1070.                                  struct brw_reg src0,
  1071.                                  struct brw_reg src1)
  1072. {
  1073.    struct brw_instruction *insn = brw_alu2(p, BRW_OPCODE_JMPI, dest, src0, src1);
  1074.  
  1075.    insn->header.execution_size = 1;
  1076.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1077.    insn->header.mask_control = BRW_MASK_DISABLE;
  1078.  
  1079.    p->current->header.predicate_control = BRW_PREDICATE_NONE;
  1080.  
  1081.    return insn;
  1082. }
  1083.  
  1084. static void
  1085. push_if_stack(struct brw_compile *p, struct brw_instruction *inst)
  1086. {
  1087.    p->if_stack[p->if_stack_depth] = inst - p->store;
  1088.  
  1089.    p->if_stack_depth++;
  1090.    if (p->if_stack_array_size <= p->if_stack_depth) {
  1091.       p->if_stack_array_size *= 2;
  1092.       p->if_stack = reralloc(p->mem_ctx, p->if_stack, int,
  1093.                              p->if_stack_array_size);
  1094.    }
  1095. }
  1096.  
  1097. static struct brw_instruction *
  1098. pop_if_stack(struct brw_compile *p)
  1099. {
  1100.    p->if_stack_depth--;
  1101.    return &p->store[p->if_stack[p->if_stack_depth]];
  1102. }
  1103.  
  1104. static void
  1105. push_loop_stack(struct brw_compile *p, struct brw_instruction *inst)
  1106. {
  1107.    if (p->loop_stack_array_size < p->loop_stack_depth) {
  1108.       p->loop_stack_array_size *= 2;
  1109.       p->loop_stack = reralloc(p->mem_ctx, p->loop_stack, int,
  1110.                                p->loop_stack_array_size);
  1111.       p->if_depth_in_loop = reralloc(p->mem_ctx, p->if_depth_in_loop, int,
  1112.                                      p->loop_stack_array_size);
  1113.    }
  1114.  
  1115.    p->loop_stack[p->loop_stack_depth] = inst - p->store;
  1116.    p->loop_stack_depth++;
  1117.    p->if_depth_in_loop[p->loop_stack_depth] = 0;
  1118. }
  1119.  
  1120. static struct brw_instruction *
  1121. get_inner_do_insn(struct brw_compile *p)
  1122. {
  1123.    return &p->store[p->loop_stack[p->loop_stack_depth - 1]];
  1124. }
  1125.  
  1126. /* EU takes the value from the flag register and pushes it onto some
  1127.  * sort of a stack (presumably merging with any flag value already on
  1128.  * the stack).  Within an if block, the flags at the top of the stack
  1129.  * control execution on each channel of the unit, eg. on each of the
  1130.  * 16 pixel values in our wm programs.
  1131.  *
  1132.  * When the matching 'else' instruction is reached (presumably by
  1133.  * countdown of the instruction count patched in by our ELSE/ENDIF
  1134.  * functions), the relevent flags are inverted.
  1135.  *
  1136.  * When the matching 'endif' instruction is reached, the flags are
  1137.  * popped off.  If the stack is now empty, normal execution resumes.
  1138.  */
  1139. struct brw_instruction *
  1140. brw_IF(struct brw_compile *p, GLuint execute_size)
  1141. {
  1142.    struct brw_context *brw = p->brw;
  1143.    struct brw_instruction *insn;
  1144.  
  1145.    insn = next_insn(p, BRW_OPCODE_IF);
  1146.  
  1147.    /* Override the defaults for this instruction:
  1148.     */
  1149.    if (brw->gen < 6) {
  1150.       brw_set_dest(p, insn, brw_ip_reg());
  1151.       brw_set_src0(p, insn, brw_ip_reg());
  1152.       brw_set_src1(p, insn, brw_imm_d(0x0));
  1153.    } else if (brw->gen == 6) {
  1154.       brw_set_dest(p, insn, brw_imm_w(0));
  1155.       insn->bits1.branch_gen6.jump_count = 0;
  1156.       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
  1157.       brw_set_src1(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
  1158.    } else {
  1159.       brw_set_dest(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
  1160.       brw_set_src0(p, insn, vec1(retype(brw_null_reg(), BRW_REGISTER_TYPE_D)));
  1161.       brw_set_src1(p, insn, brw_imm_ud(0));
  1162.       insn->bits3.break_cont.jip = 0;
  1163.       insn->bits3.break_cont.uip = 0;
  1164.    }
  1165.  
  1166.    insn->header.execution_size = execute_size;
  1167.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1168.    insn->header.predicate_control = BRW_PREDICATE_NORMAL;
  1169.    insn->header.mask_control = BRW_MASK_ENABLE;
  1170.    if (!p->single_program_flow)
  1171.       insn->header.thread_control = BRW_THREAD_SWITCH;
  1172.  
  1173.    p->current->header.predicate_control = BRW_PREDICATE_NONE;
  1174.  
  1175.    push_if_stack(p, insn);
  1176.    p->if_depth_in_loop[p->loop_stack_depth]++;
  1177.    return insn;
  1178. }
  1179.  
  1180. /* This function is only used for gen6-style IF instructions with an
  1181.  * embedded comparison (conditional modifier).  It is not used on gen7.
  1182.  */
  1183. struct brw_instruction *
  1184. gen6_IF(struct brw_compile *p, uint32_t conditional,
  1185.         struct brw_reg src0, struct brw_reg src1)
  1186. {
  1187.    struct brw_instruction *insn;
  1188.  
  1189.    insn = next_insn(p, BRW_OPCODE_IF);
  1190.  
  1191.    brw_set_dest(p, insn, brw_imm_w(0));
  1192.    if (p->compressed) {
  1193.       insn->header.execution_size = BRW_EXECUTE_16;
  1194.    } else {
  1195.       insn->header.execution_size = BRW_EXECUTE_8;
  1196.    }
  1197.    insn->bits1.branch_gen6.jump_count = 0;
  1198.    brw_set_src0(p, insn, src0);
  1199.    brw_set_src1(p, insn, src1);
  1200.  
  1201.    assert(insn->header.compression_control == BRW_COMPRESSION_NONE);
  1202.    assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
  1203.    insn->header.destreg__conditionalmod = conditional;
  1204.  
  1205.    if (!p->single_program_flow)
  1206.       insn->header.thread_control = BRW_THREAD_SWITCH;
  1207.  
  1208.    push_if_stack(p, insn);
  1209.    return insn;
  1210. }
  1211.  
  1212. /**
  1213.  * In single-program-flow (SPF) mode, convert IF and ELSE into ADDs.
  1214.  */
  1215. static void
  1216. convert_IF_ELSE_to_ADD(struct brw_compile *p,
  1217.                        struct brw_instruction *if_inst,
  1218.                        struct brw_instruction *else_inst)
  1219. {
  1220.    /* The next instruction (where the ENDIF would be, if it existed) */
  1221.    struct brw_instruction *next_inst = &p->store[p->nr_insn];
  1222.  
  1223.    assert(p->single_program_flow);
  1224.    assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
  1225.    assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
  1226.    assert(if_inst->header.execution_size == BRW_EXECUTE_1);
  1227.  
  1228.    /* Convert IF to an ADD instruction that moves the instruction pointer
  1229.     * to the first instruction of the ELSE block.  If there is no ELSE
  1230.     * block, point to where ENDIF would be.  Reverse the predicate.
  1231.     *
  1232.     * There's no need to execute an ENDIF since we don't need to do any
  1233.     * stack operations, and if we're currently executing, we just want to
  1234.     * continue normally.
  1235.     */
  1236.    if_inst->header.opcode = BRW_OPCODE_ADD;
  1237.    if_inst->header.predicate_inverse = 1;
  1238.  
  1239.    if (else_inst != NULL) {
  1240.       /* Convert ELSE to an ADD instruction that points where the ENDIF
  1241.        * would be.
  1242.        */
  1243.       else_inst->header.opcode = BRW_OPCODE_ADD;
  1244.  
  1245.       if_inst->bits3.ud = (else_inst - if_inst + 1) * 16;
  1246.       else_inst->bits3.ud = (next_inst - else_inst) * 16;
  1247.    } else {
  1248.       if_inst->bits3.ud = (next_inst - if_inst) * 16;
  1249.    }
  1250. }
  1251.  
  1252. /**
  1253.  * Patch IF and ELSE instructions with appropriate jump targets.
  1254.  */
  1255. static void
  1256. patch_IF_ELSE(struct brw_compile *p,
  1257.               struct brw_instruction *if_inst,
  1258.               struct brw_instruction *else_inst,
  1259.               struct brw_instruction *endif_inst)
  1260. {
  1261.    struct brw_context *brw = p->brw;
  1262.  
  1263.    /* We shouldn't be patching IF and ELSE instructions in single program flow
  1264.     * mode when gen < 6, because in single program flow mode on those
  1265.     * platforms, we convert flow control instructions to conditional ADDs that
  1266.     * operate on IP (see brw_ENDIF).
  1267.     *
  1268.     * However, on Gen6, writing to IP doesn't work in single program flow mode
  1269.     * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
  1270.     * not be updated by non-flow control instructions.").  And on later
  1271.     * platforms, there is no significant benefit to converting control flow
  1272.     * instructions to conditional ADDs.  So we do patch IF and ELSE
  1273.     * instructions in single program flow mode on those platforms.
  1274.     */
  1275.    if (brw->gen < 6)
  1276.       assert(!p->single_program_flow);
  1277.  
  1278.    assert(if_inst != NULL && if_inst->header.opcode == BRW_OPCODE_IF);
  1279.    assert(endif_inst != NULL);
  1280.    assert(else_inst == NULL || else_inst->header.opcode == BRW_OPCODE_ELSE);
  1281.  
  1282.    unsigned br = 1;
  1283.    /* Jump count is for 64bit data chunk each, so one 128bit instruction
  1284.     * requires 2 chunks.
  1285.     */
  1286.    if (brw->gen >= 5)
  1287.       br = 2;
  1288.  
  1289.    assert(endif_inst->header.opcode == BRW_OPCODE_ENDIF);
  1290.    endif_inst->header.execution_size = if_inst->header.execution_size;
  1291.  
  1292.    if (else_inst == NULL) {
  1293.       /* Patch IF -> ENDIF */
  1294.       if (brw->gen < 6) {
  1295.          /* Turn it into an IFF, which means no mask stack operations for
  1296.           * all-false and jumping past the ENDIF.
  1297.           */
  1298.          if_inst->header.opcode = BRW_OPCODE_IFF;
  1299.          if_inst->bits3.if_else.jump_count = br * (endif_inst - if_inst + 1);
  1300.          if_inst->bits3.if_else.pop_count = 0;
  1301.          if_inst->bits3.if_else.pad0 = 0;
  1302.       } else if (brw->gen == 6) {
  1303.          /* As of gen6, there is no IFF and IF must point to the ENDIF. */
  1304.          if_inst->bits1.branch_gen6.jump_count = br * (endif_inst - if_inst);
  1305.       } else {
  1306.          if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
  1307.          if_inst->bits3.break_cont.jip = br * (endif_inst - if_inst);
  1308.       }
  1309.    } else {
  1310.       else_inst->header.execution_size = if_inst->header.execution_size;
  1311.  
  1312.       /* Patch IF -> ELSE */
  1313.       if (brw->gen < 6) {
  1314.          if_inst->bits3.if_else.jump_count = br * (else_inst - if_inst);
  1315.          if_inst->bits3.if_else.pop_count = 0;
  1316.          if_inst->bits3.if_else.pad0 = 0;
  1317.       } else if (brw->gen == 6) {
  1318.          if_inst->bits1.branch_gen6.jump_count = br * (else_inst - if_inst + 1);
  1319.       }
  1320.  
  1321.       /* Patch ELSE -> ENDIF */
  1322.       if (brw->gen < 6) {
  1323.          /* BRW_OPCODE_ELSE pre-gen6 should point just past the
  1324.           * matching ENDIF.
  1325.           */
  1326.          else_inst->bits3.if_else.jump_count = br*(endif_inst - else_inst + 1);
  1327.          else_inst->bits3.if_else.pop_count = 1;
  1328.          else_inst->bits3.if_else.pad0 = 0;
  1329.       } else if (brw->gen == 6) {
  1330.          /* BRW_OPCODE_ELSE on gen6 should point to the matching ENDIF. */
  1331.          else_inst->bits1.branch_gen6.jump_count = br*(endif_inst - else_inst);
  1332.       } else {
  1333.          /* The IF instruction's JIP should point just past the ELSE */
  1334.          if_inst->bits3.break_cont.jip = br * (else_inst - if_inst + 1);
  1335.          /* The IF instruction's UIP and ELSE's JIP should point to ENDIF */
  1336.          if_inst->bits3.break_cont.uip = br * (endif_inst - if_inst);
  1337.          else_inst->bits3.break_cont.jip = br * (endif_inst - else_inst);
  1338.       }
  1339.    }
  1340. }
  1341.  
  1342. void
  1343. brw_ELSE(struct brw_compile *p)
  1344. {
  1345.    struct brw_context *brw = p->brw;
  1346.    struct brw_instruction *insn;
  1347.  
  1348.    insn = next_insn(p, BRW_OPCODE_ELSE);
  1349.  
  1350.    if (brw->gen < 6) {
  1351.       brw_set_dest(p, insn, brw_ip_reg());
  1352.       brw_set_src0(p, insn, brw_ip_reg());
  1353.       brw_set_src1(p, insn, brw_imm_d(0x0));
  1354.    } else if (brw->gen == 6) {
  1355.       brw_set_dest(p, insn, brw_imm_w(0));
  1356.       insn->bits1.branch_gen6.jump_count = 0;
  1357.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1358.       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1359.    } else {
  1360.       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1361.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1362.       brw_set_src1(p, insn, brw_imm_ud(0));
  1363.       insn->bits3.break_cont.jip = 0;
  1364.       insn->bits3.break_cont.uip = 0;
  1365.    }
  1366.  
  1367.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1368.    insn->header.mask_control = BRW_MASK_ENABLE;
  1369.    if (!p->single_program_flow)
  1370.       insn->header.thread_control = BRW_THREAD_SWITCH;
  1371.  
  1372.    push_if_stack(p, insn);
  1373. }
  1374.  
  1375. void
  1376. brw_ENDIF(struct brw_compile *p)
  1377. {
  1378.    struct brw_context *brw = p->brw;
  1379.    struct brw_instruction *insn = NULL;
  1380.    struct brw_instruction *else_inst = NULL;
  1381.    struct brw_instruction *if_inst = NULL;
  1382.    struct brw_instruction *tmp;
  1383.    bool emit_endif = true;
  1384.  
  1385.    /* In single program flow mode, we can express IF and ELSE instructions
  1386.     * equivalently as ADD instructions that operate on IP.  On platforms prior
  1387.     * to Gen6, flow control instructions cause an implied thread switch, so
  1388.     * this is a significant savings.
  1389.     *
  1390.     * However, on Gen6, writing to IP doesn't work in single program flow mode
  1391.     * (see the SandyBridge PRM, Volume 4 part 2, p79: "When SPF is ON, IP may
  1392.     * not be updated by non-flow control instructions.").  And on later
  1393.     * platforms, there is no significant benefit to converting control flow
  1394.     * instructions to conditional ADDs.  So we only do this trick on Gen4 and
  1395.     * Gen5.
  1396.     */
  1397.    if (brw->gen < 6 && p->single_program_flow)
  1398.       emit_endif = false;
  1399.  
  1400.    /*
  1401.     * A single next_insn() may change the base adress of instruction store
  1402.     * memory(p->store), so call it first before referencing the instruction
  1403.     * store pointer from an index
  1404.     */
  1405.    if (emit_endif)
  1406.       insn = next_insn(p, BRW_OPCODE_ENDIF);
  1407.  
  1408.    /* Pop the IF and (optional) ELSE instructions from the stack */
  1409.    p->if_depth_in_loop[p->loop_stack_depth]--;
  1410.    tmp = pop_if_stack(p);
  1411.    if (tmp->header.opcode == BRW_OPCODE_ELSE) {
  1412.       else_inst = tmp;
  1413.       tmp = pop_if_stack(p);
  1414.    }
  1415.    if_inst = tmp;
  1416.  
  1417.    if (!emit_endif) {
  1418.       /* ENDIF is useless; don't bother emitting it. */
  1419.       convert_IF_ELSE_to_ADD(p, if_inst, else_inst);
  1420.       return;
  1421.    }
  1422.  
  1423.    if (brw->gen < 6) {
  1424.       brw_set_dest(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
  1425.       brw_set_src0(p, insn, retype(brw_vec4_grf(0,0), BRW_REGISTER_TYPE_UD));
  1426.       brw_set_src1(p, insn, brw_imm_d(0x0));
  1427.    } else if (brw->gen == 6) {
  1428.       brw_set_dest(p, insn, brw_imm_w(0));
  1429.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1430.       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1431.    } else {
  1432.       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1433.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1434.       brw_set_src1(p, insn, brw_imm_ud(0));
  1435.    }
  1436.  
  1437.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1438.    insn->header.mask_control = BRW_MASK_ENABLE;
  1439.    insn->header.thread_control = BRW_THREAD_SWITCH;
  1440.  
  1441.    /* Also pop item off the stack in the endif instruction: */
  1442.    if (brw->gen < 6) {
  1443.       insn->bits3.if_else.jump_count = 0;
  1444.       insn->bits3.if_else.pop_count = 1;
  1445.       insn->bits3.if_else.pad0 = 0;
  1446.    } else if (brw->gen == 6) {
  1447.       insn->bits1.branch_gen6.jump_count = 2;
  1448.    } else {
  1449.       insn->bits3.break_cont.jip = 2;
  1450.    }
  1451.    patch_IF_ELSE(p, if_inst, else_inst, insn);
  1452. }
  1453.  
  1454. struct brw_instruction *brw_BREAK(struct brw_compile *p)
  1455. {
  1456.    struct brw_context *brw = p->brw;
  1457.    struct brw_instruction *insn;
  1458.  
  1459.    insn = next_insn(p, BRW_OPCODE_BREAK);
  1460.    if (brw->gen >= 6) {
  1461.       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1462.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1463.       brw_set_src1(p, insn, brw_imm_d(0x0));
  1464.    } else {
  1465.       brw_set_dest(p, insn, brw_ip_reg());
  1466.       brw_set_src0(p, insn, brw_ip_reg());
  1467.       brw_set_src1(p, insn, brw_imm_d(0x0));
  1468.       insn->bits3.if_else.pad0 = 0;
  1469.       insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
  1470.    }
  1471.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1472.    insn->header.execution_size = BRW_EXECUTE_8;
  1473.  
  1474.    return insn;
  1475. }
  1476.  
  1477. struct brw_instruction *gen6_CONT(struct brw_compile *p)
  1478. {
  1479.    struct brw_instruction *insn;
  1480.  
  1481.    insn = next_insn(p, BRW_OPCODE_CONTINUE);
  1482.    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1483.    brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1484.    brw_set_dest(p, insn, brw_ip_reg());
  1485.    brw_set_src0(p, insn, brw_ip_reg());
  1486.    brw_set_src1(p, insn, brw_imm_d(0x0));
  1487.  
  1488.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1489.    insn->header.execution_size = BRW_EXECUTE_8;
  1490.    return insn;
  1491. }
  1492.  
  1493. struct brw_instruction *brw_CONT(struct brw_compile *p)
  1494. {
  1495.    struct brw_instruction *insn;
  1496.    insn = next_insn(p, BRW_OPCODE_CONTINUE);
  1497.    brw_set_dest(p, insn, brw_ip_reg());
  1498.    brw_set_src0(p, insn, brw_ip_reg());
  1499.    brw_set_src1(p, insn, brw_imm_d(0x0));
  1500.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1501.    insn->header.execution_size = BRW_EXECUTE_8;
  1502.    /* insn->header.mask_control = BRW_MASK_DISABLE; */
  1503.    insn->bits3.if_else.pad0 = 0;
  1504.    insn->bits3.if_else.pop_count = p->if_depth_in_loop[p->loop_stack_depth];
  1505.    return insn;
  1506. }
  1507.  
  1508. struct brw_instruction *gen6_HALT(struct brw_compile *p)
  1509. {
  1510.    struct brw_instruction *insn;
  1511.  
  1512.    insn = next_insn(p, BRW_OPCODE_HALT);
  1513.    brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1514.    brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1515.    brw_set_src1(p, insn, brw_imm_d(0x0)); /* UIP and JIP, updated later. */
  1516.  
  1517.    if (p->compressed) {
  1518.       insn->header.execution_size = BRW_EXECUTE_16;
  1519.    } else {
  1520.       insn->header.compression_control = BRW_COMPRESSION_NONE;
  1521.       insn->header.execution_size = BRW_EXECUTE_8;
  1522.    }
  1523.    return insn;
  1524. }
  1525.  
  1526. /* DO/WHILE loop:
  1527.  *
  1528.  * The DO/WHILE is just an unterminated loop -- break or continue are
  1529.  * used for control within the loop.  We have a few ways they can be
  1530.  * done.
  1531.  *
  1532.  * For uniform control flow, the WHILE is just a jump, so ADD ip, ip,
  1533.  * jip and no DO instruction.
  1534.  *
  1535.  * For non-uniform control flow pre-gen6, there's a DO instruction to
  1536.  * push the mask, and a WHILE to jump back, and BREAK to get out and
  1537.  * pop the mask.
  1538.  *
  1539.  * For gen6, there's no more mask stack, so no need for DO.  WHILE
  1540.  * just points back to the first instruction of the loop.
  1541.  */
  1542. struct brw_instruction *brw_DO(struct brw_compile *p, GLuint execute_size)
  1543. {
  1544.    struct brw_context *brw = p->brw;
  1545.  
  1546.    if (brw->gen >= 6 || p->single_program_flow) {
  1547.       push_loop_stack(p, &p->store[p->nr_insn]);
  1548.       return &p->store[p->nr_insn];
  1549.    } else {
  1550.       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_DO);
  1551.  
  1552.       push_loop_stack(p, insn);
  1553.  
  1554.       /* Override the defaults for this instruction:
  1555.        */
  1556.       brw_set_dest(p, insn, brw_null_reg());
  1557.       brw_set_src0(p, insn, brw_null_reg());
  1558.       brw_set_src1(p, insn, brw_null_reg());
  1559.  
  1560.       insn->header.compression_control = BRW_COMPRESSION_NONE;
  1561.       insn->header.execution_size = execute_size;
  1562.       insn->header.predicate_control = BRW_PREDICATE_NONE;
  1563.       /* insn->header.mask_control = BRW_MASK_ENABLE; */
  1564.       /* insn->header.mask_control = BRW_MASK_DISABLE; */
  1565.  
  1566.       return insn;
  1567.    }
  1568. }
  1569.  
  1570. /**
  1571.  * For pre-gen6, we patch BREAK/CONT instructions to point at the WHILE
  1572.  * instruction here.
  1573.  *
  1574.  * For gen6+, see brw_set_uip_jip(), which doesn't care so much about the loop
  1575.  * nesting, since it can always just point to the end of the block/current loop.
  1576.  */
  1577. static void
  1578. brw_patch_break_cont(struct brw_compile *p, struct brw_instruction *while_inst)
  1579. {
  1580.    struct brw_context *brw = p->brw;
  1581.    struct brw_instruction *do_inst = get_inner_do_insn(p);
  1582.    struct brw_instruction *inst;
  1583.    int br = (brw->gen == 5) ? 2 : 1;
  1584.  
  1585.    for (inst = while_inst - 1; inst != do_inst; inst--) {
  1586.       /* If the jump count is != 0, that means that this instruction has already
  1587.        * been patched because it's part of a loop inside of the one we're
  1588.        * patching.
  1589.        */
  1590.       if (inst->header.opcode == BRW_OPCODE_BREAK &&
  1591.           inst->bits3.if_else.jump_count == 0) {
  1592.          inst->bits3.if_else.jump_count = br * ((while_inst - inst) + 1);
  1593.       } else if (inst->header.opcode == BRW_OPCODE_CONTINUE &&
  1594.                  inst->bits3.if_else.jump_count == 0) {
  1595.          inst->bits3.if_else.jump_count = br * (while_inst - inst);
  1596.       }
  1597.    }
  1598. }
  1599.  
  1600. struct brw_instruction *brw_WHILE(struct brw_compile *p)
  1601. {
  1602.    struct brw_context *brw = p->brw;
  1603.    struct brw_instruction *insn, *do_insn;
  1604.    GLuint br = 1;
  1605.  
  1606.    if (brw->gen >= 5)
  1607.       br = 2;
  1608.  
  1609.    if (brw->gen >= 7) {
  1610.       insn = next_insn(p, BRW_OPCODE_WHILE);
  1611.       do_insn = get_inner_do_insn(p);
  1612.  
  1613.       brw_set_dest(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1614.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1615.       brw_set_src1(p, insn, brw_imm_ud(0));
  1616.       insn->bits3.break_cont.jip = br * (do_insn - insn);
  1617.  
  1618.       insn->header.execution_size = BRW_EXECUTE_8;
  1619.    } else if (brw->gen == 6) {
  1620.       insn = next_insn(p, BRW_OPCODE_WHILE);
  1621.       do_insn = get_inner_do_insn(p);
  1622.  
  1623.       brw_set_dest(p, insn, brw_imm_w(0));
  1624.       insn->bits1.branch_gen6.jump_count = br * (do_insn - insn);
  1625.       brw_set_src0(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1626.       brw_set_src1(p, insn, retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
  1627.  
  1628.       insn->header.execution_size = BRW_EXECUTE_8;
  1629.    } else {
  1630.       if (p->single_program_flow) {
  1631.          insn = next_insn(p, BRW_OPCODE_ADD);
  1632.          do_insn = get_inner_do_insn(p);
  1633.  
  1634.          brw_set_dest(p, insn, brw_ip_reg());
  1635.          brw_set_src0(p, insn, brw_ip_reg());
  1636.          brw_set_src1(p, insn, brw_imm_d((do_insn - insn) * 16));
  1637.          insn->header.execution_size = BRW_EXECUTE_1;
  1638.       } else {
  1639.          insn = next_insn(p, BRW_OPCODE_WHILE);
  1640.          do_insn = get_inner_do_insn(p);
  1641.  
  1642.          assert(do_insn->header.opcode == BRW_OPCODE_DO);
  1643.  
  1644.          brw_set_dest(p, insn, brw_ip_reg());
  1645.          brw_set_src0(p, insn, brw_ip_reg());
  1646.          brw_set_src1(p, insn, brw_imm_d(0));
  1647.  
  1648.          insn->header.execution_size = do_insn->header.execution_size;
  1649.          insn->bits3.if_else.jump_count = br * (do_insn - insn + 1);
  1650.          insn->bits3.if_else.pop_count = 0;
  1651.          insn->bits3.if_else.pad0 = 0;
  1652.  
  1653.          brw_patch_break_cont(p, insn);
  1654.       }
  1655.    }
  1656.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  1657.    p->current->header.predicate_control = BRW_PREDICATE_NONE;
  1658.  
  1659.    p->loop_stack_depth--;
  1660.  
  1661.    return insn;
  1662. }
  1663.  
  1664.  
  1665. /* FORWARD JUMPS:
  1666.  */
  1667. void brw_land_fwd_jump(struct brw_compile *p, int jmp_insn_idx)
  1668. {
  1669.    struct brw_context *brw = p->brw;
  1670.    struct brw_instruction *jmp_insn = &p->store[jmp_insn_idx];
  1671.    GLuint jmpi = 1;
  1672.  
  1673.    if (brw->gen >= 5)
  1674.       jmpi = 2;
  1675.  
  1676.    assert(jmp_insn->header.opcode == BRW_OPCODE_JMPI);
  1677.    assert(jmp_insn->bits1.da1.src1_reg_file == BRW_IMMEDIATE_VALUE);
  1678.  
  1679.    jmp_insn->bits3.ud = jmpi * (p->nr_insn - jmp_insn_idx - 1);
  1680. }
  1681.  
  1682.  
  1683.  
  1684. /* To integrate with the above, it makes sense that the comparison
  1685.  * instruction should populate the flag register.  It might be simpler
  1686.  * just to use the flag reg for most WM tasks?
  1687.  */
  1688. void brw_CMP(struct brw_compile *p,
  1689.              struct brw_reg dest,
  1690.              GLuint conditional,
  1691.              struct brw_reg src0,
  1692.              struct brw_reg src1)
  1693. {
  1694.    struct brw_context *brw = p->brw;
  1695.    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_CMP);
  1696.  
  1697.    insn->header.destreg__conditionalmod = conditional;
  1698.    brw_set_dest(p, insn, dest);
  1699.    brw_set_src0(p, insn, src0);
  1700.    brw_set_src1(p, insn, src1);
  1701.  
  1702. /*    guess_execution_size(insn, src0); */
  1703.  
  1704.  
  1705.    /* Make it so that future instructions will use the computed flag
  1706.     * value until brw_set_predicate_control_flag_value() is called
  1707.     * again.  
  1708.     */
  1709.    if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
  1710.        dest.nr == 0) {
  1711.       p->current->header.predicate_control = BRW_PREDICATE_NORMAL;
  1712.       p->flag_value = 0xff;
  1713.    }
  1714.  
  1715.    /* Item WaCMPInstNullDstForcesThreadSwitch in the Haswell Bspec workarounds
  1716.     * page says:
  1717.     *    "Any CMP instruction with a null destination must use a {switch}."
  1718.     *
  1719.     * It also applies to other Gen7 platforms (IVB, BYT) even though it isn't
  1720.     * mentioned on their work-arounds pages.
  1721.     */
  1722.    if (brw->gen == 7) {
  1723.       if (dest.file == BRW_ARCHITECTURE_REGISTER_FILE &&
  1724.           dest.nr == BRW_ARF_NULL) {
  1725.          insn->header.thread_control = BRW_THREAD_SWITCH;
  1726.       }
  1727.    }
  1728. }
  1729.  
  1730. /* Issue 'wait' instruction for n1, host could program MMIO
  1731.    to wake up thread. */
  1732. void brw_WAIT (struct brw_compile *p)
  1733. {
  1734.    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_WAIT);
  1735.    struct brw_reg src = brw_notification_1_reg();
  1736.  
  1737.    brw_set_dest(p, insn, src);
  1738.    brw_set_src0(p, insn, src);
  1739.    brw_set_src1(p, insn, brw_null_reg());
  1740.    insn->header.execution_size = 0; /* must */
  1741.    insn->header.predicate_control = 0;
  1742.    insn->header.compression_control = 0;
  1743. }
  1744.  
  1745.  
  1746. /***********************************************************************
  1747.  * Helpers for the various SEND message types:
  1748.  */
  1749.  
  1750. /** Extended math function, float[8].
  1751.  */
  1752. void brw_math( struct brw_compile *p,
  1753.                struct brw_reg dest,
  1754.                GLuint function,
  1755.                GLuint msg_reg_nr,
  1756.                struct brw_reg src,
  1757.                GLuint data_type,
  1758.                GLuint precision )
  1759. {
  1760.    struct brw_context *brw = p->brw;
  1761.  
  1762.    if (brw->gen >= 6) {
  1763.       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
  1764.  
  1765.       assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
  1766.              (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
  1767.       assert(src.file == BRW_GENERAL_REGISTER_FILE);
  1768.  
  1769.       assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
  1770.       if (brw->gen == 6)
  1771.          assert(src.hstride == BRW_HORIZONTAL_STRIDE_1);
  1772.  
  1773.       /* Source modifiers are ignored for extended math instructions on Gen6. */
  1774.       if (brw->gen == 6) {
  1775.          assert(!src.negate);
  1776.          assert(!src.abs);
  1777.       }
  1778.  
  1779.       if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
  1780.           function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
  1781.           function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
  1782.          assert(src.type != BRW_REGISTER_TYPE_F);
  1783.       } else {
  1784.          assert(src.type == BRW_REGISTER_TYPE_F);
  1785.       }
  1786.  
  1787.       /* Math is the same ISA format as other opcodes, except that CondModifier
  1788.        * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
  1789.        */
  1790.       insn->header.destreg__conditionalmod = function;
  1791.  
  1792.       brw_set_dest(p, insn, dest);
  1793.       brw_set_src0(p, insn, src);
  1794.       brw_set_src1(p, insn, brw_null_reg());
  1795.    } else {
  1796.       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
  1797.  
  1798.       /* Example code doesn't set predicate_control for send
  1799.        * instructions.
  1800.        */
  1801.       insn->header.predicate_control = 0;
  1802.       insn->header.destreg__conditionalmod = msg_reg_nr;
  1803.  
  1804.       brw_set_dest(p, insn, dest);
  1805.       brw_set_src0(p, insn, src);
  1806.       brw_set_math_message(p,
  1807.                            insn,
  1808.                            function,
  1809.                            src.type == BRW_REGISTER_TYPE_D,
  1810.                            precision,
  1811.                            data_type);
  1812.    }
  1813. }
  1814.  
  1815. /** Extended math function, float[8].
  1816.  */
  1817. void brw_math2(struct brw_compile *p,
  1818.                struct brw_reg dest,
  1819.                GLuint function,
  1820.                struct brw_reg src0,
  1821.                struct brw_reg src1)
  1822. {
  1823.    struct brw_context *brw = p->brw;
  1824.    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_MATH);
  1825.  
  1826.    assert(dest.file == BRW_GENERAL_REGISTER_FILE ||
  1827.           (brw->gen >= 7 && dest.file == BRW_MESSAGE_REGISTER_FILE));
  1828.    assert(src0.file == BRW_GENERAL_REGISTER_FILE);
  1829.    assert(src1.file == BRW_GENERAL_REGISTER_FILE);
  1830.  
  1831.    assert(dest.hstride == BRW_HORIZONTAL_STRIDE_1);
  1832.    if (brw->gen == 6) {
  1833.       assert(src0.hstride == BRW_HORIZONTAL_STRIDE_1);
  1834.       assert(src1.hstride == BRW_HORIZONTAL_STRIDE_1);
  1835.    }
  1836.  
  1837.    if (function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT ||
  1838.        function == BRW_MATH_FUNCTION_INT_DIV_REMAINDER ||
  1839.        function == BRW_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER) {
  1840.       assert(src0.type != BRW_REGISTER_TYPE_F);
  1841.       assert(src1.type != BRW_REGISTER_TYPE_F);
  1842.    } else {
  1843.       assert(src0.type == BRW_REGISTER_TYPE_F);
  1844.       assert(src1.type == BRW_REGISTER_TYPE_F);
  1845.    }
  1846.  
  1847.    /* Source modifiers are ignored for extended math instructions on Gen6. */
  1848.    if (brw->gen == 6) {
  1849.       assert(!src0.negate);
  1850.       assert(!src0.abs);
  1851.       assert(!src1.negate);
  1852.       assert(!src1.abs);
  1853.    }
  1854.  
  1855.    /* Math is the same ISA format as other opcodes, except that CondModifier
  1856.     * becomes FC[3:0] and ThreadCtrl becomes FC[5:4].
  1857.     */
  1858.    insn->header.destreg__conditionalmod = function;
  1859.  
  1860.    brw_set_dest(p, insn, dest);
  1861.    brw_set_src0(p, insn, src0);
  1862.    brw_set_src1(p, insn, src1);
  1863. }
  1864.  
  1865.  
  1866. /**
  1867.  * Write a block of OWORDs (half a GRF each) from the scratch buffer,
  1868.  * using a constant offset per channel.
  1869.  *
  1870.  * The offset must be aligned to oword size (16 bytes).  Used for
  1871.  * register spilling.
  1872.  */
  1873. void brw_oword_block_write_scratch(struct brw_compile *p,
  1874.                                    struct brw_reg mrf,
  1875.                                    int num_regs,
  1876.                                    GLuint offset)
  1877. {
  1878.    struct brw_context *brw = p->brw;
  1879.    uint32_t msg_control, msg_type;
  1880.    int mlen;
  1881.  
  1882.    if (brw->gen >= 6)
  1883.       offset /= 16;
  1884.  
  1885.    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
  1886.  
  1887.    if (num_regs == 1) {
  1888.       msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
  1889.       mlen = 2;
  1890.    } else {
  1891.       msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
  1892.       mlen = 3;
  1893.    }
  1894.  
  1895.    /* Set up the message header.  This is g0, with g0.2 filled with
  1896.     * the offset.  We don't want to leave our offset around in g0 or
  1897.     * it'll screw up texture samples, so set it up inside the message
  1898.     * reg.
  1899.     */
  1900.    {
  1901.       brw_push_insn_state(p);
  1902.       brw_set_mask_control(p, BRW_MASK_DISABLE);
  1903.       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  1904.  
  1905.       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
  1906.  
  1907.       /* set message header global offset field (reg 0, element 2) */
  1908.       brw_MOV(p,
  1909.               retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
  1910.                                   mrf.nr,
  1911.                                   2), BRW_REGISTER_TYPE_UD),
  1912.               brw_imm_ud(offset));
  1913.  
  1914.       brw_pop_insn_state(p);
  1915.    }
  1916.  
  1917.    {
  1918.       struct brw_reg dest;
  1919.       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
  1920.       int send_commit_msg;
  1921.       struct brw_reg src_header = retype(brw_vec8_grf(0, 0),
  1922.                                          BRW_REGISTER_TYPE_UW);
  1923.  
  1924.       if (insn->header.compression_control != BRW_COMPRESSION_NONE) {
  1925.          insn->header.compression_control = BRW_COMPRESSION_NONE;
  1926.          src_header = vec16(src_header);
  1927.       }
  1928.       assert(insn->header.predicate_control == BRW_PREDICATE_NONE);
  1929.       insn->header.destreg__conditionalmod = mrf.nr;
  1930.  
  1931.       /* Until gen6, writes followed by reads from the same location
  1932.        * are not guaranteed to be ordered unless write_commit is set.
  1933.        * If set, then a no-op write is issued to the destination
  1934.        * register to set a dependency, and a read from the destination
  1935.        * can be used to ensure the ordering.
  1936.        *
  1937.        * For gen6, only writes between different threads need ordering
  1938.        * protection.  Our use of DP writes is all about register
  1939.        * spilling within a thread.
  1940.        */
  1941.       if (brw->gen >= 6) {
  1942.          dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
  1943.          send_commit_msg = 0;
  1944.       } else {
  1945.          dest = src_header;
  1946.          send_commit_msg = 1;
  1947.       }
  1948.  
  1949.       brw_set_dest(p, insn, dest);
  1950.       if (brw->gen >= 6) {
  1951.          brw_set_src0(p, insn, mrf);
  1952.       } else {
  1953.          brw_set_src0(p, insn, brw_null_reg());
  1954.       }
  1955.  
  1956.       if (brw->gen >= 6)
  1957.          msg_type = GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
  1958.       else
  1959.          msg_type = BRW_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE;
  1960.  
  1961.       brw_set_dp_write_message(p,
  1962.                                insn,
  1963.                                255, /* binding table index (255=stateless) */
  1964.                                msg_control,
  1965.                                msg_type,
  1966.                                mlen,
  1967.                                true, /* header_present */
  1968.                                0, /* not a render target */
  1969.                                send_commit_msg, /* response_length */
  1970.                                0, /* eot */
  1971.                                send_commit_msg);
  1972.    }
  1973. }
  1974.  
  1975.  
  1976. /**
  1977.  * Read a block of owords (half a GRF each) from the scratch buffer
  1978.  * using a constant index per channel.
  1979.  *
  1980.  * Offset must be aligned to oword size (16 bytes).  Used for register
  1981.  * spilling.
  1982.  */
  1983. void
  1984. brw_oword_block_read_scratch(struct brw_compile *p,
  1985.                              struct brw_reg dest,
  1986.                              struct brw_reg mrf,
  1987.                              int num_regs,
  1988.                              GLuint offset)
  1989. {
  1990.    struct brw_context *brw = p->brw;
  1991.    uint32_t msg_control;
  1992.    int rlen;
  1993.  
  1994.    if (brw->gen >= 6)
  1995.       offset /= 16;
  1996.  
  1997.    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
  1998.    dest = retype(dest, BRW_REGISTER_TYPE_UW);
  1999.  
  2000.    if (num_regs == 1) {
  2001.       msg_control = BRW_DATAPORT_OWORD_BLOCK_2_OWORDS;
  2002.       rlen = 1;
  2003.    } else {
  2004.       msg_control = BRW_DATAPORT_OWORD_BLOCK_4_OWORDS;
  2005.       rlen = 2;
  2006.    }
  2007.  
  2008.    {
  2009.       brw_push_insn_state(p);
  2010.       brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  2011.       brw_set_mask_control(p, BRW_MASK_DISABLE);
  2012.  
  2013.       brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
  2014.  
  2015.       /* set message header global offset field (reg 0, element 2) */
  2016.       brw_MOV(p,
  2017.               retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
  2018.                                   mrf.nr,
  2019.                                   2), BRW_REGISTER_TYPE_UD),
  2020.               brw_imm_ud(offset));
  2021.  
  2022.       brw_pop_insn_state(p);
  2023.    }
  2024.  
  2025.    {
  2026.       struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
  2027.  
  2028.       assert(insn->header.predicate_control == 0);
  2029.       insn->header.compression_control = BRW_COMPRESSION_NONE;
  2030.       insn->header.destreg__conditionalmod = mrf.nr;
  2031.  
  2032.       brw_set_dest(p, insn, dest);      /* UW? */
  2033.       if (brw->gen >= 6) {
  2034.          brw_set_src0(p, insn, mrf);
  2035.       } else {
  2036.          brw_set_src0(p, insn, brw_null_reg());
  2037.       }
  2038.  
  2039.       brw_set_dp_read_message(p,
  2040.                               insn,
  2041.                               255, /* binding table index (255=stateless) */
  2042.                               msg_control,
  2043.                               BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ, /* msg_type */
  2044.                               BRW_DATAPORT_READ_TARGET_RENDER_CACHE,
  2045.                               1, /* msg_length */
  2046.                               true, /* header_present */
  2047.                               rlen);
  2048.    }
  2049. }
  2050.  
  2051. /**
  2052.  * Read a float[4] vector from the data port Data Cache (const buffer).
  2053.  * Location (in buffer) should be a multiple of 16.
  2054.  * Used for fetching shader constants.
  2055.  */
  2056. void brw_oword_block_read(struct brw_compile *p,
  2057.                           struct brw_reg dest,
  2058.                           struct brw_reg mrf,
  2059.                           uint32_t offset,
  2060.                           uint32_t bind_table_index)
  2061. {
  2062.    struct brw_context *brw = p->brw;
  2063.  
  2064.    /* On newer hardware, offset is in units of owords. */
  2065.    if (brw->gen >= 6)
  2066.       offset /= 16;
  2067.  
  2068.    mrf = retype(mrf, BRW_REGISTER_TYPE_UD);
  2069.  
  2070.    brw_push_insn_state(p);
  2071.    brw_set_predicate_control(p, BRW_PREDICATE_NONE);
  2072.    brw_set_compression_control(p, BRW_COMPRESSION_NONE);
  2073.    brw_set_mask_control(p, BRW_MASK_DISABLE);
  2074.  
  2075.    brw_MOV(p, mrf, retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD));
  2076.  
  2077.    /* set message header global offset field (reg 0, element 2) */
  2078.    brw_MOV(p,
  2079.            retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE,
  2080.                                mrf.nr,
  2081.                                2), BRW_REGISTER_TYPE_UD),
  2082.            brw_imm_ud(offset));
  2083.  
  2084.    struct brw_instruction *insn = next_insn(p, BRW_OPCODE_SEND);
  2085.    insn->header.destreg__conditionalmod = mrf.nr;
  2086.  
  2087.    /* cast dest to a uword[8] vector */
  2088.    dest = retype(vec8(dest), BRW_REGISTER_TYPE_UW);
  2089.  
  2090.    brw_set_dest(p, insn, dest);
  2091.    if (brw->gen >= 6) {
  2092.       brw_set_src0(p, insn, mrf);
  2093.    } else {
  2094.       brw_set_src0(p, insn, brw_null_reg());
  2095.    }
  2096.  
  2097.    brw_set_dp_read_message(p,
  2098.                            insn,
  2099.                            bind_table_index,
  2100.                            BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW,
  2101.                            BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ,
  2102.                            BRW_DATAPORT_READ_TARGET_DATA_CACHE,
  2103.                            1, /* msg_length */
  2104.                            true, /* header_present */
  2105.                            1); /* response_length (1 reg, 2 owords!) */
  2106.  
  2107.    brw_pop_insn_state(p);
  2108. }
  2109.  
  2110.  
  2111. void brw_fb_WRITE(struct brw_compile *p,
  2112.                   int dispatch_width,
  2113.                   GLuint msg_reg_nr,
  2114.                   struct brw_reg src0,
  2115.                   GLuint msg_control,
  2116.                   GLuint binding_table_index,
  2117.                   GLuint msg_length,
  2118.                   GLuint response_length,
  2119.                   bool eot,
  2120.                   bool header_present)
  2121. {
  2122.    struct brw_context *brw = p->brw;
  2123.    struct brw_instruction *insn;
  2124.    GLuint msg_type;
  2125.    struct brw_reg dest;
  2126.  
  2127.    if (dispatch_width == 16)
  2128.       dest = retype(vec16(brw_null_reg()), BRW_REGISTER_TYPE_UW);
  2129.    else
  2130.       dest = retype(vec8(brw_null_reg()), BRW_REGISTER_TYPE_UW);
  2131.  
  2132.    if (brw->gen >= 6) {
  2133.       insn = next_insn(p, BRW_OPCODE_SENDC);
  2134.    } else {
  2135.       insn = next_insn(p, BRW_OPCODE_SEND);
  2136.    }
  2137.    /* The execution mask is ignored for render target writes. */
  2138.    insn->header.predicate_control = 0;
  2139.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  2140.  
  2141.    if (brw->gen >= 6) {
  2142.       /* headerless version, just submit color payload */
  2143.       src0 = brw_message_reg(msg_reg_nr);
  2144.  
  2145.       msg_type = GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
  2146.    } else {
  2147.       insn->header.destreg__conditionalmod = msg_reg_nr;
  2148.  
  2149.       msg_type = BRW_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE;
  2150.    }
  2151.  
  2152.    brw_set_dest(p, insn, dest);
  2153.    brw_set_src0(p, insn, src0);
  2154.    brw_set_dp_write_message(p,
  2155.                             insn,
  2156.                             binding_table_index,
  2157.                             msg_control,
  2158.                             msg_type,
  2159.                             msg_length,
  2160.                             header_present,
  2161.                             eot, /* last render target write */
  2162.                             response_length,
  2163.                             eot,
  2164.                             0 /* send_commit_msg */);
  2165. }
  2166.  
  2167.  
  2168. /**
  2169.  * Texture sample instruction.
  2170.  * Note: the msg_type plus msg_length values determine exactly what kind
  2171.  * of sampling operation is performed.  See volume 4, page 161 of docs.
  2172.  */
  2173. void brw_SAMPLE(struct brw_compile *p,
  2174.                 struct brw_reg dest,
  2175.                 GLuint msg_reg_nr,
  2176.                 struct brw_reg src0,
  2177.                 GLuint binding_table_index,
  2178.                 GLuint sampler,
  2179.                 GLuint msg_type,
  2180.                 GLuint response_length,
  2181.                 GLuint msg_length,
  2182.                 GLuint header_present,
  2183.                 GLuint simd_mode,
  2184.                 GLuint return_format)
  2185. {
  2186.    struct brw_context *brw = p->brw;
  2187.    struct brw_instruction *insn;
  2188.  
  2189.    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
  2190.  
  2191.    insn = next_insn(p, BRW_OPCODE_SEND);
  2192.    insn->header.predicate_control = 0; /* XXX */
  2193.    insn->header.compression_control = BRW_COMPRESSION_NONE;
  2194.    if (brw->gen < 6)
  2195.       insn->header.destreg__conditionalmod = msg_reg_nr;
  2196.  
  2197.    brw_set_dest(p, insn, dest);
  2198.    brw_set_src0(p, insn, src0);
  2199.    brw_set_sampler_message(p, insn,
  2200.                            binding_table_index,
  2201.                            sampler,
  2202.                            msg_type,
  2203.                            response_length,
  2204.                            msg_length,
  2205.                            header_present,
  2206.                            simd_mode,
  2207.                            return_format);
  2208. }
  2209.  
  2210. /* All these variables are pretty confusing - we might be better off
  2211.  * using bitmasks and macros for this, in the old style.  Or perhaps
  2212.  * just having the caller instantiate the fields in dword3 itself.
  2213.  */
  2214. void brw_urb_WRITE(struct brw_compile *p,
  2215.                    struct brw_reg dest,
  2216.                    GLuint msg_reg_nr,
  2217.                    struct brw_reg src0,
  2218.                    bool allocate,
  2219.                    bool used,
  2220.                    GLuint msg_length,
  2221.                    GLuint response_length,
  2222.                    bool eot,
  2223.                    bool writes_complete,
  2224.                    GLuint offset,
  2225.                    GLuint swizzle)
  2226. {
  2227.    struct brw_context *brw = p->brw;
  2228.    struct brw_instruction *insn;
  2229.  
  2230.    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
  2231.  
  2232.    if (brw->gen == 7) {
  2233.       /* Enable Channel Masks in the URB_WRITE_HWORD message header */
  2234.       brw_push_insn_state(p);
  2235.       brw_set_access_mode(p, BRW_ALIGN_1);
  2236.       brw_set_mask_control(p, BRW_MASK_DISABLE);
  2237.       brw_OR(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, msg_reg_nr, 5),
  2238.                        BRW_REGISTER_TYPE_UD),
  2239.                 retype(brw_vec1_grf(0, 5), BRW_REGISTER_TYPE_UD),
  2240.                 brw_imm_ud(0xff00));
  2241.       brw_pop_insn_state(p);
  2242.    }
  2243.  
  2244.    insn = next_insn(p, BRW_OPCODE_SEND);
  2245.  
  2246.    assert(msg_length < BRW_MAX_MRF);
  2247.  
  2248.    brw_set_dest(p, insn, dest);
  2249.    brw_set_src0(p, insn, src0);
  2250.    brw_set_src1(p, insn, brw_imm_d(0));
  2251.  
  2252.    if (brw->gen < 6)
  2253.       insn->header.destreg__conditionalmod = msg_reg_nr;
  2254.  
  2255.    brw_set_urb_message(p,
  2256.                        insn,
  2257.                        allocate,
  2258.                        used,
  2259.                        msg_length,
  2260.                        response_length,
  2261.                        eot,
  2262.                        writes_complete,
  2263.                        offset,
  2264.                        swizzle);
  2265. }
  2266.  
  2267. static int
  2268. next_ip(struct brw_compile *p, int ip)
  2269. {
  2270.    struct brw_instruction *insn = (void *)p->store + ip;
  2271.  
  2272.    if (insn->header.cmpt_control)
  2273.       return ip + 8;
  2274.    else
  2275.       return ip + 16;
  2276. }
  2277.  
  2278. static int
  2279. brw_find_next_block_end(struct brw_compile *p, int start)
  2280. {
  2281.    int ip;
  2282.    void *store = p->store;
  2283.  
  2284.    for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
  2285.       struct brw_instruction *insn = store + ip;
  2286.  
  2287.       switch (insn->header.opcode) {
  2288.       case BRW_OPCODE_ENDIF:
  2289.       case BRW_OPCODE_ELSE:
  2290.       case BRW_OPCODE_WHILE:
  2291.       case BRW_OPCODE_HALT:
  2292.          return ip;
  2293.       }
  2294.    }
  2295.  
  2296.    return 0;
  2297. }
  2298.  
  2299. /* There is no DO instruction on gen6, so to find the end of the loop
  2300.  * we have to see if the loop is jumping back before our start
  2301.  * instruction.
  2302.  */
  2303. static int
  2304. brw_find_loop_end(struct brw_compile *p, int start)
  2305. {
  2306.    struct brw_context *brw = p->brw;
  2307.    int ip;
  2308.    int scale = 8;
  2309.    void *store = p->store;
  2310.  
  2311.    /* Always start after the instruction (such as a WHILE) we're trying to fix
  2312.     * up.
  2313.     */
  2314.    for (ip = next_ip(p, start); ip < p->next_insn_offset; ip = next_ip(p, ip)) {
  2315.       struct brw_instruction *insn = store + ip;
  2316.  
  2317.       if (insn->header.opcode == BRW_OPCODE_WHILE) {
  2318.          int jip = brw->gen == 6 ? insn->bits1.branch_gen6.jump_count
  2319.                                    : insn->bits3.break_cont.jip;
  2320.          if (ip + jip * scale <= start)
  2321.             return ip;
  2322.       }
  2323.    }
  2324.    assert(!"not reached");
  2325.    return start;
  2326. }
  2327.  
  2328. /* After program generation, go back and update the UIP and JIP of
  2329.  * BREAK, CONT, and HALT instructions to their correct locations.
  2330.  */
  2331. void
  2332. brw_set_uip_jip(struct brw_compile *p)
  2333. {
  2334.    struct brw_context *brw = p->brw;
  2335.    int ip;
  2336.    int scale = 8;
  2337.    void *store = p->store;
  2338.  
  2339.    if (brw->gen < 6)
  2340.       return;
  2341.  
  2342.    for (ip = 0; ip < p->next_insn_offset; ip = next_ip(p, ip)) {
  2343.       struct brw_instruction *insn = store + ip;
  2344.  
  2345.       if (insn->header.cmpt_control) {
  2346.          /* Fixups for compacted BREAK/CONTINUE not supported yet. */
  2347.          assert(insn->header.opcode != BRW_OPCODE_BREAK &&
  2348.                 insn->header.opcode != BRW_OPCODE_CONTINUE &&
  2349.                 insn->header.opcode != BRW_OPCODE_HALT);
  2350.          continue;
  2351.       }
  2352.  
  2353.       int block_end_ip = brw_find_next_block_end(p, ip);
  2354.       switch (insn->header.opcode) {
  2355.       case BRW_OPCODE_BREAK:
  2356.          assert(block_end_ip != 0);
  2357.          insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
  2358.          /* Gen7 UIP points to WHILE; Gen6 points just after it */
  2359.          insn->bits3.break_cont.uip =
  2360.             (brw_find_loop_end(p, ip) - ip +
  2361.              (brw->gen == 6 ? 16 : 0)) / scale;
  2362.          break;
  2363.       case BRW_OPCODE_CONTINUE:
  2364.          assert(block_end_ip != 0);
  2365.          insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
  2366.          insn->bits3.break_cont.uip =
  2367.             (brw_find_loop_end(p, ip) - ip) / scale;
  2368.  
  2369.          assert(insn->bits3.break_cont.uip != 0);
  2370.          assert(insn->bits3.break_cont.jip != 0);
  2371.          break;
  2372.  
  2373.       case BRW_OPCODE_ENDIF:
  2374.          if (block_end_ip == 0)
  2375.             insn->bits3.break_cont.jip = 2;
  2376.          else
  2377.             insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
  2378.          break;
  2379.  
  2380.       case BRW_OPCODE_HALT:
  2381.          /* From the Sandy Bridge PRM (volume 4, part 2, section 8.3.19):
  2382.           *
  2383.           *    "In case of the halt instruction not inside any conditional
  2384.           *     code block, the value of <JIP> and <UIP> should be the
  2385.           *     same. In case of the halt instruction inside conditional code
  2386.           *     block, the <UIP> should be the end of the program, and the
  2387.           *     <JIP> should be end of the most inner conditional code block."
  2388.           *
  2389.           * The uip will have already been set by whoever set up the
  2390.           * instruction.
  2391.           */
  2392.          if (block_end_ip == 0) {
  2393.             insn->bits3.break_cont.jip = insn->bits3.break_cont.uip;
  2394.          } else {
  2395.             insn->bits3.break_cont.jip = (block_end_ip - ip) / scale;
  2396.          }
  2397.          assert(insn->bits3.break_cont.uip != 0);
  2398.          assert(insn->bits3.break_cont.jip != 0);
  2399.          break;
  2400.       }
  2401.    }
  2402. }
  2403.  
  2404. void brw_ff_sync(struct brw_compile *p,
  2405.                    struct brw_reg dest,
  2406.                    GLuint msg_reg_nr,
  2407.                    struct brw_reg src0,
  2408.                    bool allocate,
  2409.                    GLuint response_length,
  2410.                    bool eot)
  2411. {
  2412.    struct brw_context *brw = p->brw;
  2413.    struct brw_instruction *insn;
  2414.  
  2415.    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
  2416.  
  2417.    insn = next_insn(p, BRW_OPCODE_SEND);
  2418.    brw_set_dest(p, insn, dest);
  2419.    brw_set_src0(p, insn, src0);
  2420.    brw_set_src1(p, insn, brw_imm_d(0));
  2421.  
  2422.    if (brw->gen < 6)
  2423.       insn->header.destreg__conditionalmod = msg_reg_nr;
  2424.  
  2425.    brw_set_ff_sync_message(p,
  2426.                            insn,
  2427.                            allocate,
  2428.                            response_length,
  2429.                            eot);
  2430. }
  2431.  
  2432. /**
  2433.  * Emit the SEND instruction necessary to generate stream output data on Gen6
  2434.  * (for transform feedback).
  2435.  *
  2436.  * If send_commit_msg is true, this is the last piece of stream output data
  2437.  * from this thread, so send the data as a committed write.  According to the
  2438.  * Sandy Bridge PRM (volume 2 part 1, section 4.5.1):
  2439.  *
  2440.  *   "Prior to End of Thread with a URB_WRITE, the kernel must ensure all
  2441.  *   writes are complete by sending the final write as a committed write."
  2442.  */
  2443. void
  2444. brw_svb_write(struct brw_compile *p,
  2445.               struct brw_reg dest,
  2446.               GLuint msg_reg_nr,
  2447.               struct brw_reg src0,
  2448.               GLuint binding_table_index,
  2449.               bool   send_commit_msg)
  2450. {
  2451.    struct brw_instruction *insn;
  2452.  
  2453.    gen6_resolve_implied_move(p, &src0, msg_reg_nr);
  2454.  
  2455.    insn = next_insn(p, BRW_OPCODE_SEND);
  2456.    brw_set_dest(p, insn, dest);
  2457.    brw_set_src0(p, insn, src0);
  2458.    brw_set_src1(p, insn, brw_imm_d(0));
  2459.    brw_set_dp_write_message(p, insn,
  2460.                             binding_table_index,
  2461.                             0, /* msg_control: ignored */
  2462.                             GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE,
  2463.                             1, /* msg_length */
  2464.                             true, /* header_present */
  2465.                             0, /* last_render_target: ignored */
  2466.                             send_commit_msg, /* response_length */
  2467.                             0, /* end_of_thread */
  2468.                             send_commit_msg); /* send_commit_msg */
  2469. }
  2470.  
  2471. /**
  2472.  * This instruction is generated as a single-channel align1 instruction by
  2473.  * both the VS and FS stages when using INTEL_DEBUG=shader_time.
  2474.  *
  2475.  * We can't use the typed atomic op in the FS because that has the execution
  2476.  * mask ANDed with the pixel mask, but we just want to write the one dword for
  2477.  * all the pixels.
  2478.  *
  2479.  * We don't use the SIMD4x2 atomic ops in the VS because want to just write
  2480.  * one u32.  So we use the same untyped atomic write message as the pixel
  2481.  * shader.
  2482.  *
  2483.  * The untyped atomic operation requires a BUFFER surface type with RAW
  2484.  * format, and is only accessible through the legacy DATA_CACHE dataport
  2485.  * messages.
  2486.  */
  2487. void brw_shader_time_add(struct brw_compile *p,
  2488.                          struct brw_reg payload,
  2489.                          uint32_t surf_index)
  2490. {
  2491.    struct brw_context *brw = p->brw;
  2492.    assert(brw->gen >= 7);
  2493.  
  2494.    brw_push_insn_state(p);
  2495.    brw_set_access_mode(p, BRW_ALIGN_1);
  2496.    brw_set_mask_control(p, BRW_MASK_DISABLE);
  2497.    struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND);
  2498.    brw_pop_insn_state(p);
  2499.  
  2500.    /* We use brw_vec1_reg and unmasked because we want to increment the given
  2501.     * offset only once.
  2502.     */
  2503.    brw_set_dest(p, send, brw_vec1_reg(BRW_ARCHITECTURE_REGISTER_FILE,
  2504.                                       BRW_ARF_NULL, 0));
  2505.    brw_set_src0(p, send, brw_vec1_reg(payload.file,
  2506.                                       payload.nr, 0));
  2507.  
  2508.    uint32_t sfid, msg_type;
  2509.    if (brw->is_haswell) {
  2510.       sfid = HSW_SFID_DATAPORT_DATA_CACHE_1;
  2511.       msg_type = HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP;
  2512.    } else {
  2513.       sfid = GEN7_SFID_DATAPORT_DATA_CACHE;
  2514.       msg_type = GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP;
  2515.    }
  2516.  
  2517.    bool header_present = false;
  2518.    bool eot = false;
  2519.    uint32_t mlen = 2; /* offset, value */
  2520.    uint32_t rlen = 0;
  2521.    brw_set_message_descriptor(p, send, sfid, mlen, rlen, header_present, eot);
  2522.  
  2523.    send->bits3.ud |= msg_type << 14;
  2524.    send->bits3.ud |= 0 << 13; /* no return data */
  2525.    send->bits3.ud |= 1 << 12; /* SIMD8 mode */
  2526.    send->bits3.ud |= BRW_AOP_ADD << 8;
  2527.    send->bits3.ud |= surf_index << 0;
  2528. }
  2529.