Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * This code is based on original work by Ilia Mirkin.
  24.  */
  25.  
  26. /**
  27.  * \file gen6_gs_visitor.cpp
  28.  *
  29.  * Gen6 geometry shader implementation
  30.  */
  31.  
  32. #include "gen6_gs_visitor.h"
  33.  
  34. const unsigned MAX_GS_INPUT_VERTICES = 6;
  35.  
  36. namespace brw {
  37.  
  38. void
  39. gen6_gs_visitor::assign_binding_table_offsets()
  40. {
  41.    /* In gen6 we reserve the first BRW_MAX_SOL_BINDINGS entries for transform
  42.     * feedback surfaces.
  43.     */
  44.    assign_common_binding_table_offsets(BRW_MAX_SOL_BINDINGS);
  45. }
  46.  
  47. void
  48. gen6_gs_visitor::emit_prolog()
  49. {
  50.    vec4_gs_visitor::emit_prolog();
  51.  
  52.    /* Gen6 geometry shaders require to allocate an initial VUE handle via
  53.     * FF_SYNC message, however the documentation remarks that only one thread
  54.     * can write to the URB simultaneously and the FF_SYNC message provides the
  55.     * synchronization mechanism for this, so using this message effectively
  56.     * stalls the thread until it is its turn to write to the URB. Because of
  57.     * this, the best way to implement geometry shader algorithms in gen6 is to
  58.     * execute the algorithm before the FF_SYNC message to maximize parallelism.
  59.     *
  60.     * To achieve this we buffer the geometry shader outputs for each emitted
  61.     * vertex in vertex_output during operation. Then, when we have processed
  62.     * the last vertex (that is, at thread end time), we send the FF_SYNC
  63.     * message to allocate the initial VUE handle and write all buffered vertex
  64.     * data to the URB in one go.
  65.     *
  66.     * For each emitted vertex, vertex_output will hold vue_map.num_slots
  67.     * data items plus one additional item to hold required flags
  68.     * (PrimType, PrimStart, PrimEnd, as expected by the URB_WRITE message)
  69.     * which come right after the data items for that vertex. Vertex data and
  70.     * flags for the next vertex come right after the data items and flags for
  71.     * the previous vertex.
  72.     */
  73.    this->current_annotation = "gen6 prolog";
  74.    this->vertex_output = src_reg(this,
  75.                                  glsl_type::uint_type,
  76.                                  (prog_data->vue_map.num_slots + 1) *
  77.                                  c->gp->program.VerticesOut);
  78.    this->vertex_output_offset = src_reg(this, glsl_type::uint_type);
  79.    emit(MOV(dst_reg(this->vertex_output_offset), src_reg(0u)));
  80.  
  81.    /* MRF 1 will be the header for all messages (FF_SYNC and URB_WRITES),
  82.     * so initialize it once to R0.
  83.     */
  84.    vec4_instruction *inst = emit(MOV(dst_reg(MRF, 1),
  85.                                      retype(brw_vec8_grf(0, 0),
  86.                                             BRW_REGISTER_TYPE_UD)));
  87.    inst->force_writemask_all = true;
  88.  
  89.    /* This will be used as a temporary to store writeback data of FF_SYNC
  90.     * and URB_WRITE messages.
  91.     */
  92.    this->temp = src_reg(this, glsl_type::uint_type);
  93.  
  94.    /* This will be used to know when we are processing the first vertex of
  95.     * a primitive. We will set this to URB_WRITE_PRIM_START only when we know
  96.     * that we are processing the first vertex in the primitive and to zero
  97.     * otherwise. This way we can use its value directly in the URB write
  98.     * headers.
  99.     */
  100.    this->first_vertex = src_reg(this, glsl_type::uint_type);
  101.    emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
  102.  
  103.    /* The FF_SYNC message requires to know the number of primitives generated,
  104.     * so keep a counter for this.
  105.     */
  106.    this->prim_count = src_reg(this, glsl_type::uint_type);
  107.    emit(MOV(dst_reg(this->prim_count), 0u));
  108.  
  109.    if (c->prog_data.gen6_xfb_enabled) {
  110.       /* Create a virtual register to hold destination indices in SOL */
  111.       this->destination_indices = src_reg(this, glsl_type::uvec4_type);
  112.       /* Create a virtual register to hold number of written primitives */
  113.       this->sol_prim_written = src_reg(this, glsl_type::uint_type);
  114.       /* Create a virtual register to hold Streamed Vertex Buffer Indices */
  115.       this->svbi = src_reg(this, glsl_type::uvec4_type);
  116.       /* Create a virtual register to hold max values of SVBI */
  117.       this->max_svbi = src_reg(this, glsl_type::uvec4_type);
  118.       emit(MOV(dst_reg(this->max_svbi),
  119.                src_reg(retype(brw_vec1_grf(1, 4), BRW_REGISTER_TYPE_UD))));
  120.  
  121.       xfb_setup();
  122.    }
  123.  
  124.    /* PrimitveID is delivered in r0.1 of the thread payload. If the program
  125.     * needs it we have to move it to a separate register where we can map
  126.     * the atttribute.
  127.     *
  128.     * Notice that we cannot use a virtual register for this, because we need to
  129.     * map all input attributes to hardware registers in setup_payload(),
  130.     * which happens before virtual registers are mapped to hardware registers.
  131.     * We could work around that issue if we were able to compute the first
  132.     * non-payload register here and move the PrimitiveID information to that
  133.     * register, but we can't because at this point we don't know the final
  134.     * number uniforms that will be included in the payload.
  135.     *
  136.     * So, what we do is to place PrimitiveID information in r1, which is always
  137.     * delivered as part of the payload, but its only populated with data
  138.     * relevant for transform feedback when we set GEN6_GS_SVBI_PAYLOAD_ENABLE
  139.     * in the 3DSTATE_GS state packet. That information can be obtained by other
  140.     * means though, so we can safely use r1 for this purpose.
  141.     */
  142.    if (c->prog_data.include_primitive_id) {
  143.       this->primitive_id =
  144.          src_reg(retype(brw_vec8_grf(1, 0), BRW_REGISTER_TYPE_UD));
  145.       emit(GS_OPCODE_SET_PRIMITIVE_ID, dst_reg(this->primitive_id));
  146.    }
  147. }
  148.  
  149. void
  150. gen6_gs_visitor::visit(ir_emit_vertex *)
  151. {
  152.    this->current_annotation = "gen6 emit vertex";
  153.    /* Honor max_vertex layout indication in geometry shader by ignoring any
  154.     * vertices coming after c->gp->program.VerticesOut.
  155.     */
  156.    unsigned num_output_vertices = c->gp->program.VerticesOut;
  157.    emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices),
  158.             BRW_CONDITIONAL_L));
  159.    emit(IF(BRW_PREDICATE_NORMAL));
  160.    {
  161.       /* Buffer all output slots for this vertex in vertex_output */
  162.       for (int slot = 0; slot < prog_data->vue_map.num_slots; ++slot) {
  163.          int varying = prog_data->vue_map.slot_to_varying[slot];
  164.          if (varying != VARYING_SLOT_PSIZ) {
  165.             dst_reg dst(this->vertex_output);
  166.             dst.reladdr = ralloc(mem_ctx, src_reg);
  167.             memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
  168.             emit_urb_slot(dst, varying);
  169.          } else {
  170.             /* The PSIZ slot can pack multiple varyings in different channels
  171.              * and emit_urb_slot() will produce a MOV instruction for each of
  172.              * them. Since we are writing to an array, that will translate to
  173.              * possibly multiple MOV instructions with an array destination and
  174.              * each will generate a scratch write with the same offset into
  175.              * scratch space (thus, each one overwriting the previous). This is
  176.              * not what we want. What we will do instead is emit PSIZ to a
  177.              * a regular temporary register, then move that resgister into the
  178.              * array. This way we only have one instruction with an array
  179.              * destination and we only produce a single scratch write.
  180.              */
  181.             dst_reg tmp = dst_reg(src_reg(this, glsl_type::uvec4_type));
  182.             emit_urb_slot(tmp, varying);
  183.             dst_reg dst(this->vertex_output);
  184.             dst.reladdr = ralloc(mem_ctx, src_reg);
  185.             memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
  186.             vec4_instruction *inst = emit(MOV(dst, src_reg(tmp)));
  187.             inst->force_writemask_all = true;
  188.          }
  189.  
  190.          emit(ADD(dst_reg(this->vertex_output_offset),
  191.                   this->vertex_output_offset, 1u));
  192.       }
  193.  
  194.       /* Now buffer flags for this vertex */
  195.       dst_reg dst(this->vertex_output);
  196.       dst.reladdr = ralloc(mem_ctx, src_reg);
  197.       memcpy(dst.reladdr, &this->vertex_output_offset, sizeof(src_reg));
  198.       if (c->gp->program.OutputType == GL_POINTS) {
  199.          /* If we are outputting points, then every vertex has PrimStart and
  200.           * PrimEnd set.
  201.           */
  202.          emit(MOV(dst, (_3DPRIM_POINTLIST << URB_WRITE_PRIM_TYPE_SHIFT) |
  203.                   URB_WRITE_PRIM_START | URB_WRITE_PRIM_END));
  204.          emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
  205.       } else {
  206.          /* Otherwise, we can only set the PrimStart flag, which we have stored
  207.           * in the first_vertex register. We will have to wait until we execute
  208.           * EndPrimitive() or we end the thread to set the PrimEnd flag on a
  209.           * vertex.
  210.           */
  211.          emit(OR(dst, this->first_vertex,
  212.                  (c->prog_data.output_topology << URB_WRITE_PRIM_TYPE_SHIFT)));
  213.          emit(MOV(dst_reg(this->first_vertex), 0u));
  214.       }
  215.       emit(ADD(dst_reg(this->vertex_output_offset),
  216.                this->vertex_output_offset, 1u));
  217.  
  218.       /* Update vertex count */
  219.       emit(ADD(dst_reg(this->vertex_count), this->vertex_count, 1u));
  220.    }
  221.    emit(BRW_OPCODE_ENDIF);
  222. }
  223.  
  224. void
  225. gen6_gs_visitor::visit(ir_end_primitive *)
  226. {
  227.    this->current_annotation = "gen6 end primitive";
  228.    /* Calling EndPrimitive() is optional for point output. In this case we set
  229.     * the PrimEnd flag when we process EmitVertex().
  230.     */
  231.    if (c->gp->program.OutputType == GL_POINTS)
  232.       return;
  233.  
  234.    /* Otherwise we know that the last vertex we have processed was the last
  235.     * vertex in the primitive and we need to set its PrimEnd flag, so do this
  236.     * unless we haven't emitted that vertex at all (vertex_count != 0).
  237.     *
  238.     * Notice that we have already incremented vertex_count when we processed
  239.     * the last emit_vertex, so we need to take that into account in the
  240.     * comparison below (hence the num_output_vertices + 1 in the comparison
  241.     * below).
  242.     */
  243.    unsigned num_output_vertices = c->gp->program.VerticesOut;
  244.    emit(CMP(dst_null_d(), this->vertex_count, src_reg(num_output_vertices + 1),
  245.             BRW_CONDITIONAL_L));
  246.    vec4_instruction *inst = emit(CMP(dst_null_d(),
  247.                                      this->vertex_count, 0u,
  248.                                      BRW_CONDITIONAL_NEQ));
  249.    inst->predicate = BRW_PREDICATE_NORMAL;
  250.    emit(IF(BRW_PREDICATE_NORMAL));
  251.    {
  252.       /* vertex_output_offset is already pointing at the first entry of the
  253.        * next vertex. So subtract 1 to modify the flags for the previous
  254.        * vertex.
  255.        */
  256.       src_reg offset(this, glsl_type::uint_type);
  257.       emit(ADD(dst_reg(offset), this->vertex_output_offset, src_reg(-1)));
  258.  
  259.       src_reg dst(this->vertex_output);
  260.       dst.reladdr = ralloc(mem_ctx, src_reg);
  261.       memcpy(dst.reladdr, &offset, sizeof(src_reg));
  262.  
  263.       emit(OR(dst_reg(dst), dst, URB_WRITE_PRIM_END));
  264.       emit(ADD(dst_reg(this->prim_count), this->prim_count, 1u));
  265.  
  266.       /* Set the first vertex flag to indicate that the next vertex will start
  267.        * a primitive.
  268.        */
  269.       emit(MOV(dst_reg(this->first_vertex), URB_WRITE_PRIM_START));
  270.    }
  271.    emit(BRW_OPCODE_ENDIF);
  272. }
  273.  
  274. void
  275. gen6_gs_visitor::emit_urb_write_header(int mrf)
  276. {
  277.    this->current_annotation = "gen6 urb header";
  278.    /* Compute offset of the flags for the current vertex in vertex_output and
  279.     * write them in dw2 of the message header.
  280.     *
  281.     * Notice that by the time that emit_thread_end() calls here
  282.     * vertex_output_offset should point to the first data item of the current
  283.     * vertex in vertex_output, thus we only need to add the number of output
  284.     * slots per vertex to that offset to obtain the flags data offset.
  285.     */
  286.    src_reg flags_offset(this, glsl_type::uint_type);
  287.    emit(ADD(dst_reg(flags_offset),
  288.             this->vertex_output_offset, src_reg(prog_data->vue_map.num_slots)));
  289.  
  290.    src_reg flags_data(this->vertex_output);
  291.    flags_data.reladdr = ralloc(mem_ctx, src_reg);
  292.    memcpy(flags_data.reladdr, &flags_offset, sizeof(src_reg));
  293.  
  294.    emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, mrf), flags_data);
  295. }
  296.  
  297. void
  298. gen6_gs_visitor::emit_urb_write_opcode(bool complete, int base_mrf,
  299.                                        int last_mrf, int urb_offset)
  300. {
  301.    vec4_instruction *inst = NULL;
  302.  
  303.    if (!complete) {
  304.       /* If the vertex is not complete we don't have to do anything special */
  305.       inst = emit(GS_OPCODE_URB_WRITE);
  306.       inst->urb_write_flags = BRW_URB_WRITE_NO_FLAGS;
  307.    } else {
  308.       /* Otherwise we always request to allocate a new VUE handle. If this is
  309.        * the last write before the EOT message and the new handle never gets
  310.        * used it will be dereferenced when we send the EOT message. This is
  311.        * necessary to avoid different setups for the EOT message (one for the
  312.        * case when there is no output and another for the case when there is)
  313.        * which would require to end the program with an IF/ELSE/ENDIF block,
  314.        * something we do not want.
  315.        */
  316.       inst = emit(GS_OPCODE_URB_WRITE_ALLOCATE);
  317.       inst->urb_write_flags = BRW_URB_WRITE_COMPLETE;
  318.       inst->dst = dst_reg(MRF, base_mrf);
  319.       inst->src[0] = this->temp;
  320.    }
  321.  
  322.    inst->base_mrf = base_mrf;
  323.    /* URB data written (does not include the message header reg) must
  324.     * be a multiple of 256 bits, or 2 VS registers.  See vol5c.5,
  325.     * section 5.4.3.2.2: URB_INTERLEAVED.
  326.     */
  327.    int mlen = last_mrf - base_mrf;
  328.    if ((mlen % 2) != 1)
  329.       mlen++;
  330.    inst->mlen = mlen;
  331.    inst->offset = urb_offset;
  332. }
  333.  
  334. void
  335. gen6_gs_visitor::emit_thread_end()
  336. {
  337.    /* Make sure the current primitive is ended: we know it is not ended when
  338.     * first_vertex is not zero. This is only relevant for outputs other than
  339.     * points because in the point case we set PrimEnd on all vertices.
  340.     */
  341.    if (c->gp->program.OutputType != GL_POINTS) {
  342.       emit(CMP(dst_null_d(), this->first_vertex, 0u, BRW_CONDITIONAL_Z));
  343.       emit(IF(BRW_PREDICATE_NORMAL));
  344.       {
  345.          visit((ir_end_primitive *) NULL);
  346.       }
  347.       emit(BRW_OPCODE_ENDIF);
  348.    }
  349.  
  350.    /* Here we have to:
  351.     * 1) Emit an FF_SYNC messsage to obtain an initial VUE handle.
  352.     * 2) Loop over all buffered vertex data and write it to corresponding
  353.     *    URB entries.
  354.     * 3) Allocate new VUE handles for all vertices other than the first.
  355.     * 4) Send a final EOT message.
  356.     */
  357.  
  358.    /* MRF 0 is reserved for the debugger, so start with message header
  359.     * in MRF 1.
  360.     */
  361.    int base_mrf = 1;
  362.  
  363.    /* In the process of generating our URB write message contents, we
  364.     * may need to unspill a register or load from an array.  Those
  365.     * reads would use MRFs 14-15.
  366.     */
  367.    int max_usable_mrf = 13;
  368.  
  369.    /* Issue the FF_SYNC message and obtain the initial VUE handle. */
  370.    emit(CMP(dst_null_d(), this->vertex_count, 0u, BRW_CONDITIONAL_G));
  371.    emit(IF(BRW_PREDICATE_NORMAL));
  372.    {
  373.       this->current_annotation = "gen6 thread end: ff_sync";
  374.  
  375.       vec4_instruction *inst;
  376.       if (c->prog_data.gen6_xfb_enabled) {
  377.          src_reg sol_temp(this, glsl_type::uvec4_type);
  378.          emit(GS_OPCODE_FF_SYNC_SET_PRIMITIVES,
  379.               dst_reg(this->svbi),
  380.               this->vertex_count,
  381.               this->prim_count,
  382.               sol_temp);
  383.          inst = emit(GS_OPCODE_FF_SYNC,
  384.                      dst_reg(this->temp), this->prim_count, this->svbi);
  385.       } else {
  386.          inst = emit(GS_OPCODE_FF_SYNC,
  387.                      dst_reg(this->temp), this->prim_count, src_reg(0u));
  388.       }
  389.       inst->base_mrf = base_mrf;
  390.  
  391.       /* Loop over all buffered vertices and emit URB write messages */
  392.       this->current_annotation = "gen6 thread end: urb writes init";
  393.       src_reg vertex(this, glsl_type::uint_type);
  394.       emit(MOV(dst_reg(vertex), 0u));
  395.       emit(MOV(dst_reg(this->vertex_output_offset), 0u));
  396.  
  397.       this->current_annotation = "gen6 thread end: urb writes";
  398.       emit(BRW_OPCODE_DO);
  399.       {
  400.          emit(CMP(dst_null_d(), vertex, this->vertex_count, BRW_CONDITIONAL_GE));
  401.          inst = emit(BRW_OPCODE_BREAK);
  402.          inst->predicate = BRW_PREDICATE_NORMAL;
  403.  
  404.          /* First we prepare the message header */
  405.          emit_urb_write_header(base_mrf);
  406.  
  407.          /* Then add vertex data to the message in interleaved fashion */
  408.          int slot = 0;
  409.          bool complete = false;
  410.          do {
  411.             int mrf = base_mrf + 1;
  412.  
  413.             /* URB offset is in URB row increments, and each of our MRFs is half
  414.              * of one of those, since we're doing interleaved writes.
  415.              */
  416.             int urb_offset = slot / 2;
  417.  
  418.             for (; slot < prog_data->vue_map.num_slots; ++slot) {
  419.                int varying = prog_data->vue_map.slot_to_varying[slot];
  420.                current_annotation = output_reg_annotation[varying];
  421.  
  422.                /* Compute offset of this slot for the current vertex
  423.                 * in vertex_output
  424.                 */
  425.                src_reg data(this->vertex_output);
  426.                data.reladdr = ralloc(mem_ctx, src_reg);
  427.                memcpy(data.reladdr, &this->vertex_output_offset,
  428.                       sizeof(src_reg));
  429.  
  430.                /* Copy this slot to the appropriate message register */
  431.                dst_reg reg = dst_reg(MRF, mrf);
  432.                reg.type = output_reg[varying].type;
  433.                data.type = reg.type;
  434.                vec4_instruction *inst = emit(MOV(reg, data));
  435.                inst->force_writemask_all = true;
  436.  
  437.                mrf++;
  438.                emit(ADD(dst_reg(this->vertex_output_offset),
  439.                         this->vertex_output_offset, 1u));
  440.  
  441.                /* If this was max_usable_mrf, we can't fit anything more into
  442.                 * this URB WRITE.
  443.                 */
  444.                if (mrf > max_usable_mrf) {
  445.                   slot++;
  446.                   break;
  447.                }
  448.             }
  449.  
  450.             complete = slot >= prog_data->vue_map.num_slots;
  451.             emit_urb_write_opcode(complete, base_mrf, mrf, urb_offset);
  452.          } while (!complete);
  453.  
  454.          /* Skip over the flags data item so that vertex_output_offset points
  455.           * to the first data item of the next vertex, so that we can start
  456.           * writing the next vertex.
  457.           */
  458.          emit(ADD(dst_reg(this->vertex_output_offset),
  459.                   this->vertex_output_offset, 1u));
  460.  
  461.          emit(ADD(dst_reg(vertex), vertex, 1u));
  462.       }
  463.       emit(BRW_OPCODE_WHILE);
  464.  
  465.       if (c->prog_data.gen6_xfb_enabled)
  466.          xfb_write();
  467.    }
  468.    emit(BRW_OPCODE_ENDIF);
  469.  
  470.    /* Finally, emit EOT message.
  471.     *
  472.     * In gen6 we need to end the thread differently depending on whether we have
  473.     * emitted at least one vertex or not. In case we did, the EOT message must
  474.     * always include the COMPLETE flag or else the GPU hangs. If we have not
  475.     * produced any output we can't use the COMPLETE flag.
  476.     *
  477.     * However, this would lead us to end the program with an ENDIF opcode,
  478.     * which we want to avoid, so what we do is that we always request a new
  479.     * VUE handle every time we do a URB WRITE, even for the last vertex we emit.
  480.     * With this we make sure that whether we have emitted at least one vertex
  481.     * or none at all, we have to finish the thread without writing to the URB,
  482.     * which works for both cases by setting the COMPLETE and UNUSED flags in
  483.     * the EOT message.
  484.     */
  485.    this->current_annotation = "gen6 thread end: EOT";
  486.  
  487.    if (c->prog_data.gen6_xfb_enabled) {
  488.       /* When emitting EOT, set SONumPrimsWritten Increment Value. */
  489.       src_reg data(this, glsl_type::uint_type);
  490.       emit(AND(dst_reg(data), this->sol_prim_written, src_reg(0xffffu)));
  491.       emit(SHL(dst_reg(data), data, src_reg(16u)));
  492.       emit(GS_OPCODE_SET_DWORD_2, dst_reg(MRF, base_mrf), data);
  493.    }
  494.  
  495.    vec4_instruction *inst = emit(GS_OPCODE_THREAD_END);
  496.    inst->urb_write_flags = BRW_URB_WRITE_COMPLETE | BRW_URB_WRITE_UNUSED;
  497.    inst->base_mrf = base_mrf;
  498.    inst->mlen = 1;
  499. }
  500.  
  501. void
  502. gen6_gs_visitor::setup_payload()
  503. {
  504.    int attribute_map[BRW_VARYING_SLOT_COUNT * MAX_GS_INPUT_VERTICES];
  505.  
  506.    /* Attributes are going to be interleaved, so one register contains two
  507.     * attribute slots.
  508.     */
  509.    int attributes_per_reg = 2;
  510.  
  511.    /* If a geometry shader tries to read from an input that wasn't written by
  512.     * the vertex shader, that produces undefined results, but it shouldn't
  513.     * crash anything.  So initialize attribute_map to zeros--that ensures that
  514.     * these undefined results are read from r0.
  515.     */
  516.    memset(attribute_map, 0, sizeof(attribute_map));
  517.  
  518.    int reg = 0;
  519.  
  520.    /* The payload always contains important data in r0. */
  521.    reg++;
  522.  
  523.    /* r1 is always part of the payload and it holds information relevant
  524.     * for transform feedback when we set the GEN6_GS_SVBI_PAYLOAD_ENABLE bit in
  525.     * the 3DSTATE_GS packet. We will overwrite it with the PrimitiveID
  526.     * information (and move the original value to a virtual register if
  527.     * necessary).
  528.     */
  529.    if (c->prog_data.include_primitive_id)
  530.       attribute_map[VARYING_SLOT_PRIMITIVE_ID] = attributes_per_reg * reg;
  531.    reg++;
  532.  
  533.    reg = setup_uniforms(reg);
  534.  
  535.    reg = setup_varying_inputs(reg, attribute_map, attributes_per_reg);
  536.  
  537.    lower_attributes_to_hw_regs(attribute_map, true);
  538.  
  539.    this->first_non_payload_grf = reg;
  540. }
  541.  
  542. void
  543. gen6_gs_visitor::xfb_setup()
  544. {
  545.    static const unsigned swizzle_for_offset[4] = {
  546.       BRW_SWIZZLE4(0, 1, 2, 3),
  547.       BRW_SWIZZLE4(1, 2, 3, 3),
  548.       BRW_SWIZZLE4(2, 3, 3, 3),
  549.       BRW_SWIZZLE4(3, 3, 3, 3)
  550.    };
  551.  
  552.    struct brw_gs_prog_data *prog_data =
  553.       (struct brw_gs_prog_data *) &c->prog_data;
  554.  
  555.    const struct gl_transform_feedback_info *linked_xfb_info =
  556.       &this->shader_prog->LinkedTransformFeedback;
  557.    int i;
  558.  
  559.    /* Make sure that the VUE slots won't overflow the unsigned chars in
  560.     * prog_data->transform_feedback_bindings[].
  561.     */
  562.    STATIC_ASSERT(BRW_VARYING_SLOT_COUNT <= 256);
  563.  
  564.    /* Make sure that we don't need more binding table entries than we've
  565.     * set aside for use in transform feedback.  (We shouldn't, since we
  566.     * set aside enough binding table entries to have one per component).
  567.     */
  568.    assert(linked_xfb_info->NumOutputs <= BRW_MAX_SOL_BINDINGS);
  569.  
  570.    prog_data->num_transform_feedback_bindings = linked_xfb_info->NumOutputs;
  571.    for (i = 0; i < prog_data->num_transform_feedback_bindings; i++) {
  572.       prog_data->transform_feedback_bindings[i] =
  573.          linked_xfb_info->Outputs[i].OutputRegister;
  574.       prog_data->transform_feedback_swizzles[i] =
  575.          swizzle_for_offset[linked_xfb_info->Outputs[i].ComponentOffset];
  576.    }
  577. }
  578.  
  579. void
  580. gen6_gs_visitor::xfb_write()
  581. {
  582.    unsigned num_verts;
  583.    struct brw_gs_prog_data *prog_data =
  584.       (struct brw_gs_prog_data *) &c->prog_data;
  585.  
  586.    if (!prog_data->num_transform_feedback_bindings)
  587.       return;
  588.  
  589.    switch (c->prog_data.output_topology) {
  590.    case _3DPRIM_POINTLIST:
  591.       num_verts = 1;
  592.       break;
  593.    case _3DPRIM_LINELIST:
  594.    case _3DPRIM_LINESTRIP:
  595.    case _3DPRIM_LINELOOP:
  596.       num_verts = 2;
  597.       break;
  598.    case _3DPRIM_TRILIST:
  599.    case _3DPRIM_TRIFAN:
  600.    case _3DPRIM_TRISTRIP:
  601.    case _3DPRIM_RECTLIST:
  602.       num_verts = 3;
  603.       break;
  604.    case _3DPRIM_QUADLIST:
  605.    case _3DPRIM_QUADSTRIP:
  606.    case _3DPRIM_POLYGON:
  607.       num_verts = 3;
  608.       break;
  609.    default:
  610.       unreachable("Unexpected primitive type in Gen6 SOL program.");
  611.    }
  612.  
  613.    this->current_annotation = "gen6 thread end: svb writes init";
  614.  
  615.    emit(MOV(dst_reg(this->vertex_output_offset), 0u));
  616.    emit(MOV(dst_reg(this->sol_prim_written), 0u));
  617.  
  618.    /* Check that at least one primitive can be written
  619.     *
  620.     * Note: since we use the binding table to keep track of buffer offsets
  621.     * and stride, the GS doesn't need to keep track of a separate pointer
  622.     * into each buffer; it uses a single pointer which increments by 1 for
  623.     * each vertex.  So we use SVBI0 for this pointer, regardless of whether
  624.     * transform feedback is in interleaved or separate attribs mode.
  625.     */
  626.    src_reg sol_temp(this, glsl_type::uvec4_type);
  627.    emit(ADD(dst_reg(sol_temp), this->svbi, src_reg(num_verts)));
  628.  
  629.    /* Compare SVBI calculated number with the maximum value, which is
  630.     * in R1.4 (previously saved in this->max_svbi) for gen6.
  631.     */
  632.    emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
  633.    emit(IF(BRW_PREDICATE_NORMAL));
  634.    {
  635.       src_reg destination_indices_uw =
  636.          retype(destination_indices, BRW_REGISTER_TYPE_UW);
  637.  
  638.       vec4_instruction *inst = emit(MOV(dst_reg(destination_indices_uw),
  639.                                         brw_imm_v(0x00020100))); /* (0, 1, 2) */
  640.       inst->force_writemask_all = true;
  641.  
  642.       emit(ADD(dst_reg(this->destination_indices),
  643.                this->destination_indices,
  644.                this->svbi));
  645.    }
  646.    emit(BRW_OPCODE_ENDIF);
  647.  
  648.    /* Write transform feedback data for all processed vertices. */
  649.    for (int i = 0; i < c->gp->program.VerticesOut; i++) {
  650.       emit(MOV(dst_reg(sol_temp), i));
  651.       emit(CMP(dst_null_d(), sol_temp, this->vertex_count,
  652.                BRW_CONDITIONAL_L));
  653.       emit(IF(BRW_PREDICATE_NORMAL));
  654.       {
  655.          xfb_program(i, num_verts);
  656.       }
  657.       emit(BRW_OPCODE_ENDIF);
  658.    }
  659. }
  660.  
  661. void
  662. gen6_gs_visitor::xfb_program(unsigned vertex, unsigned num_verts)
  663. {
  664.    struct brw_gs_prog_data *prog_data =
  665.       (struct brw_gs_prog_data *) &c->prog_data;
  666.    unsigned binding;
  667.    unsigned num_bindings = prog_data->num_transform_feedback_bindings;
  668.    src_reg sol_temp(this, glsl_type::uvec4_type);
  669.  
  670.    /* Check for buffer overflow: we need room to write the complete primitive
  671.     * (all vertices). Otherwise, avoid writing any vertices for it
  672.     */
  673.    emit(ADD(dst_reg(sol_temp), this->sol_prim_written, 1u));
  674.    emit(MUL(dst_reg(sol_temp), sol_temp, src_reg(num_verts)));
  675.    emit(ADD(dst_reg(sol_temp), sol_temp, this->svbi));
  676.    emit(CMP(dst_null_d(), sol_temp, this->max_svbi, BRW_CONDITIONAL_LE));
  677.    emit(IF(BRW_PREDICATE_NORMAL));
  678.    {
  679.       /* Avoid overwriting MRF 1 as it is used as URB write message header */
  680.       dst_reg mrf_reg(MRF, 2);
  681.  
  682.       this->current_annotation = "gen6: emit SOL vertex data";
  683.       /* For each vertex, generate code to output each varying using the
  684.        * appropriate binding table entry.
  685.        */
  686.       for (binding = 0; binding < num_bindings; ++binding) {
  687.          unsigned char varying =
  688.             prog_data->transform_feedback_bindings[binding];
  689.  
  690.          /* Set up the correct destination index for this vertex */
  691.          vec4_instruction *inst = emit(GS_OPCODE_SVB_SET_DST_INDEX,
  692.                                        mrf_reg,
  693.                                        this->destination_indices);
  694.          inst->sol_vertex = vertex % num_verts;
  695.  
  696.          /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1:
  697.           *
  698.           *   "Prior to End of Thread with a URB_WRITE, the kernel must
  699.           *   ensure that all writes are complete by sending the final
  700.           *   write as a committed write."
  701.           */
  702.          bool final_write = binding == (unsigned) num_bindings - 1 &&
  703.                             inst->sol_vertex == num_verts - 1;
  704.  
  705.          /* Compute offset of this varying for the current vertex
  706.           * in vertex_output
  707.           */
  708.          this->current_annotation = output_reg_annotation[varying];
  709.          src_reg data(this->vertex_output);
  710.          data.reladdr = ralloc(mem_ctx, src_reg);
  711.          int offset = get_vertex_output_offset_for_varying(vertex, varying);
  712.          emit(MOV(dst_reg(this->vertex_output_offset), offset));
  713.          memcpy(data.reladdr, &this->vertex_output_offset, sizeof(src_reg));
  714.          data.type = output_reg[varying].type;
  715.  
  716.          /* PSIZ, LAYER and VIEWPORT are packed in different channels of the
  717.           * same slot, so make sure we write the appropriate channel
  718.           */
  719.          if (varying == VARYING_SLOT_PSIZ)
  720.             data.swizzle = BRW_SWIZZLE_WWWW;
  721.          else if (varying == VARYING_SLOT_LAYER)
  722.             data.swizzle = BRW_SWIZZLE_YYYY;
  723.          else if (varying == VARYING_SLOT_VIEWPORT)
  724.             data.swizzle = BRW_SWIZZLE_ZZZZ;
  725.          else
  726.             data.swizzle = prog_data->transform_feedback_swizzles[binding];
  727.  
  728.          /* Write data */
  729.          inst = emit(GS_OPCODE_SVB_WRITE, mrf_reg, data, sol_temp);
  730.          inst->sol_binding = binding;
  731.          inst->sol_final_write = final_write;
  732.  
  733.          if (final_write) {
  734.             /* This is the last vertex of the primitive, then increment
  735.              * SO num primitive counter and destination indices.
  736.              */
  737.             emit(ADD(dst_reg(this->destination_indices),
  738.                      this->destination_indices,
  739.                      src_reg(num_verts)));
  740.             emit(ADD(dst_reg(this->sol_prim_written),
  741.                      this->sol_prim_written, 1u));
  742.          }
  743.  
  744.       }
  745.       this->current_annotation = NULL;
  746.    }
  747.    emit(BRW_OPCODE_ENDIF);
  748. }
  749.  
  750. int
  751. gen6_gs_visitor::get_vertex_output_offset_for_varying(int vertex, int varying)
  752. {
  753.    /* Find the output slot assigned to this varying.
  754.     *
  755.     * VARYING_SLOT_LAYER and VARYING_SLOT_VIEWPORT are packed in the same slot
  756.     * as VARYING_SLOT_PSIZ.
  757.     */
  758.    if (varying == VARYING_SLOT_LAYER || varying == VARYING_SLOT_VIEWPORT)
  759.       varying = VARYING_SLOT_PSIZ;
  760.    int slot = prog_data->vue_map.varying_to_slot[varying];
  761.  
  762.    if (slot < 0) {
  763.       /* This varying does not exist in the VUE so we are not writing to it
  764.        * and its value is undefined. We still want to return a valid offset
  765.        * into vertex_output though, to prevent any out-of-bound accesses into
  766.        * the vertex_output array. Since the value for this varying is undefined
  767.        * we don't really care for the value we assign to it, so any offset
  768.        * within the limits of vertex_output will do.
  769.        */
  770.       slot = 0;
  771.    }
  772.  
  773.    return vertex * (prog_data->vue_map.num_slots + 1) + slot;
  774. }
  775.  
  776. } /* namespace brw */
  777.