Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "tgsi/tgsi_dump.h"
  29. #include "toy_compiler.h"
  30. #include "toy_tgsi.h"
  31. #include "toy_legalize.h"
  32. #include "toy_optimize.h"
  33. #include "toy_helpers.h"
  34. #include "ilo_shader_internal.h"
  35.  
  36. /* XXX Below is proof-of-concept code.  Skip this file! */
  37.  
  38. /*
  39.  * TODO
  40.  * - primitive id is in r0.1.  FS receives PID as a flat attribute.
  41.  * - set VUE header m0.1 for layered rendering
  42.  */
  43. struct gs_compile_context {
  44.    struct ilo_shader *shader;
  45.    const struct ilo_shader_variant *variant;
  46.    const struct pipe_stream_output_info *so_info;
  47.  
  48.    struct toy_compiler tc;
  49.    struct toy_tgsi tgsi;
  50.    int output_map[PIPE_MAX_SHADER_OUTPUTS];
  51.  
  52.    bool write_so;
  53.    bool write_vue;
  54.  
  55.    int in_vue_size;
  56.    int in_vue_count;
  57.  
  58.    int out_vue_size;
  59.    int out_vue_min_count;
  60.  
  61.    bool is_static;
  62.  
  63.    struct {
  64.       struct toy_src header;
  65.       struct toy_src svbi;
  66.       struct toy_src vues[6];
  67.    } payload;
  68.  
  69.    struct {
  70.       struct toy_dst urb_write_header;
  71.       bool prim_start;
  72.       bool prim_end;
  73.       int prim_type;
  74.  
  75.       struct toy_dst tmp;
  76.  
  77.       /* buffered tgsi_outs */
  78.       struct toy_dst buffers[3];
  79.       int buffer_needed, buffer_cur;
  80.  
  81.       struct toy_dst so_written;
  82.       struct toy_dst so_index;
  83.  
  84.       struct toy_src tgsi_outs[PIPE_MAX_SHADER_OUTPUTS];
  85.    } vars;
  86.  
  87.    struct {
  88.       struct toy_dst total_vertices;
  89.       struct toy_dst total_prims;
  90.  
  91.       struct toy_dst num_vertices;
  92.       struct toy_dst num_vertices_in_prim;
  93.    } dynamic_data;
  94.  
  95.    struct {
  96.       int total_vertices;
  97.       int total_prims;
  98.       /* this limits the max vertice count to be 256 */
  99.       uint32_t last_vertex[8];
  100.  
  101.       int num_vertices;
  102.       int num_vertices_in_prim;
  103.    } static_data;
  104.  
  105.    int first_free_grf;
  106.    int last_free_grf;
  107.    int first_free_mrf;
  108.    int last_free_mrf;
  109. };
  110.  
  111. static void
  112. gs_COPY8(struct toy_compiler *tc, struct toy_dst dst, struct toy_src src)
  113. {
  114.    struct toy_inst *inst;
  115.  
  116.    inst = tc_MOV(tc, dst, src);
  117.    inst->exec_size = BRW_EXECUTE_8;
  118.    inst->mask_ctrl = BRW_MASK_DISABLE;
  119. }
  120.  
  121. static void
  122. gs_COPY4(struct toy_compiler *tc,
  123.          struct toy_dst dst, int dst_ch,
  124.          struct toy_src src, int src_ch)
  125. {
  126.    struct toy_inst *inst;
  127.  
  128.    inst = tc_MOV(tc,
  129.          tdst_offset(dst, 0, dst_ch),
  130.          tsrc_offset(src, 0, src_ch));
  131.    inst->exec_size = BRW_EXECUTE_4;
  132.    inst->mask_ctrl = BRW_MASK_DISABLE;
  133. }
  134.  
  135. static void
  136. gs_COPY1(struct toy_compiler *tc,
  137.          struct toy_dst dst, int dst_ch,
  138.          struct toy_src src, int src_ch)
  139. {
  140.    struct toy_inst *inst;
  141.  
  142.    inst = tc_MOV(tc,
  143.          tdst_offset(dst, 0, dst_ch),
  144.          tsrc_rect(tsrc_offset(src, 0, src_ch), TOY_RECT_010));
  145.    inst->exec_size = BRW_EXECUTE_1;
  146.    inst->mask_ctrl = BRW_MASK_DISABLE;
  147. }
  148.  
  149. static void
  150. gs_init_vars(struct gs_compile_context *gcc)
  151. {
  152.    struct toy_compiler *tc = &gcc->tc;
  153.    struct toy_dst dst;
  154.  
  155.    /* init URB_WRITE header */
  156.    dst = gcc->vars.urb_write_header;
  157.  
  158.    gs_COPY8(tc, dst, gcc->payload.header);
  159.  
  160.    gcc->vars.prim_start = true;
  161.    gcc->vars.prim_end = false;
  162.    switch (gcc->out_vue_min_count) {
  163.    case 1:
  164.       gcc->vars.prim_type = _3DPRIM_POINTLIST;
  165.       break;
  166.    case 2:
  167.       gcc->vars.prim_type = _3DPRIM_LINESTRIP;
  168.       break;
  169.    case 3:
  170.       gcc->vars.prim_type = _3DPRIM_TRISTRIP;
  171.       break;
  172.    }
  173.  
  174.    if (gcc->write_so)
  175.       tc_MOV(tc, gcc->vars.so_written, tsrc_imm_d(0));
  176. }
  177.  
  178. static void
  179. gs_save_output(struct gs_compile_context *gcc, const struct toy_src *outs)
  180. {
  181.    struct toy_compiler *tc = &gcc->tc;
  182.    const struct toy_dst buf = gcc->vars.buffers[gcc->vars.buffer_cur];
  183.    int i;
  184.  
  185.    for (i = 0; i < gcc->shader->out.count; i++)
  186.       tc_MOV(tc, tdst_offset(buf, i, 0), outs[i]);
  187.  
  188.    /* advance the cursor */
  189.    gcc->vars.buffer_cur++;
  190.    gcc->vars.buffer_cur %= gcc->vars.buffer_needed;
  191. }
  192.  
  193. static void
  194. gs_write_so(struct gs_compile_context *gcc,
  195.             struct toy_dst dst,
  196.             struct toy_src index, struct toy_src out,
  197.             bool send_write_commit_message,
  198.             int binding_table_index)
  199. {
  200.    struct toy_compiler *tc = &gcc->tc;
  201.    struct toy_dst mrf_header;
  202.    struct toy_src desc;
  203.  
  204.    mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
  205.  
  206.    /* m0.5: destination index */
  207.    gs_COPY1(tc, mrf_header, 5, index, 0);
  208.  
  209.    /* m0.0 - m0.3: RGBA */
  210.    gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0);
  211.  
  212.    desc = tsrc_imm_mdesc_data_port(tc, false,
  213.          1, send_write_commit_message,
  214.          true, send_write_commit_message,
  215.          GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 0,
  216.          binding_table_index);
  217.  
  218.    tc_SEND(tc, dst, tsrc_from(mrf_header), desc,
  219.          GEN6_SFID_DATAPORT_RENDER_CACHE);
  220. }
  221.  
  222. static void
  223. gs_write_vue(struct gs_compile_context *gcc,
  224.              struct toy_dst dst, struct toy_src msg_header,
  225.              const struct toy_src *outs, int num_outs,
  226.              bool eot)
  227. {
  228.    struct toy_compiler *tc = &gcc->tc;
  229.    struct toy_dst mrf_header;
  230.    struct toy_src desc;
  231.    int sent = 0;
  232.  
  233.    mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
  234.    gs_COPY8(tc, mrf_header, msg_header);
  235.  
  236.    while (sent < num_outs) {
  237.       int mrf = gcc->first_free_mrf + 1;
  238.       const int mrf_avail = gcc->last_free_mrf - mrf + 1;
  239.       int msg_len, num_entries, i;
  240.       bool complete;
  241.  
  242.       num_entries = (num_outs - sent + 1) / 2;
  243.       complete = true;
  244.       if (num_entries > mrf_avail) {
  245.          num_entries = mrf_avail;
  246.          complete = false;
  247.       }
  248.  
  249.       for (i = 0; i < num_entries; i++) {
  250.          gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 0,
  251.                outs[sent + 2 * i], 0);
  252.          if (sent + i * 2 + 1 < gcc->shader->out.count) {
  253.             gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 4,
  254.                   outs[sent + 2 * i + 1], 0);
  255.          }
  256.          mrf++;
  257.       }
  258.  
  259.       /* do not forget the header */
  260.       msg_len = num_entries + 1;
  261.  
  262.       if (complete) {
  263.          desc = tsrc_imm_mdesc_urb(tc,
  264.                eot, msg_len, !eot, true, true, !eot,
  265.                BRW_URB_SWIZZLE_NONE, sent, 0);
  266.       }
  267.       else {
  268.          desc = tsrc_imm_mdesc_urb(tc,
  269.                false, msg_len, 0, false, true, false,
  270.                BRW_URB_SWIZZLE_NONE, sent, 0);
  271.       }
  272.  
  273.       tc_add2(tc, TOY_OPCODE_URB_WRITE,
  274.             (complete) ? dst : tdst_null(), tsrc_from(mrf_header), desc);
  275.  
  276.       sent += num_entries * 2;
  277.    }
  278. }
  279.  
  280. static void
  281. gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst,
  282.            struct toy_src num_prims)
  283. {
  284.    struct toy_compiler *tc = &gcc->tc;
  285.    struct toy_dst mrf_header =
  286.       tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
  287.    struct toy_src desc;
  288.    bool allocate;
  289.  
  290.    gs_COPY8(tc, mrf_header, gcc->payload.header);
  291.  
  292.    /* set NumSOVertsToWrite and NumSOPrimsNeeded */
  293.    if (gcc->write_so) {
  294.       if (num_prims.file == TOY_FILE_IMM) {
  295.          const uint32_t v =
  296.             (num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32;
  297.  
  298.          gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0);
  299.       }
  300.       else {
  301.          struct toy_dst m0_0 = tdst_d(gcc->vars.tmp);
  302.  
  303.          tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16));
  304.          tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims);
  305.  
  306.          gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0);
  307.       }
  308.    }
  309.  
  310.    /* set NumGSPrimsGenerated */
  311.    if (gcc->write_vue)
  312.       gs_COPY1(tc, mrf_header, 1, num_prims, 0);
  313.  
  314.    /*
  315.     * From the Sandy Bridge PRM, volume 2 part 1, page 173:
  316.     *
  317.     *     "Programming Note: If the GS stage is enabled, software must always
  318.     *      allocate at least one GS URB Entry. This is true even if the GS
  319.     *      thread never needs to output vertices to the pipeline, e.g., when
  320.     *      only performing stream output. This is an artifact of the need to
  321.     *      pass the GS thread an initial destination URB handle."
  322.     */
  323.    allocate = true;
  324.    desc = tsrc_imm_mdesc_urb(tc, false, 1, 1,
  325.          false, false, allocate,
  326.          BRW_URB_SWIZZLE_NONE, 0, 1);
  327.  
  328.    tc_SEND(tc, dst, tsrc_from(mrf_header), desc, BRW_SFID_URB);
  329. }
  330.  
  331. static void
  332. gs_discard(struct gs_compile_context *gcc)
  333. {
  334.    struct toy_compiler *tc = &gcc->tc;
  335.    struct toy_dst mrf_header;
  336.    struct toy_src desc;
  337.  
  338.    mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
  339.  
  340.    gs_COPY8(tc, mrf_header, tsrc_from(gcc->vars.urb_write_header));
  341.  
  342.    desc = tsrc_imm_mdesc_urb(tc,
  343.          true, 1, 0, true, false, false,
  344.          BRW_URB_SWIZZLE_NONE, 0, 0);
  345.  
  346.    tc_add2(tc, TOY_OPCODE_URB_WRITE,
  347.          tdst_null(), tsrc_from(mrf_header), desc);
  348. }
  349.  
  350. static void
  351. gs_lower_opcode_endprim(struct gs_compile_context *gcc, struct toy_inst *inst)
  352. {
  353.    /* if has control flow, set PrimEnd on the last vertex and URB_WRITE */
  354. }
  355.  
  356. static void
  357. gs_lower_opcode_emit_vue_dynamic(struct gs_compile_context *gcc)
  358. {
  359.    /* TODO similar to the static version */
  360.  
  361.    /*
  362.     * When SO is enabled and the inputs are lines or triangles, vertices are
  363.     * always buffered.  we can defer the emission of the current vertex until
  364.     * the next EMIT or ENDPRIM.  Or, we can emit two URB_WRITEs with the later
  365.     * patching the former.
  366.     */
  367. }
  368.  
  369. static void
  370. gs_lower_opcode_emit_so_dynamic(struct gs_compile_context *gcc)
  371. {
  372.    struct toy_compiler *tc = &gcc->tc;
  373.  
  374.    tc_IF(tc, tdst_null(),
  375.          tsrc_from(gcc->dynamic_data.num_vertices_in_prim),
  376.          tsrc_imm_d(gcc->out_vue_min_count),
  377.          BRW_CONDITIONAL_GE);
  378.  
  379.    {
  380.       tc_ADD(tc, gcc->vars.tmp, tsrc_from(gcc->vars.so_index), tsrc_imm_d(0x03020100));
  381.  
  382.       /* TODO same as static version */
  383.    }
  384.  
  385.    tc_ENDIF(tc);
  386.  
  387.    tc_ADD(tc, gcc->vars.so_index,
  388.          tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
  389. }
  390.  
  391. static void
  392. gs_lower_opcode_emit_vue_static(struct gs_compile_context *gcc)
  393. {
  394.    struct toy_compiler *tc = &gcc->tc;
  395.    struct toy_inst *inst2;
  396.    bool eot;
  397.  
  398.    eot = (gcc->static_data.num_vertices == gcc->static_data.total_vertices);
  399.  
  400.    gcc->vars.prim_end =
  401.       ((gcc->static_data.last_vertex[(gcc->static_data.num_vertices - 1) / 32] &
  402.         1 << ((gcc->static_data.num_vertices - 1) % 32)) != 0);
  403.  
  404.    if (eot && gcc->write_so) {
  405.       inst2 = tc_OR(tc, tdst_offset(gcc->vars.urb_write_header, 0, 2),
  406.             tsrc_from(gcc->vars.so_written),
  407.             tsrc_imm_d(gcc->vars.prim_type << 2 |
  408.                        gcc->vars.prim_start << 1 |
  409.                        gcc->vars.prim_end));
  410.       inst2->exec_size = BRW_EXECUTE_1;
  411.       inst2->src[0] = tsrc_rect(inst2->src[0], TOY_RECT_010);
  412.       inst2->src[1] = tsrc_rect(inst2->src[1], TOY_RECT_010);
  413.    }
  414.    else {
  415.       gs_COPY1(tc, gcc->vars.urb_write_header, 2,
  416.             tsrc_imm_d(gcc->vars.prim_type << 2 |
  417.                        gcc->vars.prim_start << 1 |
  418.                        gcc->vars.prim_end), 0);
  419.    }
  420.  
  421.    gs_write_vue(gcc, tdst_d(gcc->vars.tmp),
  422.          tsrc_from(gcc->vars.urb_write_header),
  423.          gcc->vars.tgsi_outs,
  424.          gcc->shader->out.count, eot);
  425.  
  426.    if (!eot) {
  427.       gs_COPY1(tc, gcc->vars.urb_write_header, 0,
  428.             tsrc_from(tdst_d(gcc->vars.tmp)), 0);
  429.    }
  430.  
  431.    gcc->vars.prim_start = gcc->vars.prim_end;
  432.    gcc->vars.prim_end = false;
  433. }
  434.  
  435. static void
  436. gs_lower_opcode_emit_so_static(struct gs_compile_context *gcc)
  437. {
  438.    struct toy_compiler *tc = &gcc->tc;
  439.    struct toy_inst *inst;
  440.    int i, j;
  441.  
  442.    if (gcc->static_data.num_vertices_in_prim < gcc->out_vue_min_count)
  443.       return;
  444.  
  445.    inst = tc_MOV(tc, tdst_w(gcc->vars.tmp), tsrc_imm_v(0x03020100));
  446.    inst->exec_size = BRW_EXECUTE_8;
  447.    inst->mask_ctrl = BRW_MASK_DISABLE;
  448.  
  449.    tc_ADD(tc, tdst_d(gcc->vars.tmp), tsrc_from(tdst_d(gcc->vars.tmp)),
  450.          tsrc_rect(tsrc_from(gcc->vars.so_index), TOY_RECT_010));
  451.  
  452.    tc_IF(tc, tdst_null(),
  453.          tsrc_rect(tsrc_offset(tsrc_from(tdst_d(gcc->vars.tmp)), 0, gcc->out_vue_min_count - 1), TOY_RECT_010),
  454.          tsrc_rect(tsrc_offset(gcc->payload.svbi, 0, 4), TOY_RECT_010),
  455.          BRW_CONDITIONAL_LE);
  456.    {
  457.       for (i = 0; i < gcc->out_vue_min_count; i++) {
  458.          for (j = 0; j < gcc->so_info->num_outputs; j++) {
  459.             const int idx = gcc->so_info->output[j].register_index;
  460.             struct toy_src index, out;
  461.             int binding_table_index;
  462.             bool write_commit;
  463.  
  464.             index = tsrc_d(tsrc_offset(tsrc_from(gcc->vars.tmp), 0, i));
  465.  
  466.             if (i == gcc->out_vue_min_count - 1) {
  467.                out = gcc->vars.tgsi_outs[idx];
  468.             }
  469.             else {
  470.                /* gcc->vars.buffer_cur also points to the first vertex */
  471.                const int buf =
  472.                   (gcc->vars.buffer_cur + i) % gcc->vars.buffer_needed;
  473.  
  474.                out = tsrc_offset(tsrc_from(gcc->vars.buffers[buf]), idx, 0);
  475.             }
  476.  
  477.             out = tsrc_offset(out, 0, gcc->so_info->output[j].start_component);
  478.  
  479.             /*
  480.              * From the Sandy Bridge PRM, volume 4 part 2, page 19:
  481.              *
  482.              *     "The Kernel must do a write commit on the last write to DAP
  483.              *      prior to a URB_WRITE with End of Thread."
  484.              */
  485.             write_commit =
  486.                (gcc->static_data.num_vertices == gcc->static_data.total_vertices &&
  487.                 i == gcc->out_vue_min_count - 1 &&
  488.                 j == gcc->so_info->num_outputs - 1);
  489.  
  490.  
  491.             binding_table_index = ILO_GS_SO_SURFACE(j);
  492.  
  493.             gs_write_so(gcc, gcc->vars.tmp, index,
  494.                   out, write_commit, binding_table_index);
  495.  
  496.             /*
  497.              * From the Sandy Bridge PRM, volume 4 part 1, page 168:
  498.              *
  499.              *     "The write commit does not modify the destination register, but
  500.              *      merely clears the dependency associated with the destination
  501.              *      register. Thus, a simple "mov" instruction using the register as a
  502.              *      source is sufficient to wait for the write commit to occur."
  503.              */
  504.             if (write_commit)
  505.                tc_MOV(tc, gcc->vars.tmp, tsrc_from(gcc->vars.tmp));
  506.          }
  507.       }
  508.  
  509.       /* SONumPrimsWritten occupies the higher word of m0.2 of URB_WRITE */
  510.       tc_ADD(tc, gcc->vars.so_written,
  511.             tsrc_from(gcc->vars.so_written), tsrc_imm_d(1 << 16));
  512.       tc_ADD(tc, gcc->vars.so_index,
  513.             tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
  514.    }
  515.    tc_ENDIF(tc);
  516. }
  517.  
  518. static void
  519. gs_lower_opcode_emit_static(struct gs_compile_context *gcc,
  520.                             struct toy_inst *inst)
  521. {
  522.    gcc->static_data.num_vertices++;
  523.    gcc->static_data.num_vertices_in_prim++;
  524.  
  525.    if (gcc->write_so) {
  526.       gs_lower_opcode_emit_so_static(gcc);
  527.  
  528.       if (gcc->out_vue_min_count > 1 &&
  529.           gcc->static_data.num_vertices != gcc->static_data.total_vertices)
  530.          gs_save_output(gcc, gcc->vars.tgsi_outs);
  531.    }
  532.  
  533.    if (gcc->write_vue)
  534.       gs_lower_opcode_emit_vue_static(gcc);
  535. }
  536.  
  537. static void
  538. gs_lower_opcode_emit_dynamic(struct gs_compile_context *gcc,
  539.                              struct toy_inst *inst)
  540. {
  541.    struct toy_compiler *tc = &gcc->tc;
  542.  
  543.    tc_ADD(tc, gcc->dynamic_data.num_vertices,
  544.          tsrc_from(gcc->dynamic_data.num_vertices), tsrc_imm_d(1));
  545.    tc_ADD(tc, gcc->dynamic_data.num_vertices_in_prim,
  546.          tsrc_from(gcc->dynamic_data.num_vertices_in_prim), tsrc_imm_d(1));
  547.  
  548.    if (gcc->write_so) {
  549.       gs_lower_opcode_emit_so_dynamic(gcc);
  550.  
  551.       if (gcc->out_vue_min_count > 1)
  552.          gs_save_output(gcc, gcc->vars.tgsi_outs);
  553.    }
  554.  
  555.    if (gcc->write_vue)
  556.       gs_lower_opcode_emit_vue_dynamic(gcc);
  557. }
  558.  
  559. static void
  560. gs_lower_opcode_emit(struct gs_compile_context *gcc, struct toy_inst *inst)
  561. {
  562.    if (gcc->is_static)
  563.       gs_lower_opcode_emit_static(gcc, inst);
  564.    else
  565.       gs_lower_opcode_emit_dynamic(gcc, inst);
  566. }
  567.  
  568. static void
  569. gs_lower_opcode_tgsi_in(struct gs_compile_context *gcc,
  570.                         struct toy_dst dst, int dim, int idx)
  571. {
  572.    struct toy_compiler *tc = &gcc->tc;
  573.    struct toy_src attr;
  574.    int slot, reg = -1, subreg;
  575.  
  576.    slot = toy_tgsi_find_input(&gcc->tgsi, idx);
  577.    if (slot >= 0) {
  578.       int i;
  579.  
  580.       for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
  581.          if (gcc->variant->u.gs.semantic_names[i] ==
  582.                gcc->tgsi.inputs[slot].semantic_name &&
  583.                gcc->variant->u.gs.semantic_indices[i] ==
  584.                gcc->tgsi.inputs[slot].semantic_index) {
  585.             reg = i / 2;
  586.             subreg = (i % 2) * 4;
  587.             break;
  588.          }
  589.       }
  590.    }
  591.  
  592.    if (reg < 0) {
  593.       tc_MOV(tc, dst, tsrc_imm_f(0.0f));
  594.       return;
  595.    }
  596.  
  597.    /* fix vertex ordering for _3DPRIM_TRISTRIP_REVERSE */
  598.    if (gcc->in_vue_count == 3 && dim < 2) {
  599.       struct toy_inst *inst;
  600.  
  601.       /* get PrimType */
  602.       inst = tc_AND(tc, tdst_d(gcc->vars.tmp),
  603.             tsrc_offset(gcc->payload.header, 0, 2), tsrc_imm_d(0x1f));
  604.       inst->exec_size = BRW_EXECUTE_1;
  605.       inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
  606.       inst->src[1] = tsrc_rect(inst->src[1], TOY_RECT_010);
  607.  
  608.       inst = tc_CMP(tc, tdst_null(), tsrc_from(tdst_d(gcc->vars.tmp)),
  609.             tsrc_imm_d(_3DPRIM_TRISTRIP_REVERSE), BRW_CONDITIONAL_NEQ);
  610.       inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
  611.  
  612.       attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
  613.       inst = tc_MOV(tc, dst, attr);
  614.       inst->pred_ctrl = BRW_PREDICATE_NORMAL;
  615.  
  616.       /* swap IN[0] and IN[1] for _3DPRIM_TRISTRIP_REVERSE */
  617.       dim = !dim;
  618.  
  619.       attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
  620.       inst = tc_MOV(tc, dst, attr);
  621.       inst->pred_ctrl = BRW_PREDICATE_NORMAL;
  622.       inst->pred_inv = true;
  623.    }
  624.    else {
  625.       attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
  626.       tc_MOV(tc, dst, attr);
  627.    }
  628.  
  629.  
  630. }
  631.  
  632. static void
  633. gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc,
  634.                          struct toy_dst dst, int idx)
  635. {
  636.    const uint32_t *imm;
  637.    int ch;
  638.  
  639.    imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL);
  640.  
  641.    for (ch = 0; ch < 4; ch++) {
  642.       struct toy_inst *inst;
  643.  
  644.       /* raw moves */
  645.       inst = tc_MOV(&gcc->tc,
  646.             tdst_writemask(tdst_ud(dst), 1 << ch),
  647.             tsrc_imm_ud(imm[ch]));
  648.       inst->access_mode = BRW_ALIGN_16;
  649.    }
  650. }
  651.  
  652. static void
  653. gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc,
  654.                             struct toy_inst *inst)
  655. {
  656.    struct toy_compiler *tc = &gcc->tc;
  657.    int dim, idx;
  658.  
  659.    assert(inst->src[0].file == TOY_FILE_IMM);
  660.    dim = inst->src[0].val32;
  661.  
  662.    assert(inst->src[1].file == TOY_FILE_IMM);
  663.    idx = inst->src[1].val32;
  664.  
  665.    switch (inst->opcode) {
  666.    case TOY_OPCODE_TGSI_IN:
  667.       gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx);
  668.       /* fetch all dimensions */
  669.       if (dim == 0) {
  670.          int i;
  671.  
  672.          for (i = 1; i < gcc->in_vue_count; i++) {
  673.             const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx);
  674.             struct toy_dst dst;
  675.  
  676.             if (vrf < 0)
  677.                continue;
  678.  
  679.             dst = tdst(TOY_FILE_VRF, vrf, 0);
  680.             gs_lower_opcode_tgsi_in(gcc, dst, i, idx);
  681.          }
  682.       }
  683.       break;
  684.    case TOY_OPCODE_TGSI_IMM:
  685.       assert(!dim);
  686.       gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx);
  687.       break;
  688.    case TOY_OPCODE_TGSI_CONST:
  689.    case TOY_OPCODE_TGSI_SV:
  690.    default:
  691.       tc_fail(tc, "unhandled TGSI fetch");
  692.       break;
  693.    }
  694.  
  695.    tc_discard_inst(tc, inst);
  696. }
  697.  
  698. static void
  699. gs_lower_virtual_opcodes(struct gs_compile_context *gcc)
  700. {
  701.    struct toy_compiler *tc = &gcc->tc;
  702.    struct toy_inst *inst;
  703.  
  704.    tc_head(tc);
  705.    while ((inst = tc_next(tc)) != NULL) {
  706.       switch (inst->opcode) {
  707.       case TOY_OPCODE_TGSI_IN:
  708.       case TOY_OPCODE_TGSI_CONST:
  709.       case TOY_OPCODE_TGSI_SV:
  710.       case TOY_OPCODE_TGSI_IMM:
  711.          gs_lower_opcode_tgsi_direct(gcc, inst);
  712.          break;
  713.       case TOY_OPCODE_TGSI_INDIRECT_FETCH:
  714.       case TOY_OPCODE_TGSI_INDIRECT_STORE:
  715.          /* TODO similar to VS */
  716.          tc_fail(tc, "no indirection support");
  717.          tc_discard_inst(tc, inst);
  718.          break;
  719.       case TOY_OPCODE_TGSI_TEX:
  720.       case TOY_OPCODE_TGSI_TXB:
  721.       case TOY_OPCODE_TGSI_TXD:
  722.       case TOY_OPCODE_TGSI_TXL:
  723.       case TOY_OPCODE_TGSI_TXP:
  724.       case TOY_OPCODE_TGSI_TXF:
  725.       case TOY_OPCODE_TGSI_TXQ:
  726.       case TOY_OPCODE_TGSI_TXQ_LZ:
  727.       case TOY_OPCODE_TGSI_TEX2:
  728.       case TOY_OPCODE_TGSI_TXB2:
  729.       case TOY_OPCODE_TGSI_TXL2:
  730.       case TOY_OPCODE_TGSI_SAMPLE:
  731.       case TOY_OPCODE_TGSI_SAMPLE_I:
  732.       case TOY_OPCODE_TGSI_SAMPLE_I_MS:
  733.       case TOY_OPCODE_TGSI_SAMPLE_B:
  734.       case TOY_OPCODE_TGSI_SAMPLE_C:
  735.       case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
  736.       case TOY_OPCODE_TGSI_SAMPLE_D:
  737.       case TOY_OPCODE_TGSI_SAMPLE_L:
  738.       case TOY_OPCODE_TGSI_GATHER4:
  739.       case TOY_OPCODE_TGSI_SVIEWINFO:
  740.       case TOY_OPCODE_TGSI_SAMPLE_POS:
  741.       case TOY_OPCODE_TGSI_SAMPLE_INFO:
  742.          /* TODO similar to VS */
  743.          tc_fail(tc, "no sampling support");
  744.          tc_discard_inst(tc, inst);
  745.          break;
  746.       case TOY_OPCODE_EMIT:
  747.          gs_lower_opcode_emit(gcc, inst);
  748.          tc_discard_inst(tc, inst);
  749.          break;
  750.       case TOY_OPCODE_ENDPRIM:
  751.          gs_lower_opcode_endprim(gcc, inst);
  752.          tc_discard_inst(tc, inst);
  753.          break;
  754.       default:
  755.          break;
  756.       }
  757.    }
  758.  
  759.    tc_head(tc);
  760.    while ((inst = tc_next(tc)) != NULL) {
  761.       switch (inst->opcode) {
  762.       case TOY_OPCODE_INV:
  763.       case TOY_OPCODE_LOG:
  764.       case TOY_OPCODE_EXP:
  765.       case TOY_OPCODE_SQRT:
  766.       case TOY_OPCODE_RSQ:
  767.       case TOY_OPCODE_SIN:
  768.       case TOY_OPCODE_COS:
  769.       case TOY_OPCODE_FDIV:
  770.       case TOY_OPCODE_POW:
  771.       case TOY_OPCODE_INT_DIV_QUOTIENT:
  772.       case TOY_OPCODE_INT_DIV_REMAINDER:
  773.          toy_compiler_lower_math(tc, inst);
  774.          break;
  775.       case TOY_OPCODE_URB_WRITE:
  776.          toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB);
  777.          break;
  778.       default:
  779.          if (inst->opcode > 127)
  780.             tc_fail(tc, "unhandled virtual opcode");
  781.          break;
  782.       }
  783.    }
  784. }
  785.  
  786. /**
  787.  * Get the number of (tessellated) primitives generated by this shader.
  788.  * Return false if that is unknown until runtime.
  789.  */
  790. static void
  791. get_num_prims_static(struct gs_compile_context *gcc)
  792. {
  793.    struct toy_compiler *tc = &gcc->tc;
  794.    const struct toy_inst *inst;
  795.    int num_vertices_in_prim = 0, if_depth = 0, do_depth = 0;
  796.    bool is_static = true;
  797.  
  798.    tc_head(tc);
  799.    while ((inst = tc_next_no_skip(tc)) != NULL) {
  800.       switch (inst->opcode) {
  801.       case BRW_OPCODE_IF:
  802.          if_depth++;
  803.          break;
  804.       case BRW_OPCODE_ENDIF:
  805.          if_depth--;
  806.          break;
  807.       case BRW_OPCODE_DO:
  808.          do_depth++;
  809.          break;
  810.       case BRW_OPCODE_WHILE:
  811.          do_depth--;
  812.          break;
  813.       case TOY_OPCODE_EMIT:
  814.          if (if_depth || do_depth) {
  815.             is_static = false;
  816.          }
  817.          else {
  818.             gcc->static_data.total_vertices++;
  819.  
  820.             num_vertices_in_prim++;
  821.             if (num_vertices_in_prim >= gcc->out_vue_min_count)
  822.                gcc->static_data.total_prims++;
  823.          }
  824.          break;
  825.       case TOY_OPCODE_ENDPRIM:
  826.          if (if_depth || do_depth) {
  827.             is_static = false;
  828.          }
  829.          else {
  830.             const int vertidx = gcc->static_data.total_vertices - 1;
  831.             const int idx = vertidx / 32;
  832.             const int subidx = vertidx % 32;
  833.  
  834.             gcc->static_data.last_vertex[idx] |= 1 << subidx;
  835.             num_vertices_in_prim = 0;
  836.          }
  837.          break;
  838.       default:
  839.          break;
  840.       }
  841.  
  842.       if (!is_static)
  843.          break;
  844.    }
  845.  
  846.    gcc->is_static = is_static;
  847. }
  848.  
  849. /**
  850.  * Compile the shader.
  851.  */
  852. static bool
  853. gs_compile(struct gs_compile_context *gcc)
  854. {
  855.    struct toy_compiler *tc = &gcc->tc;
  856.    struct ilo_shader *sh = gcc->shader;
  857.  
  858.    get_num_prims_static(gcc);
  859.  
  860.    if (gcc->is_static) {
  861.       tc_head(tc);
  862.  
  863.       gs_init_vars(gcc);
  864.       gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
  865.       gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
  866.       if (gcc->write_so)
  867.          gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
  868.  
  869.       tc_tail(tc);
  870.    }
  871.    else {
  872.       tc_fail(tc, "no control flow support");
  873.       return false;
  874.    }
  875.  
  876.    if (!gcc->write_vue)
  877.       gs_discard(gcc);
  878.  
  879.    gs_lower_virtual_opcodes(gcc);
  880.    toy_compiler_legalize_for_ra(tc);
  881.    toy_compiler_optimize(tc);
  882.    toy_compiler_allocate_registers(tc,
  883.          gcc->first_free_grf,
  884.          gcc->last_free_grf,
  885.          1);
  886.    toy_compiler_legalize_for_asm(tc);
  887.  
  888.    if (tc->fail) {
  889.       ilo_err("failed to legalize GS instructions: %s\n", tc->reason);
  890.       return false;
  891.    }
  892.  
  893.    if (ilo_debug & ILO_DEBUG_GS) {
  894.       ilo_printf("legalized instructions:\n");
  895.       toy_compiler_dump(tc);
  896.       ilo_printf("\n");
  897.    }
  898.  
  899.    sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
  900.    if (!sh->kernel)
  901.       return false;
  902.  
  903.    if (ilo_debug & ILO_DEBUG_GS) {
  904.       ilo_printf("disassembly:\n");
  905.       toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
  906.       ilo_printf("\n");
  907.    }
  908.  
  909.    return true;
  910. }
  911.  
  912. static bool
  913. gs_compile_passthrough(struct gs_compile_context *gcc)
  914. {
  915.    struct toy_compiler *tc = &gcc->tc;
  916.    struct ilo_shader *sh = gcc->shader;
  917.  
  918.    gcc->is_static = true;
  919.    gcc->static_data.total_vertices = gcc->in_vue_count;
  920.    gcc->static_data.total_prims = 1;
  921.    gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1);
  922.  
  923.    gs_init_vars(gcc);
  924.    gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
  925.    gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
  926.    if (gcc->write_so)
  927.       gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
  928.  
  929.    {
  930.       int vert, attr;
  931.  
  932.       for (vert = 0; vert < gcc->out_vue_min_count; vert++) {
  933.          for (attr = 0; attr < gcc->shader->out.count; attr++) {
  934.             tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]),
  935.                   tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4));
  936.          }
  937.  
  938.          gs_lower_opcode_emit(gcc, NULL);
  939.       }
  940.  
  941.       gs_lower_opcode_endprim(gcc, NULL);
  942.    }
  943.  
  944.    if (!gcc->write_vue)
  945.       gs_discard(gcc);
  946.  
  947.    gs_lower_virtual_opcodes(gcc);
  948.  
  949.    toy_compiler_legalize_for_ra(tc);
  950.    toy_compiler_optimize(tc);
  951.    toy_compiler_allocate_registers(tc,
  952.          gcc->first_free_grf,
  953.          gcc->last_free_grf,
  954.          1);
  955.  
  956.    toy_compiler_legalize_for_asm(tc);
  957.  
  958.    if (tc->fail) {
  959.       ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason);
  960.       return false;
  961.    }
  962.  
  963.    if (ilo_debug & ILO_DEBUG_GS) {
  964.       int i;
  965.  
  966.       ilo_printf("VUE count %d, VUE size %d\n",
  967.             gcc->in_vue_count, gcc->in_vue_size);
  968.       ilo_printf("%srasterizer discard\n",
  969.             (gcc->variant->u.gs.rasterizer_discard) ? "" : "no ");
  970.  
  971.       for (i = 0; i < gcc->so_info->num_outputs; i++) {
  972.          ilo_printf("SO[%d] = OUT[%d]\n", i,
  973.                gcc->so_info->output[i].register_index);
  974.       }
  975.  
  976.       ilo_printf("legalized instructions:\n");
  977.       toy_compiler_dump(tc);
  978.       ilo_printf("\n");
  979.    }
  980.  
  981.    sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
  982.    if (!sh->kernel) {
  983.       ilo_err("failed to compile GS: %s\n", tc->reason);
  984.       return false;
  985.    }
  986.  
  987.    if (ilo_debug & ILO_DEBUG_GS) {
  988.       ilo_printf("disassembly:\n");
  989.       toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
  990.       ilo_printf("\n");
  991.    }
  992.  
  993.    return true;
  994. }
  995.  
  996. /**
  997.  * Translate the TGSI tokens.
  998.  */
  999. static bool
  1000. gs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
  1001.               struct toy_tgsi *tgsi)
  1002. {
  1003.    if (ilo_debug & ILO_DEBUG_GS) {
  1004.       ilo_printf("dumping geometry shader\n");
  1005.       ilo_printf("\n");
  1006.  
  1007.       tgsi_dump(tokens, 0);
  1008.       ilo_printf("\n");
  1009.    }
  1010.  
  1011.    toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
  1012.    if (tc->fail)
  1013.       return false;
  1014.  
  1015.    if (ilo_debug & ILO_DEBUG_GS) {
  1016.       ilo_printf("TGSI translator:\n");
  1017.       toy_tgsi_dump(tgsi);
  1018.       ilo_printf("\n");
  1019.       toy_compiler_dump(tc);
  1020.       ilo_printf("\n");
  1021.    }
  1022.  
  1023.    return true;
  1024. }
  1025.  
  1026. /**
  1027.  * Set up shader inputs for fixed-function units.
  1028.  */
  1029. static void
  1030. gs_setup_shader_in(struct ilo_shader *sh,
  1031.                    const struct ilo_shader_variant *variant)
  1032. {
  1033.    int i;
  1034.  
  1035.    for (i = 0; i < variant->u.gs.num_inputs; i++) {
  1036.       sh->in.semantic_names[i] = variant->u.gs.semantic_names[i];
  1037.       sh->in.semantic_indices[i] = variant->u.gs.semantic_indices[i];
  1038.       sh->in.interp[i] = TGSI_INTERPOLATE_CONSTANT;
  1039.       sh->in.centroid[i] = false;
  1040.    }
  1041.  
  1042.    sh->in.count = variant->u.gs.num_inputs;
  1043.  
  1044.    sh->in.has_pos = false;
  1045.    sh->in.has_linear_interp = false;
  1046.    sh->in.barycentric_interpolation_mode = 0;
  1047. }
  1048.  
  1049. /**
  1050.  * Set up shader outputs for fixed-function units.
  1051.  *
  1052.  * XXX share the code with VS
  1053.  */
  1054. static void
  1055. gs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
  1056.                     bool output_clipdist, int *output_map)
  1057. {
  1058.    int psize_slot = -1, pos_slot = -1;
  1059.    int clipdist_slot[2] = { -1, -1 };
  1060.    int color_slot[4] = { -1, -1, -1, -1 };
  1061.    int num_outs, i;
  1062.  
  1063.    /* find out the slots of outputs that need special care */
  1064.    for (i = 0; i < tgsi->num_outputs; i++) {
  1065.       switch (tgsi->outputs[i].semantic_name) {
  1066.       case TGSI_SEMANTIC_PSIZE:
  1067.          psize_slot = i;
  1068.          break;
  1069.       case TGSI_SEMANTIC_POSITION:
  1070.          pos_slot = i;
  1071.          break;
  1072.       case TGSI_SEMANTIC_CLIPDIST:
  1073.          if (tgsi->outputs[i].semantic_index)
  1074.             clipdist_slot[1] = i;
  1075.          else
  1076.             clipdist_slot[0] = i;
  1077.          break;
  1078.       case TGSI_SEMANTIC_COLOR:
  1079.          if (tgsi->outputs[i].semantic_index)
  1080.             color_slot[2] = i;
  1081.          else
  1082.             color_slot[0] = i;
  1083.          break;
  1084.       case TGSI_SEMANTIC_BCOLOR:
  1085.          if (tgsi->outputs[i].semantic_index)
  1086.             color_slot[3] = i;
  1087.          else
  1088.             color_slot[1] = i;
  1089.          break;
  1090.       default:
  1091.          break;
  1092.       }
  1093.    }
  1094.  
  1095.    /* the first two VUEs are always PSIZE and POSITION */
  1096.    num_outs = 2;
  1097.    output_map[0] = psize_slot;
  1098.    output_map[1] = pos_slot;
  1099.  
  1100.    sh->out.register_indices[0] =
  1101.       (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
  1102.    sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
  1103.    sh->out.semantic_indices[0] = 0;
  1104.  
  1105.    sh->out.register_indices[1] =
  1106.       (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
  1107.    sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
  1108.    sh->out.semantic_indices[1] = 0;
  1109.  
  1110.    sh->out.has_pos = true;
  1111.  
  1112.    /* followed by optional clip distances */
  1113.    if (output_clipdist) {
  1114.       sh->out.register_indices[num_outs] =
  1115.          (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
  1116.       sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
  1117.       sh->out.semantic_indices[num_outs] = 0;
  1118.       output_map[num_outs++] = clipdist_slot[0];
  1119.  
  1120.       sh->out.register_indices[num_outs] =
  1121.          (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
  1122.       sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
  1123.       sh->out.semantic_indices[num_outs] = 1;
  1124.       output_map[num_outs++] = clipdist_slot[1];
  1125.    }
  1126.  
  1127.    /*
  1128.     * make BCOLOR follow COLOR so that we can make use of
  1129.     * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
  1130.     */
  1131.    for (i = 0; i < 4; i++) {
  1132.       const int slot = color_slot[i];
  1133.  
  1134.       if (slot < 0)
  1135.          continue;
  1136.  
  1137.       sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
  1138.       sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
  1139.       sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
  1140.  
  1141.       output_map[num_outs++] = slot;
  1142.    }
  1143.  
  1144.    /* add the rest of the outputs */
  1145.    for (i = 0; i < tgsi->num_outputs; i++) {
  1146.       switch (tgsi->outputs[i].semantic_name) {
  1147.       case TGSI_SEMANTIC_PSIZE:
  1148.       case TGSI_SEMANTIC_POSITION:
  1149.       case TGSI_SEMANTIC_CLIPDIST:
  1150.       case TGSI_SEMANTIC_COLOR:
  1151.       case TGSI_SEMANTIC_BCOLOR:
  1152.          break;
  1153.       default:
  1154.          sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
  1155.          sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
  1156.          sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
  1157.          output_map[num_outs++] = i;
  1158.          break;
  1159.       }
  1160.    }
  1161.  
  1162.    sh->out.count = num_outs;
  1163. }
  1164.  
  1165. static void
  1166. gs_setup_vars(struct gs_compile_context *gcc)
  1167. {
  1168.    int grf = gcc->first_free_grf;
  1169.    int i;
  1170.  
  1171.    gcc->vars.urb_write_header = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
  1172.    grf++;
  1173.  
  1174.    gcc->vars.tmp = tdst(TOY_FILE_GRF, grf, 0);
  1175.    grf++;
  1176.  
  1177.    if (gcc->write_so) {
  1178.       gcc->vars.buffer_needed = gcc->out_vue_min_count - 1;
  1179.       for (i = 0; i < gcc->vars.buffer_needed; i++) {
  1180.          gcc->vars.buffers[i] = tdst(TOY_FILE_GRF, grf, 0);
  1181.          grf += gcc->shader->out.count;
  1182.       }
  1183.  
  1184.       gcc->vars.so_written = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
  1185.       grf++;
  1186.  
  1187.       gcc->vars.so_index = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
  1188.       grf++;
  1189.    }
  1190.  
  1191.    gcc->first_free_grf = grf;
  1192.  
  1193.    if (!gcc->tgsi.reg_mapping) {
  1194.       for (i = 0; i < gcc->shader->out.count; i++)
  1195.          gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_GRF, grf++, 0);
  1196.  
  1197.       gcc->first_free_grf = grf;
  1198.       return;
  1199.    }
  1200.  
  1201.    for (i = 0; i < gcc->shader->out.count; i++) {
  1202.       const int slot = gcc->output_map[i];
  1203.       const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(&gcc->tgsi,
  1204.             TGSI_FILE_OUTPUT, 0, gcc->tgsi.outputs[slot].index) : -1;
  1205.  
  1206.       if (vrf >= 0)
  1207.          gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_VRF, vrf, 0);
  1208.       else
  1209.          gcc->vars.tgsi_outs[i] = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
  1210.    }
  1211. }
  1212.  
  1213. static void
  1214. gs_setup_payload(struct gs_compile_context *gcc)
  1215. {
  1216.    int grf, i;
  1217.  
  1218.    grf = 0;
  1219.  
  1220.    /* r0: payload header */
  1221.    gcc->payload.header = tsrc_d(tsrc(TOY_FILE_GRF, grf, 0));
  1222.    grf++;
  1223.  
  1224.    /* r1: SVBI */
  1225.    if (gcc->write_so) {
  1226.       gcc->payload.svbi = tsrc_ud(tsrc(TOY_FILE_GRF, grf, 0));
  1227.       grf++;
  1228.    }
  1229.  
  1230.    /* URB data */
  1231.    gcc->shader->in.start_grf = grf;
  1232.  
  1233.    /* no pull constants */
  1234.  
  1235.    /* VUEs */
  1236.    for (i = 0; i < gcc->in_vue_count; i++) {
  1237.       gcc->payload.vues[i] = tsrc(TOY_FILE_GRF, grf, 0);
  1238.       grf += gcc->in_vue_size;
  1239.    }
  1240.  
  1241.    gcc->first_free_grf = grf;
  1242.    gcc->last_free_grf = 127;
  1243. }
  1244.  
  1245. /**
  1246.  * Set up GS compile context.  This includes translating the TGSI tokens.
  1247.  */
  1248. static bool
  1249. gs_setup(struct gs_compile_context *gcc,
  1250.          const struct ilo_shader_state *state,
  1251.          const struct ilo_shader_variant *variant,
  1252.          int num_verts)
  1253. {
  1254.    memset(gcc, 0, sizeof(*gcc));
  1255.  
  1256.    gcc->shader = CALLOC_STRUCT(ilo_shader);
  1257.    if (!gcc->shader)
  1258.       return false;
  1259.  
  1260.    gcc->variant = variant;
  1261.    gcc->so_info = &state->info.stream_output;
  1262.  
  1263.    toy_compiler_init(&gcc->tc, state->info.dev);
  1264.  
  1265.    gcc->write_so = (state->info.stream_output.num_outputs > 0);
  1266.    gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard;
  1267.  
  1268.    gcc->tc.templ.access_mode = BRW_ALIGN_16;
  1269.    gcc->tc.templ.exec_size = BRW_EXECUTE_4;
  1270.    gcc->tc.rect_linear_width = 4;
  1271.  
  1272.    if (state->info.tokens) {
  1273.       if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) {
  1274.          toy_compiler_cleanup(&gcc->tc);
  1275.          FREE(gcc->shader);
  1276.          return false;
  1277.       }
  1278.  
  1279.       switch (gcc->tgsi.props.gs_input_prim) {
  1280.       case PIPE_PRIM_POINTS:
  1281.          gcc->in_vue_count = 1;
  1282.          break;
  1283.       case PIPE_PRIM_LINES:
  1284.          gcc->in_vue_count = 2;
  1285.          gcc->shader->in.discard_adj = true;
  1286.          break;
  1287.       case PIPE_PRIM_TRIANGLES:
  1288.          gcc->in_vue_count = 3;
  1289.          gcc->shader->in.discard_adj = true;
  1290.          break;
  1291.       case PIPE_PRIM_LINES_ADJACENCY:
  1292.          gcc->in_vue_count = 4;
  1293.          break;
  1294.       case PIPE_PRIM_TRIANGLES_ADJACENCY:
  1295.          gcc->in_vue_count = 6;
  1296.          break;
  1297.       default:
  1298.          tc_fail(&gcc->tc, "unsupported GS input type");
  1299.          gcc->in_vue_count = 0;
  1300.          break;
  1301.       }
  1302.  
  1303.       switch (gcc->tgsi.props.gs_output_prim) {
  1304.       case PIPE_PRIM_POINTS:
  1305.          gcc->out_vue_min_count = 1;
  1306.          break;
  1307.       case PIPE_PRIM_LINE_STRIP:
  1308.          gcc->out_vue_min_count = 2;
  1309.          break;
  1310.       case PIPE_PRIM_TRIANGLE_STRIP:
  1311.          gcc->out_vue_min_count = 3;
  1312.          break;
  1313.       default:
  1314.          tc_fail(&gcc->tc, "unsupported GS output type");
  1315.          gcc->out_vue_min_count = 0;
  1316.          break;
  1317.       }
  1318.    }
  1319.    else {
  1320.       int i;
  1321.  
  1322.       gcc->in_vue_count = num_verts;
  1323.       gcc->out_vue_min_count = num_verts;
  1324.  
  1325.       gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs;
  1326.       for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
  1327.          gcc->tgsi.outputs[i].semantic_name =
  1328.             gcc->variant->u.gs.semantic_names[i];
  1329.          gcc->tgsi.outputs[i].semantic_index =
  1330.             gcc->variant->u.gs.semantic_indices[i];
  1331.       }
  1332.    }
  1333.  
  1334.    gcc->tc.templ.access_mode = BRW_ALIGN_1;
  1335.  
  1336.    gs_setup_shader_in(gcc->shader, gcc->variant);
  1337.    gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map);
  1338.  
  1339.    gcc->in_vue_size = (gcc->shader->in.count + 1) / 2;
  1340.  
  1341.    gcc->out_vue_size = (gcc->shader->out.count + 1) / 2;
  1342.  
  1343.    gs_setup_payload(gcc);
  1344.    gs_setup_vars(gcc);
  1345.  
  1346.    /* m0 is reserved for system routines */
  1347.    gcc->first_free_mrf = 1;
  1348.    gcc->last_free_mrf = 15;
  1349.  
  1350.    return true;
  1351. }
  1352.  
  1353. /**
  1354.  * Compile the geometry shader.
  1355.  */
  1356. struct ilo_shader *
  1357. ilo_shader_compile_gs(const struct ilo_shader_state *state,
  1358.                       const struct ilo_shader_variant *variant)
  1359. {
  1360.    struct gs_compile_context gcc;
  1361.  
  1362.    if (!gs_setup(&gcc, state, variant, 0))
  1363.       return NULL;
  1364.  
  1365.    if (!gs_compile(&gcc)) {
  1366.       FREE(gcc.shader);
  1367.       gcc.shader = NULL;
  1368.    }
  1369.  
  1370.    toy_tgsi_cleanup(&gcc.tgsi);
  1371.    toy_compiler_cleanup(&gcc.tc);
  1372.  
  1373.    return gcc.shader;;
  1374. }
  1375.  
  1376. static bool
  1377. append_gs_to_vs(struct ilo_shader *vs, struct ilo_shader *gs, int num_verts)
  1378. {
  1379.    void *combined;
  1380.    int gs_offset;
  1381.  
  1382.    if (!gs)
  1383.       return false;
  1384.  
  1385.    /* kernels must be aligned to 64-byte */
  1386.    gs_offset = align(vs->kernel_size, 64);
  1387.    combined = REALLOC(vs->kernel, vs->kernel_size,
  1388.          gs_offset + gs->kernel_size);
  1389.    if (!combined)
  1390.       return false;
  1391.  
  1392.    memcpy(combined + gs_offset, gs->kernel, gs->kernel_size);
  1393.  
  1394.    vs->kernel = combined;
  1395.    vs->kernel_size = gs_offset + gs->kernel_size;
  1396.  
  1397.    vs->stream_output = true;
  1398.    vs->gs_offsets[num_verts - 1] = gs_offset;
  1399.    vs->gs_start_grf = gs->in.start_grf;
  1400.  
  1401.    ilo_shader_destroy_kernel(gs);
  1402.  
  1403.    return true;
  1404. }
  1405.  
  1406. bool
  1407. ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state,
  1408.                                   const struct ilo_shader_variant *vs_variant,
  1409.                                   const int *so_mapping,
  1410.                                   struct ilo_shader *vs)
  1411. {
  1412.    struct gs_compile_context gcc;
  1413.    struct ilo_shader_state state;
  1414.    struct ilo_shader_variant variant;
  1415.    const int num_verts = 3;
  1416.    int i;
  1417.  
  1418.    /* init GS state and variant */
  1419.    state = *vs_state;
  1420.    state.info.tokens = NULL;
  1421.    for (i = 0; i < state.info.stream_output.num_outputs; i++) {
  1422.       const int reg = state.info.stream_output.output[i].register_index;
  1423.  
  1424.       state.info.stream_output.output[i].register_index = so_mapping[reg];
  1425.    }
  1426.  
  1427.    variant = *vs_variant;
  1428.    variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard;
  1429.    variant.u.gs.num_inputs = vs->out.count;
  1430.    for (i = 0; i < vs->out.count; i++) {
  1431.       variant.u.gs.semantic_names[i] =
  1432.          vs->out.semantic_names[i];
  1433.       variant.u.gs.semantic_indices[i] =
  1434.          vs->out.semantic_indices[i];
  1435.    }
  1436.  
  1437.    if (!gs_setup(&gcc, &state, &variant, num_verts))
  1438.       return false;
  1439.  
  1440.    if (!gs_compile_passthrough(&gcc)) {
  1441.       FREE(gcc.shader);
  1442.       gcc.shader = NULL;
  1443.    }
  1444.  
  1445.    /* no need to call toy_tgsi_cleanup() */
  1446.    toy_compiler_cleanup(&gcc.tc);
  1447.  
  1448.    return append_gs_to_vs(vs, gcc.shader, num_verts);
  1449. }
  1450.