WebSVN – Kolibri OS – Path Comparison – / – /contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_gs.c Rev 4357 and /contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader

Regard whitespace Rev 4357 → Rev 4358

 /contrib/sdk/sources/Mesa/src/gallium/drivers/ilo/shader/ilo_shader_gs.c
 ,0 → 1,1449
+/*
+ * Mesa 3-D graphics library
+ *
+ * Copyright (C) 2012-2013 LunarG, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *    Chia-I Wu <olv@lunarg.com>
+ */
+#include "tgsi/tgsi_dump.h"
+#include "toy_compiler.h"
+#include "toy_tgsi.h"
+#include "toy_legalize.h"
+#include "toy_optimize.h"
+#include "toy_helpers.h"
+#include "ilo_shader_internal.h"
+/* XXX Below is proof-of-concept code.  Skip this file! */
+/*
+ * TODO
+ * - primitive id is in r0.1.  FS receives PID as a flat attribute.
+ * - set VUE header m0.1 for layered rendering
+ */
+struct gs_compile_context {
+   struct ilo_shader *shader;
+   const struct ilo_shader_variant *variant;
+   const struct pipe_stream_output_info *so_info;
+   struct toy_compiler tc;
+   struct toy_tgsi tgsi;
+   int output_map[PIPE_MAX_SHADER_OUTPUTS];
+   bool write_so;
+   bool write_vue;
+   int in_vue_size;
+   int in_vue_count;
+   int out_vue_size;
+   int out_vue_min_count;
+   bool is_static;
+   struct {
+      struct toy_src header;
+      struct toy_src svbi;
+      struct toy_src vues[6];
+   } payload;
+   struct {
+      struct toy_dst urb_write_header;
+      bool prim_start;
+      bool prim_end;
+      int prim_type;
+      struct toy_dst tmp;
+      /* buffered tgsi_outs */
+      struct toy_dst buffers[3];
+      int buffer_needed, buffer_cur;
+      struct toy_dst so_written;
+      struct toy_dst so_index;
+      struct toy_src tgsi_outs[PIPE_MAX_SHADER_OUTPUTS];
+   } vars;
+   struct {
+      struct toy_dst total_vertices;
+      struct toy_dst total_prims;
+      struct toy_dst num_vertices;
+      struct toy_dst num_vertices_in_prim;
+   } dynamic_data;
+   struct {
+      int total_vertices;
+      int total_prims;
+      /* this limits the max vertice count to be 256 */
+      uint32_t last_vertex[8];
+      int num_vertices;
+      int num_vertices_in_prim;
+   } static_data;
+   int first_free_grf;
+   int last_free_grf;
+   int first_free_mrf;
+   int last_free_mrf;
+};
+static void
+gs_COPY8(struct toy_compiler *tc, struct toy_dst dst, struct toy_src src)
+{
+   struct toy_inst *inst;
+   inst = tc_MOV(tc, dst, src);
+   inst->exec_size = BRW_EXECUTE_8;
+   inst->mask_ctrl = BRW_MASK_DISABLE;
+}
+static void
+gs_COPY4(struct toy_compiler *tc,
+         struct toy_dst dst, int dst_ch,
+         struct toy_src src, int src_ch)
+{
+   struct toy_inst *inst;
+   inst = tc_MOV(tc,
+         tdst_offset(dst, 0, dst_ch),
+         tsrc_offset(src, 0, src_ch));
+   inst->exec_size = BRW_EXECUTE_4;
+   inst->mask_ctrl = BRW_MASK_DISABLE;
+}
+static void
+gs_COPY1(struct toy_compiler *tc,
+         struct toy_dst dst, int dst_ch,
+         struct toy_src src, int src_ch)
+{
+   struct toy_inst *inst;
+   inst = tc_MOV(tc,
+         tdst_offset(dst, 0, dst_ch),
+         tsrc_rect(tsrc_offset(src, 0, src_ch), TOY_RECT_010));
+   inst->exec_size = BRW_EXECUTE_1;
+   inst->mask_ctrl = BRW_MASK_DISABLE;
+}
+static void
+gs_init_vars(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_dst dst;
+   /* init URB_WRITE header */
+   dst = gcc->vars.urb_write_header;
+   gs_COPY8(tc, dst, gcc->payload.header);
+   gcc->vars.prim_start = true;
+   gcc->vars.prim_end = false;
+   switch (gcc->out_vue_min_count) {
+   case 1:
+      gcc->vars.prim_type = _3DPRIM_POINTLIST;
+      break;
+   case 2:
+      gcc->vars.prim_type = _3DPRIM_LINESTRIP;
+      break;
+   case 3:
+      gcc->vars.prim_type = _3DPRIM_TRISTRIP;
+      break;
+   }
+   if (gcc->write_so)
+      tc_MOV(tc, gcc->vars.so_written, tsrc_imm_d(0));
+}
+static void
+gs_save_output(struct gs_compile_context *gcc, const struct toy_src *outs)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   const struct toy_dst buf = gcc->vars.buffers[gcc->vars.buffer_cur];
+   int i;
+   for (i = 0; i < gcc->shader->out.count; i++)
+      tc_MOV(tc, tdst_offset(buf, i, 0), outs[i]);
+   /* advance the cursor */
+   gcc->vars.buffer_cur++;
+   gcc->vars.buffer_cur %= gcc->vars.buffer_needed;
+}
+static void
+gs_write_so(struct gs_compile_context *gcc,
+            struct toy_dst dst,
+            struct toy_src index, struct toy_src out,
+            bool send_write_commit_message,
+            int binding_table_index)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_dst mrf_header;
+   struct toy_src desc;
+   mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
+   /* m0.5: destination index */
+   gs_COPY1(tc, mrf_header, 5, index, 0);
+   /* m0.0 - m0.3: RGBA */
+   gs_COPY4(tc, mrf_header, 0, tsrc_type(out, mrf_header.type), 0);
+   desc = tsrc_imm_mdesc_data_port(tc, false,
+, send_write_commit_message,
+         true, send_write_commit_message,
+         GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VB_WRITE, 0,
+         binding_table_index);
+   tc_SEND(tc, dst, tsrc_from(mrf_header), desc,
+         GEN6_SFID_DATAPORT_RENDER_CACHE);
+}
+static void
+gs_write_vue(struct gs_compile_context *gcc,
+             struct toy_dst dst, struct toy_src msg_header,
+             const struct toy_src *outs, int num_outs,
+             bool eot)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_dst mrf_header;
+   struct toy_src desc;
+   int sent = 0;
+   mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
+   gs_COPY8(tc, mrf_header, msg_header);
+   while (sent < num_outs) {
+      int mrf = gcc->first_free_mrf + 1;
+      const int mrf_avail = gcc->last_free_mrf - mrf + 1;
+      int msg_len, num_entries, i;
+      bool complete;
+      num_entries = (num_outs - sent + 1) / 2;
+      complete = true;
+      if (num_entries > mrf_avail) {
+         num_entries = mrf_avail;
+         complete = false;
+      }
+      for (i = 0; i < num_entries; i++) {
+         gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 0,
+               outs[sent + 2 * i], 0);
+         if (sent + i * 2 + 1 < gcc->shader->out.count) {
+            gs_COPY4(tc, tdst(TOY_FILE_MRF, mrf + i / 2, 0), 4,
+                  outs[sent + 2 * i + 1], 0);
+         }
+         mrf++;
+      }
+      /* do not forget the header */
+      msg_len = num_entries + 1;
+      if (complete) {
+         desc = tsrc_imm_mdesc_urb(tc,
+               eot, msg_len, !eot, true, true, !eot,
+               BRW_URB_SWIZZLE_NONE, sent, 0);
+      }
+      else {
+         desc = tsrc_imm_mdesc_urb(tc,
+               false, msg_len, 0, false, true, false,
+               BRW_URB_SWIZZLE_NONE, sent, 0);
+      }
+      tc_add2(tc, TOY_OPCODE_URB_WRITE,
+            (complete) ? dst : tdst_null(), tsrc_from(mrf_header), desc);
+      sent += num_entries * 2;
+   }
+}
+static void
+gs_ff_sync(struct gs_compile_context *gcc, struct toy_dst dst,
+           struct toy_src num_prims)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_dst mrf_header =
+      tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
+   struct toy_src desc;
+   bool allocate;
+   gs_COPY8(tc, mrf_header, gcc->payload.header);
+   /* set NumSOVertsToWrite and NumSOPrimsNeeded */
+   if (gcc->write_so) {
+      if (num_prims.file == TOY_FILE_IMM) {
+         const uint32_t v =
+            (num_prims.val32 * gcc->in_vue_count) << 16 | num_prims.val32;
+         gs_COPY1(tc, mrf_header, 0, tsrc_imm_d(v), 0);
+      }
+      else {
+         struct toy_dst m0_0 = tdst_d(gcc->vars.tmp);
+         tc_MUL(tc, m0_0, num_prims, tsrc_imm_d(gcc->in_vue_count << 16));
+         tc_OR(tc, m0_0, tsrc_from(m0_0), num_prims);
+         gs_COPY1(tc, mrf_header, 0, tsrc_from(m0_0), 0);
+      }
+   }
+   /* set NumGSPrimsGenerated */
+   if (gcc->write_vue)
+      gs_COPY1(tc, mrf_header, 1, num_prims, 0);
+   /*
+    * From the Sandy Bridge PRM, volume 2 part 1, page 173:
+    *
+    *     "Programming Note: If the GS stage is enabled, software must always
+    *      allocate at least one GS URB Entry. This is true even if the GS
+    *      thread never needs to output vertices to the pipeline, e.g., when
+    *      only performing stream output. This is an artifact of the need to
+    *      pass the GS thread an initial destination URB handle."
+    */
+   allocate = true;
+   desc = tsrc_imm_mdesc_urb(tc, false, 1, 1,
+         false, false, allocate,
+         BRW_URB_SWIZZLE_NONE, 0, 1);
+   tc_SEND(tc, dst, tsrc_from(mrf_header), desc, BRW_SFID_URB);
+}
+static void
+gs_discard(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_dst mrf_header;
+   struct toy_src desc;
+   mrf_header = tdst_d(tdst(TOY_FILE_MRF, gcc->first_free_mrf, 0));
+   gs_COPY8(tc, mrf_header, tsrc_from(gcc->vars.urb_write_header));
+   desc = tsrc_imm_mdesc_urb(tc,
+         true, 1, 0, true, false, false,
+         BRW_URB_SWIZZLE_NONE, 0, 0);
+   tc_add2(tc, TOY_OPCODE_URB_WRITE,
+         tdst_null(), tsrc_from(mrf_header), desc);
+}
+static void
+gs_lower_opcode_endprim(struct gs_compile_context *gcc, struct toy_inst *inst)
+{
+   /* if has control flow, set PrimEnd on the last vertex and URB_WRITE */
+}
+static void
+gs_lower_opcode_emit_vue_dynamic(struct gs_compile_context *gcc)
+{
+   /* TODO similar to the static version */
+   /*
+    * When SO is enabled and the inputs are lines or triangles, vertices are
+    * always buffered.  we can defer the emission of the current vertex until
+    * the next EMIT or ENDPRIM.  Or, we can emit two URB_WRITEs with the later
+    * patching the former.
+    */
+}
+static void
+gs_lower_opcode_emit_so_dynamic(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   tc_IF(tc, tdst_null(),
+         tsrc_from(gcc->dynamic_data.num_vertices_in_prim),
+         tsrc_imm_d(gcc->out_vue_min_count),
+         BRW_CONDITIONAL_GE);
+   {
+      tc_ADD(tc, gcc->vars.tmp, tsrc_from(gcc->vars.so_index), tsrc_imm_d(0x03020100));
+      /* TODO same as static version */
+   }
+   tc_ENDIF(tc);
+   tc_ADD(tc, gcc->vars.so_index,
+         tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
+}
+static void
+gs_lower_opcode_emit_vue_static(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_inst *inst2;
+   bool eot;
+   eot = (gcc->static_data.num_vertices == gcc->static_data.total_vertices);
+   gcc->vars.prim_end =
+      ((gcc->static_data.last_vertex[(gcc->static_data.num_vertices - 1) / 32] &
+<< ((gcc->static_data.num_vertices - 1) % 32)) != 0);
+   if (eot && gcc->write_so) {
+      inst2 = tc_OR(tc, tdst_offset(gcc->vars.urb_write_header, 0, 2),
+            tsrc_from(gcc->vars.so_written),
+            tsrc_imm_d(gcc->vars.prim_type << 2 |
+                       gcc->vars.prim_start << 1 |
+                       gcc->vars.prim_end));
+      inst2->exec_size = BRW_EXECUTE_1;
+      inst2->src[0] = tsrc_rect(inst2->src[0], TOY_RECT_010);
+      inst2->src[1] = tsrc_rect(inst2->src[1], TOY_RECT_010);
+   }
+   else {
+      gs_COPY1(tc, gcc->vars.urb_write_header, 2,
+            tsrc_imm_d(gcc->vars.prim_type << 2 |
+                       gcc->vars.prim_start << 1 |
+                       gcc->vars.prim_end), 0);
+   }
+   gs_write_vue(gcc, tdst_d(gcc->vars.tmp),
+         tsrc_from(gcc->vars.urb_write_header),
+         gcc->vars.tgsi_outs,
+         gcc->shader->out.count, eot);
+   if (!eot) {
+      gs_COPY1(tc, gcc->vars.urb_write_header, 0,
+            tsrc_from(tdst_d(gcc->vars.tmp)), 0);
+   }
+   gcc->vars.prim_start = gcc->vars.prim_end;
+   gcc->vars.prim_end = false;
+}
+static void
+gs_lower_opcode_emit_so_static(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_inst *inst;
+   int i, j;
+   if (gcc->static_data.num_vertices_in_prim < gcc->out_vue_min_count)
+      return;
+   inst = tc_MOV(tc, tdst_w(gcc->vars.tmp), tsrc_imm_v(0x03020100));
+   inst->exec_size = BRW_EXECUTE_8;
+   inst->mask_ctrl = BRW_MASK_DISABLE;
+   tc_ADD(tc, tdst_d(gcc->vars.tmp), tsrc_from(tdst_d(gcc->vars.tmp)),
+         tsrc_rect(tsrc_from(gcc->vars.so_index), TOY_RECT_010));
+   tc_IF(tc, tdst_null(),
+         tsrc_rect(tsrc_offset(tsrc_from(tdst_d(gcc->vars.tmp)), 0, gcc->out_vue_min_count - 1), TOY_RECT_010),
+         tsrc_rect(tsrc_offset(gcc->payload.svbi, 0, 4), TOY_RECT_010),
+         BRW_CONDITIONAL_LE);
+   {
+      for (i = 0; i < gcc->out_vue_min_count; i++) {
+         for (j = 0; j < gcc->so_info->num_outputs; j++) {
+            const int idx = gcc->so_info->output[j].register_index;
+            struct toy_src index, out;
+            int binding_table_index;
+            bool write_commit;
+            index = tsrc_d(tsrc_offset(tsrc_from(gcc->vars.tmp), 0, i));
+            if (i == gcc->out_vue_min_count - 1) {
+               out = gcc->vars.tgsi_outs[idx];
+            }
+            else {
+               /* gcc->vars.buffer_cur also points to the first vertex */
+               const int buf =
+                  (gcc->vars.buffer_cur + i) % gcc->vars.buffer_needed;
+               out = tsrc_offset(tsrc_from(gcc->vars.buffers[buf]), idx, 0);
+            }
+            out = tsrc_offset(out, 0, gcc->so_info->output[j].start_component);
+            /*
+             * From the Sandy Bridge PRM, volume 4 part 2, page 19:
+             *
+             *     "The Kernel must do a write commit on the last write to DAP
+             *      prior to a URB_WRITE with End of Thread."
+             */
+            write_commit =
+               (gcc->static_data.num_vertices == gcc->static_data.total_vertices &&
+                i == gcc->out_vue_min_count - 1 &&
+                j == gcc->so_info->num_outputs - 1);
+            binding_table_index = ILO_GS_SO_SURFACE(j);
+            gs_write_so(gcc, gcc->vars.tmp, index,
+                  out, write_commit, binding_table_index);
+            /*
+             * From the Sandy Bridge PRM, volume 4 part 1, page 168:
+             *
+             *     "The write commit does not modify the destination register, but
+             *      merely clears the dependency associated with the destination
+             *      register. Thus, a simple "mov" instruction using the register as a
+             *      source is sufficient to wait for the write commit to occur."
+             */
+            if (write_commit)
+               tc_MOV(tc, gcc->vars.tmp, tsrc_from(gcc->vars.tmp));
+         }
+      }
+      /* SONumPrimsWritten occupies the higher word of m0.2 of URB_WRITE */
+      tc_ADD(tc, gcc->vars.so_written,
+            tsrc_from(gcc->vars.so_written), tsrc_imm_d(1 << 16));
+      tc_ADD(tc, gcc->vars.so_index,
+            tsrc_from(gcc->vars.so_index), tsrc_imm_d(gcc->out_vue_min_count));
+   }
+   tc_ENDIF(tc);
+}
+static void
+gs_lower_opcode_emit_static(struct gs_compile_context *gcc,
+                            struct toy_inst *inst)
+{
+   gcc->static_data.num_vertices++;
+   gcc->static_data.num_vertices_in_prim++;
+   if (gcc->write_so) {
+      gs_lower_opcode_emit_so_static(gcc);
+      if (gcc->out_vue_min_count > 1 &&
+          gcc->static_data.num_vertices != gcc->static_data.total_vertices)
+         gs_save_output(gcc, gcc->vars.tgsi_outs);
+   }
+   if (gcc->write_vue)
+      gs_lower_opcode_emit_vue_static(gcc);
+}
+static void
+gs_lower_opcode_emit_dynamic(struct gs_compile_context *gcc,
+                             struct toy_inst *inst)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   tc_ADD(tc, gcc->dynamic_data.num_vertices,
+         tsrc_from(gcc->dynamic_data.num_vertices), tsrc_imm_d(1));
+   tc_ADD(tc, gcc->dynamic_data.num_vertices_in_prim,
+         tsrc_from(gcc->dynamic_data.num_vertices_in_prim), tsrc_imm_d(1));
+   if (gcc->write_so) {
+      gs_lower_opcode_emit_so_dynamic(gcc);
+      if (gcc->out_vue_min_count > 1)
+         gs_save_output(gcc, gcc->vars.tgsi_outs);
+   }
+   if (gcc->write_vue)
+      gs_lower_opcode_emit_vue_dynamic(gcc);
+}
+static void
+gs_lower_opcode_emit(struct gs_compile_context *gcc, struct toy_inst *inst)
+{
+   if (gcc->is_static)
+      gs_lower_opcode_emit_static(gcc, inst);
+   else
+      gs_lower_opcode_emit_dynamic(gcc, inst);
+}
+static void
+gs_lower_opcode_tgsi_in(struct gs_compile_context *gcc,
+                        struct toy_dst dst, int dim, int idx)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_src attr;
+   int slot, reg = -1, subreg;
+   slot = toy_tgsi_find_input(&gcc->tgsi, idx);
+   if (slot >= 0) {
+      int i;
+      for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
+         if (gcc->variant->u.gs.semantic_names[i] ==
+               gcc->tgsi.inputs[slot].semantic_name &&
+               gcc->variant->u.gs.semantic_indices[i] ==
+               gcc->tgsi.inputs[slot].semantic_index) {
+            reg = i / 2;
+            subreg = (i % 2) * 4;
+            break;
+         }
+      }
+   }
+   if (reg < 0) {
+      tc_MOV(tc, dst, tsrc_imm_f(0.0f));
+      return;
+   }
+   /* fix vertex ordering for _3DPRIM_TRISTRIP_REVERSE */
+   if (gcc->in_vue_count == 3 && dim < 2) {
+      struct toy_inst *inst;
+      /* get PrimType */
+      inst = tc_AND(tc, tdst_d(gcc->vars.tmp),
+            tsrc_offset(gcc->payload.header, 0, 2), tsrc_imm_d(0x1f));
+      inst->exec_size = BRW_EXECUTE_1;
+      inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
+      inst->src[1] = tsrc_rect(inst->src[1], TOY_RECT_010);
+      inst = tc_CMP(tc, tdst_null(), tsrc_from(tdst_d(gcc->vars.tmp)),
+            tsrc_imm_d(_3DPRIM_TRISTRIP_REVERSE), BRW_CONDITIONAL_NEQ);
+      inst->src[0] = tsrc_rect(inst->src[0], TOY_RECT_010);
+      attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
+      inst = tc_MOV(tc, dst, attr);
+      inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+      /* swap IN[0] and IN[1] for _3DPRIM_TRISTRIP_REVERSE */
+      dim = !dim;
+      attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
+      inst = tc_MOV(tc, dst, attr);
+      inst->pred_ctrl = BRW_PREDICATE_NORMAL;
+      inst->pred_inv = true;
+   }
+   else {
+      attr = tsrc_offset(gcc->payload.vues[dim], reg, subreg);
+      tc_MOV(tc, dst, attr);
+   }
+}
+static void
+gs_lower_opcode_tgsi_imm(struct gs_compile_context *gcc,
+                         struct toy_dst dst, int idx)
+{
+   const uint32_t *imm;
+   int ch;
+   imm = toy_tgsi_get_imm(&gcc->tgsi, idx, NULL);
+   for (ch = 0; ch < 4; ch++) {
+      struct toy_inst *inst;
+      /* raw moves */
+      inst = tc_MOV(&gcc->tc,
+            tdst_writemask(tdst_ud(dst), 1 << ch),
+            tsrc_imm_ud(imm[ch]));
+      inst->access_mode = BRW_ALIGN_16;
+   }
+}
+static void
+gs_lower_opcode_tgsi_direct(struct gs_compile_context *gcc,
+                            struct toy_inst *inst)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   int dim, idx;
+   assert(inst->src[0].file == TOY_FILE_IMM);
+   dim = inst->src[0].val32;
+   assert(inst->src[1].file == TOY_FILE_IMM);
+   idx = inst->src[1].val32;
+   switch (inst->opcode) {
+   case TOY_OPCODE_TGSI_IN:
+      gs_lower_opcode_tgsi_in(gcc, inst->dst, dim, idx);
+      /* fetch all dimensions */
+      if (dim == 0) {
+         int i;
+         for (i = 1; i < gcc->in_vue_count; i++) {
+            const int vrf = toy_tgsi_get_vrf(&gcc->tgsi, TGSI_FILE_INPUT, i, idx);
+            struct toy_dst dst;
+            if (vrf < 0)
+               continue;
+            dst = tdst(TOY_FILE_VRF, vrf, 0);
+            gs_lower_opcode_tgsi_in(gcc, dst, i, idx);
+         }
+      }
+      break;
+   case TOY_OPCODE_TGSI_IMM:
+      assert(!dim);
+      gs_lower_opcode_tgsi_imm(gcc, inst->dst, idx);
+      break;
+   case TOY_OPCODE_TGSI_CONST:
+   case TOY_OPCODE_TGSI_SV:
+   default:
+      tc_fail(tc, "unhandled TGSI fetch");
+      break;
+   }
+   tc_discard_inst(tc, inst);
+}
+static void
+gs_lower_virtual_opcodes(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct toy_inst *inst;
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      switch (inst->opcode) {
+      case TOY_OPCODE_TGSI_IN:
+      case TOY_OPCODE_TGSI_CONST:
+      case TOY_OPCODE_TGSI_SV:
+      case TOY_OPCODE_TGSI_IMM:
+         gs_lower_opcode_tgsi_direct(gcc, inst);
+         break;
+      case TOY_OPCODE_TGSI_INDIRECT_FETCH:
+      case TOY_OPCODE_TGSI_INDIRECT_STORE:
+         /* TODO similar to VS */
+         tc_fail(tc, "no indirection support");
+         tc_discard_inst(tc, inst);
+         break;
+      case TOY_OPCODE_TGSI_TEX:
+      case TOY_OPCODE_TGSI_TXB:
+      case TOY_OPCODE_TGSI_TXD:
+      case TOY_OPCODE_TGSI_TXL:
+      case TOY_OPCODE_TGSI_TXP:
+      case TOY_OPCODE_TGSI_TXF:
+      case TOY_OPCODE_TGSI_TXQ:
+      case TOY_OPCODE_TGSI_TXQ_LZ:
+      case TOY_OPCODE_TGSI_TEX2:
+      case TOY_OPCODE_TGSI_TXB2:
+      case TOY_OPCODE_TGSI_TXL2:
+      case TOY_OPCODE_TGSI_SAMPLE:
+      case TOY_OPCODE_TGSI_SAMPLE_I:
+      case TOY_OPCODE_TGSI_SAMPLE_I_MS:
+      case TOY_OPCODE_TGSI_SAMPLE_B:
+      case TOY_OPCODE_TGSI_SAMPLE_C:
+      case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
+      case TOY_OPCODE_TGSI_SAMPLE_D:
+      case TOY_OPCODE_TGSI_SAMPLE_L:
+      case TOY_OPCODE_TGSI_GATHER4:
+      case TOY_OPCODE_TGSI_SVIEWINFO:
+      case TOY_OPCODE_TGSI_SAMPLE_POS:
+      case TOY_OPCODE_TGSI_SAMPLE_INFO:
+         /* TODO similar to VS */
+         tc_fail(tc, "no sampling support");
+         tc_discard_inst(tc, inst);
+         break;
+      case TOY_OPCODE_EMIT:
+         gs_lower_opcode_emit(gcc, inst);
+         tc_discard_inst(tc, inst);
+         break;
+      case TOY_OPCODE_ENDPRIM:
+         gs_lower_opcode_endprim(gcc, inst);
+         tc_discard_inst(tc, inst);
+         break;
+      default:
+         break;
+      }
+   }
+   tc_head(tc);
+   while ((inst = tc_next(tc)) != NULL) {
+      switch (inst->opcode) {
+      case TOY_OPCODE_INV:
+      case TOY_OPCODE_LOG:
+      case TOY_OPCODE_EXP:
+      case TOY_OPCODE_SQRT:
+      case TOY_OPCODE_RSQ:
+      case TOY_OPCODE_SIN:
+      case TOY_OPCODE_COS:
+      case TOY_OPCODE_FDIV:
+      case TOY_OPCODE_POW:
+      case TOY_OPCODE_INT_DIV_QUOTIENT:
+      case TOY_OPCODE_INT_DIV_REMAINDER:
+         toy_compiler_lower_math(tc, inst);
+         break;
+      case TOY_OPCODE_URB_WRITE:
+         toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_URB);
+         break;
+      default:
+         if (inst->opcode > 127)
+            tc_fail(tc, "unhandled virtual opcode");
+         break;
+      }
+   }
+}
+/**
+ * Get the number of (tessellated) primitives generated by this shader.
+ * Return false if that is unknown until runtime.
+ */
+static void
+get_num_prims_static(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   const struct toy_inst *inst;
+   int num_vertices_in_prim = 0, if_depth = 0, do_depth = 0;
+   bool is_static = true;
+   tc_head(tc);
+   while ((inst = tc_next_no_skip(tc)) != NULL) {
+      switch (inst->opcode) {
+      case BRW_OPCODE_IF:
+         if_depth++;
+         break;
+      case BRW_OPCODE_ENDIF:
+         if_depth--;
+         break;
+      case BRW_OPCODE_DO:
+         do_depth++;
+         break;
+      case BRW_OPCODE_WHILE:
+         do_depth--;
+         break;
+      case TOY_OPCODE_EMIT:
+         if (if_depth || do_depth) {
+            is_static = false;
+         }
+         else {
+            gcc->static_data.total_vertices++;
+            num_vertices_in_prim++;
+            if (num_vertices_in_prim >= gcc->out_vue_min_count)
+               gcc->static_data.total_prims++;
+         }
+         break;
+      case TOY_OPCODE_ENDPRIM:
+         if (if_depth || do_depth) {
+            is_static = false;
+         }
+         else {
+            const int vertidx = gcc->static_data.total_vertices - 1;
+            const int idx = vertidx / 32;
+            const int subidx = vertidx % 32;
+            gcc->static_data.last_vertex[idx] |= 1 << subidx;
+            num_vertices_in_prim = 0;
+         }
+         break;
+      default:
+         break;
+      }
+      if (!is_static)
+         break;
+   }
+   gcc->is_static = is_static;
+}
+/**
+ * Compile the shader.
+ */
+static bool
+gs_compile(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct ilo_shader *sh = gcc->shader;
+   get_num_prims_static(gcc);
+   if (gcc->is_static) {
+      tc_head(tc);
+      gs_init_vars(gcc);
+      gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
+      gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
+      if (gcc->write_so)
+         gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
+      tc_tail(tc);
+   }
+   else {
+      tc_fail(tc, "no control flow support");
+      return false;
+   }
+   if (!gcc->write_vue)
+      gs_discard(gcc);
+   gs_lower_virtual_opcodes(gcc);
+   toy_compiler_legalize_for_ra(tc);
+   toy_compiler_optimize(tc);
+   toy_compiler_allocate_registers(tc,
+         gcc->first_free_grf,
+         gcc->last_free_grf,
+);
+   toy_compiler_legalize_for_asm(tc);
+   if (tc->fail) {
+      ilo_err("failed to legalize GS instructions: %s\n", tc->reason);
+      return false;
+   }
+   if (ilo_debug & ILO_DEBUG_GS) {
+      ilo_printf("legalized instructions:\n");
+      toy_compiler_dump(tc);
+      ilo_printf("\n");
+   }
+   sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
+   if (!sh->kernel)
+      return false;
+   if (ilo_debug & ILO_DEBUG_GS) {
+      ilo_printf("disassembly:\n");
+      toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
+      ilo_printf("\n");
+   }
+   return true;
+}
+static bool
+gs_compile_passthrough(struct gs_compile_context *gcc)
+{
+   struct toy_compiler *tc = &gcc->tc;
+   struct ilo_shader *sh = gcc->shader;
+   gcc->is_static = true;
+   gcc->static_data.total_vertices = gcc->in_vue_count;
+   gcc->static_data.total_prims = 1;
+   gcc->static_data.last_vertex[0] = 1 << (gcc->in_vue_count - 1);
+   gs_init_vars(gcc);
+   gs_ff_sync(gcc, tdst_d(gcc->vars.tmp), tsrc_imm_d(gcc->static_data.total_prims));
+   gs_COPY1(tc, gcc->vars.urb_write_header, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 0);
+   if (gcc->write_so)
+      gs_COPY4(tc, gcc->vars.so_index, 0, tsrc_from(tdst_d(gcc->vars.tmp)), 1);
+   {
+      int vert, attr;
+      for (vert = 0; vert < gcc->out_vue_min_count; vert++) {
+         for (attr = 0; attr < gcc->shader->out.count; attr++) {
+            tc_MOV(tc, tdst_from(gcc->vars.tgsi_outs[attr]),
+                  tsrc_offset(gcc->payload.vues[vert], attr / 2, (attr % 2) * 4));
+         }
+         gs_lower_opcode_emit(gcc, NULL);
+      }
+      gs_lower_opcode_endprim(gcc, NULL);
+   }
+   if (!gcc->write_vue)
+      gs_discard(gcc);
+   gs_lower_virtual_opcodes(gcc);
+   toy_compiler_legalize_for_ra(tc);
+   toy_compiler_optimize(tc);
+   toy_compiler_allocate_registers(tc,
+         gcc->first_free_grf,
+         gcc->last_free_grf,
+);
+   toy_compiler_legalize_for_asm(tc);
+   if (tc->fail) {
+      ilo_err("failed to translate GS TGSI tokens: %s\n", tc->reason);
+      return false;
+   }
+   if (ilo_debug & ILO_DEBUG_GS) {
+      int i;
+      ilo_printf("VUE count %d, VUE size %d\n",
+            gcc->in_vue_count, gcc->in_vue_size);
+      ilo_printf("%srasterizer discard\n",
+            (gcc->variant->u.gs.rasterizer_discard) ? "" : "no ");
+      for (i = 0; i < gcc->so_info->num_outputs; i++) {
+         ilo_printf("SO[%d] = OUT[%d]\n", i,
+               gcc->so_info->output[i].register_index);
+      }
+      ilo_printf("legalized instructions:\n");
+      toy_compiler_dump(tc);
+      ilo_printf("\n");
+   }
+   sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
+   if (!sh->kernel) {
+      ilo_err("failed to compile GS: %s\n", tc->reason);
+      return false;
+   }
+   if (ilo_debug & ILO_DEBUG_GS) {
+      ilo_printf("disassembly:\n");
+      toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
+      ilo_printf("\n");
+   }
+   return true;
+}
+/**
+ * Translate the TGSI tokens.
+ */
+static bool
+gs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
+              struct toy_tgsi *tgsi)
+{
+   if (ilo_debug & ILO_DEBUG_GS) {
+      ilo_printf("dumping geometry shader\n");
+      ilo_printf("\n");
+      tgsi_dump(tokens, 0);
+      ilo_printf("\n");
+   }
+   toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
+   if (tc->fail)
+      return false;
+   if (ilo_debug & ILO_DEBUG_GS) {
+      ilo_printf("TGSI translator:\n");
+      toy_tgsi_dump(tgsi);
+      ilo_printf("\n");
+      toy_compiler_dump(tc);
+      ilo_printf("\n");
+   }
+   return true;
+}
+/**
+ * Set up shader inputs for fixed-function units.
+ */
+static void
+gs_setup_shader_in(struct ilo_shader *sh,
+                   const struct ilo_shader_variant *variant)
+{
+   int i;
+   for (i = 0; i < variant->u.gs.num_inputs; i++) {
+      sh->in.semantic_names[i] = variant->u.gs.semantic_names[i];
+      sh->in.semantic_indices[i] = variant->u.gs.semantic_indices[i];
+      sh->in.interp[i] = TGSI_INTERPOLATE_CONSTANT;
+      sh->in.centroid[i] = false;
+   }
+   sh->in.count = variant->u.gs.num_inputs;
+   sh->in.has_pos = false;
+   sh->in.has_linear_interp = false;
+   sh->in.barycentric_interpolation_mode = 0;
+}
+/**
+ * Set up shader outputs for fixed-function units.
+ *
+ * XXX share the code with VS
+ */
+static void
+gs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
+                    bool output_clipdist, int *output_map)
+{
+   int psize_slot = -1, pos_slot = -1;
+   int clipdist_slot[2] = { -1, -1 };
+   int color_slot[4] = { -1, -1, -1, -1 };
+   int num_outs, i;
+   /* find out the slots of outputs that need special care */
+   for (i = 0; i < tgsi->num_outputs; i++) {
+      switch (tgsi->outputs[i].semantic_name) {
+      case TGSI_SEMANTIC_PSIZE:
+         psize_slot = i;
+         break;
+      case TGSI_SEMANTIC_POSITION:
+         pos_slot = i;
+         break;
+      case TGSI_SEMANTIC_CLIPDIST:
+         if (tgsi->outputs[i].semantic_index)
+            clipdist_slot[1] = i;
+         else
+            clipdist_slot[0] = i;
+         break;
+      case TGSI_SEMANTIC_COLOR:
+         if (tgsi->outputs[i].semantic_index)
+            color_slot[2] = i;
+         else
+            color_slot[0] = i;
+         break;
+      case TGSI_SEMANTIC_BCOLOR:
+         if (tgsi->outputs[i].semantic_index)
+            color_slot[3] = i;
+         else
+            color_slot[1] = i;
+         break;
+      default:
+         break;
+      }
+   }
+   /* the first two VUEs are always PSIZE and POSITION */
+   num_outs = 2;
+   output_map[0] = psize_slot;
+   output_map[1] = pos_slot;
+   sh->out.register_indices[0] =
+      (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
+   sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
+   sh->out.semantic_indices[0] = 0;
+   sh->out.register_indices[1] =
+      (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
+   sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
+   sh->out.semantic_indices[1] = 0;
+   sh->out.has_pos = true;
+   /* followed by optional clip distances */
+   if (output_clipdist) {
+      sh->out.register_indices[num_outs] =
+         (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
+      sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
+      sh->out.semantic_indices[num_outs] = 0;
+      output_map[num_outs++] = clipdist_slot[0];
+      sh->out.register_indices[num_outs] =
+         (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
+      sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
+      sh->out.semantic_indices[num_outs] = 1;
+      output_map[num_outs++] = clipdist_slot[1];
+   }
+   /*
+    * make BCOLOR follow COLOR so that we can make use of
+    * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
+    */
+   for (i = 0; i < 4; i++) {
+      const int slot = color_slot[i];
+      if (slot < 0)
+         continue;
+      sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
+      sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
+      sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
+      output_map[num_outs++] = slot;
+   }
+   /* add the rest of the outputs */
+   for (i = 0; i < tgsi->num_outputs; i++) {
+      switch (tgsi->outputs[i].semantic_name) {
+      case TGSI_SEMANTIC_PSIZE:
+      case TGSI_SEMANTIC_POSITION:
+      case TGSI_SEMANTIC_CLIPDIST:
+      case TGSI_SEMANTIC_COLOR:
+      case TGSI_SEMANTIC_BCOLOR:
+         break;
+      default:
+         sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
+         sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
+         sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
+         output_map[num_outs++] = i;
+         break;
+      }
+   }
+   sh->out.count = num_outs;
+}
+static void
+gs_setup_vars(struct gs_compile_context *gcc)
+{
+   int grf = gcc->first_free_grf;
+   int i;
+   gcc->vars.urb_write_header = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
+   grf++;
+   gcc->vars.tmp = tdst(TOY_FILE_GRF, grf, 0);
+   grf++;
+   if (gcc->write_so) {
+      gcc->vars.buffer_needed = gcc->out_vue_min_count - 1;
+      for (i = 0; i < gcc->vars.buffer_needed; i++) {
+         gcc->vars.buffers[i] = tdst(TOY_FILE_GRF, grf, 0);
+         grf += gcc->shader->out.count;
+      }
+      gcc->vars.so_written = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
+      grf++;
+      gcc->vars.so_index = tdst_d(tdst(TOY_FILE_GRF, grf, 0));
+      grf++;
+   }
+   gcc->first_free_grf = grf;
+   if (!gcc->tgsi.reg_mapping) {
+      for (i = 0; i < gcc->shader->out.count; i++)
+         gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_GRF, grf++, 0);
+      gcc->first_free_grf = grf;
+      return;
+   }
+   for (i = 0; i < gcc->shader->out.count; i++) {
+      const int slot = gcc->output_map[i];
+      const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(&gcc->tgsi,
+            TGSI_FILE_OUTPUT, 0, gcc->tgsi.outputs[slot].index) : -1;
+      if (vrf >= 0)
+         gcc->vars.tgsi_outs[i] = tsrc(TOY_FILE_VRF, vrf, 0);
+      else
+         gcc->vars.tgsi_outs[i] = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
+   }
+}
+static void
+gs_setup_payload(struct gs_compile_context *gcc)
+{
+   int grf, i;
+   grf = 0;
+   /* r0: payload header */
+   gcc->payload.header = tsrc_d(tsrc(TOY_FILE_GRF, grf, 0));
+   grf++;
+   /* r1: SVBI */
+   if (gcc->write_so) {
+      gcc->payload.svbi = tsrc_ud(tsrc(TOY_FILE_GRF, grf, 0));
+      grf++;
+   }
+   /* URB data */
+   gcc->shader->in.start_grf = grf;
+   /* no pull constants */
+   /* VUEs */
+   for (i = 0; i < gcc->in_vue_count; i++) {
+      gcc->payload.vues[i] = tsrc(TOY_FILE_GRF, grf, 0);
+      grf += gcc->in_vue_size;
+   }
+   gcc->first_free_grf = grf;
+   gcc->last_free_grf = 127;
+}
+/**
+ * Set up GS compile context.  This includes translating the TGSI tokens.
+ */
+static bool
+gs_setup(struct gs_compile_context *gcc,
+         const struct ilo_shader_state *state,
+         const struct ilo_shader_variant *variant,
+         int num_verts)
+{
+   memset(gcc, 0, sizeof(*gcc));
+   gcc->shader = CALLOC_STRUCT(ilo_shader);
+   if (!gcc->shader)
+      return false;
+   gcc->variant = variant;
+   gcc->so_info = &state->info.stream_output;
+   toy_compiler_init(&gcc->tc, state->info.dev);
+   gcc->write_so = (state->info.stream_output.num_outputs > 0);
+   gcc->write_vue = !gcc->variant->u.gs.rasterizer_discard;
+   gcc->tc.templ.access_mode = BRW_ALIGN_16;
+   gcc->tc.templ.exec_size = BRW_EXECUTE_4;
+   gcc->tc.rect_linear_width = 4;
+   if (state->info.tokens) {
+      if (!gs_setup_tgsi(&gcc->tc, state->info.tokens, &gcc->tgsi)) {
+         toy_compiler_cleanup(&gcc->tc);
+         FREE(gcc->shader);
+         return false;
+      }
+      switch (gcc->tgsi.props.gs_input_prim) {
+      case PIPE_PRIM_POINTS:
+         gcc->in_vue_count = 1;
+         break;
+      case PIPE_PRIM_LINES:
+         gcc->in_vue_count = 2;
+         gcc->shader->in.discard_adj = true;
+         break;
+      case PIPE_PRIM_TRIANGLES:
+         gcc->in_vue_count = 3;
+         gcc->shader->in.discard_adj = true;
+         break;
+      case PIPE_PRIM_LINES_ADJACENCY:
+         gcc->in_vue_count = 4;
+         break;
+      case PIPE_PRIM_TRIANGLES_ADJACENCY:
+         gcc->in_vue_count = 6;
+         break;
+      default:
+         tc_fail(&gcc->tc, "unsupported GS input type");
+         gcc->in_vue_count = 0;
+         break;
+      }
+      switch (gcc->tgsi.props.gs_output_prim) {
+      case PIPE_PRIM_POINTS:
+         gcc->out_vue_min_count = 1;
+         break;
+      case PIPE_PRIM_LINE_STRIP:
+         gcc->out_vue_min_count = 2;
+         break;
+      case PIPE_PRIM_TRIANGLE_STRIP:
+         gcc->out_vue_min_count = 3;
+         break;
+      default:
+         tc_fail(&gcc->tc, "unsupported GS output type");
+         gcc->out_vue_min_count = 0;
+         break;
+      }
+   }
+   else {
+      int i;
+      gcc->in_vue_count = num_verts;
+      gcc->out_vue_min_count = num_verts;
+      gcc->tgsi.num_outputs = gcc->variant->u.gs.num_inputs;
+      for (i = 0; i < gcc->variant->u.gs.num_inputs; i++) {
+         gcc->tgsi.outputs[i].semantic_name =
+            gcc->variant->u.gs.semantic_names[i];
+         gcc->tgsi.outputs[i].semantic_index =
+            gcc->variant->u.gs.semantic_indices[i];
+      }
+   }
+   gcc->tc.templ.access_mode = BRW_ALIGN_1;
+   gs_setup_shader_in(gcc->shader, gcc->variant);
+   gs_setup_shader_out(gcc->shader, &gcc->tgsi, false, gcc->output_map);
+   gcc->in_vue_size = (gcc->shader->in.count + 1) / 2;
+   gcc->out_vue_size = (gcc->shader->out.count + 1) / 2;
+   gs_setup_payload(gcc);
+   gs_setup_vars(gcc);
+   /* m0 is reserved for system routines */
+   gcc->first_free_mrf = 1;
+   gcc->last_free_mrf = 15;
+   return true;
+}
+/**
+ * Compile the geometry shader.
+ */
+struct ilo_shader *
+ilo_shader_compile_gs(const struct ilo_shader_state *state,
+                      const struct ilo_shader_variant *variant)
+{
+   struct gs_compile_context gcc;
+   if (!gs_setup(&gcc, state, variant, 0))
+      return NULL;
+   if (!gs_compile(&gcc)) {
+      FREE(gcc.shader);
+      gcc.shader = NULL;
+   }
+   toy_tgsi_cleanup(&gcc.tgsi);
+   toy_compiler_cleanup(&gcc.tc);
+   return gcc.shader;;
+}
+static bool
+append_gs_to_vs(struct ilo_shader *vs, struct ilo_shader *gs, int num_verts)
+{
+   void *combined;
+   int gs_offset;
+   if (!gs)
+      return false;
+   /* kernels must be aligned to 64-byte */
+   gs_offset = align(vs->kernel_size, 64);
+   combined = REALLOC(vs->kernel, vs->kernel_size,
+         gs_offset + gs->kernel_size);
+   if (!combined)
+      return false;
+   memcpy(combined + gs_offset, gs->kernel, gs->kernel_size);
+   vs->kernel = combined;
+   vs->kernel_size = gs_offset + gs->kernel_size;
+   vs->stream_output = true;
+   vs->gs_offsets[num_verts - 1] = gs_offset;
+   vs->gs_start_grf = gs->in.start_grf;
+   ilo_shader_destroy_kernel(gs);
+   return true;
+}
+bool
+ilo_shader_compile_gs_passthrough(const struct ilo_shader_state *vs_state,
+                                  const struct ilo_shader_variant *vs_variant,
+                                  const int *so_mapping,
+                                  struct ilo_shader *vs)
+{
+   struct gs_compile_context gcc;
+   struct ilo_shader_state state;
+   struct ilo_shader_variant variant;
+   const int num_verts = 3;
+   int i;
+   /* init GS state and variant */
+   state = *vs_state;
+   state.info.tokens = NULL;
+   for (i = 0; i < state.info.stream_output.num_outputs; i++) {
+      const int reg = state.info.stream_output.output[i].register_index;
+      state.info.stream_output.output[i].register_index = so_mapping[reg];
+   }
+   variant = *vs_variant;
+   variant.u.gs.rasterizer_discard = vs_variant->u.vs.rasterizer_discard;
+   variant.u.gs.num_inputs = vs->out.count;
+   for (i = 0; i < vs->out.count; i++) {
+      variant.u.gs.semantic_names[i] =
+         vs->out.semantic_names[i];
+      variant.u.gs.semantic_indices[i] =
+         vs->out.semantic_indices[i];
+   }
+   if (!gs_setup(&gcc, &state, &variant, num_verts))
+      return false;
+   if (!gs_compile_passthrough(&gcc)) {
+      FREE(gcc.shader);
+      gcc.shader = NULL;
+   }
+   /* no need to call toy_tgsi_cleanup() */
+   toy_compiler_cleanup(&gcc.tc);
+   return append_gs_to_vs(vs, gcc.shader, num_verts);
+}

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 4357 → Rev 4358