Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "tgsi/tgsi_dump.h"
  29. #include "tgsi/tgsi_util.h"
  30. #include "toy_compiler.h"
  31. #include "toy_tgsi.h"
  32. #include "toy_legalize.h"
  33. #include "toy_optimize.h"
  34. #include "toy_helpers.h"
  35. #include "ilo_context.h"
  36. #include "ilo_shader_internal.h"
  37.  
  38. struct fs_compile_context {
  39.    struct ilo_shader *shader;
  40.    const struct ilo_shader_variant *variant;
  41.  
  42.    struct toy_compiler tc;
  43.    struct toy_tgsi tgsi;
  44.  
  45.    enum brw_message_target const_cache;
  46.    int dispatch_mode;
  47.  
  48.    struct {
  49.       int barycentric_interps[BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT];
  50.       int source_depth;
  51.       int source_w;
  52.       int pos_offset;
  53.    } payloads[2];
  54.  
  55.    int first_const_grf;
  56.    int first_attr_grf;
  57.    int first_free_grf;
  58.    int last_free_grf;
  59.  
  60.    int num_grf_per_vrf;
  61.  
  62.    int first_free_mrf;
  63.    int last_free_mrf;
  64. };
  65.  
  66. static void
  67. fetch_position(struct fs_compile_context *fcc, struct toy_dst dst)
  68. {
  69.    struct toy_compiler *tc = &fcc->tc;
  70.    const struct toy_src src_z =
  71.       tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0);
  72.    const struct toy_src src_w =
  73.       tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0);
  74.    const int fb_height =
  75.       (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1;
  76.    const bool origin_upper_left =
  77.       (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT);
  78.    const bool pixel_center_integer =
  79.       (fcc->tgsi.props.fs_coord_pixel_center ==
  80.        TGSI_FS_COORD_PIXEL_CENTER_INTEGER);
  81.    struct toy_src subspan_x, subspan_y;
  82.    struct toy_dst tmp, tmp_uw;
  83.    struct toy_dst real_dst[4];
  84.  
  85.    tdst_transpose(dst, real_dst);
  86.  
  87.    subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4));
  88.    subspan_x = tsrc_rect(subspan_x, TOY_RECT_240);
  89.  
  90.    subspan_y = tsrc_offset(subspan_x, 0, 1);
  91.  
  92.    tmp_uw = tdst_uw(tc_alloc_tmp(tc));
  93.    tmp = tc_alloc_tmp(tc);
  94.  
  95.    /* X */
  96.    tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010));
  97.    tc_MOV(tc, tmp, tsrc_from(tmp_uw));
  98.    if (pixel_center_integer)
  99.       tc_MOV(tc, real_dst[0], tsrc_from(tmp));
  100.    else
  101.       tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f));
  102.  
  103.    /* Y */
  104.    tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100));
  105.    tc_MOV(tc, tmp, tsrc_from(tmp_uw));
  106.    if (origin_upper_left && pixel_center_integer) {
  107.       tc_MOV(tc, real_dst[1], tsrc_from(tmp));
  108.    }
  109.    else {
  110.       struct toy_src y = tsrc_from(tmp);
  111.       float offset = 0.0f;
  112.  
  113.       if (!pixel_center_integer)
  114.          offset += 0.5f;
  115.  
  116.       if (!origin_upper_left) {
  117.          offset += (float) (fb_height - 1);
  118.          y = tsrc_negate(y);
  119.       }
  120.  
  121.       tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset));
  122.    }
  123.  
  124.    /* Z and W */
  125.    tc_MOV(tc, real_dst[2], src_z);
  126.    tc_INV(tc, real_dst[3], src_w);
  127. }
  128.  
  129. static void
  130. fetch_face(struct fs_compile_context *fcc, struct toy_dst dst)
  131. {
  132.    struct toy_compiler *tc = &fcc->tc;
  133.    const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0));
  134.    struct toy_dst tmp_f, tmp;
  135.    struct toy_dst real_dst[4];
  136.  
  137.    tdst_transpose(dst, real_dst);
  138.  
  139.    tmp_f = tc_alloc_tmp(tc);
  140.    tmp = tdst_d(tmp_f);
  141.    tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15));
  142.    tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1));
  143.    tc_MOV(tc, tmp_f, tsrc_from(tmp));
  144.  
  145.    /* convert to 1.0 and -1.0 */
  146.    tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f));
  147.    tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f));
  148.  
  149.    tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
  150.    tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
  151.    tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
  152. }
  153.  
  154. static void
  155. fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot)
  156. {
  157.    struct toy_compiler *tc = &fcc->tc;
  158.    struct toy_dst real_dst[4];
  159.    bool is_const = false;
  160.    int grf, mode, ch;
  161.  
  162.    tdst_transpose(dst, real_dst);
  163.  
  164.    grf = fcc->first_attr_grf + slot * 2;
  165.  
  166.    switch (fcc->tgsi.inputs[slot].interp) {
  167.    case TGSI_INTERPOLATE_CONSTANT:
  168.       is_const = true;
  169.       break;
  170.    case TGSI_INTERPOLATE_LINEAR:
  171.       if (fcc->tgsi.inputs[slot].centroid)
  172.          mode = BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
  173.       else
  174.          mode = BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
  175.       break;
  176.    case TGSI_INTERPOLATE_COLOR:
  177.       if (fcc->variant->u.fs.flatshade) {
  178.          is_const = true;
  179.          break;
  180.       }
  181.       /* fall through */
  182.    case TGSI_INTERPOLATE_PERSPECTIVE:
  183.       if (fcc->tgsi.inputs[slot].centroid)
  184.          mode = BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
  185.       else
  186.          mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
  187.       break;
  188.    default:
  189.       assert(!"unexpected FS interpolation");
  190.       mode = BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
  191.       break;
  192.    }
  193.  
  194.    if (is_const) {
  195.       struct toy_src a0[4];
  196.  
  197.       a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4);
  198.       a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4);
  199.       a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4);
  200.       a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4);
  201.  
  202.       for (ch = 0; ch < 4; ch++)
  203.          tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010));
  204.    }
  205.    else {
  206.       struct toy_src attr[4], uv;
  207.  
  208.       attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0);
  209.       attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4);
  210.       attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0);
  211.       attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4);
  212.  
  213.       uv = tsrc(TOY_FILE_GRF, fcc->payloads[0].barycentric_interps[mode], 0);
  214.  
  215.       for (ch = 0; ch < 4; ch++) {
  216.          tc_add2(tc, BRW_OPCODE_PLN, real_dst[ch],
  217.                tsrc_rect(attr[ch], TOY_RECT_010), uv);
  218.       }
  219.    }
  220.  
  221.    if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) {
  222.       tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f));
  223.       tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f));
  224.       tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f));
  225.    }
  226. }
  227.  
  228. static void
  229. fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc,
  230.                         struct toy_dst dst, int dim, int idx)
  231. {
  232.    int slot;
  233.  
  234.    assert(!dim);
  235.  
  236.    slot = toy_tgsi_find_input(&fcc->tgsi, idx);
  237.    if (slot < 0)
  238.       return;
  239.  
  240.    switch (fcc->tgsi.inputs[slot].semantic_name) {
  241.    case TGSI_SEMANTIC_POSITION:
  242.       fetch_position(fcc, dst);
  243.       break;
  244.    case TGSI_SEMANTIC_FACE:
  245.       fetch_face(fcc, dst);
  246.       break;
  247.    default:
  248.       fetch_attr(fcc, dst, slot);
  249.       break;
  250.    }
  251. }
  252.  
  253. static void
  254. fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc,
  255.                                     struct toy_dst dst, int dim,
  256.                                     struct toy_src idx)
  257. {
  258.    const struct toy_dst offset =
  259.       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
  260.    struct toy_compiler *tc = &fcc->tc;
  261.    unsigned simd_mode, param_size;
  262.    struct toy_inst *inst;
  263.    struct toy_src desc, real_src[4];
  264.    struct toy_dst tmp, real_dst[4];
  265.    int i;
  266.  
  267.    tsrc_transpose(idx, real_src);
  268.  
  269.    /* set offset */
  270.    inst = tc_MOV(tc, offset, real_src[0]);
  271.    inst->mask_ctrl = BRW_MASK_DISABLE;
  272.  
  273.    switch (inst->exec_size) {
  274.    case BRW_EXECUTE_8:
  275.       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
  276.       param_size = 1;
  277.       break;
  278.    case BRW_EXECUTE_16:
  279.       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
  280.       param_size = 2;
  281.       break;
  282.    default:
  283.       assert(!"unsupported execution size");
  284.       tc_MOV(tc, dst, tsrc_imm_f(0.0f));
  285.       return;
  286.       break;
  287.    }
  288.  
  289.    desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false,
  290.          simd_mode,
  291.          GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
  292.          0,
  293.          ILO_WM_CONST_SURFACE(dim));
  294.  
  295.    tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0);
  296.    inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
  297.    inst->mask_ctrl = BRW_MASK_DISABLE;
  298.  
  299.    tdst_transpose(dst, real_dst);
  300.    for (i = 0; i < 4; i++) {
  301.       const struct toy_src src =
  302.          tsrc_offset(tsrc_from(tmp), param_size * i, 0);
  303.  
  304.       /* cast to type D to make sure these are raw moves */
  305.       tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
  306.    }
  307. }
  308.  
  309. static void
  310. fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc,
  311.                                 struct toy_dst dst, int dim, struct toy_src idx)
  312. {
  313.    const struct toy_dst header =
  314.       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
  315.    const struct toy_dst global_offset =
  316.       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4));
  317.    const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
  318.    struct toy_compiler *tc = &fcc->tc;
  319.    unsigned msg_type, msg_ctrl, msg_len;
  320.    struct toy_inst *inst;
  321.    struct toy_src desc;
  322.    struct toy_dst tmp, real_dst[4];
  323.    int i;
  324.  
  325.    /* set message header */
  326.    inst = tc_MOV(tc, header, r0);
  327.    inst->mask_ctrl = BRW_MASK_DISABLE;
  328.  
  329.    /* set global offset */
  330.    inst = tc_MOV(tc, global_offset, idx);
  331.    inst->mask_ctrl = BRW_MASK_DISABLE;
  332.    inst->exec_size = BRW_EXECUTE_1;
  333.    inst->src[0].rect = TOY_RECT_010;
  334.  
  335.    msg_type = BRW_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ;
  336.    msg_ctrl = BRW_DATAPORT_OWORD_BLOCK_1_OWORDLOW << 8;
  337.    msg_len = 1;
  338.  
  339.    desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
  340.          msg_type, msg_ctrl, ILO_WM_CONST_SURFACE(dim));
  341.  
  342.    tmp = tc_alloc_tmp(tc);
  343.  
  344.    tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache);
  345.  
  346.    tdst_transpose(dst, real_dst);
  347.    for (i = 0; i < 4; i++) {
  348.       const struct toy_src src =
  349.          tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
  350.  
  351.       /* cast to type D to make sure these are raw moves */
  352.       tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
  353.    }
  354. }
  355.  
  356. static void
  357. fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc,
  358.                                 struct toy_dst dst, int dim, struct toy_src idx)
  359. {
  360.    struct toy_compiler *tc = &fcc->tc;
  361.    const struct toy_dst offset =
  362.       tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0));
  363.    struct toy_src desc;
  364.    struct toy_inst *inst;
  365.    struct toy_dst tmp, real_dst[4];
  366.    int i;
  367.  
  368.    /*
  369.     * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was
  370.     * changed from OWord Block Read to ld to increase performance in the
  371.     * classic driver.  Since we use the constant cache instead of the data
  372.     * cache, I wonder if we still want to follow the classic driver.
  373.     */
  374.  
  375.    /* set offset */
  376.    inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010));
  377.    inst->exec_size = BRW_EXECUTE_8;
  378.    inst->mask_ctrl = BRW_MASK_DISABLE;
  379.  
  380.    desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
  381.          BRW_SAMPLER_SIMD_MODE_SIMD4X2,
  382.          GEN5_SAMPLER_MESSAGE_SAMPLE_LD,
  383.          0,
  384.          ILO_WM_CONST_SURFACE(dim));
  385.  
  386.    tmp = tc_alloc_tmp(tc);
  387.    inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, BRW_SFID_SAMPLER);
  388.    inst->exec_size = BRW_EXECUTE_8;
  389.    inst->mask_ctrl = BRW_MASK_DISABLE;
  390.  
  391.    tdst_transpose(dst, real_dst);
  392.    for (i = 0; i < 4; i++) {
  393.       const struct toy_src src =
  394.          tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i);
  395.  
  396.       /* cast to type D to make sure these are raw moves */
  397.       tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src));
  398.    }
  399. }
  400.  
  401. static void
  402. fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc,
  403.                          struct toy_dst dst, int idx)
  404. {
  405.    const uint32_t *imm;
  406.    struct toy_dst real_dst[4];
  407.    int ch;
  408.  
  409.    imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL);
  410.  
  411.    tdst_transpose(dst, real_dst);
  412.    /* raw moves */
  413.    for (ch = 0; ch < 4; ch++)
  414.       tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch]));
  415. }
  416.  
  417. static void
  418. fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc,
  419.                         struct toy_dst dst, int dim, int idx)
  420. {
  421.    struct toy_compiler *tc = &fcc->tc;
  422.    const struct toy_tgsi *tgsi = &fcc->tgsi;
  423.    int slot;
  424.  
  425.    assert(!dim);
  426.  
  427.    slot = toy_tgsi_find_system_value(tgsi, idx);
  428.    if (slot < 0)
  429.       return;
  430.  
  431.    switch (tgsi->system_values[slot].semantic_name) {
  432.    case TGSI_SEMANTIC_PRIMID:
  433.    case TGSI_SEMANTIC_INSTANCEID:
  434.    case TGSI_SEMANTIC_VERTEXID:
  435.    default:
  436.       tc_fail(tc, "unhandled system value");
  437.       tc_MOV(tc, dst, tsrc_imm_d(0));
  438.       break;
  439.    }
  440. }
  441.  
  442. static void
  443. fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc,
  444.                             struct toy_inst *inst)
  445. {
  446.    struct toy_compiler *tc = &fcc->tc;
  447.    int dim, idx;
  448.  
  449.    assert(inst->src[0].file == TOY_FILE_IMM);
  450.    dim = inst->src[0].val32;
  451.  
  452.    assert(inst->src[1].file == TOY_FILE_IMM);
  453.    idx = inst->src[1].val32;
  454.  
  455.    switch (inst->opcode) {
  456.    case TOY_OPCODE_TGSI_IN:
  457.       fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx);
  458.       break;
  459.    case TOY_OPCODE_TGSI_CONST:
  460.       if (tc->dev->gen >= ILO_GEN(7))
  461.          fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]);
  462.       else
  463.          fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]);
  464.       break;
  465.    case TOY_OPCODE_TGSI_SV:
  466.       fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx);
  467.       break;
  468.    case TOY_OPCODE_TGSI_IMM:
  469.       assert(!dim);
  470.       fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx);
  471.       break;
  472.    default:
  473.       tc_fail(tc, "unhandled TGSI fetch");
  474.       break;
  475.    }
  476.  
  477.    tc_discard_inst(tc, inst);
  478. }
  479.  
  480. static void
  481. fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc,
  482.                               struct toy_inst *inst)
  483. {
  484.    struct toy_compiler *tc = &fcc->tc;
  485.    enum tgsi_file_type file;
  486.    int dim, idx;
  487.    struct toy_src indirect_dim, indirect_idx;
  488.  
  489.    assert(inst->src[0].file == TOY_FILE_IMM);
  490.    file = inst->src[0].val32;
  491.  
  492.    assert(inst->src[1].file == TOY_FILE_IMM);
  493.    dim = inst->src[1].val32;
  494.    indirect_dim = inst->src[2];
  495.  
  496.    assert(inst->src[3].file == TOY_FILE_IMM);
  497.    idx = inst->src[3].val32;
  498.    indirect_idx = inst->src[4];
  499.  
  500.    /* no dimension indirection */
  501.    assert(indirect_dim.file == TOY_FILE_IMM);
  502.    dim += indirect_dim.val32;
  503.  
  504.    switch (inst->opcode) {
  505.    case TOY_OPCODE_TGSI_INDIRECT_FETCH:
  506.       if (file == TGSI_FILE_CONSTANT) {
  507.          if (idx) {
  508.             struct toy_dst tmp = tc_alloc_tmp(tc);
  509.  
  510.             tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
  511.             indirect_idx = tsrc_from(tmp);
  512.          }
  513.  
  514.          fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx);
  515.          break;
  516.       }
  517.       /* fall through */
  518.    case TOY_OPCODE_TGSI_INDIRECT_STORE:
  519.    default:
  520.       tc_fail(tc, "unhandled TGSI indirection");
  521.       break;
  522.    }
  523.  
  524.    tc_discard_inst(tc, inst);
  525. }
  526.  
  527. /**
  528.  * Emit instructions to move sampling parameters to the message registers.
  529.  */
  530. static int
  531. fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type,
  532.                            int base_mrf, int param_size,
  533.                            struct toy_src *coords, int num_coords,
  534.                            struct toy_src bias_or_lod, struct toy_src ref_or_si,
  535.                            struct toy_src *ddx, struct toy_src *ddy,
  536.                            int num_derivs)
  537. {
  538.    int num_params, i;
  539.  
  540.    assert(num_coords <= 4);
  541.    assert(num_derivs <= 3 && num_derivs <= num_coords);
  542.  
  543. #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
  544.    switch (msg_type) {
  545.    case GEN5_SAMPLER_MESSAGE_SAMPLE:
  546.       for (i = 0; i < num_coords; i++)
  547.          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
  548.       num_params = num_coords;
  549.       break;
  550.    case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
  551.    case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
  552.       for (i = 0; i < num_coords; i++)
  553.          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
  554.       tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod);
  555.       num_params = 5;
  556.       break;
  557.    case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
  558.       for (i = 0; i < num_coords; i++)
  559.          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
  560.       tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
  561.       num_params = 5;
  562.       break;
  563.    case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
  564.       for (i = 0; i < num_coords; i++)
  565.          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
  566.       for (i = 0; i < num_derivs; i++) {
  567.          tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]);
  568.          tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]);
  569.       }
  570.       num_params = 4 + num_derivs * 2;
  571.       break;
  572.    case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
  573.    case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
  574.       for (i = 0; i < num_coords; i++)
  575.          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
  576.       tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si);
  577.       tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod);
  578.       num_params = 6;
  579.       break;
  580.    case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
  581.       assert(num_coords <= 3);
  582.  
  583.       for (i = 0; i < num_coords; i++)
  584.          tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]);
  585.       tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod);
  586.       tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si);
  587.       num_params = 5;
  588.       break;
  589.    case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
  590.       tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
  591.       num_params = 1;
  592.       break;
  593.    default:
  594.       tc_fail(tc, "unknown sampler opcode");
  595.       num_params = 0;
  596.       break;
  597.    }
  598. #undef SAMPLER_PARAM
  599.  
  600.    return num_params * param_size;
  601. }
  602.  
  603. static int
  604. fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type,
  605.                            int base_mrf, int param_size,
  606.                            struct toy_src *coords, int num_coords,
  607.                            struct toy_src bias_or_lod, struct toy_src ref_or_si,
  608.                            struct toy_src *ddx, struct toy_src *ddy,
  609.                            int num_derivs)
  610. {
  611.    int num_params, i;
  612.  
  613.    assert(num_coords <= 4);
  614.    assert(num_derivs <= 3 && num_derivs <= num_coords);
  615.  
  616. #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0))
  617.    switch (msg_type) {
  618.    case GEN5_SAMPLER_MESSAGE_SAMPLE:
  619.       for (i = 0; i < num_coords; i++)
  620.          tc_MOV(tc, SAMPLER_PARAM(i), coords[i]);
  621.       num_params = num_coords;
  622.       break;
  623.    case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS:
  624.    case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD:
  625.       tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod);
  626.       for (i = 0; i < num_coords; i++)
  627.          tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
  628.       num_params = 1 + num_coords;
  629.       break;
  630.    case GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE:
  631.       tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
  632.       for (i = 0; i < num_coords; i++)
  633.          tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]);
  634.       num_params = 1 + num_coords;
  635.       break;
  636.    case GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS:
  637.       for (i = 0; i < num_coords; i++) {
  638.          tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]);
  639.          if (i < num_derivs) {
  640.             tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]);
  641.             tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]);
  642.          }
  643.       }
  644.       num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0);
  645.       break;
  646.    case GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE:
  647.    case GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE:
  648.       tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si);
  649.       tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod);
  650.       for (i = 0; i < num_coords; i++)
  651.          tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]);
  652.       num_params = 2 + num_coords;
  653.       break;
  654.    case GEN5_SAMPLER_MESSAGE_SAMPLE_LD:
  655.       assert(num_coords >= 1 && num_coords <= 3);
  656.  
  657.       tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]);
  658.       tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod);
  659.       for (i = 1; i < num_coords; i++)
  660.          tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]);
  661.       num_params = 1 + num_coords;
  662.       break;
  663.    case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
  664.       tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod);
  665.       num_params = 1;
  666.       break;
  667.    default:
  668.       tc_fail(tc, "unknown sampler opcode");
  669.       num_params = 0;
  670.       break;
  671.    }
  672. #undef SAMPLER_PARAM
  673.  
  674.    return num_params * param_size;
  675. }
  676.  
  677. /**
  678.  * Set up message registers and return the message descriptor for sampling.
  679.  */
  680. static struct toy_src
  681. fs_prepare_tgsi_sampling(struct toy_compiler *tc, const struct toy_inst *inst,
  682.                          int base_mrf, const uint32_t *saturate_coords,
  683.                          unsigned *ret_sampler_index)
  684. {
  685.    unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
  686.    struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si;
  687.    int num_coords, ref_pos, num_derivs;
  688.    int sampler_src, param_size, i;
  689.  
  690.    switch (inst->exec_size) {
  691.    case BRW_EXECUTE_8:
  692.       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8;
  693.       param_size = 1;
  694.       break;
  695.    case BRW_EXECUTE_16:
  696.       simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16;
  697.       param_size = 2;
  698.       break;
  699.    default:
  700.       tc_fail(tc, "unsupported execute size for sampling");
  701.       return tsrc_null();
  702.       break;
  703.    }
  704.  
  705.    num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
  706.    tsrc_transpose(inst->src[0], coords);
  707.    bias_or_lod = tsrc_null();
  708.    ref_or_si = tsrc_null();
  709.    num_derivs = 0;
  710.    sampler_src = 1;
  711.  
  712.    /*
  713.     * For TXD,
  714.     *
  715.     *   src0 := (x, y, z, w)
  716.     *   src1 := ddx
  717.     *   src2 := ddy
  718.     *   src3 := sampler
  719.     *
  720.     * For TEX2, TXB2, and TXL2,
  721.     *
  722.     *   src0 := (x, y, z, w)
  723.     *   src1 := (v or bias or lod, ...)
  724.     *   src2 := sampler
  725.     *
  726.     * For TEX, TXB, TXL, and TXP,
  727.     *
  728.     *   src0 := (x, y, z, w or bias or lod or projection)
  729.     *   src1 := sampler
  730.     *
  731.     * For TXQ,
  732.     *
  733.     *   src0 := (lod, ...)
  734.     *   src1 := sampler
  735.     *
  736.     * For TXQ_LZ,
  737.     *
  738.     *   src0 := sampler
  739.     *
  740.     * And for TXF,
  741.     *
  742.     *   src0 := (x, y, z, w or lod)
  743.     *   src1 := sampler
  744.     *
  745.     * State trackers should not generate opcode+texture combinations with
  746.     * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY).
  747.     */
  748.    switch (inst->opcode) {
  749.    case TOY_OPCODE_TGSI_TEX:
  750.       if (ref_pos >= 0) {
  751.          assert(ref_pos < 4);
  752.  
  753.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
  754.          ref_or_si = coords[ref_pos];
  755.       }
  756.       else {
  757.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
  758.       }
  759.       break;
  760.    case TOY_OPCODE_TGSI_TXD:
  761.       if (ref_pos >= 0)
  762.          tc_fail(tc, "TXD with shadow sampler not supported");
  763.  
  764.       msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS;
  765.       tsrc_transpose(inst->src[1], ddx);
  766.       tsrc_transpose(inst->src[2], ddy);
  767.       num_derivs = num_coords;
  768.       sampler_src = 3;
  769.       break;
  770.    case TOY_OPCODE_TGSI_TXP:
  771.       if (ref_pos >= 0) {
  772.          assert(ref_pos < 3);
  773.  
  774.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
  775.          ref_or_si = coords[ref_pos];
  776.       }
  777.       else {
  778.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
  779.       }
  780.  
  781.       /* project the coordinates */
  782.       {
  783.          struct toy_dst tmp[4];
  784.  
  785.          tc_alloc_tmp4(tc, tmp);
  786.  
  787.          tc_INV(tc, tmp[3], coords[3]);
  788.          for (i = 0; i < num_coords && i < 3; i++) {
  789.             tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
  790.             coords[i] = tsrc_from(tmp[i]);
  791.          }
  792.  
  793.          if (ref_pos >= i) {
  794.             tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3]));
  795.             ref_or_si = tsrc_from(tmp[ref_pos]);
  796.          }
  797.       }
  798.       break;
  799.    case TOY_OPCODE_TGSI_TXB:
  800.       if (ref_pos >= 0) {
  801.          assert(ref_pos < 3);
  802.  
  803.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
  804.          ref_or_si = coords[ref_pos];
  805.       }
  806.       else {
  807.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
  808.       }
  809.  
  810.       bias_or_lod = coords[3];
  811.       break;
  812.    case TOY_OPCODE_TGSI_TXL:
  813.       if (ref_pos >= 0) {
  814.          assert(ref_pos < 3);
  815.  
  816.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
  817.          ref_or_si = coords[ref_pos];
  818.       }
  819.       else {
  820.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
  821.       }
  822.  
  823.       bias_or_lod = coords[3];
  824.       break;
  825.    case TOY_OPCODE_TGSI_TXF:
  826.       msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD;
  827.  
  828.       switch (inst->tex.target) {
  829.       case TGSI_TEXTURE_2D_MSAA:
  830.       case TGSI_TEXTURE_2D_ARRAY_MSAA:
  831.          assert(ref_pos >= 0 && ref_pos < 4);
  832.          /* lod is always 0 */
  833.          bias_or_lod = tsrc_imm_d(0);
  834.          ref_or_si = coords[ref_pos];
  835.          break;
  836.       default:
  837.          bias_or_lod = coords[3];
  838.          break;
  839.       }
  840.  
  841.       /* offset the coordinates */
  842.       if (!tsrc_is_null(inst->tex.offsets[0])) {
  843.          struct toy_dst tmp[4];
  844.          struct toy_src offsets[4];
  845.  
  846.          tc_alloc_tmp4(tc, tmp);
  847.          tsrc_transpose(inst->tex.offsets[0], offsets);
  848.  
  849.          for (i = 0; i < num_coords; i++) {
  850.             tc_ADD(tc, tmp[i], coords[i], offsets[i]);
  851.             coords[i] = tsrc_from(tmp[i]);
  852.          }
  853.       }
  854.  
  855.       sampler_src = 1;
  856.       break;
  857.    case TOY_OPCODE_TGSI_TXQ:
  858.       msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
  859.       num_coords = 0;
  860.       bias_or_lod = coords[0];
  861.       break;
  862.    case TOY_OPCODE_TGSI_TXQ_LZ:
  863.       msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO;
  864.       num_coords = 0;
  865.       sampler_src = 0;
  866.       break;
  867.    case TOY_OPCODE_TGSI_TEX2:
  868.       if (ref_pos >= 0) {
  869.          assert(ref_pos < 5);
  870.  
  871.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE;
  872.  
  873.          if (ref_pos >= 4) {
  874.             struct toy_src src1[4];
  875.             tsrc_transpose(inst->src[1], src1);
  876.             ref_or_si = src1[ref_pos - 4];
  877.          }
  878.          else {
  879.             ref_or_si = coords[ref_pos];
  880.          }
  881.       }
  882.       else {
  883.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE;
  884.       }
  885.  
  886.       sampler_src = 2;
  887.       break;
  888.    case TOY_OPCODE_TGSI_TXB2:
  889.       if (ref_pos >= 0) {
  890.          assert(ref_pos < 4);
  891.  
  892.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE;
  893.          ref_or_si = coords[ref_pos];
  894.       }
  895.       else {
  896.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS;
  897.       }
  898.  
  899.       {
  900.          struct toy_src src1[4];
  901.          tsrc_transpose(inst->src[1], src1);
  902.          bias_or_lod = src1[0];
  903.       }
  904.  
  905.       sampler_src = 2;
  906.       break;
  907.    case TOY_OPCODE_TGSI_TXL2:
  908.       if (ref_pos >= 0) {
  909.          assert(ref_pos < 4);
  910.  
  911.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE;
  912.          ref_or_si = coords[ref_pos];
  913.       }
  914.       else {
  915.          msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD;
  916.       }
  917.  
  918.       {
  919.          struct toy_src src1[4];
  920.          tsrc_transpose(inst->src[1], src1);
  921.          bias_or_lod = src1[0];
  922.       }
  923.  
  924.       sampler_src = 2;
  925.       break;
  926.    default:
  927.       assert(!"unhandled sampling opcode");
  928.       return tsrc_null();
  929.       break;
  930.    }
  931.  
  932.    assert(inst->src[sampler_src].file == TOY_FILE_IMM);
  933.    sampler_index = inst->src[sampler_src].val32;
  934.    binding_table_index = ILO_WM_TEXTURE_SURFACE(sampler_index);
  935.  
  936.    /*
  937.     * From the Sandy Bridge PRM, volume 4 part 1, page 18:
  938.     *
  939.     *     "Note that the (cube map) coordinates delivered to the sampling
  940.     *      engine must already have been divided by the component with the
  941.     *      largest absolute value."
  942.     */
  943.    switch (inst->tex.target) {
  944.    case TGSI_TEXTURE_CUBE:
  945.    case TGSI_TEXTURE_SHADOWCUBE:
  946.    case TGSI_TEXTURE_CUBE_ARRAY:
  947.    case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
  948.       /* TXQ does not need coordinates */
  949.       if (num_coords >= 3) {
  950.          struct toy_dst tmp[4];
  951.  
  952.          tc_alloc_tmp4(tc, tmp);
  953.  
  954.          tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]),
  955.                tsrc_absolute(coords[1]), BRW_CONDITIONAL_GE);
  956.          tc_SEL(tc, tmp[3], tsrc_from(tmp[3]),
  957.                tsrc_absolute(coords[2]), BRW_CONDITIONAL_GE);
  958.          tc_INV(tc, tmp[3], tsrc_from(tmp[3]));
  959.  
  960.          for (i = 0; i < 3; i++) {
  961.             tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3]));
  962.             coords[i] = tsrc_from(tmp[i]);
  963.          }
  964.       }
  965.       break;
  966.    }
  967.  
  968.    /*
  969.     * Saturate (s, t, r).  saturate_coords is set for sampler and coordinate
  970.     * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively.  It is
  971.     * so that sampling outside the border gets the correct colors.
  972.     */
  973.    for (i = 0; i < MIN2(num_coords, 3); i++) {
  974.       bool is_rect;
  975.  
  976.       if (!(saturate_coords[i] & (1 << sampler_index)))
  977.          continue;
  978.  
  979.       switch (inst->tex.target) {
  980.       case TGSI_TEXTURE_RECT:
  981.       case TGSI_TEXTURE_SHADOWRECT:
  982.          is_rect = true;
  983.          break;
  984.       default:
  985.          is_rect = false;
  986.          break;
  987.       }
  988.  
  989.       if (is_rect) {
  990.          struct toy_src min, max;
  991.          struct toy_dst tmp;
  992.  
  993.          tc_fail(tc, "GL_CLAMP with rectangle texture unsupported");
  994.          tmp = tc_alloc_tmp(tc);
  995.  
  996.          /* saturate to [0, width] or [0, height] */
  997.          /* TODO TXQ? */
  998.          min = tsrc_imm_f(0.0f);
  999.          max = tsrc_imm_f(2048.0f);
  1000.  
  1001.          tc_SEL(tc, tmp, coords[i], min, BRW_CONDITIONAL_G);
  1002.          tc_SEL(tc, tmp, tsrc_from(tmp), max, BRW_CONDITIONAL_L);
  1003.  
  1004.          coords[i] = tsrc_from(tmp);
  1005.       }
  1006.       else {
  1007.          struct toy_dst tmp;
  1008.          struct toy_inst *inst2;
  1009.  
  1010.          tmp = tc_alloc_tmp(tc);
  1011.  
  1012.          /* saturate to [0.0f, 1.0f] */
  1013.          inst2 = tc_MOV(tc, tmp, coords[i]);
  1014.          inst2->saturate = true;
  1015.  
  1016.          coords[i] = tsrc_from(tmp);
  1017.       }
  1018.    }
  1019.  
  1020.    /* set up sampler parameters */
  1021.    if (tc->dev->gen >= ILO_GEN(7)) {
  1022.       msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size,
  1023.             coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
  1024.    }
  1025.    else {
  1026.       msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size,
  1027.             coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
  1028.    }
  1029.  
  1030.    /*
  1031.     * From the Sandy Bridge PRM, volume 4 part 1, page 136:
  1032.     *
  1033.     *     "The maximum message length allowed to the sampler is 11. This would
  1034.     *      disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
  1035.     *      SIMD16."
  1036.     */
  1037.    if (msg_len > 11)
  1038.       tc_fail(tc, "maximum length for messages to the sampler is 11");
  1039.  
  1040.    if (ret_sampler_index)
  1041.       *ret_sampler_index = sampler_index;
  1042.  
  1043.    return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size,
  1044.          false, simd_mode, msg_type, sampler_index, binding_table_index);
  1045. }
  1046.  
  1047. static void
  1048. fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc,
  1049.                               struct toy_inst *inst)
  1050. {
  1051.    struct toy_compiler *tc = &fcc->tc;
  1052.    struct toy_dst dst[4], tmp[4];
  1053.    struct toy_src desc;
  1054.    unsigned sampler_index;
  1055.    int swizzles[4], i;
  1056.    bool need_filter;
  1057.  
  1058.    desc = fs_prepare_tgsi_sampling(tc, inst,
  1059.          fcc->first_free_mrf,
  1060.          fcc->variant->saturate_tex_coords,
  1061.          &sampler_index);
  1062.  
  1063.    switch (inst->opcode) {
  1064.    case TOY_OPCODE_TGSI_TXF:
  1065.    case TOY_OPCODE_TGSI_TXQ:
  1066.    case TOY_OPCODE_TGSI_TXQ_LZ:
  1067.       need_filter = false;
  1068.       break;
  1069.    default:
  1070.       need_filter = true;
  1071.       break;
  1072.    }
  1073.  
  1074.    toy_compiler_lower_to_send(tc, inst, false, BRW_SFID_SAMPLER);
  1075.    inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0);
  1076.    inst->src[1] = desc;
  1077.    for (i = 2; i < Elements(inst->src); i++)
  1078.       inst->src[i] = tsrc_null();
  1079.  
  1080.    /* write to temps first */
  1081.    tc_alloc_tmp4(tc, tmp);
  1082.    for (i = 0; i < 4; i++)
  1083.       tmp[i].type = inst->dst.type;
  1084.    tdst_transpose(inst->dst, dst);
  1085.    inst->dst = tmp[0];
  1086.  
  1087.    tc_move_inst(tc, inst);
  1088.  
  1089.    if (need_filter) {
  1090.       assert(sampler_index < fcc->variant->num_sampler_views);
  1091.       swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r;
  1092.       swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g;
  1093.       swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b;
  1094.       swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a;
  1095.    }
  1096.    else {
  1097.       swizzles[0] = PIPE_SWIZZLE_RED;
  1098.       swizzles[1] = PIPE_SWIZZLE_GREEN;
  1099.       swizzles[2] = PIPE_SWIZZLE_BLUE;
  1100.       swizzles[3] = PIPE_SWIZZLE_ALPHA;
  1101.    }
  1102.  
  1103.    /* swizzle the results */
  1104.    for (i = 0; i < 4; i++) {
  1105.       switch (swizzles[i]) {
  1106.       case PIPE_SWIZZLE_ZERO:
  1107.          tc_MOV(tc, dst[i], tsrc_imm_f(0.0f));
  1108.          break;
  1109.       case PIPE_SWIZZLE_ONE:
  1110.          tc_MOV(tc, dst[i], tsrc_imm_f(1.0f));
  1111.          break;
  1112.       default:
  1113.          tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]]));
  1114.          break;
  1115.       }
  1116.    }
  1117. }
  1118.  
  1119. static void
  1120. fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst)
  1121. {
  1122.    struct toy_dst dst[4];
  1123.    struct toy_src src[4];
  1124.    int i;
  1125.  
  1126.    tdst_transpose(inst->dst, dst);
  1127.    tsrc_transpose(inst->src[0], src);
  1128.  
  1129.    /*
  1130.     * Every four fragments are from a 2x2 subspan, with
  1131.     *
  1132.     *   fragment 1 on the top-left,
  1133.     *   fragment 2 on the top-right,
  1134.     *   fragment 3 on the bottom-left,
  1135.     *   fragment 4 on the bottom-right.
  1136.     *
  1137.     * DDX should thus produce
  1138.     *
  1139.     *   dst = src.yyww - src.xxzz
  1140.     *
  1141.     * and DDY should produce
  1142.     *
  1143.     *   dst = src.zzww - src.xxyy
  1144.     *
  1145.     * But since we are in BRW_ALIGN_1, swizzling does not work and we have to
  1146.     * play with the region parameters.
  1147.     */
  1148.    if (inst->opcode == TOY_OPCODE_DDX) {
  1149.       for (i = 0; i < 4; i++) {
  1150.          struct toy_src left, right;
  1151.  
  1152.          left = tsrc_rect(src[i], TOY_RECT_220);
  1153.          right = tsrc_offset(left, 0, 1);
  1154.  
  1155.          tc_ADD(tc, dst[i], right, tsrc_negate(left));
  1156.       }
  1157.    }
  1158.    else {
  1159.       for (i = 0; i < 4; i++) {
  1160.          struct toy_src top, bottom;
  1161.  
  1162.          /* approximate with dst = src.zzzz - src.xxxx */
  1163.          top = tsrc_rect(src[i], TOY_RECT_440);
  1164.          bottom = tsrc_offset(top, 0, 2);
  1165.  
  1166.          tc_ADD(tc, dst[i], bottom, tsrc_negate(top));
  1167.       }
  1168.    }
  1169.  
  1170.    tc_discard_inst(tc, inst);
  1171. }
  1172.  
  1173. static void
  1174. fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst)
  1175. {
  1176.    /* fs_write_fb() has set up the message registers */
  1177.    toy_compiler_lower_to_send(tc, inst, true,
  1178.          GEN6_SFID_DATAPORT_RENDER_CACHE);
  1179. }
  1180.  
  1181. static void
  1182. fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst)
  1183. {
  1184.    struct toy_dst pixel_mask_dst;
  1185.    struct toy_src f0, pixel_mask;
  1186.    struct toy_inst *tmp;
  1187.  
  1188.    /* lower half of r1.7:ud */
  1189.    pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4));
  1190.    pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010);
  1191.  
  1192.    f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, BRW_ARF_FLAG, 0)), TOY_RECT_010);
  1193.  
  1194.    /* KILL or KILL_IF */
  1195.    if (tsrc_is_null(inst->src[0])) {
  1196.       struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0));
  1197.       struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, BRW_ARF_FLAG, 0));
  1198.  
  1199.       /* create a mask that masks out all pixels */
  1200.       tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010));
  1201.       tmp->exec_size = BRW_EXECUTE_1;
  1202.       tmp->mask_ctrl = BRW_MASK_DISABLE;
  1203.  
  1204.       tc_CMP(tc, tdst_null(), dummy, dummy, BRW_CONDITIONAL_NEQ);
  1205.  
  1206.       /* swapping the two src operands breaks glBitmap()!? */
  1207.       tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
  1208.       tmp->exec_size = BRW_EXECUTE_1;
  1209.       tmp->mask_ctrl = BRW_MASK_DISABLE;
  1210.    }
  1211.    else {
  1212.       struct toy_src src[4];
  1213.       int i;
  1214.  
  1215.       tsrc_transpose(inst->src[0], src);
  1216.       /* mask out killed pixels */
  1217.       for (i = 0; i < 4; i++) {
  1218.          tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f),
  1219.                BRW_CONDITIONAL_GE);
  1220.  
  1221.          /* swapping the two src operands breaks glBitmap()!? */
  1222.          tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask);
  1223.          tmp->exec_size = BRW_EXECUTE_1;
  1224.          tmp->mask_ctrl = BRW_MASK_DISABLE;
  1225.       }
  1226.    }
  1227.  
  1228.    tc_discard_inst(tc, inst);
  1229. }
  1230.  
  1231. static void
  1232. fs_lower_virtual_opcodes(struct fs_compile_context *fcc)
  1233. {
  1234.    struct toy_compiler *tc = &fcc->tc;
  1235.    struct toy_inst *inst;
  1236.  
  1237.    /* lower TGSI's first, as they might be lowered to other virtual opcodes */
  1238.    tc_head(tc);
  1239.    while ((inst = tc_next(tc)) != NULL) {
  1240.       switch (inst->opcode) {
  1241.       case TOY_OPCODE_TGSI_IN:
  1242.       case TOY_OPCODE_TGSI_CONST:
  1243.       case TOY_OPCODE_TGSI_SV:
  1244.       case TOY_OPCODE_TGSI_IMM:
  1245.          fs_lower_opcode_tgsi_direct(fcc, inst);
  1246.          break;
  1247.       case TOY_OPCODE_TGSI_INDIRECT_FETCH:
  1248.       case TOY_OPCODE_TGSI_INDIRECT_STORE:
  1249.          fs_lower_opcode_tgsi_indirect(fcc, inst);
  1250.          break;
  1251.       case TOY_OPCODE_TGSI_TEX:
  1252.       case TOY_OPCODE_TGSI_TXB:
  1253.       case TOY_OPCODE_TGSI_TXD:
  1254.       case TOY_OPCODE_TGSI_TXL:
  1255.       case TOY_OPCODE_TGSI_TXP:
  1256.       case TOY_OPCODE_TGSI_TXF:
  1257.       case TOY_OPCODE_TGSI_TXQ:
  1258.       case TOY_OPCODE_TGSI_TXQ_LZ:
  1259.       case TOY_OPCODE_TGSI_TEX2:
  1260.       case TOY_OPCODE_TGSI_TXB2:
  1261.       case TOY_OPCODE_TGSI_TXL2:
  1262.       case TOY_OPCODE_TGSI_SAMPLE:
  1263.       case TOY_OPCODE_TGSI_SAMPLE_I:
  1264.       case TOY_OPCODE_TGSI_SAMPLE_I_MS:
  1265.       case TOY_OPCODE_TGSI_SAMPLE_B:
  1266.       case TOY_OPCODE_TGSI_SAMPLE_C:
  1267.       case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
  1268.       case TOY_OPCODE_TGSI_SAMPLE_D:
  1269.       case TOY_OPCODE_TGSI_SAMPLE_L:
  1270.       case TOY_OPCODE_TGSI_GATHER4:
  1271.       case TOY_OPCODE_TGSI_SVIEWINFO:
  1272.       case TOY_OPCODE_TGSI_SAMPLE_POS:
  1273.       case TOY_OPCODE_TGSI_SAMPLE_INFO:
  1274.          fs_lower_opcode_tgsi_sampling(fcc, inst);
  1275.          break;
  1276.       }
  1277.    }
  1278.  
  1279.    tc_head(tc);
  1280.    while ((inst = tc_next(tc)) != NULL) {
  1281.       switch (inst->opcode) {
  1282.       case TOY_OPCODE_INV:
  1283.       case TOY_OPCODE_LOG:
  1284.       case TOY_OPCODE_EXP:
  1285.       case TOY_OPCODE_SQRT:
  1286.       case TOY_OPCODE_RSQ:
  1287.       case TOY_OPCODE_SIN:
  1288.       case TOY_OPCODE_COS:
  1289.       case TOY_OPCODE_FDIV:
  1290.       case TOY_OPCODE_POW:
  1291.       case TOY_OPCODE_INT_DIV_QUOTIENT:
  1292.       case TOY_OPCODE_INT_DIV_REMAINDER:
  1293.          toy_compiler_lower_math(tc, inst);
  1294.          break;
  1295.       case TOY_OPCODE_DDX:
  1296.       case TOY_OPCODE_DDY:
  1297.          fs_lower_opcode_derivative(tc, inst);
  1298.          break;
  1299.       case TOY_OPCODE_FB_WRITE:
  1300.          fs_lower_opcode_fb_write(tc, inst);
  1301.          break;
  1302.       case TOY_OPCODE_KIL:
  1303.          fs_lower_opcode_kil(tc, inst);
  1304.          break;
  1305.       default:
  1306.          if (inst->opcode > 127)
  1307.             tc_fail(tc, "unhandled virtual opcode");
  1308.          break;
  1309.       }
  1310.    }
  1311. }
  1312.  
  1313. /**
  1314.  * Compile the shader.
  1315.  */
  1316. static bool
  1317. fs_compile(struct fs_compile_context *fcc)
  1318. {
  1319.    struct toy_compiler *tc = &fcc->tc;
  1320.    struct ilo_shader *sh = fcc->shader;
  1321.  
  1322.    fs_lower_virtual_opcodes(fcc);
  1323.    toy_compiler_legalize_for_ra(tc);
  1324.    toy_compiler_optimize(tc);
  1325.    toy_compiler_allocate_registers(tc,
  1326.          fcc->first_free_grf,
  1327.          fcc->last_free_grf,
  1328.          fcc->num_grf_per_vrf);
  1329.    toy_compiler_legalize_for_asm(tc);
  1330.  
  1331.    if (tc->fail) {
  1332.       ilo_err("failed to legalize FS instructions: %s\n", tc->reason);
  1333.       return false;
  1334.    }
  1335.  
  1336.    if (ilo_debug & ILO_DEBUG_FS) {
  1337.       ilo_printf("legalized instructions:\n");
  1338.       toy_compiler_dump(tc);
  1339.       ilo_printf("\n");
  1340.    }
  1341.  
  1342.    if (true) {
  1343.       sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
  1344.    }
  1345.    else {
  1346.       static const uint32_t microcode[] = {
  1347.          /* fill in the microcode here */
  1348.          0x0, 0x0, 0x0, 0x0,
  1349.       };
  1350.       const bool swap = true;
  1351.  
  1352.       sh->kernel_size = sizeof(microcode);
  1353.       sh->kernel = MALLOC(sh->kernel_size);
  1354.  
  1355.       if (sh->kernel) {
  1356.          const int num_dwords = sizeof(microcode) / 4;
  1357.          const uint32_t *src = microcode;
  1358.          uint32_t *dst = (uint32_t *) sh->kernel;
  1359.          int i;
  1360.  
  1361.          for (i = 0; i < num_dwords; i += 4) {
  1362.             if (swap) {
  1363.                dst[i + 0] = src[i + 3];
  1364.                dst[i + 1] = src[i + 2];
  1365.                dst[i + 2] = src[i + 1];
  1366.                dst[i + 3] = src[i + 0];
  1367.             }
  1368.             else {
  1369.                memcpy(dst, src, 16);
  1370.             }
  1371.          }
  1372.       }
  1373.    }
  1374.  
  1375.    if (!sh->kernel) {
  1376.       ilo_err("failed to compile FS: %s\n", tc->reason);
  1377.       return false;
  1378.    }
  1379.  
  1380.    if (ilo_debug & ILO_DEBUG_FS) {
  1381.       ilo_printf("disassembly:\n");
  1382.       toy_compiler_disassemble(tc, sh->kernel, sh->kernel_size);
  1383.       ilo_printf("\n");
  1384.    }
  1385.  
  1386.    return true;
  1387. }
  1388.  
  1389. /**
  1390.  * Emit instructions to write the color buffers (and the depth buffer).
  1391.  */
  1392. static void
  1393. fs_write_fb(struct fs_compile_context *fcc)
  1394. {
  1395.    struct toy_compiler *tc = &fcc->tc;
  1396.    int base_mrf = fcc->first_free_mrf;
  1397.    const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0));
  1398.    bool header_present = false;
  1399.    struct toy_src desc;
  1400.    unsigned msg_type, ctrl;
  1401.    int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs;
  1402.    int pos_slot = -1, cbuf, i;
  1403.  
  1404.    for (i = 0; i < Elements(color_slots); i++)
  1405.       color_slots[i] = -1;
  1406.  
  1407.    for (i = 0; i < fcc->tgsi.num_outputs; i++) {
  1408.       if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) {
  1409.          assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots));
  1410.          color_slots[fcc->tgsi.outputs[i].semantic_index] = i;
  1411.       }
  1412.       else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
  1413.          pos_slot = i;
  1414.       }
  1415.    }
  1416.  
  1417.    num_cbufs = fcc->variant->u.fs.num_cbufs;
  1418.    /* still need to send EOT (and probably depth) */
  1419.    if (!num_cbufs)
  1420.       num_cbufs = 1;
  1421.  
  1422.    /* we need the header to specify the pixel mask or render target */
  1423.    if (fcc->tgsi.uses_kill || num_cbufs > 1) {
  1424.       const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
  1425.       struct toy_inst *inst;
  1426.  
  1427.       inst = tc_MOV(tc, header, r0);
  1428.       inst->mask_ctrl = BRW_MASK_DISABLE;
  1429.       base_mrf += fcc->num_grf_per_vrf;
  1430.  
  1431.       /* this is a two-register header */
  1432.       if (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) {
  1433.          inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0));
  1434.          inst->mask_ctrl = BRW_MASK_DISABLE;
  1435.          base_mrf += fcc->num_grf_per_vrf;
  1436.       }
  1437.  
  1438.       header_present = true;
  1439.    }
  1440.  
  1441.    for (cbuf = 0; cbuf < num_cbufs; cbuf++) {
  1442.       const int slot =
  1443.          color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf];
  1444.       int mrf = base_mrf, vrf;
  1445.       struct toy_src src[4];
  1446.  
  1447.       if (slot >= 0) {
  1448.          const unsigned undefined_mask =
  1449.             fcc->tgsi.outputs[slot].undefined_mask;
  1450.          const int index = fcc->tgsi.outputs[slot].index;
  1451.  
  1452.          vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
  1453.          if (vrf >= 0) {
  1454.             const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
  1455.             tsrc_transpose(tmp, src);
  1456.          }
  1457.          else {
  1458.             /* use (0, 0, 0, 0) */
  1459.             tsrc_transpose(tsrc_imm_f(0.0f), src);
  1460.          }
  1461.  
  1462.          for (i = 0; i < 4; i++) {
  1463.             const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
  1464.  
  1465.             if (undefined_mask & (1 << i))
  1466.                src[i] = tsrc_imm_f(0.0f);
  1467.  
  1468.             tc_MOV(tc, dst, src[i]);
  1469.  
  1470.             mrf += fcc->num_grf_per_vrf;
  1471.          }
  1472.       }
  1473.       else {
  1474.          /* use (0, 0, 0, 0) */
  1475.          for (i = 0; i < 4; i++) {
  1476.             const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
  1477.  
  1478.             tc_MOV(tc, dst, tsrc_imm_f(0.0f));
  1479.             mrf += fcc->num_grf_per_vrf;
  1480.          }
  1481.       }
  1482.  
  1483.       /* select BLEND_STATE[rt] */
  1484.       if (cbuf > 0) {
  1485.          struct toy_inst *inst;
  1486.  
  1487.          inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf));
  1488.          inst->mask_ctrl = BRW_MASK_DISABLE;
  1489.          inst->exec_size = BRW_EXECUTE_1;
  1490.          inst->src[0].rect = TOY_RECT_010;
  1491.       }
  1492.  
  1493.       if (cbuf == 0 && pos_slot >= 0) {
  1494.          const int index = fcc->tgsi.outputs[pos_slot].index;
  1495.          const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0);
  1496.          struct toy_src src[4];
  1497.          int vrf;
  1498.  
  1499.          vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index);
  1500.          if (vrf >= 0) {
  1501.             const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0);
  1502.             tsrc_transpose(tmp, src);
  1503.          }
  1504.          else {
  1505.             /* use (0, 0, 0, 0) */
  1506.             tsrc_transpose(tsrc_imm_f(0.0f), src);
  1507.          }
  1508.  
  1509.          /* only Z */
  1510.          tc_MOV(tc, dst, src[2]);
  1511.  
  1512.          mrf += fcc->num_grf_per_vrf;
  1513.       }
  1514.  
  1515.       msg_type = (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ?
  1516.          BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE :
  1517.          BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01;
  1518.  
  1519.       ctrl = (cbuf == num_cbufs - 1) << 12 |
  1520.              msg_type << 8;
  1521.  
  1522.       desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1,
  1523.             mrf - fcc->first_free_mrf, 0,
  1524.             header_present, false,
  1525.             GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE,
  1526.             ctrl, ILO_WM_DRAW_SURFACE(cbuf));
  1527.  
  1528.       tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(),
  1529.             tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc);
  1530.    }
  1531. }
  1532.  
  1533. /**
  1534.  * Set up shader outputs for fixed-function units.
  1535.  */
  1536. static void
  1537. fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi)
  1538. {
  1539.    int i;
  1540.  
  1541.    sh->out.count = tgsi->num_outputs;
  1542.    for (i = 0; i < tgsi->num_outputs; i++) {
  1543.       sh->out.register_indices[i] = tgsi->outputs[i].index;
  1544.       sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name;
  1545.       sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index;
  1546.  
  1547.       if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION)
  1548.          sh->out.has_pos = true;
  1549.    }
  1550. }
  1551.  
  1552. /**
  1553.  * Set up shader inputs for fixed-function units.
  1554.  */
  1555. static void
  1556. fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi,
  1557.                    bool flatshade)
  1558. {
  1559.    int i;
  1560.  
  1561.    sh->in.count = tgsi->num_inputs;
  1562.    for (i = 0; i < tgsi->num_inputs; i++) {
  1563.       sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name;
  1564.       sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index;
  1565.       sh->in.interp[i] = tgsi->inputs[i].interp;
  1566.       sh->in.centroid[i] = tgsi->inputs[i].centroid;
  1567.  
  1568.       if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) {
  1569.          sh->in.has_pos = true;
  1570.          continue;
  1571.       }
  1572.       else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) {
  1573.          continue;
  1574.       }
  1575.  
  1576.       switch (tgsi->inputs[i].interp) {
  1577.       case TGSI_INTERPOLATE_CONSTANT:
  1578.          sh->in.const_interp_enable |= 1 << i;
  1579.          break;
  1580.       case TGSI_INTERPOLATE_LINEAR:
  1581.          sh->in.has_linear_interp = true;
  1582.  
  1583.          if (tgsi->inputs[i].centroid) {
  1584.             sh->in.barycentric_interpolation_mode |=
  1585.                1 << BRW_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC;
  1586.          }
  1587.          else {
  1588.             sh->in.barycentric_interpolation_mode |=
  1589.                1 << BRW_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC;
  1590.          }
  1591.          break;
  1592.       case TGSI_INTERPOLATE_COLOR:
  1593.          if (flatshade) {
  1594.             sh->in.const_interp_enable |= 1 << i;
  1595.             break;
  1596.          }
  1597.          /* fall through */
  1598.       case TGSI_INTERPOLATE_PERSPECTIVE:
  1599.          if (tgsi->inputs[i].centroid) {
  1600.             sh->in.barycentric_interpolation_mode |=
  1601.                1 << BRW_WM_PERSPECTIVE_CENTROID_BARYCENTRIC;
  1602.          }
  1603.          else {
  1604.             sh->in.barycentric_interpolation_mode |=
  1605.                1 << BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC;
  1606.          }
  1607.          break;
  1608.       default:
  1609.          break;
  1610.       }
  1611.    }
  1612. }
  1613.  
  1614. static int
  1615. fs_setup_payloads(struct fs_compile_context *fcc)
  1616. {
  1617.    const struct ilo_shader *sh = fcc->shader;
  1618.    int grf, i;
  1619.  
  1620.    grf = 0;
  1621.  
  1622.    /* r0: header */
  1623.    grf++;
  1624.  
  1625.    /* r1-r2: coordinates and etc. */
  1626.    grf += (fcc->dispatch_mode == GEN6_WM_32_DISPATCH_ENABLE) ? 2 : 1;
  1627.  
  1628.    for (i = 0; i < Elements(fcc->payloads); i++) {
  1629.       int interp;
  1630.  
  1631.       /* r3-r26 or r32-r55: barycentric interpolation parameters */
  1632.       for (interp = 0; interp < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; interp++) {
  1633.          if (!(sh->in.barycentric_interpolation_mode & (1 << interp)))
  1634.             continue;
  1635.  
  1636.          fcc->payloads[i].barycentric_interps[interp] = grf;
  1637.          grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 2 : 4;
  1638.       }
  1639.  
  1640.       /* r27-r28 or r56-r57: interpoloated depth */
  1641.       if (sh->in.has_pos) {
  1642.          fcc->payloads[i].source_depth = grf;
  1643.          grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
  1644.       }
  1645.  
  1646.       /* r29-r30 or r58-r59: interpoloated w */
  1647.       if (sh->in.has_pos) {
  1648.          fcc->payloads[i].source_w = grf;
  1649.          grf += (fcc->dispatch_mode == GEN6_WM_8_DISPATCH_ENABLE) ? 1 : 2;
  1650.       }
  1651.  
  1652.       /* r31 or r60: position offset */
  1653.       if (false) {
  1654.          fcc->payloads[i].pos_offset = grf;
  1655.          grf++;
  1656.       }
  1657.  
  1658.       if (fcc->dispatch_mode != GEN6_WM_32_DISPATCH_ENABLE)
  1659.          break;
  1660.    }
  1661.  
  1662.    return grf;
  1663. }
  1664.  
  1665. /**
  1666.  * Translate the TGSI tokens.
  1667.  */
  1668. static bool
  1669. fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens,
  1670.               struct toy_tgsi *tgsi)
  1671. {
  1672.    if (ilo_debug & ILO_DEBUG_FS) {
  1673.       ilo_printf("dumping fragment shader\n");
  1674.       ilo_printf("\n");
  1675.  
  1676.       tgsi_dump(tokens, 0);
  1677.       ilo_printf("\n");
  1678.    }
  1679.  
  1680.    toy_compiler_translate_tgsi(tc, tokens, false, tgsi);
  1681.    if (tc->fail) {
  1682.       ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason);
  1683.       return false;
  1684.    }
  1685.  
  1686.    if (ilo_debug & ILO_DEBUG_FS) {
  1687.       ilo_printf("TGSI translator:\n");
  1688.       toy_tgsi_dump(tgsi);
  1689.       ilo_printf("\n");
  1690.       toy_compiler_dump(tc);
  1691.       ilo_printf("\n");
  1692.    }
  1693.  
  1694.    return true;
  1695. }
  1696.  
  1697. /**
  1698.  * Set up FS compile context.  This includes translating the TGSI tokens.
  1699.  */
  1700. static bool
  1701. fs_setup(struct fs_compile_context *fcc,
  1702.          const struct ilo_shader_state *state,
  1703.          const struct ilo_shader_variant *variant)
  1704. {
  1705.    int num_consts;
  1706.  
  1707.    memset(fcc, 0, sizeof(*fcc));
  1708.  
  1709.    fcc->shader = CALLOC_STRUCT(ilo_shader);
  1710.    if (!fcc->shader)
  1711.       return false;
  1712.  
  1713.    fcc->variant = variant;
  1714.  
  1715.    toy_compiler_init(&fcc->tc, state->info.dev);
  1716.  
  1717.    fcc->dispatch_mode = GEN6_WM_8_DISPATCH_ENABLE;
  1718.  
  1719.    fcc->tc.templ.access_mode = BRW_ALIGN_1;
  1720.    if (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) {
  1721.       fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1H;
  1722.       fcc->tc.templ.exec_size = BRW_EXECUTE_16;
  1723.    }
  1724.    else {
  1725.       fcc->tc.templ.qtr_ctrl = GEN6_COMPRESSION_1Q;
  1726.       fcc->tc.templ.exec_size = BRW_EXECUTE_8;
  1727.    }
  1728.  
  1729.    fcc->tc.rect_linear_width = 8;
  1730.  
  1731.    /*
  1732.     * The classic driver uses the sampler cache (gen6) or the data cache
  1733.     * (gen7).  Why?
  1734.     */
  1735.    fcc->const_cache = GEN6_SFID_DATAPORT_CONSTANT_CACHE;
  1736.  
  1737.    if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) {
  1738.       toy_compiler_cleanup(&fcc->tc);
  1739.       FREE(fcc->shader);
  1740.       return false;
  1741.    }
  1742.  
  1743.    fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade);
  1744.    fs_setup_shader_out(fcc->shader, &fcc->tgsi);
  1745.  
  1746.    /* we do not make use of push constant buffers yet */
  1747.    num_consts = 0;
  1748.  
  1749.    fcc->first_const_grf = fs_setup_payloads(fcc);
  1750.    fcc->first_attr_grf = fcc->first_const_grf + num_consts;
  1751.    fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2;
  1752.    fcc->last_free_grf = 127;
  1753.  
  1754.    /* m0 is reserved for system routines */
  1755.    fcc->first_free_mrf = 1;
  1756.    fcc->last_free_mrf = 15;
  1757.  
  1758.    /* instructions are compressed with BRW_EXECUTE_16 */
  1759.    fcc->num_grf_per_vrf =
  1760.       (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE) ? 2 : 1;
  1761.  
  1762.    if (fcc->tc.dev->gen >= ILO_GEN(7)) {
  1763.       fcc->last_free_grf -= 15;
  1764.       fcc->first_free_mrf = fcc->last_free_grf + 1;
  1765.       fcc->last_free_mrf = fcc->first_free_mrf + 14;
  1766.    }
  1767.  
  1768.    fcc->shader->in.start_grf = fcc->first_const_grf;
  1769.    fcc->shader->has_kill = fcc->tgsi.uses_kill;
  1770.    fcc->shader->dispatch_16 =
  1771.       (fcc->dispatch_mode == GEN6_WM_16_DISPATCH_ENABLE);
  1772.  
  1773.    return true;
  1774. }
  1775.  
  1776. /**
  1777.  * Compile the fragment shader.
  1778.  */
  1779. struct ilo_shader *
  1780. ilo_shader_compile_fs(const struct ilo_shader_state *state,
  1781.                       const struct ilo_shader_variant *variant)
  1782. {
  1783.    struct fs_compile_context fcc;
  1784.  
  1785.    if (!fs_setup(&fcc, state, variant))
  1786.       return NULL;
  1787.  
  1788.    fs_write_fb(&fcc);
  1789.  
  1790.    if (!fs_compile(&fcc)) {
  1791.       FREE(fcc.shader);
  1792.       fcc.shader = NULL;
  1793.    }
  1794.  
  1795.    toy_tgsi_cleanup(&fcc.tgsi);
  1796.    toy_compiler_cleanup(&fcc.tc);
  1797.  
  1798.    return fcc.shader;
  1799. }
  1800.