Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Mesa 3-D graphics library
  3.  *
  4.  * Copyright (C) 2012-2013 LunarG, Inc.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice shall be included
  14.  * in all copies or substantial portions of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  17.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  18.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  19.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  20.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  21.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  22.  * DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Chia-I Wu <olv@lunarg.com>
  26.  */
  27.  
  28. #include "toy_compiler.h"
  29.  
  30. #define CG_REG_SHIFT 5
  31. #define CG_REG_NUM(origin) ((origin) >> CG_REG_SHIFT)
  32.  
  33. struct codegen {
  34.    const struct toy_inst *inst;
  35.    int pc;
  36.  
  37.    unsigned flag_sub_reg_num;
  38.  
  39.    struct codegen_dst {
  40.       unsigned file;
  41.       unsigned type;
  42.       bool indirect;
  43.       unsigned indirect_subreg;
  44.       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
  45.  
  46.       unsigned horz_stride;
  47.  
  48.       unsigned writemask;
  49.    } dst;
  50.  
  51.    struct codegen_src {
  52.       unsigned file;
  53.       unsigned type;
  54.       bool indirect;
  55.       unsigned indirect_subreg;
  56.       unsigned origin; /* (RegNum << 5 | SubRegNumInBytes) */
  57.  
  58.       unsigned vert_stride;
  59.       unsigned width;
  60.       unsigned horz_stride;
  61.  
  62.       unsigned swizzle[4];
  63.       bool absolute;
  64.       bool negate;
  65.    } src[3];
  66. };
  67.  
  68. /**
  69.  * Return true if the source operand is null.
  70.  */
  71. static bool
  72. src_is_null(const struct codegen *cg, int idx)
  73. {
  74.    const struct codegen_src *src = &cg->src[idx];
  75.  
  76.    return (src->file == BRW_ARCHITECTURE_REGISTER_FILE &&
  77.            src->origin == BRW_ARF_NULL << CG_REG_SHIFT);
  78. }
  79.  
  80. /**
  81.  * Translate a source operand to DW2 or DW3 of the 1-src/2-src format.
  82.  */
  83. static uint32_t
  84. translate_src(const struct codegen *cg, int idx)
  85. {
  86.    const struct codegen_src *src = &cg->src[idx];
  87.    uint32_t dw;
  88.  
  89.    /* special treatment may be needed if any of the operand is immediate */
  90.    if (cg->src[0].file == BRW_IMMEDIATE_VALUE) {
  91.       assert(!cg->src[0].absolute && !cg->src[0].negate);
  92.       /* only the last src operand can be an immediate */
  93.       assert(src_is_null(cg, 1));
  94.  
  95.       if (idx == 0)
  96.          return cg->flag_sub_reg_num << 25;
  97.       else
  98.          return cg->src[0].origin;
  99.    }
  100.    else if (idx && cg->src[1].file == BRW_IMMEDIATE_VALUE) {
  101.       assert(!cg->src[1].absolute && !cg->src[1].negate);
  102.       return cg->src[1].origin;
  103.    }
  104.  
  105.    assert(src->file != BRW_IMMEDIATE_VALUE);
  106.  
  107.    if (src->indirect) {
  108.       const int offset = (int) src->origin;
  109.  
  110.       assert(src->file == BRW_GENERAL_REGISTER_FILE);
  111.       assert(offset < 512 && offset >= -512);
  112.  
  113.       if (cg->inst->access_mode == BRW_ALIGN_16) {
  114.          assert(src->width == BRW_WIDTH_4);
  115.          assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
  116.  
  117.          /* the lower 4 bits are reserved for the swizzle_[xy] */
  118.          assert(!(src->origin & 0xf));
  119.  
  120.          dw = src->vert_stride << 21 |
  121.               src->swizzle[3] << 18 |
  122.               src->swizzle[2] << 16 |
  123.               BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
  124.               src->negate << 14 |
  125.               src->absolute << 13 |
  126.               src->indirect_subreg << 10 |
  127.               (src->origin & 0x3f0) |
  128.               src->swizzle[1] << 2 |
  129.               src->swizzle[0];
  130.       }
  131.       else {
  132.          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
  133.                 src->swizzle[1] == TOY_SWIZZLE_Y &&
  134.                 src->swizzle[2] == TOY_SWIZZLE_Z &&
  135.                 src->swizzle[3] == TOY_SWIZZLE_W);
  136.  
  137.          dw = src->vert_stride << 21 |
  138.               src->width << 18 |
  139.               src->horz_stride << 16 |
  140.               BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
  141.               src->negate << 14 |
  142.               src->absolute << 13 |
  143.               src->indirect_subreg << 10 |
  144.               (src->origin & 0x3ff);
  145.       }
  146.    }
  147.    else {
  148.       switch (src->file) {
  149.       case BRW_ARCHITECTURE_REGISTER_FILE:
  150.          break;
  151.       case BRW_GENERAL_REGISTER_FILE:
  152.          assert(CG_REG_NUM(src->origin) < 128);
  153.          break;
  154.       case BRW_MESSAGE_REGISTER_FILE:
  155.          assert(cg->inst->opcode == BRW_OPCODE_SEND ||
  156.                 cg->inst->opcode == BRW_OPCODE_SENDC);
  157.          assert(CG_REG_NUM(src->origin) < 16);
  158.          break;
  159.       case BRW_IMMEDIATE_VALUE:
  160.       default:
  161.          assert(!"invalid src file");
  162.          break;
  163.       }
  164.  
  165.       if (cg->inst->access_mode == BRW_ALIGN_16) {
  166.          assert(src->width == BRW_WIDTH_4);
  167.          assert(src->horz_stride == BRW_HORIZONTAL_STRIDE_1);
  168.  
  169.          /* the lower 4 bits are reserved for the swizzle_[xy] */
  170.          assert(!(src->origin & 0xf));
  171.  
  172.          dw = src->vert_stride << 21 |
  173.               src->swizzle[3] << 18 |
  174.               src->swizzle[2] << 16 |
  175.               BRW_ADDRESS_DIRECT << 15 |
  176.               src->negate << 14 |
  177.               src->absolute << 13 |
  178.               src->origin |
  179.               src->swizzle[1] << 2 |
  180.               src->swizzle[0];
  181.       }
  182.       else {
  183.          assert(src->swizzle[0] == TOY_SWIZZLE_X &&
  184.                 src->swizzle[1] == TOY_SWIZZLE_Y &&
  185.                 src->swizzle[2] == TOY_SWIZZLE_Z &&
  186.                 src->swizzle[3] == TOY_SWIZZLE_W);
  187.  
  188.          dw = src->vert_stride << 21 |
  189.               src->width << 18 |
  190.               src->horz_stride << 16 |
  191.               BRW_ADDRESS_DIRECT << 15 |
  192.               src->negate << 14 |
  193.               src->absolute << 13 |
  194.               src->origin;
  195.       }
  196.    }
  197.  
  198.    if (idx == 0)
  199.       dw |= cg->flag_sub_reg_num << 25;
  200.  
  201.    return dw;
  202. }
  203.  
  204. /**
  205.  * Translate the destination operand to the higher 16 bits of DW1 of the
  206.  * 1-src/2-src format.
  207.  */
  208. static uint16_t
  209. translate_dst_region(const struct codegen *cg)
  210. {
  211.    const struct codegen_dst *dst = &cg->dst;
  212.    uint16_t dw1_region;
  213.  
  214.    if (dst->file == BRW_IMMEDIATE_VALUE) {
  215.       /* dst is immediate (JIP) when the opcode is a conditional branch */
  216.       switch (cg->inst->opcode) {
  217.       case BRW_OPCODE_IF:
  218.       case BRW_OPCODE_ELSE:
  219.       case BRW_OPCODE_ENDIF:
  220.       case BRW_OPCODE_WHILE:
  221.          assert(dst->type == BRW_REGISTER_TYPE_W);
  222.          dw1_region = (dst->origin & 0xffff);
  223.          break;
  224.       default:
  225.          assert(!"dst cannot be immediate");
  226.          dw1_region = 0;
  227.          break;
  228.       }
  229.  
  230.       return dw1_region;
  231.    }
  232.  
  233.    if (dst->indirect) {
  234.       const int offset = (int) dst->origin;
  235.  
  236.       assert(dst->file == BRW_GENERAL_REGISTER_FILE);
  237.       assert(offset < 512 && offset >= -512);
  238.  
  239.       if (cg->inst->access_mode == BRW_ALIGN_16) {
  240.          /*
  241.           * From the Sandy Bridge PRM, volume 4 part 2, page 144:
  242.           *
  243.           *     "Allthough Dst.HorzStride is a don't care for Align16, HW
  244.           *      needs this to be programmed as 01."
  245.           */
  246.          assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
  247.          /* the lower 4 bits are reserved for the writemask */
  248.          assert(!(dst->origin & 0xf));
  249.  
  250.          dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
  251.                       dst->horz_stride << 13 |
  252.                       dst->indirect_subreg << 10 |
  253.                       (dst->origin & 0x3f0) |
  254.                       dst->writemask;
  255.       }
  256.       else {
  257.          assert(dst->writemask == TOY_WRITEMASK_XYZW);
  258.  
  259.          dw1_region = BRW_ADDRESS_REGISTER_INDIRECT_REGISTER << 15 |
  260.                       dst->horz_stride << 13 |
  261.                       dst->indirect_subreg << 10 |
  262.                       (dst->origin & 0x3ff);
  263.       }
  264.    }
  265.    else {
  266.       assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
  267.               CG_REG_NUM(dst->origin) < 128) ||
  268.              (dst->file == BRW_MESSAGE_REGISTER_FILE &&
  269.               CG_REG_NUM(dst->origin) < 16) ||
  270.              (dst->file == BRW_ARCHITECTURE_REGISTER_FILE));
  271.  
  272.       if (cg->inst->access_mode == BRW_ALIGN_16) {
  273.          /* similar to the indirect case */
  274.          assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
  275.          assert(!(dst->origin & 0xf));
  276.  
  277.          dw1_region = BRW_ADDRESS_DIRECT << 15 |
  278.                       dst->horz_stride << 13 |
  279.                       dst->origin |
  280.                       dst->writemask;
  281.       }
  282.       else {
  283.          assert(dst->writemask == TOY_WRITEMASK_XYZW);
  284.  
  285.          dw1_region = BRW_ADDRESS_DIRECT << 15 |
  286.                       dst->horz_stride << 13 |
  287.                       dst->origin;
  288.       }
  289.    }
  290.  
  291.    return dw1_region;
  292. }
  293.  
  294. /**
  295.  * Translate the destination operand to DW1 of the 1-src/2-src format.
  296.  */
  297. static uint32_t
  298. translate_dst(const struct codegen *cg)
  299. {
  300.    return translate_dst_region(cg) << 16 |
  301.           cg->src[1].type << 12 |
  302.           cg->src[1].file << 10 |
  303.           cg->src[0].type << 7 |
  304.           cg->src[0].file << 5 |
  305.           cg->dst.type << 2 |
  306.           cg->dst.file;
  307. }
  308.  
  309. /**
  310.  * Translate the instruction to DW0 of the 1-src/2-src format.
  311.  */
  312. static uint32_t
  313. translate_inst(const struct codegen *cg)
  314. {
  315.    const bool debug_ctrl = false;
  316.    const bool cmpt_ctrl = false;
  317.  
  318.    assert(cg->inst->opcode < 128);
  319.  
  320.    return cg->inst->saturate << 31 |
  321.           debug_ctrl << 30 |
  322.           cmpt_ctrl << 29 |
  323.           cg->inst->acc_wr_ctrl << 28 |
  324.           cg->inst->cond_modifier << 24 |
  325.           cg->inst->exec_size << 21 |
  326.           cg->inst->pred_inv << 20 |
  327.           cg->inst->pred_ctrl << 16 |
  328.           cg->inst->thread_ctrl << 14 |
  329.           cg->inst->qtr_ctrl << 12 |
  330.           cg->inst->dep_ctrl << 10 |
  331.           cg->inst->mask_ctrl << 9 |
  332.           cg->inst->access_mode << 8 |
  333.           cg->inst->opcode;
  334. }
  335.  
  336. /**
  337.  * Codegen an instruction in 1-src/2-src format.
  338.  */
  339. static void
  340. codegen_inst(const struct codegen *cg, uint32_t *code)
  341. {
  342.    code[0] = translate_inst(cg);
  343.    code[1] = translate_dst(cg);
  344.    code[2] = translate_src(cg, 0);
  345.    code[3] = translate_src(cg, 1);
  346.    assert(src_is_null(cg, 2));
  347. }
  348.  
  349. /**
  350.  * Codegen an instruction in 3-src format.
  351.  */
  352. static void
  353. codegen_inst_3src(const struct codegen *cg, uint32_t *code)
  354. {
  355.    const struct codegen_dst *dst = &cg->dst;
  356.    uint32_t dw0, dw1, dw_src[3];
  357.    int i;
  358.  
  359.    dw0 = translate_inst(cg);
  360.  
  361.    /*
  362.     * 3-src instruction restrictions
  363.     *
  364.     *  - align16 with direct addressing
  365.     *  - GRF or MRF dst
  366.     *  - GRF src
  367.     *  - sub_reg_num is DWORD aligned
  368.     *  - no regioning except replication control
  369.     *    (vert_stride == 0 && horz_stride == 0)
  370.     */
  371.    assert(cg->inst->access_mode == BRW_ALIGN_16);
  372.  
  373.    assert(!dst->indirect);
  374.    assert((dst->file == BRW_GENERAL_REGISTER_FILE &&
  375.            CG_REG_NUM(dst->origin) < 128) ||
  376.           (dst->file == BRW_MESSAGE_REGISTER_FILE &&
  377.            CG_REG_NUM(dst->origin) < 16));
  378.    assert(!(dst->origin & 0x3));
  379.    assert(dst->horz_stride == BRW_HORIZONTAL_STRIDE_1);
  380.  
  381.    dw1 = dst->origin << 19 |
  382.          dst->writemask << 17 |
  383.          cg->src[2].negate << 9 |
  384.          cg->src[2].absolute << 8 |
  385.          cg->src[1].negate << 7 |
  386.          cg->src[1].absolute << 6 |
  387.          cg->src[0].negate << 5 |
  388.          cg->src[0].absolute << 4 |
  389.          cg->flag_sub_reg_num << 1 |
  390.          (dst->file == BRW_MESSAGE_REGISTER_FILE);
  391.  
  392.    for (i = 0; i < 3; i++) {
  393.       const struct codegen_src *src = &cg->src[i];
  394.  
  395.       assert(!src->indirect);
  396.       assert(src->file == BRW_GENERAL_REGISTER_FILE &&
  397.              CG_REG_NUM(src->origin) < 128);
  398.       assert(!(src->origin & 0x3));
  399.  
  400.       assert((src->vert_stride == BRW_VERTICAL_STRIDE_4 &&
  401.               src->horz_stride == BRW_HORIZONTAL_STRIDE_1) ||
  402.              (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
  403.               src->horz_stride == BRW_HORIZONTAL_STRIDE_0));
  404.       assert(src->width == BRW_WIDTH_4);
  405.  
  406.       dw_src[i] = src->origin << 7 |
  407.                   src->swizzle[3] << 7 |
  408.                   src->swizzle[2] << 5 |
  409.                   src->swizzle[1] << 3 |
  410.                   src->swizzle[0] << 1 |
  411.                   (src->vert_stride == BRW_VERTICAL_STRIDE_0 &&
  412.                    src->horz_stride == BRW_HORIZONTAL_STRIDE_0);
  413.  
  414.       /* only the lower 20 bits are used */
  415.       assert((dw_src[i] & 0xfffff) == dw_src[i]);
  416.    }
  417.  
  418.    code[0] = dw0;
  419.    code[1] = dw1;
  420.    /* concatenate the bits of dw_src */
  421.    code[2] = (dw_src[1] & 0x7ff ) << 21 | dw_src[0];
  422.    code[3] = dw_src[2] << 10 | (dw_src[1] >> 11);
  423. }
  424.  
  425. /**
  426.  * Sanity check the region parameters of the operands.
  427.  */
  428. static void
  429. codegen_validate_region_restrictions(const struct codegen *cg)
  430. {
  431.    const int exec_size_map[] = {
  432.       [BRW_EXECUTE_1] = 1,
  433.       [BRW_EXECUTE_2] = 2,
  434.       [BRW_EXECUTE_4] = 4,
  435.       [BRW_EXECUTE_8] = 8,
  436.       [BRW_EXECUTE_16] = 16,
  437.       [BRW_EXECUTE_32] = 32,
  438.    };
  439.    const int width_map[] = {
  440.       [BRW_WIDTH_1] = 1,
  441.       [BRW_WIDTH_2] = 2,
  442.       [BRW_WIDTH_4] = 4,
  443.       [BRW_WIDTH_8] = 8,
  444.       [BRW_WIDTH_16] = 16,
  445.    };
  446.    const int horz_stride_map[] = {
  447.       [BRW_HORIZONTAL_STRIDE_0] = 0,
  448.       [BRW_HORIZONTAL_STRIDE_1] = 1,
  449.       [BRW_HORIZONTAL_STRIDE_2] = 2,
  450.       [BRW_HORIZONTAL_STRIDE_4] = 4,
  451.    };
  452.    const int vert_stride_map[] = {
  453.       [BRW_VERTICAL_STRIDE_0] = 0,
  454.       [BRW_VERTICAL_STRIDE_1] = 1,
  455.       [BRW_VERTICAL_STRIDE_2] = 2,
  456.       [BRW_VERTICAL_STRIDE_4] = 4,
  457.       [BRW_VERTICAL_STRIDE_8] = 8,
  458.       [BRW_VERTICAL_STRIDE_16] = 16,
  459.       [BRW_VERTICAL_STRIDE_32] = 32,
  460.       [BRW_VERTICAL_STRIDE_64] = 64,
  461.       [BRW_VERTICAL_STRIDE_128] = 128,
  462.       [BRW_VERTICAL_STRIDE_256] = 256,
  463.       [BRW_VERTICAL_STRIDE_ONE_DIMENSIONAL] = 0,
  464.    };
  465.    const int exec_size = exec_size_map[cg->inst->exec_size];
  466.    int i;
  467.  
  468.    /* Sandy Bridge PRM, volume 4 part 2, page 94 */
  469.  
  470.    /* 1. (we don't do 32 anyway) */
  471.    assert(exec_size <= 16);
  472.  
  473.    for (i = 0; i < Elements(cg->src); i++) {
  474.       const int width = width_map[cg->src[i].width];
  475.       const int horz_stride = horz_stride_map[cg->src[i].horz_stride];
  476.       const int vert_stride = vert_stride_map[cg->src[i].vert_stride];
  477.  
  478.       if (src_is_null(cg, i))
  479.          break;
  480.  
  481.       /* 3. */
  482.       assert(exec_size >= width);
  483.  
  484.       if (exec_size == width) {
  485.          /* 4. & 5. */
  486.          if (horz_stride)
  487.             assert(vert_stride == width * horz_stride);
  488.       }
  489.  
  490.       if (width == 1) {
  491.          /* 6. */
  492.          assert(horz_stride == 0);
  493.  
  494.          /* 7. */
  495.          if (exec_size == 1)
  496.             assert(vert_stride == 0);
  497.       }
  498.  
  499.       /* 8. */
  500.       if (!vert_stride && !horz_stride)
  501.          assert(width == 1);
  502.    }
  503.  
  504.    /* derived from 10.1.2. & 10.2. */
  505.    assert(cg->dst.horz_stride != BRW_HORIZONTAL_STRIDE_0);
  506. }
  507.  
  508. static unsigned
  509. translate_vfile(enum toy_file file)
  510. {
  511.    switch (file) {
  512.    case TOY_FILE_ARF:   return BRW_ARCHITECTURE_REGISTER_FILE;
  513.    case TOY_FILE_GRF:   return BRW_GENERAL_REGISTER_FILE;
  514.    case TOY_FILE_MRF:   return BRW_MESSAGE_REGISTER_FILE;
  515.    case TOY_FILE_IMM:   return BRW_IMMEDIATE_VALUE;
  516.    default:
  517.       assert(!"unhandled toy file");
  518.       return BRW_GENERAL_REGISTER_FILE;
  519.    }
  520. }
  521.  
  522. static unsigned
  523. translate_vtype(enum toy_type type)
  524. {
  525.    switch (type) {
  526.    case TOY_TYPE_F:     return BRW_REGISTER_TYPE_F;
  527.    case TOY_TYPE_D:     return BRW_REGISTER_TYPE_D;
  528.    case TOY_TYPE_UD:    return BRW_REGISTER_TYPE_UD;
  529.    case TOY_TYPE_W:     return BRW_REGISTER_TYPE_W;
  530.    case TOY_TYPE_UW:    return BRW_REGISTER_TYPE_UW;
  531.    case TOY_TYPE_V:     return BRW_REGISTER_TYPE_V;
  532.    default:
  533.       assert(!"unhandled toy type");
  534.       return BRW_REGISTER_TYPE_F;
  535.    }
  536. }
  537.  
  538. static unsigned
  539. translate_writemask(enum toy_writemask writemask)
  540. {
  541.    /* TOY_WRITEMASK_* are compatible with the hardware definitions */
  542.    assert(writemask <= 0xf);
  543.    return writemask;
  544. }
  545.  
  546. static unsigned
  547. translate_swizzle(enum toy_swizzle swizzle)
  548. {
  549.    /* TOY_SWIZZLE_* are compatible with the hardware definitions */
  550.    assert(swizzle <= 3);
  551.    return swizzle;
  552. }
  553.  
  554. /**
  555.  * Prepare for generating an instruction.
  556.  */
  557. static void
  558. codegen_prepare(struct codegen *cg, const struct toy_inst *inst,
  559.                 int pc, int rect_linear_width)
  560. {
  561.    int i;
  562.  
  563.    cg->inst = inst;
  564.    cg->pc = pc;
  565.  
  566.    cg->flag_sub_reg_num = 0;
  567.  
  568.    cg->dst.file = translate_vfile(inst->dst.file);
  569.    cg->dst.type = translate_vtype(inst->dst.type);
  570.    cg->dst.indirect = inst->dst.indirect;
  571.    cg->dst.indirect_subreg = inst->dst.indirect_subreg;
  572.    cg->dst.origin = inst->dst.val32;
  573.  
  574.    /*
  575.     * From the Sandy Bridge PRM, volume 4 part 2, page 81:
  576.     *
  577.     *     "For a word or an unsigned word immediate data, software must
  578.     *      replicate the same 16-bit immediate value to both the lower word
  579.     *      and the high word of the 32-bit immediate field in an instruction."
  580.     */
  581.    if (inst->dst.file == TOY_FILE_IMM) {
  582.       switch (inst->dst.type) {
  583.       case TOY_TYPE_W:
  584.       case TOY_TYPE_UW:
  585.          cg->dst.origin &= 0xffff;
  586.          cg->dst.origin |= cg->dst.origin << 16;
  587.          break;
  588.       default:
  589.          break;
  590.       }
  591.    }
  592.  
  593.    cg->dst.writemask = translate_writemask(inst->dst.writemask);
  594.  
  595.    switch (inst->dst.rect) {
  596.    case TOY_RECT_LINEAR:
  597.       cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
  598.       break;
  599.    default:
  600.       assert(!"unsupported dst region");
  601.       cg->dst.horz_stride = BRW_HORIZONTAL_STRIDE_1;
  602.       break;
  603.    }
  604.  
  605.    for (i = 0; i < Elements(cg->src); i++) {
  606.       struct codegen_src *src = &cg->src[i];
  607.  
  608.       src->file = translate_vfile(inst->src[i].file);
  609.       src->type = translate_vtype(inst->src[i].type);
  610.       src->indirect = inst->src[i].indirect;
  611.       src->indirect_subreg = inst->src[i].indirect_subreg;
  612.       src->origin = inst->src[i].val32;
  613.  
  614.       /* do the same for src */
  615.       if (inst->dst.file == TOY_FILE_IMM) {
  616.          switch (inst->src[i].type) {
  617.          case TOY_TYPE_W:
  618.          case TOY_TYPE_UW:
  619.             src->origin &= 0xffff;
  620.             src->origin |= src->origin << 16;
  621.             break;
  622.          default:
  623.             break;
  624.          }
  625.       }
  626.  
  627.       src->swizzle[0] = translate_swizzle(inst->src[i].swizzle_x);
  628.       src->swizzle[1] = translate_swizzle(inst->src[i].swizzle_y);
  629.       src->swizzle[2] = translate_swizzle(inst->src[i].swizzle_z);
  630.       src->swizzle[3] = translate_swizzle(inst->src[i].swizzle_w);
  631.       src->absolute = inst->src[i].absolute;
  632.       src->negate = inst->src[i].negate;
  633.  
  634.       switch (inst->src[i].rect) {
  635.       case TOY_RECT_LINEAR:
  636.          switch (rect_linear_width) {
  637.          case 1:
  638.             src->vert_stride = BRW_VERTICAL_STRIDE_1;
  639.             src->width = BRW_WIDTH_1;
  640.             break;
  641.          case 2:
  642.             src->vert_stride = BRW_VERTICAL_STRIDE_2;
  643.             src->width = BRW_WIDTH_2;
  644.             break;
  645.          case 4:
  646.             src->vert_stride = BRW_VERTICAL_STRIDE_4;
  647.             src->width = BRW_WIDTH_4;
  648.             break;
  649.          case 8:
  650.             src->vert_stride = BRW_VERTICAL_STRIDE_8;
  651.             src->width = BRW_WIDTH_8;
  652.             break;
  653.          case 16:
  654.             src->vert_stride = BRW_VERTICAL_STRIDE_16;
  655.             src->width = BRW_WIDTH_16;
  656.             break;
  657.          default:
  658.             assert(!"unsupported TOY_RECT_LINEAR width");
  659.             src->vert_stride = BRW_VERTICAL_STRIDE_1;
  660.             src->width = BRW_WIDTH_1;
  661.             break;
  662.          }
  663.          src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
  664.          break;
  665.       case TOY_RECT_041:
  666.          src->vert_stride = BRW_VERTICAL_STRIDE_0;
  667.          src->width = BRW_WIDTH_4;
  668.          src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
  669.          break;
  670.       case TOY_RECT_010:
  671.          src->vert_stride = BRW_VERTICAL_STRIDE_0;
  672.          src->width = BRW_WIDTH_1;
  673.          src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
  674.          break;
  675.       case TOY_RECT_220:
  676.          src->vert_stride = BRW_VERTICAL_STRIDE_2;
  677.          src->width = BRW_WIDTH_2;
  678.          src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
  679.          break;
  680.       case TOY_RECT_440:
  681.          src->vert_stride = BRW_VERTICAL_STRIDE_4;
  682.          src->width = BRW_WIDTH_4;
  683.          src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
  684.          break;
  685.       case TOY_RECT_240:
  686.          src->vert_stride = BRW_VERTICAL_STRIDE_2;
  687.          src->width = BRW_WIDTH_4;
  688.          src->horz_stride = BRW_HORIZONTAL_STRIDE_0;
  689.          break;
  690.       default:
  691.          assert(!"unsupported src region");
  692.          src->vert_stride = BRW_VERTICAL_STRIDE_1;
  693.          src->width = BRW_WIDTH_1;
  694.          src->horz_stride = BRW_HORIZONTAL_STRIDE_1;
  695.          break;
  696.       }
  697.    }
  698. }
  699.  
  700. /**
  701.  * Generate HW shader code.  The instructions should have been legalized.
  702.  */
  703. void *
  704. toy_compiler_assemble(struct toy_compiler *tc, int *size)
  705. {
  706.    const struct toy_inst *inst;
  707.    uint32_t *code;
  708.    int pc;
  709.  
  710.    code = MALLOC(tc->num_instructions * 4 * sizeof(uint32_t));
  711.    if (!code)
  712.       return NULL;
  713.  
  714.    pc = 0;
  715.    tc_head(tc);
  716.    while ((inst = tc_next(tc)) != NULL) {
  717.       uint32_t *dw = &code[pc * 4];
  718.       struct codegen cg;
  719.  
  720.       if (pc >= tc->num_instructions) {
  721.          tc_fail(tc, "wrong instructoun count");
  722.          break;
  723.       }
  724.  
  725.       codegen_prepare(&cg, inst, pc, tc->rect_linear_width);
  726.       codegen_validate_region_restrictions(&cg);
  727.  
  728.       switch (inst->opcode) {
  729.       case BRW_OPCODE_MAD:
  730.          codegen_inst_3src(&cg, dw);
  731.          break;
  732.       default:
  733.          codegen_inst(&cg, dw);
  734.          break;
  735.       }
  736.  
  737.       pc++;
  738.    }
  739.  
  740.    /* never return an invalid kernel */
  741.    if (tc->fail) {
  742.       FREE(code);
  743.       return NULL;
  744.    }
  745.  
  746.    if (size)
  747.       *size = pc * 4 * sizeof(uint32_t);
  748.  
  749.    return code;
  750. }
  751.