Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
  2.  
  3. /*
  4.  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  *
  25.  * Authors:
  26.  *    Rob Clark <robclark@freedesktop.org>
  27.  */
  28.  
  29. #include "freedreno_util.h"
  30.  
  31. #include "ir3.h"
  32.  
  33. /*
  34.  * Find/group instruction neighbors:
  35.  */
  36.  
  37. /* stop condition for iteration: */
  38. static bool check_stop(struct ir3_instruction *instr)
  39. {
  40.         if (ir3_instr_check_mark(instr))
  41.                 return true;
  42.  
  43.         /* stay within the block.. don't try to operate across
  44.          * basic block boundaries or we'll have problems when
  45.          * dealing with multiple basic blocks:
  46.          */
  47.         if (is_meta(instr) && (instr->opc == OPC_META_INPUT))
  48.                 return true;
  49.  
  50.         return false;
  51. }
  52.  
  53. static struct ir3_instruction * create_mov(struct ir3_instruction *instr)
  54. {
  55.         struct ir3_instruction *mov;
  56.  
  57.         mov = ir3_instr_create(instr->block, 1, 0);
  58.         mov->cat1.src_type = TYPE_F32;
  59.         mov->cat1.dst_type = TYPE_F32;
  60.         ir3_reg_create(mov, 0, 0);    /* dst */
  61.         ir3_reg_create(mov, 0, IR3_REG_SSA)->instr = instr;
  62.  
  63.         return mov;
  64. }
  65.  
  66. /* bleh.. we need to do the same group_n() thing for both inputs/outputs
  67.  * (where we have a simple instr[] array), and fanin nodes (where we have
  68.  * an extra indirection via reg->instr).
  69.  */
  70. struct group_ops {
  71.         struct ir3_instruction *(*get)(void *arr, int idx);
  72.         void (*insert_mov)(void *arr, int idx, struct ir3_instruction *instr);
  73. };
  74.  
  75. static struct ir3_instruction *arr_get(void *arr, int idx)
  76. {
  77.         return ((struct ir3_instruction **)arr)[idx];
  78. }
  79. static void arr_insert_mov_out(void *arr, int idx, struct ir3_instruction *instr)
  80. {
  81.         ((struct ir3_instruction **)arr)[idx] = create_mov(instr);
  82. }
  83. static void arr_insert_mov_in(void *arr, int idx, struct ir3_instruction *instr)
  84. {
  85.         /* so, we can't insert a mov in front of a meta:in.. and the downstream
  86.          * instruction already has a pointer to 'instr'.  So we cheat a bit and
  87.          * morph the meta:in instruction into a mov and insert a new meta:in
  88.          * in front.
  89.          */
  90.         struct ir3_instruction *in;
  91.  
  92.         debug_assert(instr->regs_count == 1);
  93.  
  94.         in = ir3_instr_create(instr->block, -1, OPC_META_INPUT);
  95.         in->inout.block = instr->block;
  96.         ir3_reg_create(in, instr->regs[0]->num, 0);
  97.  
  98.         /* create src reg for meta:in and fixup to now be a mov: */
  99.         ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = in;
  100.         instr->category = 1;
  101.         instr->opc = 0;
  102.         instr->cat1.src_type = TYPE_F32;
  103.         instr->cat1.dst_type = TYPE_F32;
  104.  
  105.         ((struct ir3_instruction **)arr)[idx] = in;
  106. }
  107. static struct group_ops arr_ops_out = { arr_get, arr_insert_mov_out };
  108. static struct group_ops arr_ops_in = { arr_get, arr_insert_mov_in };
  109.  
  110. static struct ir3_instruction *instr_get(void *arr, int idx)
  111. {
  112.         return ssa(((struct ir3_instruction *)arr)->regs[idx+1]);
  113. }
  114. static void instr_insert_mov(void *arr, int idx, struct ir3_instruction *instr)
  115. {
  116.         ((struct ir3_instruction *)arr)->regs[idx+1]->instr = create_mov(instr);
  117. }
  118. static struct group_ops instr_ops = { instr_get, instr_insert_mov };
  119.  
  120.  
  121. static void group_n(struct group_ops *ops, void *arr, unsigned n)
  122. {
  123.         unsigned i, j;
  124.  
  125.         /* first pass, figure out what has conflicts and needs a mov
  126.          * inserted.  Do this up front, before starting to setup
  127.          * left/right neighbor pointers.  Trying to do it in a single
  128.          * pass could result in a situation where we can't even setup
  129.          * the mov's right neighbor ptr if the next instr also needs
  130.          * a mov.
  131.          */
  132. restart:
  133.         for (i = 0; i < n; i++) {
  134.                 struct ir3_instruction *instr = ops->get(arr, i);
  135.                 if (instr) {
  136.                         struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
  137.                         struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
  138.                         bool conflict;
  139.  
  140.                         /* check for left/right neighbor conflicts: */
  141.                         conflict = conflicts(instr->cp.left, left) ||
  142.                                 conflicts(instr->cp.right, right);
  143.  
  144.                         /* we also can't have an instr twice in the group: */
  145.                         for (j = i + 1; (j < n) && !conflict; j++)
  146.                                 if (ops->get(arr, j) == instr)
  147.                                         conflict = true;
  148.  
  149.                         if (conflict) {
  150.                                 ops->insert_mov(arr, i, instr);
  151.                                 /* inserting the mov may have caused a conflict
  152.                                  * against the previous:
  153.                                  */
  154.                                 goto restart;
  155.                         }
  156.                 }
  157.         }
  158.  
  159.         /* second pass, now that we've inserted mov's, fixup left/right
  160.          * neighbors.  This is guaranteed to succeed, since by definition
  161.          * the newly inserted mov's cannot conflict with anything.
  162.          */
  163.         for (i = 0; i < n; i++) {
  164.                 struct ir3_instruction *instr = ops->get(arr, i);
  165.                 if (instr) {
  166.                         struct ir3_instruction *left = (i > 0) ? ops->get(arr, i - 1) : NULL;
  167.                         struct ir3_instruction *right = (i < (n-1)) ? ops->get(arr, i + 1) : NULL;
  168.  
  169.                         debug_assert(!conflicts(instr->cp.left, left));
  170.                         if (left) {
  171.                                 instr->cp.left_cnt++;
  172.                                 instr->cp.left = left;
  173.                         }
  174.  
  175.                         debug_assert(!conflicts(instr->cp.right, right));
  176.                         if (right) {
  177.                                 instr->cp.right_cnt++;
  178.                                 instr->cp.right = right;
  179.                         }
  180.                 }
  181.         }
  182. }
  183.  
  184. static void instr_find_neighbors(struct ir3_instruction *instr)
  185. {
  186.         struct ir3_instruction *src;
  187.  
  188.         if (check_stop(instr))
  189.                 return;
  190.  
  191.         if (is_meta(instr) && (instr->opc == OPC_META_FI))
  192.                 group_n(&instr_ops, instr, instr->regs_count - 1);
  193.  
  194.         foreach_ssa_src(src, instr)
  195.                 instr_find_neighbors(src);
  196. }
  197.  
  198. /* a bit of sadness.. we can't have "holes" in inputs from PoV of
  199.  * register assignment, they still need to be grouped together.  So
  200.  * we need to insert dummy/padding instruction for grouping, and
  201.  * then take it back out again before anyone notices.
  202.  */
  203. static void pad_and_group_input(struct ir3_instruction **input, unsigned n)
  204. {
  205.         int i, mask = 0;
  206.         struct ir3_block *block = NULL;
  207.  
  208.         for (i = n - 1; i >= 0; i--) {
  209.                 struct ir3_instruction *instr = input[i];
  210.                 if (instr) {
  211.                         block = instr->block;
  212.                 } else if (block) {
  213.                         instr = ir3_instr_create(block, 0, OPC_NOP);
  214.                         ir3_reg_create(instr, 0, IR3_REG_SSA);    /* dst */
  215.                         input[i] = instr;
  216.                         mask |= (1 << i);
  217.                 }
  218.         }
  219.  
  220.         group_n(&arr_ops_in, input, n);
  221.  
  222.         for (i = 0; i < n; i++) {
  223.                 if (mask & (1 << i))
  224.                         input[i] = NULL;
  225.         }
  226. }
  227.  
  228. static void block_find_neighbors(struct ir3_block *block)
  229. {
  230.         unsigned i;
  231.  
  232.         for (i = 0; i < block->noutputs; i++) {
  233.                 if (block->outputs[i]) {
  234.                         struct ir3_instruction *instr = block->outputs[i];
  235.                         instr_find_neighbors(instr);
  236.                 }
  237.         }
  238.  
  239.         /* shader inputs/outputs themselves must be contiguous as well:
  240.          */
  241.         if (!block->parent) {
  242.                 /* NOTE: group inputs first, since we only insert mov's
  243.                  * *before* the conflicted instr (and that would go badly
  244.                  * for inputs).  By doing inputs first, we should never
  245.                  * have a conflict on inputs.. pushing any conflict to
  246.                  * resolve to the outputs, for stuff like:
  247.                  *
  248.                  *     MOV OUT[n], IN[m].wzyx
  249.                  *
  250.                  * NOTE: we assume here inputs/outputs are grouped in vec4.
  251.                  * This logic won't quite cut it if we don't align smaller
  252.                  * on vec4 boundaries
  253.                  */
  254.                 for (i = 0; i < block->ninputs; i += 4)
  255.                         pad_and_group_input(&block->inputs[i], 4);
  256.                 for (i = 0; i < block->noutputs; i += 4)
  257.                         group_n(&arr_ops_out, &block->outputs[i], 4);
  258.  
  259.         }
  260. }
  261.  
  262. void ir3_block_group(struct ir3_block *block)
  263. {
  264.         ir3_clear_mark(block->shader);
  265.         block_find_neighbors(block);
  266. }
  267.