Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
  2.  
  3. /*
  4.  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the "Software"),
  8.  * to deal in the Software without restriction, including without limitation
  9.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  10.  * and/or sell copies of the Software, and to permit persons to whom the
  11.  * Software is furnished to do so, subject to the following conditions:
  12.  *
  13.  * The above copyright notice and this permission notice (including the next
  14.  * paragraph) shall be included in all copies or substantial portions of the
  15.  * Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  18.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  21.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  22.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  23.  * SOFTWARE.
  24.  *
  25.  * Authors:
  26.  *    Rob Clark <robclark@freedesktop.org>
  27.  */
  28.  
  29. #include "util/u_math.h"
  30.  
  31. #include "ir3.h"
  32.  
  33. /*
  34.  * Instruction Depth:
  35.  *
  36.  * Calculates weighted instruction depth, ie. the sum of # of needed
  37.  * instructions plus delay slots back to original input (ie INPUT or
  38.  * CONST).  That is to say, an instructions depth is:
  39.  *
  40.  *   depth(instr) {
  41.  *     d = 0;
  42.  *     // for each src register:
  43.  *     foreach (src in instr->regs[1..n])
  44.  *       d = max(d, delayslots(src->instr, n) + depth(src->instr));
  45.  *     return d + 1;
  46.  *   }
  47.  *
  48.  * After an instruction's depth is calculated, it is inserted into the
  49.  * blocks depth sorted list, which is used by the scheduling pass.
  50.  */
  51.  
  52. /* calculate required # of delay slots between the instruction that
  53.  * assigns a value and the one that consumes
  54.  */
  55. int ir3_delayslots(struct ir3_instruction *assigner,
  56.                 struct ir3_instruction *consumer, unsigned n)
  57. {
  58.         /* worst case is cat1-3 (alu) -> cat4/5 needing 6 cycles, normal
  59.          * alu -> alu needs 3 cycles, cat4 -> alu and texture fetch
  60.          * handled with sync bits
  61.          */
  62.  
  63.         if (is_meta(assigner))
  64.                 return 0;
  65.  
  66.         if (writes_addr(assigner))
  67.                 return 6;
  68.  
  69.         /* handled via sync flags: */
  70.         if (is_sfu(assigner) || is_tex(assigner) || is_mem(assigner))
  71.                 return 0;
  72.  
  73.         /* assigner must be alu: */
  74.         if (is_flow(consumer) || is_sfu(consumer) || is_tex(consumer) ||
  75.                         is_mem(consumer)) {
  76.                 return 6;
  77.         } else if ((consumer->category == 3) &&
  78.                         (is_mad(consumer->opc) || is_madsh(consumer->opc)) &&
  79.                         (n == 2)) {
  80.                 /* special case, 3rd src to cat3 not required on first cycle */
  81.                 return 1;
  82.         } else {
  83.                 return 3;
  84.         }
  85. }
  86.  
  87. static void insert_by_depth(struct ir3_instruction *instr)
  88. {
  89.         struct ir3_block *block = instr->block;
  90.         struct ir3_instruction *n = block->head;
  91.         struct ir3_instruction *p = NULL;
  92.  
  93.         while (n && (n != instr) && (n->depth > instr->depth)) {
  94.                 p = n;
  95.                 n = n->next;
  96.         }
  97.  
  98.         instr->next = n;
  99.         if (p)
  100.                 p->next = instr;
  101.         else
  102.                 block->head = instr;
  103. }
  104.  
  105. static void ir3_instr_depth(struct ir3_instruction *instr)
  106. {
  107.         struct ir3_instruction *src;
  108.  
  109.         /* if we've already visited this instruction, bail now: */
  110.         if (ir3_instr_check_mark(instr))
  111.                 return;
  112.  
  113.         instr->depth = 0;
  114.  
  115.         foreach_ssa_src_n(src, i, instr) {
  116.                 unsigned sd;
  117.  
  118.                 /* visit child to compute it's depth: */
  119.                 ir3_instr_depth(src);
  120.  
  121.                 sd = ir3_delayslots(src, instr, i) + src->depth;
  122.  
  123.                 instr->depth = MAX2(instr->depth, sd);
  124.         }
  125.  
  126.         /* meta-instructions don't add cycles, other than PHI.. which
  127.          * might translate to a real instruction..
  128.          *
  129.          * well, not entirely true, fan-in/out, etc might need to need
  130.          * to generate some extra mov's in edge cases, etc.. probably
  131.          * we might want to do depth calculation considering the worst
  132.          * case for these??
  133.          */
  134.         if (!is_meta(instr))
  135.                 instr->depth++;
  136.  
  137.         insert_by_depth(instr);
  138. }
  139.  
  140. void ir3_block_depth(struct ir3_block *block)
  141. {
  142.         unsigned i;
  143.  
  144.         block->head = NULL;
  145.  
  146.         ir3_clear_mark(block->shader);
  147.         for (i = 0; i < block->noutputs; i++)
  148.                 if (block->outputs[i])
  149.                         ir3_instr_depth(block->outputs[i]);
  150.  
  151.         /* mark un-used instructions: */
  152.         for (i = 0; i < block->shader->instrs_count; i++) {
  153.                 struct ir3_instruction *instr = block->shader->instrs[i];
  154.  
  155.                 /* just consider instructions within this block: */
  156.                 if (instr->block != block)
  157.                         continue;
  158.  
  159.                 if (!ir3_instr_check_mark(instr))
  160.                         instr->depth = DEPTH_UNUSED;
  161.         }
  162.  
  163.         /* cleanup unused inputs: */
  164.         for (i = 0; i < block->ninputs; i++) {
  165.                 struct ir3_instruction *in = block->inputs[i];
  166.                 if (in && (in->depth == DEPTH_UNUSED))
  167.                         block->inputs[i] = NULL;
  168.         }
  169. }
  170.