Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Broadcom
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include "vc4_qpu.h"
  25.  
  26. static bool
  27. writes_reg(uint64_t inst, uint32_t w)
  28. {
  29.         return (QPU_GET_FIELD(inst, QPU_WADDR_ADD) == w ||
  30.                 QPU_GET_FIELD(inst, QPU_WADDR_MUL) == w);
  31. }
  32.  
  33. static bool
  34. _reads_reg(uint64_t inst, uint32_t r, bool ignore_a, bool ignore_b)
  35. {
  36.         struct {
  37.                 uint32_t mux, addr;
  38.         } src_regs[] = {
  39.                 { QPU_GET_FIELD(inst, QPU_ADD_A) },
  40.                 { QPU_GET_FIELD(inst, QPU_ADD_B) },
  41.                 { QPU_GET_FIELD(inst, QPU_MUL_A) },
  42.                 { QPU_GET_FIELD(inst, QPU_MUL_B) },
  43.         };
  44.  
  45.         for (int i = 0; i < ARRAY_SIZE(src_regs); i++) {
  46.                 if (!ignore_a &&
  47.                     src_regs[i].mux == QPU_MUX_A &&
  48.                     (QPU_GET_FIELD(inst, QPU_RADDR_A) == r))
  49.                         return true;
  50.  
  51.                 if (!ignore_b &&
  52.                     QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM &&
  53.                     src_regs[i].mux == QPU_MUX_B &&
  54.                     (QPU_GET_FIELD(inst, QPU_RADDR_B) == r))
  55.                         return true;
  56.         }
  57.  
  58.         return false;
  59. }
  60.  
  61. static bool
  62. reads_reg(uint64_t inst, uint32_t r)
  63. {
  64.         return _reads_reg(inst, r, false, false);
  65. }
  66.  
  67. static bool
  68. reads_a_reg(uint64_t inst, uint32_t r)
  69. {
  70.         return _reads_reg(inst, r, false, true);
  71. }
  72.  
  73. static bool
  74. reads_b_reg(uint64_t inst, uint32_t r)
  75. {
  76.         return _reads_reg(inst, r, true, false);
  77. }
  78.  
  79. static bool
  80. writes_sfu(uint64_t inst)
  81. {
  82.         return (writes_reg(inst, QPU_W_SFU_RECIP) ||
  83.                 writes_reg(inst, QPU_W_SFU_RECIPSQRT) ||
  84.                 writes_reg(inst, QPU_W_SFU_EXP) ||
  85.                 writes_reg(inst, QPU_W_SFU_LOG));
  86. }
  87.  
  88. /**
  89.  * Checks for the instruction restrictions from page 37 ("Summary of
  90.  * Instruction Restrictions").
  91.  */
  92. void
  93. vc4_qpu_validate(uint64_t *insts, uint32_t num_inst)
  94. {
  95.         bool scoreboard_locked = false;
  96.  
  97.         for (int i = 0; i < num_inst; i++) {
  98.                 uint64_t inst = insts[i];
  99.  
  100.                 if (QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_PROG_END) {
  101.                         if (qpu_inst_is_tlb(inst))
  102.                                 scoreboard_locked = true;
  103.  
  104.                         continue;
  105.                 }
  106.  
  107.                 /* "The Thread End instruction must not write to either physical
  108.                  *  regfile A or B."
  109.                  */
  110.                 assert(QPU_GET_FIELD(inst, QPU_WADDR_ADD) >= 32);
  111.                 assert(QPU_GET_FIELD(inst, QPU_WADDR_MUL) >= 32);
  112.  
  113.                 /* Can't trigger an implicit wait on scoreboard in the program
  114.                  * end instruction.
  115.                  */
  116.                 assert(!qpu_inst_is_tlb(inst) || scoreboard_locked);
  117.  
  118.                 /* Two delay slots will be executed. */
  119.                 assert(i + 2 <= num_inst);
  120.  
  121.                  for (int j = i; j < i + 2; j++) {
  122.                          /* "The last three instructions of any program
  123.                           *  (Thread End plus the following two delay-slot
  124.                           *  instructions) must not do varyings read, uniforms
  125.                           *  read or any kind of VPM, VDR, or VDW read or
  126.                           *  write."
  127.                           */
  128.                          assert(!writes_reg(insts[j], QPU_W_VPM));
  129.                          assert(!reads_reg(insts[j], QPU_R_VARY));
  130.                          assert(!reads_reg(insts[j], QPU_R_UNIF));
  131.                          assert(!reads_reg(insts[j], QPU_R_VPM));
  132.  
  133.                          /* "The Thread End instruction and the following two
  134.                           *  delay slot instructions must not write or read
  135.                           *  address 14 in either regfile A or B."
  136.                           */
  137.                          assert(!writes_reg(insts[j], 14));
  138.                          assert(!reads_reg(insts[j], 14));
  139.  
  140.                  }
  141.  
  142.                  /* "The final program instruction (the second delay slot
  143.                   *  instruction) must not do a TLB Z write."
  144.                   */
  145.                  assert(!writes_reg(insts[i + 2], QPU_W_TLB_Z));
  146.         }
  147.  
  148.         /* "A scoreboard wait must not occur in the first two instructions of
  149.          *  a fragment shader. This is either the explicit Wait for Scoreboard
  150.          *  signal or an implicit wait with the first tile-buffer read or
  151.          *  write instruction."
  152.          */
  153.         for (int i = 0; i < 2; i++) {
  154.                 uint64_t inst = insts[i];
  155.  
  156.                 assert(!qpu_inst_is_tlb(inst));
  157.         }
  158.  
  159.         /* "If TMU_NOSWAP is written, the write must be three instructions
  160.          *  before the first TMU write instruction.  For example, if
  161.          *  TMU_NOSWAP is written in the first shader instruction, the first
  162.          *  TMU write cannot occur before the 4th shader instruction."
  163.          */
  164.         int last_tmu_noswap = -10;
  165.         for (int i = 0; i < num_inst; i++) {
  166.                 uint64_t inst = insts[i];
  167.  
  168.                 assert((i - last_tmu_noswap) > 3 ||
  169.                        (!writes_reg(inst, QPU_W_TMU0_S) &&
  170.                         !writes_reg(inst, QPU_W_TMU1_S)));
  171.  
  172.                 if (writes_reg(inst, QPU_W_TMU_NOSWAP))
  173.                     last_tmu_noswap = i;
  174.         }
  175.  
  176.         /* "An instruction must not read from a location in physical regfile A
  177.          *  or B that was written to by the previous instruction."
  178.          */
  179.         for (int i = 0; i < num_inst - 1; i++) {
  180.                 uint64_t inst = insts[i];
  181.                 uint32_t add_waddr = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  182.                 uint32_t mul_waddr = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  183.                 uint32_t waddr_a, waddr_b;
  184.  
  185.                 if (inst & QPU_WS) {
  186.                         waddr_b = add_waddr;
  187.                         waddr_a = mul_waddr;
  188.                 } else {
  189.                         waddr_a = add_waddr;
  190.                         waddr_b = mul_waddr;
  191.                 }
  192.  
  193.                 assert(waddr_a >= 32 || !reads_a_reg(insts[i + 1], waddr_a));
  194.                 assert(waddr_b >= 32 || !reads_b_reg(insts[i + 1], waddr_b));
  195.         }
  196.  
  197.         /* "After an SFU lookup instruction, accumulator r4 must not be read
  198.          *  in the following two instructions. Any other instruction that
  199.          *  results in r4 being written (that is, TMU read, TLB read, SFU
  200.          *  lookup) cannot occur in the two instructions following an SFU
  201.          *  lookup."
  202.          */
  203.         int last_sfu_inst = -10;
  204.         for (int i = 0; i < num_inst - 1; i++) {
  205.                 uint64_t inst = insts[i];
  206.                 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  207.  
  208.                 assert(i - last_sfu_inst > 2 ||
  209.                        (!writes_sfu(inst) &&
  210.                         sig != QPU_SIG_LOAD_TMU0 &&
  211.                         sig != QPU_SIG_LOAD_TMU1 &&
  212.                         sig != QPU_SIG_COLOR_LOAD));
  213.  
  214.                 if (writes_sfu(inst))
  215.                         last_sfu_inst = i;
  216.         }
  217.  
  218.         int last_r5_write = -10;
  219.         for (int i = 0; i < num_inst - 1; i++) {
  220.                 uint64_t inst = insts[i];
  221.  
  222.                 /* "An instruction that does a vector rotate by r5 must not
  223.                  *  immediately follow an instruction that writes to r5."
  224.                  */
  225.                 assert(last_r5_write != i - 1 ||
  226.                        QPU_GET_FIELD(inst, QPU_SIG) != QPU_SIG_SMALL_IMM ||
  227.                        QPU_GET_FIELD(inst, QPU_SMALL_IMM) != 48);
  228.         }
  229.  
  230.         /* "An instruction that does a vector rotate must not immediately
  231.          *  follow an instruction that writes to the accumulator that is being
  232.          *  rotated.
  233.          *
  234.          * XXX: TODO.
  235.          */
  236.  
  237.         /* "After an instruction that does a TLB Z write, the multisample mask
  238.          *  must not be read as an instruction input argument in the following
  239.          *  two instruction. The TLB Z write instruction can, however, be
  240.          *  followed immediately by a TLB color write."
  241.          */
  242.         for (int i = 0; i < num_inst - 1; i++) {
  243.                 uint64_t inst = insts[i];
  244.                 if (writes_reg(inst, QPU_W_TLB_Z)) {
  245.                         assert(!reads_a_reg(insts[i + 1], QPU_R_MS_REV_FLAGS));
  246.                         assert(!reads_a_reg(insts[i + 2], QPU_R_MS_REV_FLAGS));
  247.                 }
  248.         }
  249.  
  250.         /*
  251.          * "A single instruction can only perform a maximum of one of the
  252.          *  following closely coupled peripheral accesses in a single
  253.          *  instruction: TMU write, TMU read, TLB write, TLB read, TLB
  254.          *  combined color read and write, SFU write, Mutex read or Semaphore
  255.          *  access."
  256.          */
  257.         for (int i = 0; i < num_inst - 1; i++) {
  258.                 uint64_t inst = insts[i];
  259.  
  260.                 assert(qpu_num_sf_accesses(inst) <= 1);
  261.         }
  262. }
  263.