Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Broadcom
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  */
  23.  
  24. #include <stdbool.h>
  25. #include <stdio.h>
  26.  
  27. #include "vc4_qpu.h"
  28. #include "vc4_qpu_defines.h"
  29.  
  30. static const char *qpu_add_opcodes[] = {
  31.         [QPU_A_NOP] = "nop",
  32.         [QPU_A_FADD] = "fadd",
  33.         [QPU_A_FSUB] = "fsub",
  34.         [QPU_A_FMIN] = "fmin",
  35.         [QPU_A_FMAX] = "fmax",
  36.         [QPU_A_FMINABS] = "fminabs",
  37.         [QPU_A_FMAXABS] = "fmaxabs",
  38.         [QPU_A_FTOI] = "ftoi",
  39.         [QPU_A_ITOF] = "itof",
  40.         [QPU_A_ADD] = "add",
  41.         [QPU_A_SUB] = "sub",
  42.         [QPU_A_SHR] = "shr",
  43.         [QPU_A_ASR] = "asr",
  44.         [QPU_A_ROR] = "ror",
  45.         [QPU_A_SHL] = "shl",
  46.         [QPU_A_MIN] = "min",
  47.         [QPU_A_MAX] = "max",
  48.         [QPU_A_AND] = "and",
  49.         [QPU_A_OR] = "or",
  50.         [QPU_A_XOR] = "xor",
  51.         [QPU_A_NOT] = "not",
  52.         [QPU_A_CLZ] = "clz",
  53.         [QPU_A_V8ADDS] = "v8adds",
  54.         [QPU_A_V8SUBS] = "v8subs",
  55. };
  56.  
  57. static const char *qpu_mul_opcodes[] = {
  58.         [QPU_M_NOP] = "nop",
  59.         [QPU_M_FMUL] = "fmul",
  60.         [QPU_M_MUL24] = "mul24",
  61.         [QPU_M_V8MULD] = "v8muld",
  62.         [QPU_M_V8MIN] = "v8min",
  63.         [QPU_M_V8MAX] = "v8max",
  64.         [QPU_M_V8ADDS] = "v8adds",
  65.         [QPU_M_V8SUBS] = "v8subs",
  66. };
  67.  
  68. static const char *qpu_sig[] = {
  69.         [QPU_SIG_SW_BREAKPOINT] = "sig_brk",
  70.         [QPU_SIG_NONE] = "",
  71.         [QPU_SIG_THREAD_SWITCH] = "sig_switch",
  72.         [QPU_SIG_PROG_END] = "sig_end",
  73.         [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score",
  74.         [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score",
  75.         [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch",
  76.         [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load",
  77.         [QPU_SIG_COLOR_LOAD] = "sig_color_load",
  78.         [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end",
  79.         [QPU_SIG_LOAD_TMU0] = "load_tmu0",
  80.         [QPU_SIG_LOAD_TMU1] = "load_tmu1",
  81.         [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load",
  82.         [QPU_SIG_SMALL_IMM] = "sig_small_imm",
  83.         [QPU_SIG_LOAD_IMM] = "sig_load_imm",
  84.         [QPU_SIG_BRANCH] = "sig_branch",
  85. };
  86.  
  87. static const char *qpu_pack_mul[] = {
  88.         [QPU_PACK_MUL_NOP] = "",
  89.         [QPU_PACK_MUL_8888] = "8888",
  90.         [QPU_PACK_MUL_8A] = "8a",
  91.         [QPU_PACK_MUL_8B] = "8b",
  92.         [QPU_PACK_MUL_8C] = "8c",
  93.         [QPU_PACK_MUL_8D] = "8d",
  94. };
  95.  
  96. /* The QPU unpack for A and R4 files can be described the same, it's just that
  97.  * the R4 variants are convert-to-float only, with no int support.
  98.  */
  99. static const char *qpu_unpack[] = {
  100.         [QPU_UNPACK_NOP] = "",
  101.         [QPU_UNPACK_16A_TO_F32] = "16a",
  102.         [QPU_UNPACK_16B_TO_F32] = "16b",
  103.         [QPU_UNPACK_8D_REP] = "8d_rep",
  104.         [QPU_UNPACK_8A] = "8a",
  105.         [QPU_UNPACK_8B] = "8b",
  106.         [QPU_UNPACK_8C] = "8c",
  107.         [QPU_UNPACK_8D] = "8d",
  108. };
  109.  
  110. static const char *special_read_a[] = {
  111.         "uni",
  112.         NULL,
  113.         NULL,
  114.         "vary",
  115.         NULL,
  116.         NULL,
  117.         "elem",
  118.         "nop",
  119.         NULL,
  120.         "x_pix",
  121.         "ms_flags",
  122.         NULL,
  123.         NULL,
  124.         NULL,
  125.         NULL,
  126.         NULL,
  127.         "vpm_read",
  128.         "vpm_ld_busy",
  129.         "vpm_ld_wait",
  130.         "mutex_acq"
  131. };
  132.  
  133. static const char *special_read_b[] = {
  134.         "uni",
  135.         NULL,
  136.         NULL,
  137.         "vary",
  138.         NULL,
  139.         NULL,
  140.         "qpu",
  141.         "nop",
  142.         NULL,
  143.         "y_pix",
  144.         "rev_flag",
  145.         NULL,
  146.         NULL,
  147.         NULL,
  148.         NULL,
  149.         NULL,
  150.         "vpm_read",
  151.         "vpm_st_busy",
  152.         "vpm_st_wait",
  153.         "mutex_acq"
  154. };
  155.  
  156. /**
  157.  * This has the B-file descriptions for register writes.
  158.  *
  159.  * Since only a couple of regs are different between A and B, the A overrides
  160.  * are in get_special_write_desc().
  161.  */
  162. static const char *special_write[] = {
  163.         [QPU_W_ACC0] = "r0",
  164.         [QPU_W_ACC1] = "r1",
  165.         [QPU_W_ACC2] = "r2",
  166.         [QPU_W_ACC3] = "r3",
  167.         [QPU_W_TMU_NOSWAP] = "tmu_noswap",
  168.         [QPU_W_ACC5] = "r5",
  169.         [QPU_W_HOST_INT] = "host_int",
  170.         [QPU_W_NOP] = "nop",
  171.         [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr",
  172.         [QPU_W_QUAD_XY] = "quad_y",
  173.         [QPU_W_MS_FLAGS] = "ms_flags",
  174.         [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup",
  175.         [QPU_W_TLB_Z] = "tlb_z",
  176.         [QPU_W_TLB_COLOR_MS] = "tlb_color_ms",
  177.         [QPU_W_TLB_COLOR_ALL] = "tlb_color_all",
  178.         [QPU_W_VPM] = "vpm",
  179.         [QPU_W_VPMVCD_SETUP] = "vw_setup",
  180.         [QPU_W_VPM_ADDR] = "vw_addr",
  181.         [QPU_W_MUTEX_RELEASE] = "mutex_release",
  182.         [QPU_W_SFU_RECIP] = "sfu_recip",
  183.         [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt",
  184.         [QPU_W_SFU_EXP] = "sfu_exp",
  185.         [QPU_W_SFU_LOG] = "sfu_log",
  186.         [QPU_W_TMU0_S] = "tmu0_s",
  187.         [QPU_W_TMU0_T] = "tmu0_t",
  188.         [QPU_W_TMU0_R] = "tmu0_r",
  189.         [QPU_W_TMU0_B] = "tmu0_b",
  190.         [QPU_W_TMU1_S] = "tmu1_s",
  191.         [QPU_W_TMU1_T] = "tmu1_t",
  192.         [QPU_W_TMU1_R] = "tmu1_r",
  193.         [QPU_W_TMU1_B] = "tmu1_b",
  194. };
  195.  
  196. static const char *qpu_pack_a[] = {
  197.         [QPU_PACK_A_NOP] = "",
  198.         [QPU_PACK_A_16A] = ".16a",
  199.         [QPU_PACK_A_16B] = ".16b",
  200.         [QPU_PACK_A_8888] = ".8888",
  201.         [QPU_PACK_A_8A] = ".8a",
  202.         [QPU_PACK_A_8B] = ".8b",
  203.         [QPU_PACK_A_8C] = ".8c",
  204.         [QPU_PACK_A_8D] = ".8d",
  205.  
  206.         [QPU_PACK_A_32_SAT] = ".sat",
  207.         [QPU_PACK_A_16A_SAT] = ".16a.sat",
  208.         [QPU_PACK_A_16B_SAT] = ".16b.sat",
  209.         [QPU_PACK_A_8888_SAT] = ".8888.sat",
  210.         [QPU_PACK_A_8A_SAT] = ".8a.sat",
  211.         [QPU_PACK_A_8B_SAT] = ".8b.sat",
  212.         [QPU_PACK_A_8C_SAT] = ".8c.sat",
  213.         [QPU_PACK_A_8D_SAT] = ".8d.sat",
  214. };
  215.  
  216. static const char *qpu_condflags[] = {
  217.         [QPU_COND_NEVER] = ".never",
  218.         [QPU_COND_ALWAYS] = "",
  219.         [QPU_COND_ZS] = ".zs",
  220.         [QPU_COND_ZC] = ".zc",
  221.         [QPU_COND_NS] = ".ns",
  222.         [QPU_COND_NC] = ".nc",
  223.         [QPU_COND_CS] = ".cs",
  224.         [QPU_COND_CC] = ".cc",
  225. };
  226.  
  227. #define DESC(array, index)                                        \
  228.         ((index > ARRAY_SIZE(array) || !(array)[index]) ?         \
  229.          "???" : (array)[index])
  230.  
  231. static const char *
  232. get_special_write_desc(int reg, bool is_a)
  233. {
  234.         if (is_a) {
  235.                 switch (reg) {
  236.                 case QPU_W_QUAD_XY:
  237.                         return "quad_x";
  238.                 case QPU_W_VPMVCD_SETUP:
  239.                         return "vr_setup";
  240.                 case QPU_W_VPM_ADDR:
  241.                         return "vr_addr";
  242.                 }
  243.         }
  244.  
  245.         return special_write[reg];
  246. }
  247.  
  248. static void
  249. print_alu_dst(uint64_t inst, bool is_mul)
  250. {
  251.         bool is_a = is_mul == ((inst & QPU_WS) != 0);
  252.         uint32_t waddr = (is_mul ?
  253.                           QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
  254.                           QPU_GET_FIELD(inst, QPU_WADDR_ADD));
  255.         const char *file = is_a ? "a" : "b";
  256.         uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK);
  257.  
  258.         if (waddr <= 31)
  259.                 fprintf(stderr, "r%s%d", file, waddr);
  260.         else if (get_special_write_desc(waddr, is_a))
  261.                 fprintf(stderr, "%s", get_special_write_desc(waddr, is_a));
  262.         else
  263.                 fprintf(stderr, "%s%d?", file, waddr);
  264.  
  265.         if (is_mul && (inst & QPU_PM)) {
  266.                 fprintf(stderr, ".%s", DESC(qpu_pack_mul, pack));
  267.         } else if (is_a && !(inst & QPU_PM)) {
  268.                 fprintf(stderr, "%s", DESC(qpu_pack_a, pack));
  269.         }
  270. }
  271.  
  272. static void
  273. print_alu_src(uint64_t inst, uint32_t mux)
  274. {
  275.         bool is_a = mux != QPU_MUX_B;
  276.         const char *file = is_a ? "a" : "b";
  277.         uint32_t raddr = (is_a ?
  278.                           QPU_GET_FIELD(inst, QPU_RADDR_A) :
  279.                           QPU_GET_FIELD(inst, QPU_RADDR_B));
  280.         uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
  281.  
  282.         if (mux <= QPU_MUX_R5)
  283.                 fprintf(stderr, "r%d", mux);
  284.         else if (!is_a &&
  285.                  QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM) {
  286.                 uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM);
  287.                 if (si <= 15)
  288.                         fprintf(stderr, "%d", si);
  289.                 else if (si <= 31)
  290.                         fprintf(stderr, "%d", -16 + (si - 16));
  291.                 else if (si <= 39)
  292.                         fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
  293.                 else if (si <= 47)
  294.                         fprintf(stderr, "%f", 1.0f / (1 << (48 - si)));
  295.                 else
  296.                         fprintf(stderr, "<bad imm %d>", si);
  297.         } else if (raddr <= 31)
  298.                 fprintf(stderr, "r%s%d", file, raddr);
  299.         else {
  300.                 if (is_a)
  301.                         fprintf(stderr, "%s", DESC(special_read_a, raddr - 32));
  302.                 else
  303.                         fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
  304.         }
  305.  
  306.         if (unpack != QPU_UNPACK_NOP &&
  307.             ((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
  308.              (mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
  309.                 fprintf(stderr, ".%s", DESC(qpu_unpack, unpack));
  310.         }
  311. }
  312.  
  313. static void
  314. print_add_op(uint64_t inst)
  315. {
  316.         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
  317.         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
  318.         bool is_mov = (op_add == QPU_A_OR &&
  319.                        QPU_GET_FIELD(inst, QPU_ADD_A) ==
  320.                        QPU_GET_FIELD(inst, QPU_ADD_B));
  321.  
  322.         fprintf(stderr, "%s%s%s ",
  323.                 is_mov ? "mov" : DESC(qpu_add_opcodes, op_add),
  324.                 ((inst & QPU_SF) && op_add != QPU_A_NOP) ? ".sf" : "",
  325.                 op_add != QPU_A_NOP ? DESC(qpu_condflags, cond) : "");
  326.  
  327.         print_alu_dst(inst, false);
  328.         fprintf(stderr, ", ");
  329.  
  330.         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A));
  331.  
  332.         if (!is_mov) {
  333.                 fprintf(stderr, ", ");
  334.  
  335.                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B));
  336.         }
  337. }
  338.  
  339. static void
  340. print_mul_op(uint64_t inst)
  341. {
  342.         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
  343.         uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
  344.         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
  345.         bool is_mov = (op_mul == QPU_M_V8MIN &&
  346.                        QPU_GET_FIELD(inst, QPU_MUL_A) ==
  347.                        QPU_GET_FIELD(inst, QPU_MUL_B));
  348.  
  349.         fprintf(stderr, "%s%s%s ",
  350.                 is_mov ? "mov" : DESC(qpu_mul_opcodes, op_mul),
  351.                 ((inst & QPU_SF) && op_add == QPU_A_NOP) ? ".sf" : "",
  352.                 op_mul != QPU_M_NOP ? DESC(qpu_condflags, cond) : "");
  353.  
  354.         print_alu_dst(inst, true);
  355.         fprintf(stderr, ", ");
  356.  
  357.         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A));
  358.  
  359.         if (!is_mov) {
  360.                 fprintf(stderr, ", ");
  361.                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B));
  362.         }
  363. }
  364.  
  365. static void
  366. print_load_imm(uint64_t inst)
  367. {
  368.         uint32_t imm = inst;
  369.         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
  370.         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
  371.         uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
  372.         uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
  373.  
  374.         fprintf(stderr, "load_imm ");
  375.         print_alu_dst(inst, false);
  376.         fprintf(stderr, "%s, ", (waddr_add != QPU_W_NOP ?
  377.                                  DESC(qpu_condflags, cond_add) : ""));
  378.         print_alu_dst(inst, true);
  379.         fprintf(stderr, "%s, ", (waddr_mul != QPU_W_NOP ?
  380.                                  DESC(qpu_condflags, cond_mul) : ""));
  381.         fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
  382. }
  383.  
  384. void
  385. vc4_qpu_disasm(const uint64_t *instructions, int num_instructions)
  386. {
  387.         for (int i = 0; i < num_instructions; i++) {
  388.                 uint64_t inst = instructions[i];
  389.                 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
  390.  
  391.                 switch (sig) {
  392.                 case QPU_SIG_BRANCH:
  393.                         fprintf(stderr, "branch");
  394.                         break;
  395.                 case QPU_SIG_LOAD_IMM:
  396.                         print_load_imm(inst);
  397.                         break;
  398.                 default:
  399.                         if (sig != QPU_SIG_NONE)
  400.                                 fprintf(stderr, "%s ", DESC(qpu_sig, sig));
  401.                         print_add_op(inst);
  402.                         fprintf(stderr, " ; ");
  403.                         print_mul_op(inst);
  404.                         break;
  405.                 }
  406.  
  407.                 if (num_instructions != 1)
  408.                         fprintf(stderr, "\n");
  409.         }
  410. }
  411.