Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21.  * SOFTWARE.
  22.  */
  23.  
  24. #ifndef IR3_H_
  25. #define IR3_H_
  26.  
  27. #include <stdint.h>
  28. #include <stdbool.h>
  29.  
  30. #include "util/u_debug.h"
  31.  
  32. #include "instr-a3xx.h"
  33. #include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
  34.  
  35. /* low level intermediate representation of an adreno shader program */
  36.  
  37. struct ir3;
  38. struct ir3_instruction;
  39. struct ir3_block;
  40.  
  41. struct ir3_info {
  42.         uint16_t sizedwords;
  43.         uint16_t instrs_count;   /* expanded to account for rpt's */
  44.         /* NOTE: max_reg, etc, does not include registers not touched
  45.          * by the shader (ie. vertex fetched via VFD_DECODE but not
  46.          * touched by shader)
  47.          */
  48.         int8_t   max_reg;   /* highest GPR # used by shader */
  49.         int8_t   max_half_reg;
  50.         int16_t  max_const;
  51. };
  52.  
  53. struct ir3_register {
  54.         enum {
  55.                 IR3_REG_CONST  = 0x001,
  56.                 IR3_REG_IMMED  = 0x002,
  57.                 IR3_REG_HALF   = 0x004,
  58.                 IR3_REG_RELATIV= 0x008,
  59.                 IR3_REG_R      = 0x010,
  60.                 /* Most instructions, it seems, can do float abs/neg but not
  61.                  * integer.  The CP pass needs to know what is intended (int or
  62.                  * float) in order to do the right thing.  For this reason the
  63.                  * abs/neg flags are split out into float and int variants.  In
  64.                  * addition, .b (bitwise) operations, the negate is actually a
  65.                  * bitwise not, so split that out into a new flag to make it
  66.                  * more clear.
  67.                  */
  68.                 IR3_REG_FNEG   = 0x020,
  69.                 IR3_REG_FABS   = 0x040,
  70.                 IR3_REG_SNEG   = 0x080,
  71.                 IR3_REG_SABS   = 0x100,
  72.                 IR3_REG_BNOT   = 0x200,
  73.                 IR3_REG_EVEN   = 0x400,
  74.                 IR3_REG_POS_INF= 0x800,
  75.                 /* (ei) flag, end-input?  Set on last bary, presumably to signal
  76.                  * that the shader needs no more input:
  77.                  */
  78.                 IR3_REG_EI     = 0x1000,
  79.                 /* meta-flags, for intermediate stages of IR, ie.
  80.                  * before register assignment is done:
  81.                  */
  82.                 IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
  83.                 IR3_REG_IA     = 0x4000,   /* meta-input dst is "assigned" */
  84.                 IR3_REG_ADDR   = 0x8000,   /* register is a0.x */
  85.         } flags;
  86.         union {
  87.                 /* normal registers:
  88.                  * the component is in the low two bits of the reg #, so
  89.                  * rN.x becomes: (N << 2) | x
  90.                  */
  91.                 int   num;
  92.                 /* immediate: */
  93.                 int32_t  iim_val;
  94.                 uint32_t uim_val;
  95.                 float    fim_val;
  96.                 /* relative: */
  97.                 int   offset;
  98.         };
  99.  
  100.         /* for IR3_REG_SSA, src registers contain ptr back to
  101.          * assigning instruction.
  102.          */
  103.         struct ir3_instruction *instr;
  104.  
  105.         union {
  106.                 /* used for cat5 instructions, but also for internal/IR level
  107.                  * tracking of what registers are read/written by an instruction.
  108.                  * wrmask may be a bad name since it is used to represent both
  109.                  * src and dst that touch multiple adjacent registers.
  110.                  */
  111.                 unsigned wrmask;
  112.                 /* for relative addressing, 32bits for array size is too small,
  113.                  * but otoh we don't need to deal with disjoint sets, so instead
  114.                  * use a simple size field (number of scalar components).
  115.                  */
  116.                 unsigned size;
  117.         };
  118. };
  119.  
  120. struct ir3_instruction {
  121.         struct ir3_block *block;
  122.         int category;
  123.         opc_t opc;
  124.         enum {
  125.                 /* (sy) flag is set on first instruction, and after sample
  126.                  * instructions (probably just on RAW hazard).
  127.                  */
  128.                 IR3_INSTR_SY    = 0x001,
  129.                 /* (ss) flag is set on first instruction, and first instruction
  130.                  * to depend on the result of "long" instructions (RAW hazard):
  131.                  *
  132.                  *   rcp, rsq, log2, exp2, sin, cos, sqrt
  133.                  *
  134.                  * It seems to synchronize until all in-flight instructions are
  135.                  * completed, for example:
  136.                  *
  137.                  *   rsq hr1.w, hr1.w
  138.                  *   add.f hr2.z, (neg)hr2.z, hc0.y
  139.                  *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
  140.                  *   rsq hr2.x, hr2.x
  141.                  *   (rpt1)nop
  142.                  *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
  143.                  *   nop
  144.                  *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
  145.                  *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
  146.                  *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
  147.                  *
  148.                  * The last mul.f does not have (ss) set, presumably because the
  149.                  * (ss) on the previous instruction does the job.
  150.                  *
  151.                  * The blob driver also seems to set it on WAR hazards, although
  152.                  * not really clear if this is needed or just blob compiler being
  153.                  * sloppy.  So far I haven't found a case where removing the (ss)
  154.                  * causes problems for WAR hazard, but I could just be getting
  155.                  * lucky:
  156.                  *
  157.                  *   rcp r1.y, r3.y
  158.                  *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
  159.                  *
  160.                  */
  161.                 IR3_INSTR_SS    = 0x002,
  162.                 /* (jp) flag is set on jump targets:
  163.                  */
  164.                 IR3_INSTR_JP    = 0x004,
  165.                 IR3_INSTR_UL    = 0x008,
  166.                 IR3_INSTR_3D    = 0x010,
  167.                 IR3_INSTR_A     = 0x020,
  168.                 IR3_INSTR_O     = 0x040,
  169.                 IR3_INSTR_P     = 0x080,
  170.                 IR3_INSTR_S     = 0x100,
  171.                 IR3_INSTR_S2EN  = 0x200,
  172.                 /* meta-flags, for intermediate stages of IR, ie.
  173.                  * before register assignment is done:
  174.                  */
  175.                 IR3_INSTR_MARK  = 0x1000,
  176.         } flags;
  177.         int repeat;
  178. #ifdef DEBUG
  179.         unsigned regs_max;
  180. #endif
  181.         unsigned regs_count;
  182.         struct ir3_register **regs;
  183.         union {
  184.                 struct {
  185.                         char inv;
  186.                         char comp;
  187.                         int  immed;
  188.                 } cat0;
  189.                 struct {
  190.                         type_t src_type, dst_type;
  191.                 } cat1;
  192.                 struct {
  193.                         enum {
  194.                                 IR3_COND_LT = 0,
  195.                                 IR3_COND_LE = 1,
  196.                                 IR3_COND_GT = 2,
  197.                                 IR3_COND_GE = 3,
  198.                                 IR3_COND_EQ = 4,
  199.                                 IR3_COND_NE = 5,
  200.                         } condition;
  201.                 } cat2;
  202.                 struct {
  203.                         unsigned samp, tex;
  204.                         type_t type;
  205.                 } cat5;
  206.                 struct {
  207.                         type_t type;
  208.                         int offset;
  209.                         int iim_val;
  210.                 } cat6;
  211.                 /* for meta-instructions, just used to hold extra data
  212.                  * before instruction scheduling, etc
  213.                  */
  214.                 struct {
  215.                         int off;              /* component/offset */
  216.                 } fo;
  217.                 struct {
  218.                         int aid;
  219.                 } fi;
  220.                 struct {
  221.                         struct ir3_block *if_block, *else_block;
  222.                 } flow;
  223.                 struct {
  224.                         struct ir3_block *block;
  225.                 } inout;
  226.  
  227.                 /* XXX keep this as big as all other union members! */
  228.                 uint32_t info[3];
  229.         };
  230.  
  231.         /* transient values used during various algorithms: */
  232.         union {
  233.                 /* The instruction depth is the max dependency distance to output.
  234.                  *
  235.                  * You can also think of it as the "cost", if we did any sort of
  236.                  * optimization for register footprint.  Ie. a value that is  just
  237.                  * result of moving a const to a reg would have a low cost,  so to
  238.                  * it could make sense to duplicate the instruction at various
  239.                  * points where the result is needed to reduce register footprint.
  240.                  *
  241.                  * DEPTH_UNUSED used to mark unused instructions after depth
  242.                  * calculation pass.
  243.                  */
  244. #define DEPTH_UNUSED  ~0
  245.                 unsigned depth;
  246.         };
  247.  
  248.         /* Used during CP and RA stages.  For fanin and shader inputs/
  249.          * outputs where we need a sequence of consecutive registers,
  250.          * keep track of each src instructions left (ie 'n-1') and right
  251.          * (ie 'n+1') neighbor.  The front-end must insert enough mov's
  252.          * to ensure that each instruction has at most one left and at
  253.          * most one right neighbor.  During the copy-propagation pass,
  254.          * we only remove mov's when we can preserve this constraint.
  255.          * And during the RA stage, we use the neighbor information to
  256.          * allocate a block of registers in one shot.
  257.          *
  258.          * TODO: maybe just add something like:
  259.          *   struct ir3_instruction_ref {
  260.          *       struct ir3_instruction *instr;
  261.          *       unsigned cnt;
  262.          *   }
  263.          *
  264.          * Or can we get away without the refcnt stuff?  It seems like
  265.          * it should be overkill..  the problem is if, potentially after
  266.          * already eliminating some mov's, if you have a single mov that
  267.          * needs to be grouped with it's neighbors in two different
  268.          * places (ex. shader output and a fanin).
  269.          */
  270.         struct {
  271.                 struct ir3_instruction *left, *right;
  272.                 uint16_t left_cnt, right_cnt;
  273.         } cp;
  274.  
  275.         /* an instruction can reference at most one address register amongst
  276.          * it's src/dst registers.  Beyond that, you need to insert mov's.
  277.          */
  278.         struct ir3_instruction *address;
  279.  
  280.         /* in case of a instruction with relative dst instruction, we need to
  281.          * capture the dependency on the fanin for the previous values of
  282.          * the array elements.  Since we don't know at compile time actually
  283.          * which array elements are written, this serves to preserve the
  284.          * unconditional write to array elements prior to the conditional
  285.          * write.
  286.          *
  287.          * TODO only cat1 can do indirect write.. we could maybe move this
  288.          * into instr->cat1.fanin (but would require the frontend to insert
  289.          * the extra mov)
  290.          */
  291.         struct ir3_instruction *fanin;
  292.  
  293.         struct ir3_instruction *next;
  294. #ifdef DEBUG
  295.         uint32_t serialno;
  296. #endif
  297. };
  298.  
  299. static inline struct ir3_instruction *
  300. ir3_neighbor_first(struct ir3_instruction *instr)
  301. {
  302.         while (instr->cp.left)
  303.                 instr = instr->cp.left;
  304.         return instr;
  305. }
  306.  
  307. static inline int ir3_neighbor_count(struct ir3_instruction *instr)
  308. {
  309.         int num = 1;
  310.  
  311.         debug_assert(!instr->cp.left);
  312.  
  313.         while (instr->cp.right) {
  314.                 num++;
  315.                 instr = instr->cp.right;
  316.         }
  317.  
  318.         return num;
  319. }
  320.  
  321. struct ir3_heap_chunk;
  322.  
  323. struct ir3 {
  324.         unsigned instrs_count, instrs_sz;
  325.         struct ir3_instruction **instrs;
  326.  
  327.         /* Track bary.f (and ldlv) instructions.. this is needed in
  328.          * scheduling to ensure that all varying fetches happen before
  329.          * any potential kill instructions.  The hw gets grumpy if all
  330.          * threads in a group are killed before the last bary.f gets
  331.          * a chance to signal end of input (ei).
  332.          */
  333.         unsigned baryfs_count, baryfs_sz;
  334.         struct ir3_instruction **baryfs;
  335.  
  336.         /* Track all indirect instructions (read and write).  To avoid
  337.          * deadlock scenario where an address register gets scheduled,
  338.          * but other dependent src instructions cannot be scheduled due
  339.          * to dependency on a *different* address register value, the
  340.          * scheduler needs to ensure that all dependencies other than
  341.          * the instruction other than the address register are scheduled
  342.          * before the one that writes the address register.  Having a
  343.          * convenient list of instructions that reference some address
  344.          * register simplifies this.
  345.          */
  346.         unsigned indirects_count, indirects_sz;
  347.         struct ir3_instruction **indirects;
  348.  
  349.         struct ir3_block *block;
  350.         unsigned heap_idx;
  351.         struct ir3_heap_chunk *chunk;
  352. };
  353.  
  354. struct ir3_block {
  355.         struct ir3 *shader;
  356.         unsigned ntemporaries, ninputs, noutputs;
  357.         /* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
  358.         struct ir3_instruction **temporaries;
  359.         struct ir3_instruction **inputs;
  360.         struct ir3_instruction **outputs;
  361.         /* only a single address register: */
  362.         struct ir3_instruction *address;
  363.         struct ir3_block *parent;
  364.         struct ir3_instruction *head;
  365. };
  366.  
  367. struct ir3 * ir3_create(void);
  368. void ir3_destroy(struct ir3 *shader);
  369. void * ir3_assemble(struct ir3 *shader,
  370.                 struct ir3_info *info, uint32_t gpu_id);
  371. void * ir3_alloc(struct ir3 *shader, int sz);
  372.  
  373. struct ir3_block * ir3_block_create(struct ir3 *shader,
  374.                 unsigned ntmp, unsigned nin, unsigned nout);
  375.  
  376. struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
  377.                 int category, opc_t opc);
  378. struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
  379.                 int category, opc_t opc, int nreg);
  380. struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
  381. const char *ir3_instr_name(struct ir3_instruction *instr);
  382.  
  383. struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
  384.                 int num, int flags);
  385.  
  386.  
  387. static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
  388. {
  389.         if (instr->flags & IR3_INSTR_MARK)
  390.                 return true;  /* already visited */
  391.         instr->flags |= IR3_INSTR_MARK;
  392.         return false;
  393. }
  394.  
  395. static inline void ir3_clear_mark(struct ir3 *shader)
  396. {
  397.         /* TODO would be nice to drop the instruction array.. for
  398.          * new compiler, _clear_mark() is all we use it for, and
  399.          * we could probably manage a linked list instead..
  400.          *
  401.          * Also, we'll probably want to mark instructions within
  402.          * a block, so tracking the list of instrs globally is
  403.          * unlikely to be what we want.
  404.          */
  405.         unsigned i;
  406.         for (i = 0; i < shader->instrs_count; i++) {
  407.                 struct ir3_instruction *instr = shader->instrs[i];
  408.                 instr->flags &= ~IR3_INSTR_MARK;
  409.         }
  410. }
  411.  
  412. static inline int ir3_instr_regno(struct ir3_instruction *instr,
  413.                 struct ir3_register *reg)
  414. {
  415.         unsigned i;
  416.         for (i = 0; i < instr->regs_count; i++)
  417.                 if (reg == instr->regs[i])
  418.                         return i;
  419.         return -1;
  420. }
  421.  
  422.  
  423. #define MAX_ARRAYS 16
  424.  
  425. /* comp:
  426.  *   0 - x
  427.  *   1 - y
  428.  *   2 - z
  429.  *   3 - w
  430.  */
  431. static inline uint32_t regid(int num, int comp)
  432. {
  433.         return (num << 2) | (comp & 0x3);
  434. }
  435.  
  436. static inline uint32_t reg_num(struct ir3_register *reg)
  437. {
  438.         return reg->num >> 2;
  439. }
  440.  
  441. static inline uint32_t reg_comp(struct ir3_register *reg)
  442. {
  443.         return reg->num & 0x3;
  444. }
  445.  
  446. static inline bool is_flow(struct ir3_instruction *instr)
  447. {
  448.         return (instr->category == 0);
  449. }
  450.  
  451. static inline bool is_kill(struct ir3_instruction *instr)
  452. {
  453.         return is_flow(instr) && (instr->opc == OPC_KILL);
  454. }
  455.  
  456. static inline bool is_nop(struct ir3_instruction *instr)
  457. {
  458.         return is_flow(instr) && (instr->opc == OPC_NOP);
  459. }
  460.  
  461. /* Is it a non-transformative (ie. not type changing) mov?  This can
  462.  * also include absneg.s/absneg.f, which for the most part can be
  463.  * treated as a mov (single src argument).
  464.  */
  465. static inline bool is_same_type_mov(struct ir3_instruction *instr)
  466. {
  467.         struct ir3_register *dst = instr->regs[0];
  468.  
  469.         /* mov's that write to a0.x or p0.x are special: */
  470.         if (dst->num == regid(REG_P0, 0))
  471.                 return false;
  472.         if (dst->num == regid(REG_A0, 0))
  473.                 return false;
  474.  
  475.         if ((instr->category == 1) &&
  476.                         (instr->cat1.src_type == instr->cat1.dst_type))
  477.                 return true;
  478.         if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
  479.                         (instr->opc == OPC_ABSNEG_S)))
  480.                 return true;
  481.         return false;
  482. }
  483.  
  484. static inline bool is_alu(struct ir3_instruction *instr)
  485. {
  486.         return (1 <= instr->category) && (instr->category <= 3);
  487. }
  488.  
  489. static inline bool is_sfu(struct ir3_instruction *instr)
  490. {
  491.         return (instr->category == 4);
  492. }
  493.  
  494. static inline bool is_tex(struct ir3_instruction *instr)
  495. {
  496.         return (instr->category == 5);
  497. }
  498.  
  499. static inline bool is_mem(struct ir3_instruction *instr)
  500. {
  501.         return (instr->category == 6);
  502. }
  503.  
  504. static inline bool is_input(struct ir3_instruction *instr)
  505. {
  506.         /* in some cases, ldlv is used to fetch varying without
  507.          * interpolation.. fortunately inloc is the first src
  508.          * register in either case
  509.          */
  510.         if (is_mem(instr) && (instr->opc == OPC_LDLV))
  511.                 return true;
  512.         return (instr->category == 2) && (instr->opc == OPC_BARY_F);
  513. }
  514.  
  515. static inline bool is_meta(struct ir3_instruction *instr)
  516. {
  517.         /* TODO how should we count PHI (and maybe fan-in/out) which
  518.          * might actually contribute some instructions to the final
  519.          * result?
  520.          */
  521.         return (instr->category == -1);
  522. }
  523.  
  524. static inline bool writes_addr(struct ir3_instruction *instr)
  525. {
  526.         if (instr->regs_count > 0) {
  527.                 struct ir3_register *dst = instr->regs[0];
  528.                 return !!(dst->flags & IR3_REG_ADDR);
  529.         }
  530.         return false;
  531. }
  532.  
  533. static inline bool writes_pred(struct ir3_instruction *instr)
  534. {
  535.         if (instr->regs_count > 0) {
  536.                 struct ir3_register *dst = instr->regs[0];
  537.                 return reg_num(dst) == REG_P0;
  538.         }
  539.         return false;
  540. }
  541.  
  542. /* returns defining instruction for reg */
  543. /* TODO better name */
  544. static inline struct ir3_instruction *ssa(struct ir3_register *reg)
  545. {
  546.         if (reg->flags & IR3_REG_SSA)
  547.                 return reg->instr;
  548.         return NULL;
  549. }
  550.  
  551. static inline bool conflicts(struct ir3_instruction *a,
  552.                 struct ir3_instruction *b)
  553. {
  554.         return (a && b) && (a != b);
  555. }
  556.  
  557. static inline bool reg_gpr(struct ir3_register *r)
  558. {
  559.         if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
  560.                 return false;
  561.         if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
  562.                 return false;
  563.         return true;
  564. }
  565.  
  566. /* some cat2 instructions (ie. those which are not float) can embed an
  567.  * immediate:
  568.  */
  569. static inline bool ir3_cat2_int(opc_t opc)
  570. {
  571.         switch (opc) {
  572.         case OPC_ADD_U:
  573.         case OPC_ADD_S:
  574.         case OPC_SUB_U:
  575.         case OPC_SUB_S:
  576.         case OPC_CMPS_U:
  577.         case OPC_CMPS_S:
  578.         case OPC_MIN_U:
  579.         case OPC_MIN_S:
  580.         case OPC_MAX_U:
  581.         case OPC_MAX_S:
  582.         case OPC_CMPV_U:
  583.         case OPC_CMPV_S:
  584.         case OPC_MUL_U:
  585.         case OPC_MUL_S:
  586.         case OPC_MULL_U:
  587.         case OPC_CLZ_S:
  588.         case OPC_ABSNEG_S:
  589.         case OPC_AND_B:
  590.         case OPC_OR_B:
  591.         case OPC_NOT_B:
  592.         case OPC_XOR_B:
  593.         case OPC_BFREV_B:
  594.         case OPC_CLZ_B:
  595.         case OPC_SHL_B:
  596.         case OPC_SHR_B:
  597.         case OPC_ASHR_B:
  598.         case OPC_MGEN_B:
  599.         case OPC_GETBIT_B:
  600.         case OPC_CBITS_B:
  601.         case OPC_BARY_F:
  602.                 return true;
  603.  
  604.         default:
  605.                 return false;
  606.         }
  607. }
  608.  
  609.  
  610. /* map cat2 instruction to valid abs/neg flags: */
  611. static inline unsigned ir3_cat2_absneg(opc_t opc)
  612. {
  613.         switch (opc) {
  614.         case OPC_ADD_F:
  615.         case OPC_MIN_F:
  616.         case OPC_MAX_F:
  617.         case OPC_MUL_F:
  618.         case OPC_SIGN_F:
  619.         case OPC_CMPS_F:
  620.         case OPC_ABSNEG_F:
  621.         case OPC_CMPV_F:
  622.         case OPC_FLOOR_F:
  623.         case OPC_CEIL_F:
  624.         case OPC_RNDNE_F:
  625.         case OPC_RNDAZ_F:
  626.         case OPC_TRUNC_F:
  627.         case OPC_BARY_F:
  628.                 return IR3_REG_FABS | IR3_REG_FNEG;
  629.  
  630.         case OPC_ADD_U:
  631.         case OPC_ADD_S:
  632.         case OPC_SUB_U:
  633.         case OPC_SUB_S:
  634.         case OPC_CMPS_U:
  635.         case OPC_CMPS_S:
  636.         case OPC_MIN_U:
  637.         case OPC_MIN_S:
  638.         case OPC_MAX_U:
  639.         case OPC_MAX_S:
  640.         case OPC_CMPV_U:
  641.         case OPC_CMPV_S:
  642.         case OPC_MUL_U:
  643.         case OPC_MUL_S:
  644.         case OPC_MULL_U:
  645.         case OPC_CLZ_S:
  646.                 return 0;
  647.  
  648.         case OPC_ABSNEG_S:
  649.                 return IR3_REG_SABS | IR3_REG_SNEG;
  650.  
  651.         case OPC_AND_B:
  652.         case OPC_OR_B:
  653.         case OPC_NOT_B:
  654.         case OPC_XOR_B:
  655.         case OPC_BFREV_B:
  656.         case OPC_CLZ_B:
  657.         case OPC_SHL_B:
  658.         case OPC_SHR_B:
  659.         case OPC_ASHR_B:
  660.         case OPC_MGEN_B:
  661.         case OPC_GETBIT_B:
  662.         case OPC_CBITS_B:
  663.                 return IR3_REG_BNOT;
  664.  
  665.         default:
  666.                 return 0;
  667.         }
  668. }
  669.  
  670. /* map cat3 instructions to valid abs/neg flags: */
  671. static inline unsigned ir3_cat3_absneg(opc_t opc)
  672. {
  673.         switch (opc) {
  674.         case OPC_MAD_F16:
  675.         case OPC_MAD_F32:
  676.         case OPC_SEL_F16:
  677.         case OPC_SEL_F32:
  678.                 return IR3_REG_FNEG;
  679.  
  680.         case OPC_MAD_U16:
  681.         case OPC_MADSH_U16:
  682.         case OPC_MAD_S16:
  683.         case OPC_MADSH_M16:
  684.         case OPC_MAD_U24:
  685.         case OPC_MAD_S24:
  686.         case OPC_SEL_S16:
  687.         case OPC_SEL_S32:
  688.         case OPC_SAD_S16:
  689.         case OPC_SAD_S32:
  690.                 /* neg *may* work on 3rd src.. */
  691.  
  692.         case OPC_SEL_B16:
  693.         case OPC_SEL_B32:
  694.  
  695.         default:
  696.                 return 0;
  697.         }
  698. }
  699.  
  700. #define array_insert(arr, val) do { \
  701.                 if (arr ## _count == arr ## _sz) { \
  702.                         arr ## _sz = MAX2(2 * arr ## _sz, 16); \
  703.                         arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
  704.                 } \
  705.                 arr[arr ##_count++] = val; \
  706.         } while (0)
  707.  
  708. /* iterator for an instructions's sources (reg), also returns src #: */
  709. #define foreach_src_n(__srcreg, __n, __instr) \
  710.         if ((__instr)->regs_count) \
  711.                 for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
  712.                         if ((__srcreg = (__instr)->regs[__n + 1]))
  713.  
  714. /* iterator for an instructions's sources (reg): */
  715. #define foreach_src(__srcreg, __instr) \
  716.         foreach_src_n(__srcreg, __i, __instr)
  717.  
  718. static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
  719. {
  720.         if (instr->fanin)
  721.                 return instr->regs_count + 2;
  722.         if (instr->address)
  723.                 return instr->regs_count + 1;
  724.         return instr->regs_count;
  725. }
  726.  
  727. static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
  728. {
  729.         if (n == (instr->regs_count + 1))
  730.                 return instr->fanin;
  731.         if (n == (instr->regs_count + 0))
  732.                 return instr->address;
  733.         return ssa(instr->regs[n]);
  734. }
  735.  
  736. #define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
  737.  
  738. /* iterator for an instruction's SSA sources (instr), also returns src #: */
  739. #define foreach_ssa_src_n(__srcinst, __n, __instr) \
  740.         if ((__instr)->regs_count) \
  741.                 for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
  742.                         if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
  743.  
  744. /* iterator for an instruction's SSA sources (instr): */
  745. #define foreach_ssa_src(__srcinst, __instr) \
  746.         foreach_ssa_src_n(__srcinst, __i, __instr)
  747.  
  748.  
  749. /* dump: */
  750. #include <stdio.h>
  751. void ir3_dump(struct ir3 *shader, const char *name,
  752.                 struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
  753.                 FILE *f);
  754. void ir3_dump_instr_single(struct ir3_instruction *instr);
  755. void ir3_dump_instr_list(struct ir3_instruction *instr);
  756.  
  757. /* flatten if/else: */
  758. int ir3_block_flatten(struct ir3_block *block);
  759.  
  760. /* depth calculation: */
  761. int ir3_delayslots(struct ir3_instruction *assigner,
  762.                 struct ir3_instruction *consumer, unsigned n);
  763. void ir3_block_depth(struct ir3_block *block);
  764.  
  765. /* copy-propagate: */
  766. void ir3_block_cp(struct ir3_block *block);
  767.  
  768. /* group neightbors and insert mov's to resolve conflicts: */
  769. void ir3_block_group(struct ir3_block *block);
  770.  
  771. /* scheduling: */
  772. int ir3_block_sched(struct ir3_block *block);
  773.  
  774. /* register assignment: */
  775. int ir3_block_ra(struct ir3_block *block, enum shader_t type,
  776.                 bool frag_coord, bool frag_face);
  777.  
  778. /* legalize: */
  779. void ir3_block_legalize(struct ir3_block *block,
  780.                 bool *has_samp, int *max_bary);
  781.  
  782. /* ************************************************************************* */
  783. /* instruction helpers */
  784.  
  785. static inline struct ir3_instruction *
  786. ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
  787. {
  788.         struct ir3_instruction *instr =
  789.                 ir3_instr_create(block, 1, 0);
  790.         ir3_reg_create(instr, 0, 0);   /* dst */
  791.         ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
  792.         instr->cat1.src_type = type;
  793.         instr->cat1.dst_type = type;
  794.         return instr;
  795. }
  796.  
  797. static inline struct ir3_instruction *
  798. ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
  799.                 type_t src_type, type_t dst_type)
  800. {
  801.         struct ir3_instruction *instr =
  802.                 ir3_instr_create(block, 1, 0);
  803.         ir3_reg_create(instr, 0, 0);   /* dst */
  804.         ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
  805.         instr->cat1.src_type = src_type;
  806.         instr->cat1.dst_type = dst_type;
  807.         return instr;
  808. }
  809.  
  810. #define INSTR1(CAT, name)                                                \
  811. static inline struct ir3_instruction *                                   \
  812. ir3_##name(struct ir3_block *block,                                      \
  813.                 struct ir3_instruction *a, unsigned aflags)                      \
  814. {                                                                        \
  815.         struct ir3_instruction *instr =                                      \
  816.                 ir3_instr_create(block, CAT, OPC_##name);                        \
  817.         ir3_reg_create(instr, 0, 0);   /* dst */                             \
  818.         ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
  819.         return instr;                                                        \
  820. }
  821.  
  822. #define INSTR2(CAT, name)                                                \
  823. static inline struct ir3_instruction *                                   \
  824. ir3_##name(struct ir3_block *block,                                      \
  825.                 struct ir3_instruction *a, unsigned aflags,                      \
  826.                 struct ir3_instruction *b, unsigned bflags)                      \
  827. {                                                                        \
  828.         struct ir3_instruction *instr =                                      \
  829.                 ir3_instr_create(block, CAT, OPC_##name);                        \
  830.         ir3_reg_create(instr, 0, 0);   /* dst */                             \
  831.         ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
  832.         ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
  833.         return instr;                                                        \
  834. }
  835.  
  836. #define INSTR3(CAT, name)                                                \
  837. static inline struct ir3_instruction *                                   \
  838. ir3_##name(struct ir3_block *block,                                      \
  839.                 struct ir3_instruction *a, unsigned aflags,                      \
  840.                 struct ir3_instruction *b, unsigned bflags,                      \
  841.                 struct ir3_instruction *c, unsigned cflags)                      \
  842. {                                                                        \
  843.         struct ir3_instruction *instr =                                      \
  844.                 ir3_instr_create(block, CAT, OPC_##name);                        \
  845.         ir3_reg_create(instr, 0, 0);   /* dst */                             \
  846.         ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
  847.         ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
  848.         ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c;           \
  849.         return instr;                                                        \
  850. }
  851.  
  852. /* cat0 instructions: */
  853. INSTR1(0, KILL);
  854.  
  855. /* cat2 instructions, most 2 src but some 1 src: */
  856. INSTR2(2, ADD_F)
  857. INSTR2(2, MIN_F)
  858. INSTR2(2, MAX_F)
  859. INSTR2(2, MUL_F)
  860. INSTR1(2, SIGN_F)
  861. INSTR2(2, CMPS_F)
  862. INSTR1(2, ABSNEG_F)
  863. INSTR2(2, CMPV_F)
  864. INSTR1(2, FLOOR_F)
  865. INSTR1(2, CEIL_F)
  866. INSTR1(2, RNDNE_F)
  867. INSTR1(2, RNDAZ_F)
  868. INSTR1(2, TRUNC_F)
  869. INSTR2(2, ADD_U)
  870. INSTR2(2, ADD_S)
  871. INSTR2(2, SUB_U)
  872. INSTR2(2, SUB_S)
  873. INSTR2(2, CMPS_U)
  874. INSTR2(2, CMPS_S)
  875. INSTR2(2, MIN_U)
  876. INSTR2(2, MIN_S)
  877. INSTR2(2, MAX_U)
  878. INSTR2(2, MAX_S)
  879. INSTR1(2, ABSNEG_S)
  880. INSTR2(2, AND_B)
  881. INSTR2(2, OR_B)
  882. INSTR1(2, NOT_B)
  883. INSTR2(2, XOR_B)
  884. INSTR2(2, CMPV_U)
  885. INSTR2(2, CMPV_S)
  886. INSTR2(2, MUL_U)
  887. INSTR2(2, MUL_S)
  888. INSTR2(2, MULL_U)
  889. INSTR1(2, BFREV_B)
  890. INSTR1(2, CLZ_S)
  891. INSTR1(2, CLZ_B)
  892. INSTR2(2, SHL_B)
  893. INSTR2(2, SHR_B)
  894. INSTR2(2, ASHR_B)
  895. INSTR2(2, BARY_F)
  896. INSTR2(2, MGEN_B)
  897. INSTR2(2, GETBIT_B)
  898. INSTR1(2, SETRM)
  899. INSTR1(2, CBITS_B)
  900. INSTR2(2, SHB)
  901. INSTR2(2, MSAD)
  902.  
  903. /* cat3 instructions: */
  904. INSTR3(3, MAD_U16)
  905. INSTR3(3, MADSH_U16)
  906. INSTR3(3, MAD_S16)
  907. INSTR3(3, MADSH_M16)
  908. INSTR3(3, MAD_U24)
  909. INSTR3(3, MAD_S24)
  910. INSTR3(3, MAD_F16)
  911. INSTR3(3, MAD_F32)
  912. INSTR3(3, SEL_B16)
  913. INSTR3(3, SEL_B32)
  914. INSTR3(3, SEL_S16)
  915. INSTR3(3, SEL_S32)
  916. INSTR3(3, SEL_F16)
  917. INSTR3(3, SEL_F32)
  918. INSTR3(3, SAD_S16)
  919. INSTR3(3, SAD_S32)
  920.  
  921. /* cat4 instructions: */
  922. INSTR1(4, RCP)
  923. INSTR1(4, RSQ)
  924. INSTR1(4, LOG2)
  925. INSTR1(4, EXP2)
  926. INSTR1(4, SIN)
  927. INSTR1(4, COS)
  928. INSTR1(4, SQRT)
  929.  
  930. /* cat5 instructions: */
  931. INSTR1(5, DSX)
  932. INSTR1(5, DSY)
  933.  
  934. static inline struct ir3_instruction *
  935. ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
  936.                 unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
  937.                 struct ir3_instruction *src0, struct ir3_instruction *src1)
  938. {
  939.         struct ir3_instruction *sam;
  940.         struct ir3_register *reg;
  941.  
  942.         sam = ir3_instr_create(block, 5, opc);
  943.         sam->flags |= flags;
  944.         ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
  945.         if (src0) {
  946.                 reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
  947.                 reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
  948.                 reg->instr = src0;
  949.         }
  950.         if (src1) {
  951.                 reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
  952.                 reg->instr = src1;
  953.                 reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
  954.         }
  955.         sam->cat5.samp = samp;
  956.         sam->cat5.tex  = tex;
  957.         sam->cat5.type  = type;
  958.  
  959.         return sam;
  960. }
  961.  
  962. /* cat6 instructions: */
  963. INSTR2(6, LDLV)
  964. INSTR2(6, LDG)
  965.  
  966. /* ************************************************************************* */
  967. /* split this out or find some helper to use.. like main/bitset.h.. */
  968.  
  969. #include <string.h>
  970.  
  971. #define MAX_REG 256
  972.  
  973. typedef uint8_t regmask_t[2 * MAX_REG / 8];
  974.  
  975. static inline unsigned regmask_idx(struct ir3_register *reg)
  976. {
  977.         unsigned num = reg->num;
  978.         debug_assert(num < MAX_REG);
  979.         if (reg->flags & IR3_REG_HALF)
  980.                 num += MAX_REG;
  981.         return num;
  982. }
  983.  
  984. static inline void regmask_init(regmask_t *regmask)
  985. {
  986.         memset(regmask, 0, sizeof(*regmask));
  987. }
  988.  
  989. static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
  990. {
  991.         unsigned idx = regmask_idx(reg);
  992.         if (reg->flags & IR3_REG_RELATIV) {
  993.                 unsigned i;
  994.                 for (i = 0; i < reg->size; i++, idx++)
  995.                         (*regmask)[idx / 8] |= 1 << (idx % 8);
  996.         } else {
  997.                 unsigned mask;
  998.                 for (mask = reg->wrmask; mask; mask >>= 1, idx++)
  999.                         if (mask & 1)
  1000.                                 (*regmask)[idx / 8] |= 1 << (idx % 8);
  1001.         }
  1002. }
  1003.  
  1004. static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
  1005. {
  1006.         unsigned i;
  1007.         for (i = 0; i < ARRAY_SIZE(*dst); i++)
  1008.                 (*dst)[i] = (*a)[i] | (*b)[i];
  1009. }
  1010.  
  1011. /* set bits in a if not set in b, conceptually:
  1012.  *   a |= (reg & ~b)
  1013.  */
  1014. static inline void regmask_set_if_not(regmask_t *a,
  1015.                 struct ir3_register *reg, regmask_t *b)
  1016. {
  1017.         unsigned idx = regmask_idx(reg);
  1018.         if (reg->flags & IR3_REG_RELATIV) {
  1019.                 unsigned i;
  1020.                 for (i = 0; i < reg->size; i++, idx++)
  1021.                         if (!((*b)[idx / 8] & (1 << (idx % 8))))
  1022.                                 (*a)[idx / 8] |= 1 << (idx % 8);
  1023.         } else {
  1024.                 unsigned mask;
  1025.                 for (mask = reg->wrmask; mask; mask >>= 1, idx++)
  1026.                         if (mask & 1)
  1027.                                 if (!((*b)[idx / 8] & (1 << (idx % 8))))
  1028.                                         (*a)[idx / 8] |= 1 << (idx % 8);
  1029.         }
  1030. }
  1031.  
  1032. static inline bool regmask_get(regmask_t *regmask,
  1033.                 struct ir3_register *reg)
  1034. {
  1035.         unsigned idx = regmask_idx(reg);
  1036.         if (reg->flags & IR3_REG_RELATIV) {
  1037.                 unsigned i;
  1038.                 for (i = 0; i < reg->size; i++, idx++)
  1039.                         if ((*regmask)[idx / 8] & (1 << (idx % 8)))
  1040.                                 return true;
  1041.         } else {
  1042.                 unsigned mask;
  1043.                 for (mask = reg->wrmask; mask; mask >>= 1, idx++)
  1044.                         if (mask & 1)
  1045.                                 if ((*regmask)[idx / 8] & (1 << (idx % 8)))
  1046.                                         return true;
  1047.         }
  1048.         return false;
  1049. }
  1050.  
  1051. /* ************************************************************************* */
  1052.  
  1053. #endif /* IR3_H_ */
  1054.