Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20.  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21.  * SOFTWARE.
  22.  */
  23.  
  24. #ifndef IR3_H_
  25. #define IR3_H_
  26.  
  27. #include <stdint.h>
  28. #include <stdbool.h>
  29.  
  30. #include "instr-a3xx.h"
  31.  
  32. /* low level intermediate representation of an adreno shader program */
  33.  
  34. struct ir3_shader;
  35.  
  36. struct ir3_shader * fd_asm_parse(const char *src);
  37.  
  38. struct ir3_shader_info {
  39.         uint16_t sizedwords;
  40.         /* NOTE: max_reg, etc, does not include registers not touched
  41.          * by the shader (ie. vertex fetched via VFD_DECODE but not
  42.          * touched by shader)
  43.          */
  44.         int8_t   max_reg;   /* highest GPR # used by shader */
  45.         int8_t   max_half_reg;
  46.         int8_t   max_const;
  47. };
  48.  
  49. struct ir3_register {
  50.         enum {
  51.                 IR3_REG_CONST  = 0x001,
  52.                 IR3_REG_IMMED  = 0x002,
  53.                 IR3_REG_HALF   = 0x004,
  54.                 IR3_REG_RELATIV= 0x008,
  55.                 IR3_REG_R      = 0x010,
  56.                 IR3_REG_NEGATE = 0x020,
  57.                 IR3_REG_ABS    = 0x040,
  58.                 IR3_REG_EVEN   = 0x080,
  59.                 IR3_REG_POS_INF= 0x100,
  60.                 /* (ei) flag, end-input?  Set on last bary, presumably to signal
  61.                  * that the shader needs no more input:
  62.                  */
  63.                 IR3_REG_EI     = 0x200,
  64.         } flags;
  65.         union {
  66.                 /* normal registers: */
  67.                 struct {
  68.                         /* the component is in the low two bits of the reg #, so
  69.                          * rN.x becomes: (n << 2) | x
  70.                          */
  71.                         int num;
  72.                         int wrmask;
  73.                 };
  74.                 /* immediate: */
  75.                 int     iim_val;
  76.                 float   fim_val;
  77.                 /* relative: */
  78.                 int offset;
  79.         };
  80. };
  81.  
  82. struct ir3_instruction {
  83.         struct ir3_shader *shader;
  84.         int category;
  85.         opc_t opc;
  86.         enum {
  87.                 /* (sy) flag is set on first instruction, and after sample
  88.                  * instructions (probably just on RAW hazard).
  89.                  */
  90.                 IR3_INSTR_SY    = 0x001,
  91.                 /* (ss) flag is set on first instruction, and first instruction
  92.                  * to depend on the result of "long" instructions (RAW hazard):
  93.                  *
  94.                  *   rcp, rsq, log2, exp2, sin, cos, sqrt
  95.                  *
  96.                  * It seems to synchronize until all in-flight instructions are
  97.                  * completed, for example:
  98.                  *
  99.                  *   rsq hr1.w, hr1.w
  100.                  *   add.f hr2.z, (neg)hr2.z, hc0.y
  101.                  *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
  102.                  *   rsq hr2.x, hr2.x
  103.                  *   (rpt1)nop
  104.                  *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
  105.                  *   nop
  106.                  *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
  107.                  *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
  108.                  *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
  109.                  *
  110.                  * The last mul.f does not have (ss) set, presumably because the
  111.                  * (ss) on the previous instruction does the job.
  112.                  *
  113.                  * The blob driver also seems to set it on WAR hazards, although
  114.                  * not really clear if this is needed or just blob compiler being
  115.                  * sloppy.  So far I haven't found a case where removing the (ss)
  116.                  * causes problems for WAR hazard, but I could just be getting
  117.                  * lucky:
  118.                  *
  119.                  *   rcp r1.y, r3.y
  120.                  *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
  121.                  *
  122.                  */
  123.                 IR3_INSTR_SS    = 0x002,
  124.                 /* (jp) flag is set on jump targets:
  125.                  */
  126.                 IR3_INSTR_JP    = 0x004,
  127.                 IR3_INSTR_UL    = 0x008,
  128.                 IR3_INSTR_3D    = 0x010,
  129.                 IR3_INSTR_A     = 0x020,
  130.                 IR3_INSTR_O     = 0x040,
  131.                 IR3_INSTR_P     = 0x080,
  132.                 IR3_INSTR_S     = 0x100,
  133.                 IR3_INSTR_S2EN  = 0x200,
  134.         } flags;
  135.         int repeat;
  136.         unsigned regs_count;
  137.         struct ir3_register *regs[4];
  138.         union {
  139.                 struct {
  140.                         char inv;
  141.                         char comp;
  142.                         int  immed;
  143.                 } cat0;
  144.                 struct {
  145.                         type_t src_type, dst_type;
  146.                 } cat1;
  147.                 struct {
  148.                         enum {
  149.                                 IR3_COND_LT = 0,
  150.                                 IR3_COND_LE = 1,
  151.                                 IR3_COND_GT = 2,
  152.                                 IR3_COND_GE = 3,
  153.                                 IR3_COND_EQ = 4,
  154.                                 IR3_COND_NE = 5,
  155.                         } condition;
  156.                 } cat2;
  157.                 struct {
  158.                         unsigned samp, tex;
  159.                         type_t type;
  160.                 } cat5;
  161.                 struct {
  162.                         type_t type;
  163.                         int offset;
  164.                         int iim_val;
  165.                 } cat6;
  166.         };
  167. };
  168.  
  169. /* this is just large to cope w/ the large test *.asm: */
  170. #define MAX_INSTRS 10240
  171.  
  172. struct ir3_shader {
  173.         unsigned instrs_count;
  174.         struct ir3_instruction *instrs[MAX_INSTRS];
  175.         uint32_t heap[128 * MAX_INSTRS];
  176.         unsigned heap_idx;
  177. };
  178.  
  179. struct ir3_shader * ir3_shader_create(void);
  180. void ir3_shader_destroy(struct ir3_shader *shader);
  181. void * ir3_shader_assemble(struct ir3_shader *shader,
  182.                 struct ir3_shader_info *info);
  183.  
  184. struct ir3_instruction * ir3_instr_create(struct ir3_shader *shader, int category, opc_t opc);
  185. struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
  186.  
  187. struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
  188.                 int num, int flags);
  189.  
  190. #endif /* IR3_H_ */
  191.