Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * RadeonHD R6xx, R7xx DRI driver
  3.  *
  4.  * Copyright (C) 2008-2009  Alexander Deucher
  5.  * Copyright (C) 2008-2009  Matthias Hopf
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the "Software"),
  9.  * to deal in the Software without restriction, including without limitation
  10.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  11.  * and/or sell copies of the Software, and to permit persons to whom the
  12.  * Software is furnished to do so, subject to the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice shall be included
  15.  * in all copies or substantial portions of the Software.
  16.  *
  17.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  18.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  19.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  20.  * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
  21.  * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
  22.  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  */
  24.  
  25. /*
  26.  * Shader macros
  27.  */
  28.  
  29. #ifndef __SHADER_H__
  30. #define __SHADER_H__
  31.  
  32. //#include "radeon.h"
  33.  
  34. /* Restrictions of ALU instructions
  35.  * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1.
  36.  * max of 3 different src GPRs per instr.
  37.  * max of 4 different cfile constant components per instr.
  38.  * max of 2 (different) constants (any type) for t.
  39.  * bank swizzle (see below).
  40.  * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to
  41.  * different indices (gpr,loop,nothing).
  42.  * may use constant registers or constant cache, but not both.
  43.  */
  44.  
  45. /* Bank_swizzle: (pp. 297ff)
  46.  * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2).
  47.  * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.:
  48.  *   SRC0 SRC1 SRC2  SWIZZLE  cycle0 cycle1 cycle2
  49.  *   1.x  2.x          012     1.x    2.x     -
  50.  *   3.x  1.y          201     1.y     -     3.x
  51.  *   2.x  1.y          102    (1.y)  (2.x)    -
  52.  * If data is read in a cycle, multiple scalar instructions can reference it.
  53.  * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1.
  54.  * No restrictions for constants or PV/PS.
  55.  * t can load multiple components in a single cycle slot, but has to share cycles with xyzw.
  56.  * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210).
  57.  * t with two constants may only load GPRs or PV/PS in cycle 2.
  58.  */
  59.  
  60.  
  61. /* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
  62.  
  63.  
  64. // CF insts
  65. // addr
  66. #define ADDR(x)  (x)
  67. // pc
  68. #define POP_COUNT(x)      (x)
  69. // const
  70. #define CF_CONST(x)       (x)
  71. // cond
  72. #define COND(x)        (x)              // SQ_COND_*
  73. // count
  74. #define I_COUNT(x)        ((x) ? ((x) - 1) : 0)
  75. //r7xx
  76. #define COUNT_3(x)        (x)
  77. // call count
  78. #define CALL_COUNT(x)     (x)
  79. // eop
  80. #define END_OF_PROGRAM(x)   (x)
  81. // vpm
  82. #define VALID_PIXEL_MODE(x) (x)
  83. // cf inst
  84. #define CF_INST(x)        (x)           // SQ_CF_INST_*
  85.  
  86. // wqm
  87. #define WHOLE_QUAD_MODE(x)  (x)
  88. // barrier
  89. #define BARRIER(x)          (x)
  90. //kb0
  91. #define KCACHE_BANK0(x)          (x)
  92. //kb1
  93. #define KCACHE_BANK1(x)          (x)
  94. // km0/1
  95. #define KCACHE_MODE0(x)          (x)
  96. #define KCACHE_MODE1(x)          (x)    // SQ_CF_KCACHE_*
  97. //
  98. #define KCACHE_ADDR0(x)          (x)
  99. #define KCACHE_ADDR1(x)          (x)
  100. // uw
  101. #define USES_WATERFALL(x)        (x)
  102.  
  103. #define ARRAY_BASE(x)        (x)
  104. // export pixel
  105. #define CF_PIXEL_MRT0         0
  106. #define CF_PIXEL_MRT1         1
  107. #define CF_PIXEL_MRT2         2
  108. #define CF_PIXEL_MRT3         3
  109. #define CF_PIXEL_MRT4         4
  110. #define CF_PIXEL_MRT5         5
  111. #define CF_PIXEL_MRT6         6
  112. #define CF_PIXEL_MRT7         7
  113. // *_FOG: r6xx only
  114. #define CF_PIXEL_MRT0_FOG     16
  115. #define CF_PIXEL_MRT1_FOG     17
  116. #define CF_PIXEL_MRT2_FOG     18
  117. #define CF_PIXEL_MRT3_FOG     19
  118. #define CF_PIXEL_MRT4_FOG     20
  119. #define CF_PIXEL_MRT5_FOG     21
  120. #define CF_PIXEL_MRT6_FOG     22
  121. #define CF_PIXEL_MRT7_FOG     23
  122. #define CF_PIXEL_Z            61
  123. // export pos
  124. #define CF_POS0               60
  125. #define CF_POS1               61
  126. #define CF_POS2               62
  127. #define CF_POS3               63
  128. // export param
  129. // 0...31
  130. #define TYPE(x)              (x)        // SQ_EXPORT_*
  131. #if 0
  132. // type export
  133. #define SQ_EXPORT_PIXEL              0
  134. #define SQ_EXPORT_POS                1
  135. #define SQ_EXPORT_PARAM              2
  136. // reserved 3
  137. // type mem
  138. #define SQ_EXPORT_WRITE              0
  139. #define SQ_EXPORT_WRITE_IND          1
  140. #define SQ_EXPORT_WRITE_ACK          2
  141. #define SQ_EXPORT_WRITE_IND_ACK      3
  142. #endif
  143.  
  144. #define RW_GPR(x)            (x)
  145. #define RW_REL(x)            (x)
  146. #define ABSOLUTE                  0
  147. #define RELATIVE                  1
  148. #define INDEX_GPR(x)            (x)
  149. #define ELEM_SIZE(x)            (x ? (x - 1) : 0)
  150. #define COMP_MASK(x)            (x)
  151. #define R6xx_ELEM_LOOP(x)            (x)
  152. #define BURST_COUNT(x)          (x ? (x - 1) : 0)
  153.  
  154. // swiz
  155. #define SRC_SEL_X(x)    (x)             // SQ_SEL_* each
  156. #define SRC_SEL_Y(x)    (x)
  157. #define SRC_SEL_Z(x)    (x)
  158. #define SRC_SEL_W(x)    (x)
  159.  
  160. #define CF_DWORD0(addr) cpu_to_le32((addr))
  161. // R7xx has another entry (COUNT3), but that is only used for adding a bit to count.
  162. // We allow one more bit for count in the argument of the macro on R7xx instead.
  163. // R6xx: [0,7]  R7xx: [1,16]
  164. #define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \
  165.     cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
  166.                  ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
  167.  
  168. #define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
  169. #define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \
  170.     cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
  171.                  ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
  172.  
  173. #define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
  174.     cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
  175.                  ((es) << 30)))
  176. // R7xx apparently doesn't have the ELEM_LOOP entry any more
  177. // We still expose it, but ELEM_LOOP is explicitely R6xx now.
  178. // TODO: is this just forgotten in the docs, or really not available any more?
  179. #define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \
  180.     cpu_to_le32((((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
  181.                  ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
  182. #define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \
  183.     cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
  184.                  ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
  185.                  ((wqm) << 30) | ((b) << 31)))
  186.  
  187. // ALU clause insts
  188. #define SRC0_SEL(x)        (x)
  189. #define SRC1_SEL(x)        (x)
  190. #define SRC2_SEL(x)        (x)
  191. // src[0-2]_sel
  192. //   0-127 GPR
  193. // 128-159 kcache constants bank 0
  194. // 160-191 kcache constants bank 1
  195. // 248-255 special SQ_ALU_SRC_* (0, 1, etc.)
  196. #define ALU_SRC_GPR_BASE        0
  197. #define ALU_SRC_KCACHE0_BASE  128
  198. #define ALU_SRC_KCACHE1_BASE  160
  199. #define ALU_SRC_CFILE_BASE    256
  200.  
  201. #define SRC0_REL(x)        (x)
  202. #define SRC1_REL(x)        (x)
  203. #define SRC2_REL(x)        (x)
  204. // elem
  205. #define SRC0_ELEM(x)        (x)
  206. #define SRC1_ELEM(x)        (x)
  207. #define SRC2_ELEM(x)        (x)
  208. #define ELEM_X        0
  209. #define ELEM_Y        1
  210. #define ELEM_Z        2
  211. #define ELEM_W        3
  212. // neg
  213. #define SRC0_NEG(x)        (x)
  214. #define SRC1_NEG(x)        (x)
  215. #define SRC2_NEG(x)        (x)
  216. // im
  217. #define INDEX_MODE(x)    (x)            // SQ_INDEX_*
  218. // ps
  219. #define PRED_SEL(x)      (x)            // SQ_PRED_SEL_*
  220. // last
  221. #define LAST(x)          (x)
  222. // abs
  223. #define SRC0_ABS(x)       (x)
  224. #define SRC1_ABS(x)       (x)
  225. // uem
  226. #define UPDATE_EXECUTE_MASK(x) (x)
  227. // up
  228. #define UPDATE_PRED(x)      (x)
  229. // wm
  230. #define WRITE_MASK(x)   (x)
  231. // fm
  232. #define FOG_MERGE(x)    (x)
  233. // omod
  234. #define OMOD(x)        (x)              // SQ_ALU_OMOD_*
  235. // alu inst
  236. #define ALU_INST(x)        (x)          // SQ_ALU_INST_*
  237. //bs
  238. #define BANK_SWIZZLE(x)        (x)      // SQ_ALU_VEC_*
  239. #define DST_GPR(x)        (x)
  240. #define DST_REL(x)        (x)
  241. #define DST_ELEM(x)       (x)
  242. #define CLAMP(x)          (x)
  243.  
  244. #define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
  245.     cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
  246.                  ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
  247.                  ((im) << 26) | ((ps) << 29) | ((last) << 31)))
  248. // R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more)
  249. #define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
  250.     cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
  251.                  ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
  252.                  ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
  253. #define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
  254.     cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
  255.                  ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
  256.                  ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
  257. // This is a general chipset macro, but due to selection by chipid typically not usable in static arrays
  258. // Fog is NOT USED on R7xx, even if specified.
  259. #define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
  260.     ((chipfamily) < CHIP_FAMILY_RV770 ? \
  261.      R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \
  262.      R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp))
  263. #define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
  264.     cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
  265.                  ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
  266.                  ((de) << 29) | ((clamp) << 31)))
  267.  
  268. // VTX clause insts
  269. // vxt insts
  270. #define VTX_INST(x)        (x)          // SQ_VTX_INST_*
  271.  
  272. // fetch type
  273. #define FETCH_TYPE(x)        (x)        // SQ_VTX_FETCH_*
  274.  
  275. #define FETCH_WHOLE_QUAD(x)        (x)
  276. #define BUFFER_ID(x)        (x)
  277. #define SRC_GPR(x)          (x)
  278. #define SRC_REL(x)          (x)
  279. #define MEGA_FETCH_COUNT(x)        ((x) ? ((x) - 1) : 0)
  280.  
  281. #define SEMANTIC_ID(x)        (x)
  282. #define DST_SEL_X(x)          (x)
  283. #define DST_SEL_Y(x)          (x)
  284. #define DST_SEL_Z(x)          (x)
  285. #define DST_SEL_W(x)          (x)
  286. #define USE_CONST_FIELDS(x)   (x)
  287. #define DATA_FORMAT(x)        (x)
  288. // num format
  289. #define NUM_FORMAT_ALL(x)     (x)       // SQ_NUM_FORMAT_*
  290. // format comp
  291. #define FORMAT_COMP_ALL(x)     (x)      // SQ_FORMAT_COMP_*
  292. // sma
  293. #define SRF_MODE_ALL(x)     (x)
  294. #define SRF_MODE_ZERO_CLAMP_MINUS_ONE      0
  295. #define SRF_MODE_NO_ZERO                   1
  296. #define OFFSET(x)     (x)
  297. // endian swap
  298. #define ENDIAN_SWAP(x)     (x)          // SQ_ENDIAN_*
  299. #define CONST_BUF_NO_STRIDE(x)     (x)
  300. // mf
  301. #define MEGA_FETCH(x)     (x)
  302.  
  303. #define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
  304.     cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
  305.                  ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
  306. #define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
  307.     cpu_to_le32((((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
  308.                  ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
  309. #define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
  310.     cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
  311.                  ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
  312. #define VTX_DWORD2(offset, es, cbns, mf) \
  313.     cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)))
  314. #define VTX_DWORD_PAD cpu_to_le32(0x00000000)
  315.  
  316. // TEX clause insts
  317. // tex insts
  318. #define TEX_INST(x)     (x)             // SQ_TEX_INST_*
  319.  
  320. #define BC_FRAC_MODE(x)         (x)
  321. #define FETCH_WHOLE_QUAD(x)     (x)
  322. #define RESOURCE_ID(x)          (x)
  323. #define R7xx_ALT_CONST(x)            (x)
  324.  
  325. #define LOD_BIAS(x)     (x)
  326. //ct
  327. #define COORD_TYPE_X(x)     (x)
  328. #define COORD_TYPE_Y(x)     (x)
  329. #define COORD_TYPE_Z(x)     (x)
  330. #define COORD_TYPE_W(x)     (x)
  331. #define TEX_UNNORMALIZED                0
  332. #define TEX_NORMALIZED                  1
  333. #define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
  334. #define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
  335. #define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
  336. #define SAMPLER_ID(x)     (x)
  337.  
  338. // R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only
  339. #define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \
  340.     cpu_to_le32((((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
  341.                  ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)))
  342. #define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
  343.     cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
  344.                  ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
  345. #define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
  346.     cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
  347.                  ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
  348. #define TEX_DWORD_PAD cpu_to_le32(0x00000000)
  349.  
  350. #endif
  351.