Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
2176 serge 1
/*
2
 * RadeonHD R6xx, R7xx DRI driver
3
 *
4
 * Copyright (C) 2008-2009  Alexander Deucher
5
 * Copyright (C) 2008-2009  Matthias Hopf
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining a
8
 * copy of this software and associated documentation files (the "Software"),
9
 * to deal in the Software without restriction, including without limitation
10
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
11
 * and/or sell copies of the Software, and to permit persons to whom the
12
 * Software is furnished to do so, subject to the following conditions:
13
 *
14
 * The above copyright notice and this permission notice shall be included
15
 * in all copies or substantial portions of the Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
18
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
21
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
22
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 */
24
 
25
/*
26
 * Shader macros
27
 */
28
 
29
#ifndef __SHADER_H__
30
#define __SHADER_H__
31
 
32
//#include "radeon.h"
33
 
34
/* Restrictions of ALU instructions
35
 * order of scalar ops is always x,y,z,w,t(rans), last to be indicated by last==1.
36
 * max of 3 different src GPRs per instr.
37
 * max of 4 different cfile constant components per instr.
38
 * max of 2 (different) constants (any type) for t.
39
 * bank swizzle (see below).
40
 * GPR write stalls read of same register. Auto-replaced by PV/PS, NOP needed if registers are relative to
41
 * different indices (gpr,loop,nothing).
42
 * may use constant registers or constant cache, but not both.
43
 */
44
 
45
/* Bank_swizzle: (pp. 297ff)
46
 * Only one of each x,y,z,w GPR component can be loaded per cycle (3 cycles per instr, called 0-2).
47
 * per scalar instruction bank_swizzle can select which cycle each operand comes from. e.g.:
48
 *   SRC0 SRC1 SRC2  SWIZZLE  cycle0 cycle1 cycle2
49
 *   1.x  2.x          012     1.x    2.x     -
50
 *   3.x  1.y          201     1.y     -     3.x
51
 *   2.x  1.y          102    (1.y)  (2.x)    -
52
 * If data is read in a cycle, multiple scalar instructions can reference it.
53
 * Special case: square() - i.e. same component in src0+src1 doesn't need read port -> ignores swizzle for src1.
54
 * No restrictions for constants or PV/PS.
55
 * t can load multiple components in a single cycle slot, but has to share cycles with xyzw.
56
 * t with single constant may not load GPRs or PV/PS in cycle 0 (carefull with ALU_TRANS_210).
57
 * t with two constants may only load GPRs or PV/PS in cycle 2.
58
 */
59
 
60
 
61
/* Oder of instructions: All CF, All ALU, All Tex/Vtx fetches */
62
 
63
 
64
// CF insts
65
// addr
66
#define ADDR(x)  (x)
67
// pc
68
#define POP_COUNT(x)      (x)
69
// const
70
#define CF_CONST(x)       (x)
71
// cond
72
#define COND(x)        (x)		// SQ_COND_*
73
// count
74
#define I_COUNT(x)        ((x) ? ((x) - 1) : 0)
75
//r7xx
76
#define COUNT_3(x)        (x)
77
// call count
78
#define CALL_COUNT(x)     (x)
79
// eop
80
#define END_OF_PROGRAM(x)   (x)
81
// vpm
82
#define VALID_PIXEL_MODE(x) (x)
83
// cf inst
84
#define CF_INST(x)        (x)		// SQ_CF_INST_*
85
 
86
// wqm
87
#define WHOLE_QUAD_MODE(x)  (x)
88
// barrier
89
#define BARRIER(x)          (x)
90
//kb0
91
#define KCACHE_BANK0(x)          (x)
92
//kb1
93
#define KCACHE_BANK1(x)          (x)
94
// km0/1
95
#define KCACHE_MODE0(x)          (x)
96
#define KCACHE_MODE1(x)          (x)	// SQ_CF_KCACHE_*
97
//
98
#define KCACHE_ADDR0(x)          (x)
99
#define KCACHE_ADDR1(x)          (x)
100
// uw
101
#define USES_WATERFALL(x)        (x)
102
 
103
#define ARRAY_BASE(x)        (x)
104
// export pixel
105
#define CF_PIXEL_MRT0         0
106
#define CF_PIXEL_MRT1         1
107
#define CF_PIXEL_MRT2         2
108
#define CF_PIXEL_MRT3         3
109
#define CF_PIXEL_MRT4         4
110
#define CF_PIXEL_MRT5         5
111
#define CF_PIXEL_MRT6         6
112
#define CF_PIXEL_MRT7         7
113
// *_FOG: r6xx only
114
#define CF_PIXEL_MRT0_FOG     16
115
#define CF_PIXEL_MRT1_FOG     17
116
#define CF_PIXEL_MRT2_FOG     18
117
#define CF_PIXEL_MRT3_FOG     19
118
#define CF_PIXEL_MRT4_FOG     20
119
#define CF_PIXEL_MRT5_FOG     21
120
#define CF_PIXEL_MRT6_FOG     22
121
#define CF_PIXEL_MRT7_FOG     23
122
#define CF_PIXEL_Z            61
123
// export pos
124
#define CF_POS0               60
125
#define CF_POS1               61
126
#define CF_POS2               62
127
#define CF_POS3               63
128
// export param
129
// 0...31
130
#define TYPE(x)              (x)	// SQ_EXPORT_*
131
#if 0
132
// type export
133
#define SQ_EXPORT_PIXEL              0
134
#define SQ_EXPORT_POS                1
135
#define SQ_EXPORT_PARAM              2
136
// reserved 3
137
// type mem
138
#define SQ_EXPORT_WRITE              0
139
#define SQ_EXPORT_WRITE_IND          1
140
#define SQ_EXPORT_WRITE_ACK          2
141
#define SQ_EXPORT_WRITE_IND_ACK      3
142
#endif
143
 
144
#define RW_GPR(x)            (x)
145
#define RW_REL(x)            (x)
146
#define ABSOLUTE                  0
147
#define RELATIVE                  1
148
#define INDEX_GPR(x)            (x)
149
#define ELEM_SIZE(x)            (x ? (x - 1) : 0)
150
#define COMP_MASK(x)            (x)
151
#define R6xx_ELEM_LOOP(x)            (x)
152
#define BURST_COUNT(x)          (x ? (x - 1) : 0)
153
 
154
// swiz
155
#define SRC_SEL_X(x)    (x)		// SQ_SEL_* each
156
#define SRC_SEL_Y(x)    (x)
157
#define SRC_SEL_Z(x)    (x)
158
#define SRC_SEL_W(x)    (x)
159
 
160
#define CF_DWORD0(addr) cpu_to_le32((addr))
161
// R7xx has another entry (COUNT3), but that is only used for adding a bit to count.
162
// We allow one more bit for count in the argument of the macro on R7xx instead.
163
// R6xx: [0,7]  R7xx: [1,16]
164
#define CF_DWORD1(pc, cf_const, cond, count, call_count, eop, vpm, cf_inst, wqm, b) \
165
    cpu_to_le32((((pc) << 0) | ((cf_const) << 3) | ((cond) << 8) | (((count) & 7) << 10) | (((count) >> 3) << 19) | \
166
		 ((call_count) << 13) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
167
 
168
#define CF_ALU_DWORD0(addr, kb0, kb1, km0) cpu_to_le32((((addr) << 0) | ((kb0) << 22) | ((kb1) << 26) | ((km0) << 30)))
169
#define CF_ALU_DWORD1(km1, kcache_addr0, kcache_addr1, count, uw, cf_inst, wqm, b) \
170
    cpu_to_le32((((km1) << 0) | ((kcache_addr0) << 2) | ((kcache_addr1) << 10) | \
171
		 ((count) << 18) | ((uw) << 25) | ((cf_inst) << 26) | ((wqm) << 30) | ((b) << 31)))
172
 
173
#define CF_ALLOC_IMP_EXP_DWORD0(array_base, type, rw_gpr, rr, index_gpr, es) \
174
    cpu_to_le32((((array_base) << 0) | ((type) << 13) | ((rw_gpr) << 15) | ((rr) << 22) | ((index_gpr) << 23) | \
175
		 ((es) << 30)))
176
// R7xx apparently doesn't have the ELEM_LOOP entry any more
177
// We still expose it, but ELEM_LOOP is explicitely R6xx now.
178
// TODO: is this just forgotten in the docs, or really not available any more?
179
#define CF_ALLOC_IMP_EXP_DWORD1_BUF(array_size, comp_mask, el, bc, eop, vpm, cf_inst, wqm, b) \
180
    cpu_to_le32((((array_size) << 0) | ((comp_mask) << 12) | ((el) << 16) | ((bc) << 17) | \
181
		 ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | ((wqm) << 30) | ((b) << 31)))
182
#define CF_ALLOC_IMP_EXP_DWORD1_SWIZ(sel_x, sel_y, sel_z, sel_w, el, bc, eop, vpm, cf_inst, wqm, b) \
183
    cpu_to_le32((((sel_x) << 0) | ((sel_y) << 3) | ((sel_z) << 6) | ((sel_w) << 9) | ((el) << 16) | \
184
		 ((bc) << 17) | ((eop) << 21) | ((vpm) << 22) | ((cf_inst) << 23) | \
185
		 ((wqm) << 30) | ((b) << 31)))
186
 
187
// ALU clause insts
188
#define SRC0_SEL(x)        (x)
189
#define SRC1_SEL(x)        (x)
190
#define SRC2_SEL(x)        (x)
191
// src[0-2]_sel
192
//   0-127 GPR
193
// 128-159 kcache constants bank 0
194
// 160-191 kcache constants bank 1
195
// 248-255 special SQ_ALU_SRC_* (0, 1, etc.)
196
#define ALU_SRC_GPR_BASE        0
197
#define ALU_SRC_KCACHE0_BASE  128
198
#define ALU_SRC_KCACHE1_BASE  160
199
#define ALU_SRC_CFILE_BASE    256
200
 
201
#define SRC0_REL(x)        (x)
202
#define SRC1_REL(x)        (x)
203
#define SRC2_REL(x)        (x)
204
// elem
205
#define SRC0_ELEM(x)        (x)
206
#define SRC1_ELEM(x)        (x)
207
#define SRC2_ELEM(x)        (x)
208
#define ELEM_X        0
209
#define ELEM_Y        1
210
#define ELEM_Z        2
211
#define ELEM_W        3
212
// neg
213
#define SRC0_NEG(x)        (x)
214
#define SRC1_NEG(x)        (x)
215
#define SRC2_NEG(x)        (x)
216
// im
217
#define INDEX_MODE(x)    (x)		// SQ_INDEX_*
218
// ps
219
#define PRED_SEL(x)      (x)		// SQ_PRED_SEL_*
220
// last
221
#define LAST(x)          (x)
222
// abs
223
#define SRC0_ABS(x)       (x)
224
#define SRC1_ABS(x)       (x)
225
// uem
226
#define UPDATE_EXECUTE_MASK(x) (x)
227
// up
228
#define UPDATE_PRED(x)      (x)
229
// wm
230
#define WRITE_MASK(x)   (x)
231
// fm
232
#define FOG_MERGE(x)    (x)
233
// omod
234
#define OMOD(x)        (x)		// SQ_ALU_OMOD_*
235
// alu inst
236
#define ALU_INST(x)        (x)		// SQ_ALU_INST_*
237
//bs
238
#define BANK_SWIZZLE(x)        (x)	// SQ_ALU_VEC_*
239
#define DST_GPR(x)        (x)
240
#define DST_REL(x)        (x)
241
#define DST_ELEM(x)       (x)
242
#define CLAMP(x)          (x)
243
 
244
#define ALU_DWORD0(src0_sel, s0r, s0e, s0n, src1_sel, s1r, s1e, s1n, im, ps, last) \
245
    cpu_to_le32((((src0_sel) << 0) | ((s0r) << 9) | ((s0e) << 10) | ((s0n) << 12) | \
246
		 ((src1_sel) << 13) | ((s1r) << 22) | ((s1e) << 23) | ((s1n) << 25) | \
247
		 ((im) << 26) | ((ps) << 29) | ((last) << 31)))
248
// R7xx has alu_inst at a different slot, and no fog merge any more (no fix function fog any more)
249
#define R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
250
    cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
251
		 ((fm) << 5) | ((omod) << 6) | ((alu_inst) << 8) | ((bs) << 18) | ((dst_gpr) << 21) | \
252
		 ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
253
#define R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
254
    cpu_to_le32((((s0a) << 0) | ((s1a) << 1) | ((uem) << 2) | ((up) << 3) | ((wm) << 4) | \
255
		 ((omod) << 5) | ((alu_inst) << 7) | ((bs) << 18) | ((dst_gpr) << 21) | \
256
		 ((dr) << 28) | ((de) << 29) | ((clamp) << 31)))
257
// This is a general chipset macro, but due to selection by chipid typically not usable in static arrays
258
// Fog is NOT USED on R7xx, even if specified.
259
#define ALU_DWORD1_OP2(chipfamily, s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) \
260
    ((chipfamily) < CHIP_FAMILY_RV770 ? \
261
     R6xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, fm, omod, alu_inst, bs, dst_gpr, dr, de, clamp) : \
262
     R7xx_ALU_DWORD1_OP2(s0a, s1a, uem, up, wm, omod, alu_inst, bs, dst_gpr, dr, de, clamp))
263
#define ALU_DWORD1_OP3(src2_sel, s2r, s2e, s2n, alu_inst, bs, dst_gpr, dr, de, clamp) \
264
    cpu_to_le32((((src2_sel) << 0) | ((s2r) << 9) | ((s2e) << 10) | ((s2n) << 12) | \
265
		 ((alu_inst) << 13) | ((bs) << 18) | ((dst_gpr) << 21) | ((dr) << 28) | \
266
		 ((de) << 29) | ((clamp) << 31)))
267
 
268
// VTX clause insts
269
// vxt insts
270
#define VTX_INST(x)        (x)		// SQ_VTX_INST_*
271
 
272
// fetch type
273
#define FETCH_TYPE(x)        (x)	// SQ_VTX_FETCH_*
274
 
275
#define FETCH_WHOLE_QUAD(x)        (x)
276
#define BUFFER_ID(x)        (x)
277
#define SRC_GPR(x)          (x)
278
#define SRC_REL(x)          (x)
279
#define MEGA_FETCH_COUNT(x)        ((x) ? ((x) - 1) : 0)
280
 
281
#define SEMANTIC_ID(x)        (x)
282
#define DST_SEL_X(x)          (x)
283
#define DST_SEL_Y(x)          (x)
284
#define DST_SEL_Z(x)          (x)
285
#define DST_SEL_W(x)          (x)
286
#define USE_CONST_FIELDS(x)   (x)
287
#define DATA_FORMAT(x)        (x)
288
// num format
289
#define NUM_FORMAT_ALL(x)     (x)	// SQ_NUM_FORMAT_*
290
// format comp
291
#define FORMAT_COMP_ALL(x)     (x)	// SQ_FORMAT_COMP_*
292
// sma
293
#define SRF_MODE_ALL(x)     (x)
294
#define SRF_MODE_ZERO_CLAMP_MINUS_ONE      0
295
#define SRF_MODE_NO_ZERO                   1
296
#define OFFSET(x)     (x)
297
// endian swap
298
#define ENDIAN_SWAP(x)     (x)		// SQ_ENDIAN_*
299
#define CONST_BUF_NO_STRIDE(x)     (x)
300
// mf
301
#define MEGA_FETCH(x)     (x)
302
 
303
#define VTX_DWORD0(vtx_inst, ft, fwq, buffer_id, src_gpr, sr, ssx, mfc) \
304
    cpu_to_le32((((vtx_inst) << 0) | ((ft) << 5) | ((fwq) << 7) | ((buffer_id) << 8) | \
305
		 ((src_gpr) << 16) | ((sr) << 23) | ((ssx) << 24) | ((mfc) << 26)))
306
#define VTX_DWORD1_SEM(semantic_id, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
307
    cpu_to_le32((((semantic_id) << 0) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
308
		 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
309
#define VTX_DWORD1_GPR(dst_gpr, dr, dsx, dsy, dsz, dsw, ucf, data_format, nfa, fca, sma) \
310
    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
311
		 ((ucf) << 21) | ((data_format) << 22) | ((nfa) << 28) | ((fca) << 30) | ((sma) << 31)))
312
#define VTX_DWORD2(offset, es, cbns, mf) \
313
    cpu_to_le32((((offset) << 0) | ((es) << 16) | ((cbns) << 18) | ((mf) << 19)))
314
#define VTX_DWORD_PAD cpu_to_le32(0x00000000)
315
 
316
// TEX clause insts
317
// tex insts
318
#define TEX_INST(x)     (x)		// SQ_TEX_INST_*
319
 
320
#define BC_FRAC_MODE(x)         (x)
321
#define FETCH_WHOLE_QUAD(x)     (x)
322
#define RESOURCE_ID(x)          (x)
323
#define R7xx_ALT_CONST(x)            (x)
324
 
325
#define LOD_BIAS(x)     (x)
326
//ct
327
#define COORD_TYPE_X(x)     (x)
328
#define COORD_TYPE_Y(x)     (x)
329
#define COORD_TYPE_Z(x)     (x)
330
#define COORD_TYPE_W(x)     (x)
331
#define TEX_UNNORMALIZED                0
332
#define TEX_NORMALIZED                  1
333
#define OFFSET_X(x) (((int)(x) * 2) & 0x1f) /* 4:1-bits 2's-complement fixed-point: [-8.0..7.5] */
334
#define OFFSET_Y(x) (((int)(x) * 2) & 0x1f)
335
#define OFFSET_Z(x) (((int)(x) * 2) & 0x1f)
336
#define SAMPLER_ID(x)     (x)
337
 
338
// R7xx has an additional parameter ALT_CONST. We always expose it, but ALT_CONST is R7xx only
339
#define TEX_DWORD0(tex_inst, bfm, fwq, resource_id, src_gpr, sr, ac) \
340
    cpu_to_le32((((tex_inst) << 0) | ((bfm) << 5) | ((fwq) << 7) | ((resource_id) << 8) | \
341
		 ((src_gpr) << 16) | ((sr) << 23) | ((ac) << 24)))
342
#define TEX_DWORD1(dst_gpr, dr, dsx, dsy, dsz, dsw, lod_bias, ctx, cty, ctz, ctw) \
343
    cpu_to_le32((((dst_gpr) << 0) | ((dr) << 7) | ((dsx) << 9) | ((dsy) << 12) | ((dsz) << 15) | ((dsw) << 18) | \
344
		 ((lod_bias) << 21) | ((ctx) << 28) | ((cty) << 29) | ((ctz) << 30) | ((ctw) << 31)))
345
#define TEX_DWORD2(offset_x, offset_y, offset_z, sampler_id, ssx, ssy, ssz, ssw) \
346
    cpu_to_le32((((offset_x) << 0) | ((offset_y) << 5) | ((offset_z) << 10) | ((sampler_id) << 15) | \
347
		 ((ssx) << 20) | ((ssy) << 23) | ((ssz) << 26) | ((ssw) << 29)))
348
#define TEX_DWORD_PAD cpu_to_le32(0x00000000)
349
 
350
#endif