Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright (c) 2013 Rob Clark 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 */
23
 
24
#ifndef IR3_H_
25
#define IR3_H_
26
 
27
#include 
28
#include 
29
 
30
#include "util/u_debug.h"
31
 
32
#include "instr-a3xx.h"
33
#include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
34
 
35
/* low level intermediate representation of an adreno shader program */
36
 
37
struct ir3;
38
struct ir3_instruction;
39
struct ir3_block;
40
 
41
struct ir3_info {
42
	uint16_t sizedwords;
43
	uint16_t instrs_count;   /* expanded to account for rpt's */
44
	/* NOTE: max_reg, etc, does not include registers not touched
45
	 * by the shader (ie. vertex fetched via VFD_DECODE but not
46
	 * touched by shader)
47
	 */
48
	int8_t   max_reg;   /* highest GPR # used by shader */
49
	int8_t   max_half_reg;
50
	int16_t  max_const;
51
};
52
 
53
struct ir3_register {
54
	enum {
55
		IR3_REG_CONST  = 0x001,
56
		IR3_REG_IMMED  = 0x002,
57
		IR3_REG_HALF   = 0x004,
58
		IR3_REG_RELATIV= 0x008,
59
		IR3_REG_R      = 0x010,
60
		/* Most instructions, it seems, can do float abs/neg but not
61
		 * integer.  The CP pass needs to know what is intended (int or
62
		 * float) in order to do the right thing.  For this reason the
63
		 * abs/neg flags are split out into float and int variants.  In
64
		 * addition, .b (bitwise) operations, the negate is actually a
65
		 * bitwise not, so split that out into a new flag to make it
66
		 * more clear.
67
		 */
68
		IR3_REG_FNEG   = 0x020,
69
		IR3_REG_FABS   = 0x040,
70
		IR3_REG_SNEG   = 0x080,
71
		IR3_REG_SABS   = 0x100,
72
		IR3_REG_BNOT   = 0x200,
73
		IR3_REG_EVEN   = 0x400,
74
		IR3_REG_POS_INF= 0x800,
75
		/* (ei) flag, end-input?  Set on last bary, presumably to signal
76
		 * that the shader needs no more input:
77
		 */
78
		IR3_REG_EI     = 0x1000,
79
		/* meta-flags, for intermediate stages of IR, ie.
80
		 * before register assignment is done:
81
		 */
82
		IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
83
		IR3_REG_IA     = 0x4000,   /* meta-input dst is "assigned" */
84
		IR3_REG_ADDR   = 0x8000,   /* register is a0.x */
85
	} flags;
86
	union {
87
		/* normal registers:
88
		 * the component is in the low two bits of the reg #, so
89
		 * rN.x becomes: (N << 2) | x
90
		 */
91
		int   num;
92
		/* immediate: */
93
		int32_t  iim_val;
94
		uint32_t uim_val;
95
		float    fim_val;
96
		/* relative: */
97
		int   offset;
98
	};
99
 
100
	/* for IR3_REG_SSA, src registers contain ptr back to
101
	 * assigning instruction.
102
	 */
103
	struct ir3_instruction *instr;
104
 
105
	union {
106
		/* used for cat5 instructions, but also for internal/IR level
107
		 * tracking of what registers are read/written by an instruction.
108
		 * wrmask may be a bad name since it is used to represent both
109
		 * src and dst that touch multiple adjacent registers.
110
		 */
111
		unsigned wrmask;
112
		/* for relative addressing, 32bits for array size is too small,
113
		 * but otoh we don't need to deal with disjoint sets, so instead
114
		 * use a simple size field (number of scalar components).
115
		 */
116
		unsigned size;
117
	};
118
};
119
 
120
struct ir3_instruction {
121
	struct ir3_block *block;
122
	int category;
123
	opc_t opc;
124
	enum {
125
		/* (sy) flag is set on first instruction, and after sample
126
		 * instructions (probably just on RAW hazard).
127
		 */
128
		IR3_INSTR_SY    = 0x001,
129
		/* (ss) flag is set on first instruction, and first instruction
130
		 * to depend on the result of "long" instructions (RAW hazard):
131
		 *
132
		 *   rcp, rsq, log2, exp2, sin, cos, sqrt
133
		 *
134
		 * It seems to synchronize until all in-flight instructions are
135
		 * completed, for example:
136
		 *
137
		 *   rsq hr1.w, hr1.w
138
		 *   add.f hr2.z, (neg)hr2.z, hc0.y
139
		 *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
140
		 *   rsq hr2.x, hr2.x
141
		 *   (rpt1)nop
142
		 *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
143
		 *   nop
144
		 *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
145
		 *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
146
		 *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
147
		 *
148
		 * The last mul.f does not have (ss) set, presumably because the
149
		 * (ss) on the previous instruction does the job.
150
		 *
151
		 * The blob driver also seems to set it on WAR hazards, although
152
		 * not really clear if this is needed or just blob compiler being
153
		 * sloppy.  So far I haven't found a case where removing the (ss)
154
		 * causes problems for WAR hazard, but I could just be getting
155
		 * lucky:
156
		 *
157
		 *   rcp r1.y, r3.y
158
		 *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
159
		 *
160
		 */
161
		IR3_INSTR_SS    = 0x002,
162
		/* (jp) flag is set on jump targets:
163
		 */
164
		IR3_INSTR_JP    = 0x004,
165
		IR3_INSTR_UL    = 0x008,
166
		IR3_INSTR_3D    = 0x010,
167
		IR3_INSTR_A     = 0x020,
168
		IR3_INSTR_O     = 0x040,
169
		IR3_INSTR_P     = 0x080,
170
		IR3_INSTR_S     = 0x100,
171
		IR3_INSTR_S2EN  = 0x200,
172
		/* meta-flags, for intermediate stages of IR, ie.
173
		 * before register assignment is done:
174
		 */
175
		IR3_INSTR_MARK  = 0x1000,
176
	} flags;
177
	int repeat;
178
#ifdef DEBUG
179
	unsigned regs_max;
180
#endif
181
	unsigned regs_count;
182
	struct ir3_register **regs;
183
	union {
184
		struct {
185
			char inv;
186
			char comp;
187
			int  immed;
188
		} cat0;
189
		struct {
190
			type_t src_type, dst_type;
191
		} cat1;
192
		struct {
193
			enum {
194
				IR3_COND_LT = 0,
195
				IR3_COND_LE = 1,
196
				IR3_COND_GT = 2,
197
				IR3_COND_GE = 3,
198
				IR3_COND_EQ = 4,
199
				IR3_COND_NE = 5,
200
			} condition;
201
		} cat2;
202
		struct {
203
			unsigned samp, tex;
204
			type_t type;
205
		} cat5;
206
		struct {
207
			type_t type;
208
			int offset;
209
			int iim_val;
210
		} cat6;
211
		/* for meta-instructions, just used to hold extra data
212
		 * before instruction scheduling, etc
213
		 */
214
		struct {
215
			int off;              /* component/offset */
216
		} fo;
217
		struct {
218
			int aid;
219
		} fi;
220
		struct {
221
			struct ir3_block *if_block, *else_block;
222
		} flow;
223
		struct {
224
			struct ir3_block *block;
225
		} inout;
226
 
227
		/* XXX keep this as big as all other union members! */
228
		uint32_t info[3];
229
	};
230
 
231
	/* transient values used during various algorithms: */
232
	union {
233
		/* The instruction depth is the max dependency distance to output.
234
		 *
235
		 * You can also think of it as the "cost", if we did any sort of
236
		 * optimization for register footprint.  Ie. a value that is  just
237
		 * result of moving a const to a reg would have a low cost,  so to
238
		 * it could make sense to duplicate the instruction at various
239
		 * points where the result is needed to reduce register footprint.
240
		 *
241
		 * DEPTH_UNUSED used to mark unused instructions after depth
242
		 * calculation pass.
243
		 */
244
#define DEPTH_UNUSED  ~0
245
		unsigned depth;
246
	};
247
 
248
	/* Used during CP and RA stages.  For fanin and shader inputs/
249
	 * outputs where we need a sequence of consecutive registers,
250
	 * keep track of each src instructions left (ie 'n-1') and right
251
	 * (ie 'n+1') neighbor.  The front-end must insert enough mov's
252
	 * to ensure that each instruction has at most one left and at
253
	 * most one right neighbor.  During the copy-propagation pass,
254
	 * we only remove mov's when we can preserve this constraint.
255
	 * And during the RA stage, we use the neighbor information to
256
	 * allocate a block of registers in one shot.
257
	 *
258
	 * TODO: maybe just add something like:
259
	 *   struct ir3_instruction_ref {
260
	 *       struct ir3_instruction *instr;
261
	 *       unsigned cnt;
262
	 *   }
263
	 *
264
	 * Or can we get away without the refcnt stuff?  It seems like
265
	 * it should be overkill..  the problem is if, potentially after
266
	 * already eliminating some mov's, if you have a single mov that
267
	 * needs to be grouped with it's neighbors in two different
268
	 * places (ex. shader output and a fanin).
269
	 */
270
	struct {
271
		struct ir3_instruction *left, *right;
272
		uint16_t left_cnt, right_cnt;
273
	} cp;
274
 
275
	/* an instruction can reference at most one address register amongst
276
	 * it's src/dst registers.  Beyond that, you need to insert mov's.
277
	 */
278
	struct ir3_instruction *address;
279
 
280
	/* in case of a instruction with relative dst instruction, we need to
281
	 * capture the dependency on the fanin for the previous values of
282
	 * the array elements.  Since we don't know at compile time actually
283
	 * which array elements are written, this serves to preserve the
284
	 * unconditional write to array elements prior to the conditional
285
	 * write.
286
	 *
287
	 * TODO only cat1 can do indirect write.. we could maybe move this
288
	 * into instr->cat1.fanin (but would require the frontend to insert
289
	 * the extra mov)
290
	 */
291
	struct ir3_instruction *fanin;
292
 
293
	struct ir3_instruction *next;
294
#ifdef DEBUG
295
	uint32_t serialno;
296
#endif
297
};
298
 
299
static inline struct ir3_instruction *
300
ir3_neighbor_first(struct ir3_instruction *instr)
301
{
302
	while (instr->cp.left)
303
		instr = instr->cp.left;
304
	return instr;
305
}
306
 
307
static inline int ir3_neighbor_count(struct ir3_instruction *instr)
308
{
309
	int num = 1;
310
 
311
	debug_assert(!instr->cp.left);
312
 
313
	while (instr->cp.right) {
314
		num++;
315
		instr = instr->cp.right;
316
	}
317
 
318
	return num;
319
}
320
 
321
struct ir3_heap_chunk;
322
 
323
struct ir3 {
324
	unsigned instrs_count, instrs_sz;
325
	struct ir3_instruction **instrs;
326
 
327
	/* Track bary.f (and ldlv) instructions.. this is needed in
328
	 * scheduling to ensure that all varying fetches happen before
329
	 * any potential kill instructions.  The hw gets grumpy if all
330
	 * threads in a group are killed before the last bary.f gets
331
	 * a chance to signal end of input (ei).
332
	 */
333
	unsigned baryfs_count, baryfs_sz;
334
	struct ir3_instruction **baryfs;
335
 
336
	/* Track all indirect instructions (read and write).  To avoid
337
	 * deadlock scenario where an address register gets scheduled,
338
	 * but other dependent src instructions cannot be scheduled due
339
	 * to dependency on a *different* address register value, the
340
	 * scheduler needs to ensure that all dependencies other than
341
	 * the instruction other than the address register are scheduled
342
	 * before the one that writes the address register.  Having a
343
	 * convenient list of instructions that reference some address
344
	 * register simplifies this.
345
	 */
346
	unsigned indirects_count, indirects_sz;
347
	struct ir3_instruction **indirects;
348
 
349
	struct ir3_block *block;
350
	unsigned heap_idx;
351
	struct ir3_heap_chunk *chunk;
352
};
353
 
354
struct ir3_block {
355
	struct ir3 *shader;
356
	unsigned ntemporaries, ninputs, noutputs;
357
	/* maps TGSI_FILE_TEMPORARY index back to the assigning instruction: */
358
	struct ir3_instruction **temporaries;
359
	struct ir3_instruction **inputs;
360
	struct ir3_instruction **outputs;
361
	/* only a single address register: */
362
	struct ir3_instruction *address;
363
	struct ir3_block *parent;
364
	struct ir3_instruction *head;
365
};
366
 
367
struct ir3 * ir3_create(void);
368
void ir3_destroy(struct ir3 *shader);
369
void * ir3_assemble(struct ir3 *shader,
370
		struct ir3_info *info, uint32_t gpu_id);
371
void * ir3_alloc(struct ir3 *shader, int sz);
372
 
373
struct ir3_block * ir3_block_create(struct ir3 *shader,
374
		unsigned ntmp, unsigned nin, unsigned nout);
375
 
376
struct ir3_instruction * ir3_instr_create(struct ir3_block *block,
377
		int category, opc_t opc);
378
struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
379
		int category, opc_t opc, int nreg);
380
struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
381
const char *ir3_instr_name(struct ir3_instruction *instr);
382
 
383
struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
384
		int num, int flags);
385
 
386
 
387
static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
388
{
389
	if (instr->flags & IR3_INSTR_MARK)
390
		return true;  /* already visited */
391
	instr->flags |= IR3_INSTR_MARK;
392
	return false;
393
}
394
 
395
static inline void ir3_clear_mark(struct ir3 *shader)
396
{
397
	/* TODO would be nice to drop the instruction array.. for
398
	 * new compiler, _clear_mark() is all we use it for, and
399
	 * we could probably manage a linked list instead..
400
	 *
401
	 * Also, we'll probably want to mark instructions within
402
	 * a block, so tracking the list of instrs globally is
403
	 * unlikely to be what we want.
404
	 */
405
	unsigned i;
406
	for (i = 0; i < shader->instrs_count; i++) {
407
		struct ir3_instruction *instr = shader->instrs[i];
408
		instr->flags &= ~IR3_INSTR_MARK;
409
	}
410
}
411
 
412
static inline int ir3_instr_regno(struct ir3_instruction *instr,
413
		struct ir3_register *reg)
414
{
415
	unsigned i;
416
	for (i = 0; i < instr->regs_count; i++)
417
		if (reg == instr->regs[i])
418
			return i;
419
	return -1;
420
}
421
 
422
 
423
#define MAX_ARRAYS 16
424
 
425
/* comp:
426
 *   0 - x
427
 *   1 - y
428
 *   2 - z
429
 *   3 - w
430
 */
431
static inline uint32_t regid(int num, int comp)
432
{
433
	return (num << 2) | (comp & 0x3);
434
}
435
 
436
static inline uint32_t reg_num(struct ir3_register *reg)
437
{
438
	return reg->num >> 2;
439
}
440
 
441
static inline uint32_t reg_comp(struct ir3_register *reg)
442
{
443
	return reg->num & 0x3;
444
}
445
 
446
static inline bool is_flow(struct ir3_instruction *instr)
447
{
448
	return (instr->category == 0);
449
}
450
 
451
static inline bool is_kill(struct ir3_instruction *instr)
452
{
453
	return is_flow(instr) && (instr->opc == OPC_KILL);
454
}
455
 
456
static inline bool is_nop(struct ir3_instruction *instr)
457
{
458
	return is_flow(instr) && (instr->opc == OPC_NOP);
459
}
460
 
461
/* Is it a non-transformative (ie. not type changing) mov?  This can
462
 * also include absneg.s/absneg.f, which for the most part can be
463
 * treated as a mov (single src argument).
464
 */
465
static inline bool is_same_type_mov(struct ir3_instruction *instr)
466
{
467
	struct ir3_register *dst = instr->regs[0];
468
 
469
	/* mov's that write to a0.x or p0.x are special: */
470
	if (dst->num == regid(REG_P0, 0))
471
		return false;
472
	if (dst->num == regid(REG_A0, 0))
473
		return false;
474
 
475
	if ((instr->category == 1) &&
476
			(instr->cat1.src_type == instr->cat1.dst_type))
477
		return true;
478
	if ((instr->category == 2) && ((instr->opc == OPC_ABSNEG_F) ||
479
			(instr->opc == OPC_ABSNEG_S)))
480
		return true;
481
	return false;
482
}
483
 
484
static inline bool is_alu(struct ir3_instruction *instr)
485
{
486
	return (1 <= instr->category) && (instr->category <= 3);
487
}
488
 
489
static inline bool is_sfu(struct ir3_instruction *instr)
490
{
491
	return (instr->category == 4);
492
}
493
 
494
static inline bool is_tex(struct ir3_instruction *instr)
495
{
496
	return (instr->category == 5);
497
}
498
 
499
static inline bool is_mem(struct ir3_instruction *instr)
500
{
501
	return (instr->category == 6);
502
}
503
 
504
static inline bool is_input(struct ir3_instruction *instr)
505
{
506
	/* in some cases, ldlv is used to fetch varying without
507
	 * interpolation.. fortunately inloc is the first src
508
	 * register in either case
509
	 */
510
	if (is_mem(instr) && (instr->opc == OPC_LDLV))
511
		return true;
512
	return (instr->category == 2) && (instr->opc == OPC_BARY_F);
513
}
514
 
515
static inline bool is_meta(struct ir3_instruction *instr)
516
{
517
	/* TODO how should we count PHI (and maybe fan-in/out) which
518
	 * might actually contribute some instructions to the final
519
	 * result?
520
	 */
521
	return (instr->category == -1);
522
}
523
 
524
static inline bool writes_addr(struct ir3_instruction *instr)
525
{
526
	if (instr->regs_count > 0) {
527
		struct ir3_register *dst = instr->regs[0];
528
		return !!(dst->flags & IR3_REG_ADDR);
529
	}
530
	return false;
531
}
532
 
533
static inline bool writes_pred(struct ir3_instruction *instr)
534
{
535
	if (instr->regs_count > 0) {
536
		struct ir3_register *dst = instr->regs[0];
537
		return reg_num(dst) == REG_P0;
538
	}
539
	return false;
540
}
541
 
542
/* returns defining instruction for reg */
543
/* TODO better name */
544
static inline struct ir3_instruction *ssa(struct ir3_register *reg)
545
{
546
	if (reg->flags & IR3_REG_SSA)
547
		return reg->instr;
548
	return NULL;
549
}
550
 
551
static inline bool conflicts(struct ir3_instruction *a,
552
		struct ir3_instruction *b)
553
{
554
	return (a && b) && (a != b);
555
}
556
 
557
static inline bool reg_gpr(struct ir3_register *r)
558
{
559
	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_ADDR))
560
		return false;
561
	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
562
		return false;
563
	return true;
564
}
565
 
566
/* some cat2 instructions (ie. those which are not float) can embed an
567
 * immediate:
568
 */
569
static inline bool ir3_cat2_int(opc_t opc)
570
{
571
	switch (opc) {
572
	case OPC_ADD_U:
573
	case OPC_ADD_S:
574
	case OPC_SUB_U:
575
	case OPC_SUB_S:
576
	case OPC_CMPS_U:
577
	case OPC_CMPS_S:
578
	case OPC_MIN_U:
579
	case OPC_MIN_S:
580
	case OPC_MAX_U:
581
	case OPC_MAX_S:
582
	case OPC_CMPV_U:
583
	case OPC_CMPV_S:
584
	case OPC_MUL_U:
585
	case OPC_MUL_S:
586
	case OPC_MULL_U:
587
	case OPC_CLZ_S:
588
	case OPC_ABSNEG_S:
589
	case OPC_AND_B:
590
	case OPC_OR_B:
591
	case OPC_NOT_B:
592
	case OPC_XOR_B:
593
	case OPC_BFREV_B:
594
	case OPC_CLZ_B:
595
	case OPC_SHL_B:
596
	case OPC_SHR_B:
597
	case OPC_ASHR_B:
598
	case OPC_MGEN_B:
599
	case OPC_GETBIT_B:
600
	case OPC_CBITS_B:
601
	case OPC_BARY_F:
602
		return true;
603
 
604
	default:
605
		return false;
606
	}
607
}
608
 
609
 
610
/* map cat2 instruction to valid abs/neg flags: */
611
static inline unsigned ir3_cat2_absneg(opc_t opc)
612
{
613
	switch (opc) {
614
	case OPC_ADD_F:
615
	case OPC_MIN_F:
616
	case OPC_MAX_F:
617
	case OPC_MUL_F:
618
	case OPC_SIGN_F:
619
	case OPC_CMPS_F:
620
	case OPC_ABSNEG_F:
621
	case OPC_CMPV_F:
622
	case OPC_FLOOR_F:
623
	case OPC_CEIL_F:
624
	case OPC_RNDNE_F:
625
	case OPC_RNDAZ_F:
626
	case OPC_TRUNC_F:
627
	case OPC_BARY_F:
628
		return IR3_REG_FABS | IR3_REG_FNEG;
629
 
630
	case OPC_ADD_U:
631
	case OPC_ADD_S:
632
	case OPC_SUB_U:
633
	case OPC_SUB_S:
634
	case OPC_CMPS_U:
635
	case OPC_CMPS_S:
636
	case OPC_MIN_U:
637
	case OPC_MIN_S:
638
	case OPC_MAX_U:
639
	case OPC_MAX_S:
640
	case OPC_CMPV_U:
641
	case OPC_CMPV_S:
642
	case OPC_MUL_U:
643
	case OPC_MUL_S:
644
	case OPC_MULL_U:
645
	case OPC_CLZ_S:
646
		return 0;
647
 
648
	case OPC_ABSNEG_S:
649
		return IR3_REG_SABS | IR3_REG_SNEG;
650
 
651
	case OPC_AND_B:
652
	case OPC_OR_B:
653
	case OPC_NOT_B:
654
	case OPC_XOR_B:
655
	case OPC_BFREV_B:
656
	case OPC_CLZ_B:
657
	case OPC_SHL_B:
658
	case OPC_SHR_B:
659
	case OPC_ASHR_B:
660
	case OPC_MGEN_B:
661
	case OPC_GETBIT_B:
662
	case OPC_CBITS_B:
663
		return IR3_REG_BNOT;
664
 
665
	default:
666
		return 0;
667
	}
668
}
669
 
670
/* map cat3 instructions to valid abs/neg flags: */
671
static inline unsigned ir3_cat3_absneg(opc_t opc)
672
{
673
	switch (opc) {
674
	case OPC_MAD_F16:
675
	case OPC_MAD_F32:
676
	case OPC_SEL_F16:
677
	case OPC_SEL_F32:
678
		return IR3_REG_FNEG;
679
 
680
	case OPC_MAD_U16:
681
	case OPC_MADSH_U16:
682
	case OPC_MAD_S16:
683
	case OPC_MADSH_M16:
684
	case OPC_MAD_U24:
685
	case OPC_MAD_S24:
686
	case OPC_SEL_S16:
687
	case OPC_SEL_S32:
688
	case OPC_SAD_S16:
689
	case OPC_SAD_S32:
690
		/* neg *may* work on 3rd src.. */
691
 
692
	case OPC_SEL_B16:
693
	case OPC_SEL_B32:
694
 
695
	default:
696
		return 0;
697
	}
698
}
699
 
700
#define array_insert(arr, val) do { \
701
		if (arr ## _count == arr ## _sz) { \
702
			arr ## _sz = MAX2(2 * arr ## _sz, 16); \
703
			arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
704
		} \
705
		arr[arr ##_count++] = val; \
706
	} while (0)
707
 
708
/* iterator for an instructions's sources (reg), also returns src #: */
709
#define foreach_src_n(__srcreg, __n, __instr) \
710
	if ((__instr)->regs_count) \
711
		for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
712
			if ((__srcreg = (__instr)->regs[__n + 1]))
713
 
714
/* iterator for an instructions's sources (reg): */
715
#define foreach_src(__srcreg, __instr) \
716
	foreach_src_n(__srcreg, __i, __instr)
717
 
718
static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
719
{
720
	if (instr->fanin)
721
		return instr->regs_count + 2;
722
	if (instr->address)
723
		return instr->regs_count + 1;
724
	return instr->regs_count;
725
}
726
 
727
static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
728
{
729
	if (n == (instr->regs_count + 1))
730
		return instr->fanin;
731
	if (n == (instr->regs_count + 0))
732
		return instr->address;
733
	return ssa(instr->regs[n]);
734
}
735
 
736
#define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
737
 
738
/* iterator for an instruction's SSA sources (instr), also returns src #: */
739
#define foreach_ssa_src_n(__srcinst, __n, __instr) \
740
	if ((__instr)->regs_count) \
741
		for (unsigned __cnt = __ssa_src_cnt(__instr) - 1, __n = 0; __n < __cnt; __n++) \
742
			if ((__srcinst = __ssa_src_n(__instr, __n + 1)))
743
 
744
/* iterator for an instruction's SSA sources (instr): */
745
#define foreach_ssa_src(__srcinst, __instr) \
746
	foreach_ssa_src_n(__srcinst, __i, __instr)
747
 
748
 
749
/* dump: */
750
#include 
751
void ir3_dump(struct ir3 *shader, const char *name,
752
		struct ir3_block *block /* XXX maybe 'block' ptr should move to ir3? */,
753
		FILE *f);
754
void ir3_dump_instr_single(struct ir3_instruction *instr);
755
void ir3_dump_instr_list(struct ir3_instruction *instr);
756
 
757
/* flatten if/else: */
758
int ir3_block_flatten(struct ir3_block *block);
759
 
760
/* depth calculation: */
761
int ir3_delayslots(struct ir3_instruction *assigner,
762
		struct ir3_instruction *consumer, unsigned n);
763
void ir3_block_depth(struct ir3_block *block);
764
 
765
/* copy-propagate: */
766
void ir3_block_cp(struct ir3_block *block);
767
 
768
/* group neightbors and insert mov's to resolve conflicts: */
769
void ir3_block_group(struct ir3_block *block);
770
 
771
/* scheduling: */
772
int ir3_block_sched(struct ir3_block *block);
773
 
774
/* register assignment: */
775
int ir3_block_ra(struct ir3_block *block, enum shader_t type,
776
		bool frag_coord, bool frag_face);
777
 
778
/* legalize: */
779
void ir3_block_legalize(struct ir3_block *block,
780
		bool *has_samp, int *max_bary);
781
 
782
/* ************************************************************************* */
783
/* instruction helpers */
784
 
785
static inline struct ir3_instruction *
786
ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
787
{
788
	struct ir3_instruction *instr =
789
		ir3_instr_create(block, 1, 0);
790
	ir3_reg_create(instr, 0, 0);   /* dst */
791
	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
792
	instr->cat1.src_type = type;
793
	instr->cat1.dst_type = type;
794
	return instr;
795
}
796
 
797
static inline struct ir3_instruction *
798
ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
799
		type_t src_type, type_t dst_type)
800
{
801
	struct ir3_instruction *instr =
802
		ir3_instr_create(block, 1, 0);
803
	ir3_reg_create(instr, 0, 0);   /* dst */
804
	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
805
	instr->cat1.src_type = src_type;
806
	instr->cat1.dst_type = dst_type;
807
	return instr;
808
}
809
 
810
#define INSTR1(CAT, name)                                                \
811
static inline struct ir3_instruction *                                   \
812
ir3_##name(struct ir3_block *block,                                      \
813
		struct ir3_instruction *a, unsigned aflags)                      \
814
{                                                                        \
815
	struct ir3_instruction *instr =                                      \
816
		ir3_instr_create(block, CAT, OPC_##name);                        \
817
	ir3_reg_create(instr, 0, 0);   /* dst */                             \
818
	ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
819
	return instr;                                                        \
820
}
821
 
822
#define INSTR2(CAT, name)                                                \
823
static inline struct ir3_instruction *                                   \
824
ir3_##name(struct ir3_block *block,                                      \
825
		struct ir3_instruction *a, unsigned aflags,                      \
826
		struct ir3_instruction *b, unsigned bflags)                      \
827
{                                                                        \
828
	struct ir3_instruction *instr =                                      \
829
		ir3_instr_create(block, CAT, OPC_##name);                        \
830
	ir3_reg_create(instr, 0, 0);   /* dst */                             \
831
	ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
832
	ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
833
	return instr;                                                        \
834
}
835
 
836
#define INSTR3(CAT, name)                                                \
837
static inline struct ir3_instruction *                                   \
838
ir3_##name(struct ir3_block *block,                                      \
839
		struct ir3_instruction *a, unsigned aflags,                      \
840
		struct ir3_instruction *b, unsigned bflags,                      \
841
		struct ir3_instruction *c, unsigned cflags)                      \
842
{                                                                        \
843
	struct ir3_instruction *instr =                                      \
844
		ir3_instr_create(block, CAT, OPC_##name);                        \
845
	ir3_reg_create(instr, 0, 0);   /* dst */                             \
846
	ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
847
	ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
848
	ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c;           \
849
	return instr;                                                        \
850
}
851
 
852
/* cat0 instructions: */
853
INSTR1(0, KILL);
854
 
855
/* cat2 instructions, most 2 src but some 1 src: */
856
INSTR2(2, ADD_F)
857
INSTR2(2, MIN_F)
858
INSTR2(2, MAX_F)
859
INSTR2(2, MUL_F)
860
INSTR1(2, SIGN_F)
861
INSTR2(2, CMPS_F)
862
INSTR1(2, ABSNEG_F)
863
INSTR2(2, CMPV_F)
864
INSTR1(2, FLOOR_F)
865
INSTR1(2, CEIL_F)
866
INSTR1(2, RNDNE_F)
867
INSTR1(2, RNDAZ_F)
868
INSTR1(2, TRUNC_F)
869
INSTR2(2, ADD_U)
870
INSTR2(2, ADD_S)
871
INSTR2(2, SUB_U)
872
INSTR2(2, SUB_S)
873
INSTR2(2, CMPS_U)
874
INSTR2(2, CMPS_S)
875
INSTR2(2, MIN_U)
876
INSTR2(2, MIN_S)
877
INSTR2(2, MAX_U)
878
INSTR2(2, MAX_S)
879
INSTR1(2, ABSNEG_S)
880
INSTR2(2, AND_B)
881
INSTR2(2, OR_B)
882
INSTR1(2, NOT_B)
883
INSTR2(2, XOR_B)
884
INSTR2(2, CMPV_U)
885
INSTR2(2, CMPV_S)
886
INSTR2(2, MUL_U)
887
INSTR2(2, MUL_S)
888
INSTR2(2, MULL_U)
889
INSTR1(2, BFREV_B)
890
INSTR1(2, CLZ_S)
891
INSTR1(2, CLZ_B)
892
INSTR2(2, SHL_B)
893
INSTR2(2, SHR_B)
894
INSTR2(2, ASHR_B)
895
INSTR2(2, BARY_F)
896
INSTR2(2, MGEN_B)
897
INSTR2(2, GETBIT_B)
898
INSTR1(2, SETRM)
899
INSTR1(2, CBITS_B)
900
INSTR2(2, SHB)
901
INSTR2(2, MSAD)
902
 
903
/* cat3 instructions: */
904
INSTR3(3, MAD_U16)
905
INSTR3(3, MADSH_U16)
906
INSTR3(3, MAD_S16)
907
INSTR3(3, MADSH_M16)
908
INSTR3(3, MAD_U24)
909
INSTR3(3, MAD_S24)
910
INSTR3(3, MAD_F16)
911
INSTR3(3, MAD_F32)
912
INSTR3(3, SEL_B16)
913
INSTR3(3, SEL_B32)
914
INSTR3(3, SEL_S16)
915
INSTR3(3, SEL_S32)
916
INSTR3(3, SEL_F16)
917
INSTR3(3, SEL_F32)
918
INSTR3(3, SAD_S16)
919
INSTR3(3, SAD_S32)
920
 
921
/* cat4 instructions: */
922
INSTR1(4, RCP)
923
INSTR1(4, RSQ)
924
INSTR1(4, LOG2)
925
INSTR1(4, EXP2)
926
INSTR1(4, SIN)
927
INSTR1(4, COS)
928
INSTR1(4, SQRT)
929
 
930
/* cat5 instructions: */
931
INSTR1(5, DSX)
932
INSTR1(5, DSY)
933
 
934
static inline struct ir3_instruction *
935
ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
936
		unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
937
		struct ir3_instruction *src0, struct ir3_instruction *src1)
938
{
939
	struct ir3_instruction *sam;
940
	struct ir3_register *reg;
941
 
942
	sam = ir3_instr_create(block, 5, opc);
943
	sam->flags |= flags;
944
	ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
945
	if (src0) {
946
		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
947
		reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
948
		reg->instr = src0;
949
	}
950
	if (src1) {
951
		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
952
		reg->instr = src1;
953
		reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
954
	}
955
	sam->cat5.samp = samp;
956
	sam->cat5.tex  = tex;
957
	sam->cat5.type  = type;
958
 
959
	return sam;
960
}
961
 
962
/* cat6 instructions: */
963
INSTR2(6, LDLV)
964
INSTR2(6, LDG)
965
 
966
/* ************************************************************************* */
967
/* split this out or find some helper to use.. like main/bitset.h.. */
968
 
969
#include 
970
 
971
#define MAX_REG 256
972
 
973
typedef uint8_t regmask_t[2 * MAX_REG / 8];
974
 
975
static inline unsigned regmask_idx(struct ir3_register *reg)
976
{
977
	unsigned num = reg->num;
978
	debug_assert(num < MAX_REG);
979
	if (reg->flags & IR3_REG_HALF)
980
		num += MAX_REG;
981
	return num;
982
}
983
 
984
static inline void regmask_init(regmask_t *regmask)
985
{
986
	memset(regmask, 0, sizeof(*regmask));
987
}
988
 
989
static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
990
{
991
	unsigned idx = regmask_idx(reg);
992
	if (reg->flags & IR3_REG_RELATIV) {
993
		unsigned i;
994
		for (i = 0; i < reg->size; i++, idx++)
995
			(*regmask)[idx / 8] |= 1 << (idx % 8);
996
	} else {
997
		unsigned mask;
998
		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
999
			if (mask & 1)
1000
				(*regmask)[idx / 8] |= 1 << (idx % 8);
1001
	}
1002
}
1003
 
1004
static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
1005
{
1006
	unsigned i;
1007
	for (i = 0; i < ARRAY_SIZE(*dst); i++)
1008
		(*dst)[i] = (*a)[i] | (*b)[i];
1009
}
1010
 
1011
/* set bits in a if not set in b, conceptually:
1012
 *   a |= (reg & ~b)
1013
 */
1014
static inline void regmask_set_if_not(regmask_t *a,
1015
		struct ir3_register *reg, regmask_t *b)
1016
{
1017
	unsigned idx = regmask_idx(reg);
1018
	if (reg->flags & IR3_REG_RELATIV) {
1019
		unsigned i;
1020
		for (i = 0; i < reg->size; i++, idx++)
1021
			if (!((*b)[idx / 8] & (1 << (idx % 8))))
1022
				(*a)[idx / 8] |= 1 << (idx % 8);
1023
	} else {
1024
		unsigned mask;
1025
		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
1026
			if (mask & 1)
1027
				if (!((*b)[idx / 8] & (1 << (idx % 8))))
1028
					(*a)[idx / 8] |= 1 << (idx % 8);
1029
	}
1030
}
1031
 
1032
static inline bool regmask_get(regmask_t *regmask,
1033
		struct ir3_register *reg)
1034
{
1035
	unsigned idx = regmask_idx(reg);
1036
	if (reg->flags & IR3_REG_RELATIV) {
1037
		unsigned i;
1038
		for (i = 0; i < reg->size; i++, idx++)
1039
			if ((*regmask)[idx / 8] & (1 << (idx % 8)))
1040
				return true;
1041
	} else {
1042
		unsigned mask;
1043
		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
1044
			if (mask & 1)
1045
				if ((*regmask)[idx / 8] & (1 << (idx % 8)))
1046
					return true;
1047
	}
1048
	return false;
1049
}
1050
 
1051
/* ************************************************************************* */
1052
 
1053
#endif /* IR3_H_ */