Subversion Repositories Kolibri OS

Rev

Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
 
3
/*
4
 * Copyright (C) 2013 Rob Clark 
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice (including the next
14
 * paragraph) shall be included in all copies or substantial portions of the
15
 * Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
 * SOFTWARE.
24
 *
25
 * Authors:
26
 *    Rob Clark 
27
 */
28
 
29
#include 
30
 
31
#include "pipe/p_state.h"
32
#include "util/u_string.h"
33
#include "util/u_memory.h"
34
#include "util/u_inlines.h"
35
#include "tgsi/tgsi_parse.h"
36
#include "tgsi/tgsi_ureg.h"
37
#include "tgsi/tgsi_info.h"
38
#include "tgsi/tgsi_strings.h"
39
#include "tgsi/tgsi_dump.h"
40
#include "tgsi/tgsi_scan.h"
41
 
42
#include "fd3_compiler.h"
43
#include "fd3_program.h"
44
#include "fd3_util.h"
45
 
46
#include "instr-a3xx.h"
47
#include "ir-a3xx.h"
48
 
49
/* ************************************************************************* */
50
/* split the out or find some helper to use.. like main/bitset.h.. */
51
 
52
#define MAX_REG 256
53
 
54
typedef uint8_t regmask_t[2 * MAX_REG / 8];
55
 
56
static unsigned regmask_idx(struct ir3_register *reg)
57
{
58
	unsigned num = reg->num;
59
	assert(num < MAX_REG);
60
	if (reg->flags & IR3_REG_HALF)
61
		num += MAX_REG;
62
	return num;
63
}
64
 
4401 Serge 65
static void regmask_set(regmask_t regmask, struct ir3_register *reg,
66
		unsigned wrmask)
4358 Serge 67
{
4401 Serge 68
	unsigned i;
69
	for (i = 0; i < 4; i++) {
70
		if (wrmask & (1 << i)) {
71
			unsigned idx = regmask_idx(reg) + i;
72
			regmask[idx / 8] |= 1 << (idx % 8);
73
		}
74
	}
4358 Serge 75
}
76
 
77
static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
78
{
79
	unsigned idx = regmask_idx(reg);
80
	return regmask[idx / 8] & (1 << (idx % 8));
81
}
82
 
83
/* ************************************************************************* */
84
 
85
struct fd3_compile_context {
86
	const struct tgsi_token *tokens;
87
	struct ir3_shader *ir;
88
	struct fd3_shader_stateobj *so;
89
 
90
	struct tgsi_parse_context parser;
91
	unsigned type;
92
 
93
	struct tgsi_shader_info info;
94
 
95
	/* last input dst (for setting (ei) flag): */
96
	struct ir3_register *last_input;
97
 
98
	unsigned next_inloc;
99
	unsigned num_internal_temps;
4401 Serge 100
	struct tgsi_src_register internal_temps[6];
4358 Serge 101
 
102
	/* track registers which need to synchronize w/ "complex alu" cat3
103
	 * instruction pipeline:
104
	 */
105
	regmask_t needs_ss;
106
 
107
	/* track registers which need to synchronize with texture fetch
108
	 * pipeline:
109
	 */
110
	regmask_t needs_sy;
111
 
112
	/* inputs start at r0, temporaries start after last input, and
113
	 * outputs start after last temporary.
114
	 *
115
	 * We could be more clever, because this is not a hw restriction,
116
	 * but probably best just to implement an optimizing pass to
117
	 * reduce the # of registers used and get rid of redundant mov's
118
	 * (to output register).
119
	 */
120
	unsigned base_reg[TGSI_FILE_COUNT];
121
 
122
	/* idx/slot for last compiler generated immediate */
123
	unsigned immediate_idx;
124
 
125
	/* stack of branch instructions that start (potentially nested)
126
	 * branch instructions, so that we can fix up the branch targets
127
	 * so that we can fix up the branch target on the corresponding
128
	 * END instruction
129
	 */
130
	struct ir3_instruction *branch[16];
131
	unsigned int branch_count;
132
 
133
	/* used when dst is same as one of the src, to avoid overwriting a
134
	 * src element before the remaining scalar instructions that make
135
	 * up the vector operation
136
	 */
137
	struct tgsi_dst_register tmp_dst;
4401 Serge 138
	struct tgsi_src_register *tmp_src;
4358 Serge 139
};
140
 
4401 Serge 141
 
142
static void vectorize(struct fd3_compile_context *ctx,
143
		struct ir3_instruction *instr, struct tgsi_dst_register *dst,
144
		int nsrcs, ...);
145
static void create_mov(struct fd3_compile_context *ctx,
146
		struct tgsi_dst_register *dst, struct tgsi_src_register *src);
147
 
4358 Serge 148
static unsigned
149
compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
150
		const struct tgsi_token *tokens)
151
{
152
	unsigned ret;
153
 
154
	ctx->tokens = tokens;
155
	ctx->ir = so->ir;
156
	ctx->so = so;
157
	ctx->last_input = NULL;
158
	ctx->next_inloc = 8;
159
	ctx->num_internal_temps = 0;
160
	ctx->branch_count = 0;
161
 
162
	memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
163
	memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
164
	memset(ctx->base_reg, 0, sizeof(ctx->base_reg));
165
 
166
	tgsi_scan_shader(tokens, &ctx->info);
167
 
168
	/* Immediates go after constants: */
169
	ctx->base_reg[TGSI_FILE_CONSTANT]  = 0;
170
	ctx->base_reg[TGSI_FILE_IMMEDIATE] =
4401 Serge 171
			ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
4358 Serge 172
 
173
	/* Temporaries after outputs after inputs: */
174
	ctx->base_reg[TGSI_FILE_INPUT]     = 0;
175
	ctx->base_reg[TGSI_FILE_OUTPUT]    =
4401 Serge 176
			ctx->info.file_max[TGSI_FILE_INPUT] + 1;
4358 Serge 177
	ctx->base_reg[TGSI_FILE_TEMPORARY] =
4401 Serge 178
			ctx->info.file_max[TGSI_FILE_INPUT] + 1 +
179
			ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
4358 Serge 180
 
181
	so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
4401 Serge 182
	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
183
			ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
4358 Serge 184
 
185
	ret = tgsi_parse_init(&ctx->parser, tokens);
186
	if (ret != TGSI_PARSE_OK)
187
		return ret;
188
 
189
	ctx->type = ctx->parser.FullHeader.Processor.Processor;
190
 
191
	return ret;
192
}
193
 
194
static void
4401 Serge 195
compile_error(struct fd3_compile_context *ctx, const char *format, ...)
196
{
197
	va_list ap;
198
	va_start(ap, format);
199
	_debug_vprintf(format, ap);
200
	va_end(ap);
201
	tgsi_dump(ctx->tokens, 0);
202
	assert(0);
203
}
204
 
205
#define compile_assert(ctx, cond) do { \
206
		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
207
	} while (0)
208
 
209
static void
4358 Serge 210
compile_free(struct fd3_compile_context *ctx)
211
{
212
	tgsi_parse_free(&ctx->parser);
213
}
214
 
215
struct instr_translater {
216
	void (*fxn)(const struct instr_translater *t,
217
			struct fd3_compile_context *ctx,
218
			struct tgsi_full_instruction *inst);
219
	unsigned tgsi_opc;
220
	opc_t opc;
221
	opc_t hopc;    /* opc to use for half_precision mode, if different */
222
	unsigned arg;
223
};
224
 
4401 Serge 225
static unsigned
226
src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
227
{
228
	unsigned flags = 0;
229
 
230
	if (regmask_get(ctx->needs_ss, reg)) {
231
		flags |= IR3_INSTR_SS;
232
		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
233
	}
234
 
235
	if (regmask_get(ctx->needs_sy, reg)) {
236
		flags |= IR3_INSTR_SY;
237
		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
238
	}
239
 
240
	return flags;
241
}
242
 
4358 Serge 243
static struct ir3_register *
244
add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
245
		const struct tgsi_dst_register *dst, unsigned chan)
246
{
247
	unsigned flags = 0, num = 0;
248
 
249
	switch (dst->File) {
250
	case TGSI_FILE_OUTPUT:
251
	case TGSI_FILE_TEMPORARY:
252
		num = dst->Index + ctx->base_reg[dst->File];
253
		break;
254
	default:
4401 Serge 255
		compile_error(ctx, "unsupported dst register file: %s\n",
4358 Serge 256
			tgsi_file_name(dst->File));
257
		break;
258
	}
259
 
260
	if (ctx->so->half_precision)
261
		flags |= IR3_REG_HALF;
262
 
263
	return ir3_reg_create(instr, regid(num, chan), flags);
264
}
265
 
266
static struct ir3_register *
267
add_src_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
268
		const struct tgsi_src_register *src, unsigned chan)
269
{
270
	unsigned flags = 0, num = 0;
271
	struct ir3_register *reg;
272
 
273
	switch (src->File) {
274
	case TGSI_FILE_IMMEDIATE:
275
		/* TODO if possible, use actual immediate instead of const.. but
276
		 * TGSI has vec4 immediates, we can only embed scalar (of limited
277
		 * size, depending on instruction..)
278
		 */
279
	case TGSI_FILE_CONSTANT:
280
		flags |= IR3_REG_CONST;
281
		num = src->Index + ctx->base_reg[src->File];
282
		break;
4401 Serge 283
	case TGSI_FILE_OUTPUT:
284
		/* NOTE: we should only end up w/ OUTPUT file for things like
285
		 * clamp()'ing saturated dst instructions
286
		 */
4358 Serge 287
	case TGSI_FILE_INPUT:
288
	case TGSI_FILE_TEMPORARY:
289
		num = src->Index + ctx->base_reg[src->File];
290
		break;
291
	default:
4401 Serge 292
		compile_error(ctx, "unsupported src register file: %s\n",
4358 Serge 293
			tgsi_file_name(src->File));
294
		break;
295
	}
296
 
297
	if (src->Absolute)
298
		flags |= IR3_REG_ABS;
299
	if (src->Negate)
300
		flags |= IR3_REG_NEGATE;
301
	if (ctx->so->half_precision)
302
		flags |= IR3_REG_HALF;
303
 
304
	reg = ir3_reg_create(instr, regid(num, chan), flags);
305
 
4401 Serge 306
	instr->flags |= src_flags(ctx, reg);
4358 Serge 307
 
308
	return reg;
309
}
310
 
311
static void
312
src_from_dst(struct tgsi_src_register *src, struct tgsi_dst_register *dst)
313
{
314
	src->File      = dst->File;
315
	src->Indirect  = dst->Indirect;
316
	src->Dimension = dst->Dimension;
317
	src->Index     = dst->Index;
318
	src->Absolute  = 0;
319
	src->Negate    = 0;
320
	src->SwizzleX  = TGSI_SWIZZLE_X;
321
	src->SwizzleY  = TGSI_SWIZZLE_Y;
322
	src->SwizzleZ  = TGSI_SWIZZLE_Z;
323
	src->SwizzleW  = TGSI_SWIZZLE_W;
324
}
325
 
326
/* Get internal-temp src/dst to use for a sequence of instructions
327
 * generated by a single TGSI op.
328
 */
4401 Serge 329
static struct tgsi_src_register *
4358 Serge 330
get_internal_temp(struct fd3_compile_context *ctx,
4401 Serge 331
		struct tgsi_dst_register *tmp_dst)
4358 Serge 332
{
4401 Serge 333
	struct tgsi_src_register *tmp_src;
4358 Serge 334
	int n;
335
 
336
	tmp_dst->File      = TGSI_FILE_TEMPORARY;
337
	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
338
	tmp_dst->Indirect  = 0;
339
	tmp_dst->Dimension = 0;
340
 
341
	/* assign next temporary: */
342
	n = ctx->num_internal_temps++;
4401 Serge 343
	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
344
	tmp_src = &ctx->internal_temps[n];
4358 Serge 345
 
4401 Serge 346
	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
4358 Serge 347
 
348
	src_from_dst(tmp_src, tmp_dst);
4401 Serge 349
 
350
	return tmp_src;
4358 Serge 351
}
352
 
353
/* same as get_internal_temp, but w/ src.xxxx (for instructions that
354
 * replicate their results)
355
 */
4401 Serge 356
static struct tgsi_src_register *
4358 Serge 357
get_internal_temp_repl(struct fd3_compile_context *ctx,
4401 Serge 358
		struct tgsi_dst_register *tmp_dst)
4358 Serge 359
{
4401 Serge 360
	struct tgsi_src_register *tmp_src =
361
			get_internal_temp(ctx, tmp_dst);
4358 Serge 362
	tmp_src->SwizzleX = tmp_src->SwizzleY =
363
		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
4401 Serge 364
	return tmp_src;
4358 Serge 365
}
366
 
4401 Serge 367
static inline bool
368
is_const(struct tgsi_src_register *src)
369
{
370
	return (src->File == TGSI_FILE_CONSTANT) ||
371
			(src->File == TGSI_FILE_IMMEDIATE);
372
}
373
 
374
static type_t
375
get_ftype(struct fd3_compile_context *ctx)
376
{
377
	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
378
}
379
 
380
static type_t
381
get_utype(struct fd3_compile_context *ctx)
382
{
383
	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
384
}
385
 
386
static unsigned
387
src_swiz(struct tgsi_src_register *src, int chan)
388
{
389
	switch (chan) {
390
	case 0: return src->SwizzleX;
391
	case 1: return src->SwizzleY;
392
	case 2: return src->SwizzleZ;
393
	case 3: return src->SwizzleW;
394
	}
395
	assert(0);
396
	return 0;
397
}
398
 
399
/* for instructions that cannot take a const register as src, if needed
400
 * generate a move to temporary gpr:
401
 */
402
static struct tgsi_src_register *
403
get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
404
{
405
	struct tgsi_dst_register tmp_dst;
406
	struct tgsi_src_register *tmp_src;
407
 
408
	compile_assert(ctx, is_const(src));
409
 
410
	tmp_src = get_internal_temp(ctx, &tmp_dst);
411
 
412
	create_mov(ctx, &tmp_dst, src);
413
 
414
	return tmp_src;
415
}
416
 
4358 Serge 417
static void
418
get_immediate(struct fd3_compile_context *ctx,
419
		struct tgsi_src_register *reg, uint32_t val)
420
{
421
	unsigned neg, swiz, idx, i;
422
	/* actually maps 1:1 currently.. not sure if that is safe to rely on: */
423
	static const unsigned swiz2tgsi[] = {
424
			TGSI_SWIZZLE_X, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W,
425
	};
426
 
427
	for (i = 0; i < ctx->immediate_idx; i++) {
428
		swiz = i % 4;
429
		idx  = i / 4;
430
 
431
		if (ctx->so->immediates[idx].val[swiz] == val) {
432
			neg = 0;
433
			break;
434
		}
435
 
436
		if (ctx->so->immediates[idx].val[swiz] == -val) {
437
			neg = 1;
438
			break;
439
		}
440
	}
441
 
442
	if (i == ctx->immediate_idx) {
443
		/* need to generate a new immediate: */
444
		swiz = i % 4;
445
		idx  = i / 4;
446
		neg  = 0;
447
		ctx->so->immediates[idx].val[swiz] = val;
448
		ctx->so->immediates_count = idx + 1;
449
		ctx->immediate_idx++;
450
	}
451
 
452
	reg->File      = TGSI_FILE_IMMEDIATE;
453
	reg->Indirect  = 0;
454
	reg->Dimension = 0;
455
	reg->Index     = idx;
456
	reg->Absolute  = 0;
457
	reg->Negate    = neg;
458
	reg->SwizzleX  = swiz2tgsi[swiz];
459
	reg->SwizzleY  = swiz2tgsi[swiz];
460
	reg->SwizzleZ  = swiz2tgsi[swiz];
461
	reg->SwizzleW  = swiz2tgsi[swiz];
462
}
463
 
464
static void
465
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
466
		struct tgsi_src_register *src)
467
{
4401 Serge 468
	type_t type_mov = get_ftype(ctx);
4358 Serge 469
	unsigned i;
470
 
471
	for (i = 0; i < 4; i++) {
472
		/* move to destination: */
473
		if (dst->WriteMask & (1 << i)) {
474
			struct ir3_instruction *instr =
475
					ir3_instr_create(ctx->ir, 1, 0);
476
			instr->cat1.src_type = type_mov;
477
			instr->cat1.dst_type = type_mov;
478
			add_dst_reg(ctx, instr, dst, i);
479
			add_src_reg(ctx, instr, src, src_swiz(src, i));
480
		} else {
481
			ir3_instr_create(ctx->ir, 0, OPC_NOP);
482
		}
483
	}
4401 Serge 484
}
4358 Serge 485
 
4401 Serge 486
static void
487
create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
488
		struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
489
{
490
	struct ir3_instruction *instr;
491
	struct tgsi_src_register src;
492
 
493
	src_from_dst(&src, dst);
494
 
495
	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
496
	vectorize(ctx, instr, dst, 2, &src, 0, minval, 0);
497
 
498
	instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
499
	vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0);
4358 Serge 500
}
501
 
4401 Serge 502
static void
503
create_clamp_imm(struct fd3_compile_context *ctx,
504
		struct tgsi_dst_register *dst,
505
		uint32_t minval, uint32_t maxval)
506
{
507
	struct tgsi_src_register minconst, maxconst;
508
 
509
	get_immediate(ctx, &minconst, minval);
510
	get_immediate(ctx, &maxconst, maxval);
511
 
512
	create_clamp(ctx, dst, &minconst, &maxconst);
513
}
514
 
4358 Serge 515
static struct tgsi_dst_register *
516
get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
517
{
518
	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
519
	unsigned i;
520
	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
521
		struct tgsi_src_register *src = &inst->Src[i].Register;
522
		if ((src->File == dst->File) && (src->Index == dst->Index)) {
4401 Serge 523
			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
4358 Serge 524
			ctx->tmp_dst.WriteMask = dst->WriteMask;
525
			dst = &ctx->tmp_dst;
526
			break;
527
		}
528
	}
529
	return dst;
530
}
531
 
532
static void
533
put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
534
		struct tgsi_dst_register *dst)
535
{
536
	/* if necessary, add mov back into original dst: */
537
	if (dst != &inst->Dst[0].Register) {
4401 Serge 538
		create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
4358 Serge 539
	}
540
}
541
 
542
/* helper to generate the necessary repeat and/or additional instructions
543
 * to turn a scalar instruction into a vector operation:
544
 */
545
static void
546
vectorize(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
547
		struct tgsi_dst_register *dst, int nsrcs, ...)
548
{
549
	va_list ap;
550
	int i, j, n = 0;
551
 
552
	add_dst_reg(ctx, instr, dst, 0);
553
 
554
	va_start(ap, nsrcs);
555
	for (j = 0; j < nsrcs; j++) {
556
		struct tgsi_src_register *src =
557
				va_arg(ap, struct tgsi_src_register *);
558
		unsigned flags = va_arg(ap, unsigned);
559
		add_src_reg(ctx, instr, src, 0)->flags |= flags;
560
	}
561
	va_end(ap);
562
 
563
	for (i = 0; i < 4; i++) {
564
		if (dst->WriteMask & (1 << i)) {
565
			struct ir3_instruction *cur;
566
 
567
			if (n++ == 0) {
568
				cur = instr;
569
			} else {
570
				cur = ir3_instr_clone(instr);
571
				cur->flags &= ~(IR3_INSTR_SY | IR3_INSTR_SS | IR3_INSTR_JP);
572
			}
573
 
574
			/* fix-up dst register component: */
575
			cur->regs[0]->num = regid(cur->regs[0]->num >> 2, i);
576
 
577
			/* fix-up src register component: */
578
			va_start(ap, nsrcs);
579
			for (j = 0; j < nsrcs; j++) {
580
				struct tgsi_src_register *src =
581
						va_arg(ap, struct tgsi_src_register *);
582
				(void)va_arg(ap, unsigned);
583
				cur->regs[j+1]->num =
584
					regid(cur->regs[j+1]->num >> 2,
585
						src_swiz(src, i));
4401 Serge 586
				cur->flags |= src_flags(ctx, cur->regs[j+1]);
4358 Serge 587
			}
588
			va_end(ap);
589
		}
590
	}
591
 
592
	/* pad w/ nop's.. at least until we are clever enough to
593
	 * figure out if we really need to..
594
	 */
595
	for (; n < 4; n++) {
596
		ir3_instr_create(instr->shader, 0, OPC_NOP);
597
	}
598
}
599
 
600
/*
601
 * Handlers for TGSI instructions which do not have a 1:1 mapping to
602
 * native instructions:
603
 */
604
 
4401 Serge 605
static inline void
606
get_swiz(unsigned *swiz, struct tgsi_src_register *src)
607
{
608
	swiz[0] = src->SwizzleX;
609
	swiz[1] = src->SwizzleY;
610
	swiz[2] = src->SwizzleZ;
611
	swiz[3] = src->SwizzleW;
612
}
613
 
4358 Serge 614
static void
615
trans_dotp(const struct instr_translater *t,
616
		struct fd3_compile_context *ctx,
617
		struct tgsi_full_instruction *inst)
618
{
619
	struct ir3_instruction *instr;
620
	struct tgsi_dst_register tmp_dst;
4401 Serge 621
	struct tgsi_src_register *tmp_src;
4358 Serge 622
	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
623
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
624
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
4401 Serge 625
	unsigned swiz0[4];
626
	unsigned swiz1[4];
4358 Serge 627
	opc_t opc_mad    = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
628
	unsigned n = t->arg;     /* number of components */
4401 Serge 629
	unsigned i, swapped = 0;
4358 Serge 630
 
4401 Serge 631
	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
4358 Serge 632
 
4401 Serge 633
	/* in particular, can't handle const for src1 for cat3/mad:
4358 Serge 634
	 */
4401 Serge 635
	if (is_const(src1)) {
636
		if (!is_const(src0)) {
637
			struct tgsi_src_register *tmp;
638
			tmp = src0;
639
			src0 = src1;
640
			src1 = tmp;
641
			swapped = 1;
642
		} else {
643
			src0 = get_unconst(ctx, src0);
644
		}
645
	}
4358 Serge 646
 
4401 Serge 647
	get_swiz(swiz0, src0);
648
	get_swiz(swiz1, src1);
4358 Serge 649
 
650
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
651
	add_dst_reg(ctx, instr, &tmp_dst, 0);
652
	add_src_reg(ctx, instr, src0, swiz0[0]);
653
	add_src_reg(ctx, instr, src1, swiz1[0]);
654
 
655
	for (i = 1; i < n; i++) {
656
		ir3_instr_create(ctx->ir, 0, OPC_NOP);
657
 
658
		instr = ir3_instr_create(ctx->ir, 3, opc_mad);
659
		add_dst_reg(ctx, instr, &tmp_dst, 0);
660
		add_src_reg(ctx, instr, src0, swiz0[i]);
661
		add_src_reg(ctx, instr, src1, swiz1[i]);
4401 Serge 662
		add_src_reg(ctx, instr, tmp_src, 0);
4358 Serge 663
	}
664
 
665
	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
666
	if (t->tgsi_opc == TGSI_OPCODE_DPH) {
4401 Serge 667
		ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
4358 Serge 668
 
669
		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
670
		add_dst_reg(ctx, instr, &tmp_dst, 0);
4401 Serge 671
		if (swapped)
672
			add_src_reg(ctx, instr, src0, swiz0[i]);
673
		else
674
			add_src_reg(ctx, instr, src1, swiz1[i]);
675
		add_src_reg(ctx, instr, tmp_src, 0);
4358 Serge 676
 
677
		n++;
678
	}
679
 
4401 Serge 680
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
4358 Serge 681
 
4401 Serge 682
	create_mov(ctx, dst, tmp_src);
4358 Serge 683
}
684
 
685
/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
686
static void
687
trans_lrp(const struct instr_translater *t,
688
		struct fd3_compile_context *ctx,
689
		struct tgsi_full_instruction *inst)
690
{
691
	struct ir3_instruction *instr;
692
	struct tgsi_dst_register tmp_dst1, tmp_dst2;
4401 Serge 693
	struct tgsi_src_register *tmp_src1, *tmp_src2;
4358 Serge 694
	struct tgsi_src_register tmp_const;
4401 Serge 695
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
696
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
4358 Serge 697
 
4401 Serge 698
	if (is_const(src0) && is_const(src1))
699
		src0 = get_unconst(ctx, src0);
4358 Serge 700
 
4401 Serge 701
	tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
702
	tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
703
 
4358 Serge 704
	get_immediate(ctx, &tmp_const, fui(1.0));
705
 
706
	/* tmp1 = (a * b) */
707
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
4401 Serge 708
	vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0);
4358 Serge 709
 
710
	/* tmp2 = (1 - a) */
711
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
4401 Serge 712
	vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0,
713
			src0, IR3_REG_NEGATE);
4358 Serge 714
 
715
	/* tmp2 = tmp2 * c */
716
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
717
	vectorize(ctx, instr, &tmp_dst2, 2,
4401 Serge 718
			tmp_src2, 0,
4358 Serge 719
			&inst->Src[2].Register, 0);
720
 
721
	/* dst = tmp1 + tmp2 */
722
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
723
	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
4401 Serge 724
			tmp_src1, 0,
725
			tmp_src2, 0);
4358 Serge 726
}
727
 
728
/* FRC(x) = x - FLOOR(x) */
729
static void
730
trans_frac(const struct instr_translater *t,
731
		struct fd3_compile_context *ctx,
732
		struct tgsi_full_instruction *inst)
733
{
734
	struct ir3_instruction *instr;
735
	struct tgsi_dst_register tmp_dst;
4401 Serge 736
	struct tgsi_src_register *tmp_src;
4358 Serge 737
 
4401 Serge 738
	tmp_src = get_internal_temp(ctx, &tmp_dst);
4358 Serge 739
 
740
	/* tmp = FLOOR(x) */
741
	instr = ir3_instr_create(ctx->ir, 2, OPC_FLOOR_F);
742
	vectorize(ctx, instr, &tmp_dst, 1,
743
			&inst->Src[0].Register, 0);
744
 
745
	/* dst = x - tmp */
746
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
747
	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
748
			&inst->Src[0].Register, 0,
4401 Serge 749
			tmp_src, IR3_REG_NEGATE);
4358 Serge 750
}
751
 
752
/* POW(a,b) = EXP2(b * LOG2(a)) */
753
static void
754
trans_pow(const struct instr_translater *t,
755
		struct fd3_compile_context *ctx,
756
		struct tgsi_full_instruction *inst)
757
{
758
	struct ir3_instruction *instr;
759
	struct ir3_register *r;
760
	struct tgsi_dst_register tmp_dst;
4401 Serge 761
	struct tgsi_src_register *tmp_src;
4358 Serge 762
	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
763
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
764
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
765
 
4401 Serge 766
	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
4358 Serge 767
 
768
	/* log2 Rtmp, Rsrc0 */
769
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
770
	instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
771
	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
772
	add_src_reg(ctx, instr, src0, src0->SwizzleX);
4401 Serge 773
	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
4358 Serge 774
 
775
	/* mul.f Rtmp, Rtmp, Rsrc1 */
776
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
777
	add_dst_reg(ctx, instr, &tmp_dst, 0);
4401 Serge 778
	add_src_reg(ctx, instr, tmp_src, 0);
4358 Serge 779
	add_src_reg(ctx, instr, src1, src1->SwizzleX);
780
 
781
	/* blob compiler seems to ensure there are at least 6 instructions
782
	 * between a "simple" (non-cat4) instruction and a dependent cat4..
783
	 * probably we need to handle this in some other places too.
784
	 */
785
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
786
 
787
	/* exp2 Rdst, Rtmp */
788
	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
789
	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
4401 Serge 790
	add_src_reg(ctx, instr, tmp_src, 0);
791
	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
4358 Serge 792
 
4401 Serge 793
	create_mov(ctx, dst, tmp_src);
4358 Serge 794
}
795
 
796
/* texture fetch/sample instructions: */
797
static void
798
trans_samp(const struct instr_translater *t,
799
		struct fd3_compile_context *ctx,
800
		struct tgsi_full_instruction *inst)
801
{
802
	struct ir3_register *r;
803
	struct ir3_instruction *instr;
804
	struct tgsi_src_register *coord = &inst->Src[0].Register;
805
	struct tgsi_src_register *samp  = &inst->Src[1].Register;
806
	unsigned tex = inst->Texture.Texture;
807
	int8_t *order;
808
	unsigned i, j, flags = 0;
809
 
810
	switch (t->arg) {
811
	case TGSI_OPCODE_TEX:
812
		order = (tex == TGSI_TEXTURE_2D) ?
813
				(int8_t[4]){ 0,  1, -1, -1 } :  /* 2D */
814
				(int8_t[4]){ 0,  1,  2, -1 };   /* 3D */
815
		break;
816
	case TGSI_OPCODE_TXP:
817
		order = (tex == TGSI_TEXTURE_2D) ?
818
				(int8_t[4]){ 0,  1,  3, -1 } :  /* 2D */
819
				(int8_t[4]){ 0,  1,  2,  3 };   /* 3D */
820
		flags |= IR3_INSTR_P;
821
		break;
822
	default:
4401 Serge 823
		compile_assert(ctx, 0);
4358 Serge 824
		break;
825
	}
826
 
827
	if (tex == TGSI_TEXTURE_3D)
828
		flags |= IR3_INSTR_3D;
829
 
830
	/* The texture sample instructions need to coord in successive
831
	 * registers/components (ie. src.xy but not src.yx).  And TXP
832
	 * needs the .w component in .z for 2D..  so in some cases we
833
	 * might need to emit some mov instructions to shuffle things
834
	 * around:
835
	 */
836
	for (i = 1; (i < 4) && (order[i] >= 0); i++) {
837
		if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
4401 Serge 838
			struct tgsi_dst_register tmp_dst;
839
			struct tgsi_src_register *tmp_src;
4358 Serge 840
 
4401 Serge 841
			type_t type_mov = get_ftype(ctx);
842
 
4358 Serge 843
			/* need to move things around: */
4401 Serge 844
			tmp_src = get_internal_temp(ctx, &tmp_dst);
4358 Serge 845
 
846
			for (j = 0; (j < 4) && (order[j] >= 0); j++) {
847
				instr = ir3_instr_create(ctx->ir, 1, 0);
848
				instr->cat1.src_type = type_mov;
849
				instr->cat1.dst_type = type_mov;
850
				add_dst_reg(ctx, instr, &tmp_dst, j);
851
				add_src_reg(ctx, instr, coord,
852
						src_swiz(coord, order[j]));
853
			}
854
 
4401 Serge 855
			coord = tmp_src;
4358 Serge 856
 
857
			if (j < 4)
858
				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
859
 
860
			break;
861
		}
862
	}
863
 
864
	instr = ir3_instr_create(ctx->ir, 5, t->opc);
4401 Serge 865
	instr->cat5.type = get_ftype(ctx);
4358 Serge 866
	instr->cat5.samp = samp->Index;
867
	instr->cat5.tex  = samp->Index;
868
	instr->flags |= flags;
869
 
870
	r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
871
	r->wrmask = inst->Dst[0].Register.WriteMask;
872
 
873
	add_src_reg(ctx, instr, coord, coord->SwizzleX);
874
 
4401 Serge 875
	regmask_set(ctx->needs_sy, r, r->wrmask);
4358 Serge 876
}
877
 
4401 Serge 878
/*
879
 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
880
 *   cmps.f.eq tmp0, b, a
881
 *   cov.u16f16 dst, tmp0
882
 *
883
 * SNE(a,b) = (a != b) ? 1.0 : 0.0
884
 *   cmps.f.eq tmp0, b, a
885
 *   add.s tmp0, tmp0, -1
886
 *   sel.f16 dst, {0.0}, tmp0, {1.0}
887
 *
888
 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
889
 *   cmps.f.ge tmp0, a, b
890
 *   cov.u16f16 dst, tmp0
891
 *
892
 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
893
 *   cmps.f.ge tmp0, b, a
894
 *   cov.u16f16 dst, tmp0
895
 *
896
 * SGT(a,b) = (a > b)  ? 1.0 : 0.0
897
 *   cmps.f.ge tmp0, b, a
898
 *   add.s tmp0, tmp0, -1
899
 *   sel.f16 dst, {0.0}, tmp0, {1.0}
900
 *
901
 * SLT(a,b) = (a < b)  ? 1.0 : 0.0
902
 *   cmps.f.ge tmp0, a, b
903
 *   add.s tmp0, tmp0, -1
904
 *   sel.f16 dst, {0.0}, tmp0, {1.0}
905
 *
906
 * CMP(a,b,c) = (a < 0.0) ? b : c
907
 *   cmps.f.ge tmp0, a, {0.0}
908
 *   add.s tmp0, tmp0, -1
909
 *   sel.f16 dst, c, tmp0, b
910
 */
4358 Serge 911
static void
912
trans_cmp(const struct instr_translater *t,
913
		struct fd3_compile_context *ctx,
914
		struct tgsi_full_instruction *inst)
915
{
916
	struct ir3_instruction *instr;
917
	struct tgsi_dst_register tmp_dst;
4401 Serge 918
	struct tgsi_src_register *tmp_src;
919
	struct tgsi_src_register constval0, constval1;
920
	/* final instruction for CMP() uses orig src1 and src2: */
4358 Serge 921
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
4401 Serge 922
	struct tgsi_src_register *a0, *a1;
923
	unsigned condition;
4358 Serge 924
 
4401 Serge 925
	tmp_src = get_internal_temp(ctx, &tmp_dst);
4358 Serge 926
 
4401 Serge 927
	switch (t->tgsi_opc) {
928
	case TGSI_OPCODE_SEQ:
929
	case TGSI_OPCODE_SNE:
930
		a0 = &inst->Src[1].Register;  /* b */
931
		a1 = &inst->Src[0].Register;  /* a */
932
		condition = IR3_COND_EQ;
933
		break;
934
	case TGSI_OPCODE_SGE:
935
	case TGSI_OPCODE_SLT:
936
		a0 = &inst->Src[0].Register;  /* a */
937
		a1 = &inst->Src[1].Register;  /* b */
938
		condition = IR3_COND_GE;
939
		break;
940
	case TGSI_OPCODE_SLE:
941
	case TGSI_OPCODE_SGT:
942
		a0 = &inst->Src[1].Register;  /* b */
943
		a1 = &inst->Src[0].Register;  /* a */
944
		condition = IR3_COND_GE;
945
		break;
946
	case TGSI_OPCODE_CMP:
947
		get_immediate(ctx, &constval0, fui(0.0));
948
		a0 = &inst->Src[0].Register;  /* a */
949
		a1 = &constval0;              /* {0.0} */
950
		condition = IR3_COND_GE;
951
		break;
952
	default:
953
		compile_assert(ctx, 0);
954
		return;
955
	}
956
 
957
	if (is_const(a0) && is_const(a1))
958
		a0 = get_unconst(ctx, a0);
959
 
960
	/* cmps.f.ge tmp, a0, a1 */
4358 Serge 961
	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
4401 Serge 962
	instr->cat2.condition = condition;
963
	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
4358 Serge 964
 
4401 Serge 965
	switch (t->tgsi_opc) {
966
	case TGSI_OPCODE_SEQ:
967
	case TGSI_OPCODE_SGE:
968
	case TGSI_OPCODE_SLE:
969
		/* cov.u16f16 dst, tmp0 */
970
		instr = ir3_instr_create(ctx->ir, 1, 0);
971
		instr->cat1.src_type = get_utype(ctx);
972
		instr->cat1.dst_type = get_ftype(ctx);
973
		vectorize(ctx, instr, dst, 1, tmp_src, 0);
974
		break;
975
	case TGSI_OPCODE_SNE:
976
	case TGSI_OPCODE_SGT:
977
	case TGSI_OPCODE_SLT:
978
	case TGSI_OPCODE_CMP:
979
		/* add.s tmp, tmp, -1 */
980
		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
981
		instr->repeat = 3;
982
		add_dst_reg(ctx, instr, &tmp_dst, 0);
983
		add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
984
		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
4358 Serge 985
 
4401 Serge 986
		if (t->tgsi_opc == TGSI_OPCODE_CMP) {
987
			/* sel.{f32,f16} dst, src2, tmp, src1 */
988
			instr = ir3_instr_create(ctx->ir, 3,
989
					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
990
			vectorize(ctx, instr, dst, 3,
991
					&inst->Src[2].Register, 0,
992
					tmp_src, 0,
993
					&inst->Src[1].Register, 0);
994
		} else {
995
			get_immediate(ctx, &constval0, fui(0.0));
996
			get_immediate(ctx, &constval1, fui(1.0));
997
			/* sel.{f32,f16} dst, {0.0}, tmp0, {1.0} */
998
			instr = ir3_instr_create(ctx->ir, 3,
999
					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
1000
			vectorize(ctx, instr, dst, 3,
1001
					&constval0, 0, tmp_src, 0, &constval1, 0);
1002
		}
4358 Serge 1003
 
4401 Serge 1004
		break;
1005
	}
1006
 
4358 Serge 1007
	put_dst(ctx, inst, dst);
1008
}
1009
 
1010
/*
1011
 * Conditional / Flow control
1012
 */
1013
 
1014
static unsigned
1015
find_instruction(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
1016
{
1017
	unsigned i;
1018
	for (i = 0; i < ctx->ir->instrs_count; i++)
1019
		if (ctx->ir->instrs[i] == instr)
1020
			return i;
1021
	return ~0;
1022
}
1023
 
1024
static void
1025
push_branch(struct fd3_compile_context *ctx, struct ir3_instruction *instr)
1026
{
1027
	ctx->branch[ctx->branch_count++] = instr;
1028
}
1029
 
1030
static void
1031
pop_branch(struct fd3_compile_context *ctx)
1032
{
1033
	struct ir3_instruction *instr;
1034
 
1035
	/* if we were clever enough, we'd patch this up after the fact,
1036
	 * and set (jp) flag on whatever the next instruction was, rather
1037
	 * than inserting an extra nop..
1038
	 */
1039
	instr = ir3_instr_create(ctx->ir, 0, OPC_NOP);
1040
	instr->flags |= IR3_INSTR_JP;
1041
 
1042
	/* pop the branch instruction from the stack and fix up branch target: */
1043
	instr = ctx->branch[--ctx->branch_count];
1044
	instr->cat0.immed = ctx->ir->instrs_count - find_instruction(ctx, instr) - 1;
1045
}
1046
 
1047
/* We probably don't really want to translate if/else/endif into branches..
1048
 * the blob driver evaluates both legs of the if and then uses the sel
1049
 * instruction to pick which sides of the branch to "keep".. but figuring
1050
 * that out will take somewhat more compiler smarts.  So hopefully branches
1051
 * don't kill performance too badly.
1052
 */
1053
static void
1054
trans_if(const struct instr_translater *t,
1055
		struct fd3_compile_context *ctx,
1056
		struct tgsi_full_instruction *inst)
1057
{
1058
	struct ir3_instruction *instr;
1059
	struct tgsi_src_register *src = &inst->Src[0].Register;
1060
	struct tgsi_src_register constval;
1061
 
1062
	get_immediate(ctx, &constval, fui(0.0));
1063
 
4401 Serge 1064
	if (is_const(src))
1065
		src = get_unconst(ctx, src);
1066
 
4358 Serge 1067
	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
1068
	ir3_reg_create(instr, regid(REG_P0, 0), 0);
4401 Serge 1069
	add_src_reg(ctx, instr, src, src->SwizzleX);
4358 Serge 1070
	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
1071
	instr->cat2.condition = IR3_COND_EQ;
1072
 
1073
	instr = ir3_instr_create(ctx->ir, 0, OPC_BR);
1074
	push_branch(ctx, instr);
1075
}
1076
 
1077
static void
1078
trans_else(const struct instr_translater *t,
1079
		struct fd3_compile_context *ctx,
1080
		struct tgsi_full_instruction *inst)
1081
{
1082
	struct ir3_instruction *instr;
1083
 
1084
	/* for first half of if/else/endif, generate a jump past the else: */
1085
	instr = ir3_instr_create(ctx->ir, 0, OPC_JUMP);
1086
 
1087
	pop_branch(ctx);
1088
	push_branch(ctx, instr);
1089
}
1090
 
1091
static void
1092
trans_endif(const struct instr_translater *t,
1093
		struct fd3_compile_context *ctx,
1094
		struct tgsi_full_instruction *inst)
1095
{
1096
	pop_branch(ctx);
1097
}
1098
 
1099
/*
1100
 * Handlers for TGSI instructions which do have 1:1 mapping to native
1101
 * instructions:
1102
 */
1103
 
1104
static void
1105
instr_cat0(const struct instr_translater *t,
1106
		struct fd3_compile_context *ctx,
1107
		struct tgsi_full_instruction *inst)
1108
{
1109
	ir3_instr_create(ctx->ir, 0, t->opc);
1110
}
1111
 
1112
static void
1113
instr_cat1(const struct instr_translater *t,
1114
		struct fd3_compile_context *ctx,
1115
		struct tgsi_full_instruction *inst)
1116
{
1117
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
1118
	struct tgsi_src_register *src = &inst->Src[0].Register;
1119
 
1120
	/* mov instructions can't handle a negate on src: */
1121
	if (src->Negate) {
1122
		struct tgsi_src_register constval;
1123
		struct ir3_instruction *instr;
1124
 
1125
		/* since right now, we are using uniformly either TYPE_F16 or
1126
		 * TYPE_F32, and we don't utilize the conversion possibilities
1127
		 * of mov instructions, we can get away with substituting an
1128
		 * add.f which can handle negate.  Might need to revisit this
1129
		 * in the future if we start supporting widening/narrowing or
1130
		 * conversion to/from integer..
1131
		 */
1132
		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
1133
		get_immediate(ctx, &constval, fui(0.0));
1134
		vectorize(ctx, instr, dst, 2, src, 0, &constval, 0);
1135
	} else {
1136
		create_mov(ctx, dst, src);
1137
		/* create_mov() generates vector sequence, so no vectorize() */
1138
	}
1139
	put_dst(ctx, inst, dst);
1140
}
1141
 
1142
static void
1143
instr_cat2(const struct instr_translater *t,
1144
		struct fd3_compile_context *ctx,
1145
		struct tgsi_full_instruction *inst)
1146
{
1147
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
4401 Serge 1148
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
1149
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
4358 Serge 1150
	struct ir3_instruction *instr;
1151
	unsigned src0_flags = 0;
1152
 
1153
	switch (t->tgsi_opc) {
1154
	case TGSI_OPCODE_ABS:
1155
		src0_flags = IR3_REG_ABS;
1156
		break;
1157
	}
1158
 
1159
	switch (t->opc) {
1160
	case OPC_ABSNEG_F:
1161
	case OPC_ABSNEG_S:
1162
	case OPC_CLZ_B:
1163
	case OPC_CLZ_S:
1164
	case OPC_SIGN_F:
1165
	case OPC_FLOOR_F:
1166
	case OPC_CEIL_F:
1167
	case OPC_RNDNE_F:
1168
	case OPC_RNDAZ_F:
1169
	case OPC_TRUNC_F:
1170
	case OPC_NOT_B:
1171
	case OPC_BFREV_B:
1172
	case OPC_SETRM:
1173
	case OPC_CBITS_B:
1174
		/* these only have one src reg */
4401 Serge 1175
		instr = ir3_instr_create(ctx->ir, 2, t->opc);
1176
		vectorize(ctx, instr, dst, 1, src0, src0_flags);
4358 Serge 1177
		break;
1178
	default:
4401 Serge 1179
		if (is_const(src0) && is_const(src1))
1180
			src0 = get_unconst(ctx, src0);
1181
 
1182
		instr = ir3_instr_create(ctx->ir, 2, t->opc);
1183
		vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0);
4358 Serge 1184
		break;
1185
	}
1186
 
1187
	put_dst(ctx, inst, dst);
1188
}
1189
 
4401 Serge 1190
static bool is_mad(opc_t opc)
1191
{
1192
	switch (opc) {
1193
	case OPC_MAD_U16:
1194
	case OPC_MADSH_U16:
1195
	case OPC_MAD_S16:
1196
	case OPC_MADSH_M16:
1197
	case OPC_MAD_U24:
1198
	case OPC_MAD_S24:
1199
	case OPC_MAD_F16:
1200
	case OPC_MAD_F32:
1201
		return true;
1202
	default:
1203
		return false;
1204
	}
1205
}
1206
 
4358 Serge 1207
static void
1208
instr_cat3(const struct instr_translater *t,
1209
		struct fd3_compile_context *ctx,
1210
		struct tgsi_full_instruction *inst)
1211
{
1212
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
4401 Serge 1213
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
4358 Serge 1214
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
1215
	struct ir3_instruction *instr;
1216
 
4401 Serge 1217
	/* in particular, can't handle const for src1 for cat3..
1218
	 * for mad, we can swap first two src's if needed:
4358 Serge 1219
	 */
4401 Serge 1220
	if (is_const(src1)) {
1221
		if (is_mad(t->opc) && !is_const(src0)) {
1222
			struct tgsi_src_register *tmp;
1223
			tmp = src0;
1224
			src0 = src1;
1225
			src1 = tmp;
1226
		} else {
1227
			src0 = get_unconst(ctx, src0);
1228
		}
4358 Serge 1229
	}
1230
 
1231
	instr = ir3_instr_create(ctx->ir, 3,
1232
			ctx->so->half_precision ? t->hopc : t->opc);
4401 Serge 1233
	vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
4358 Serge 1234
			&inst->Src[2].Register, 0);
1235
	put_dst(ctx, inst, dst);
1236
}
1237
 
1238
static void
1239
instr_cat4(const struct instr_translater *t,
1240
		struct fd3_compile_context *ctx,
1241
		struct tgsi_full_instruction *inst)
1242
{
1243
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
4401 Serge 1244
	struct tgsi_src_register *src = &inst->Src[0].Register;
4358 Serge 1245
	struct ir3_instruction *instr;
1246
 
4401 Serge 1247
	/* seems like blob compiler avoids const as src.. */
1248
	if (is_const(src))
1249
		src = get_unconst(ctx, src);
1250
 
4358 Serge 1251
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
1252
	instr = ir3_instr_create(ctx->ir, 4, t->opc);
1253
 
4401 Serge 1254
	vectorize(ctx, instr, dst, 1, src, 0);
4358 Serge 1255
 
4401 Serge 1256
	regmask_set(ctx->needs_ss, instr->regs[0],
1257
			inst->Dst[0].Register.WriteMask);
4358 Serge 1258
 
1259
	put_dst(ctx, inst, dst);
1260
}
1261
 
1262
static const struct instr_translater translaters[TGSI_OPCODE_LAST] = {
1263
#define INSTR(n, f, ...) \
1264
	[TGSI_OPCODE_ ## n] = { .fxn = (f), .tgsi_opc = TGSI_OPCODE_ ## n, ##__VA_ARGS__ }
1265
 
1266
	INSTR(MOV,          instr_cat1),
1267
	INSTR(RCP,          instr_cat4, .opc = OPC_RCP),
1268
	INSTR(RSQ,          instr_cat4, .opc = OPC_RSQ),
1269
	INSTR(SQRT,         instr_cat4, .opc = OPC_SQRT),
1270
	INSTR(MUL,          instr_cat2, .opc = OPC_MUL_F),
1271
	INSTR(ADD,          instr_cat2, .opc = OPC_ADD_F),
1272
	INSTR(DP2,          trans_dotp, .arg = 2),
1273
	INSTR(DP3,          trans_dotp, .arg = 3),
1274
	INSTR(DP4,          trans_dotp, .arg = 4),
1275
	INSTR(DPH,          trans_dotp, .arg = 3),   /* almost like DP3 */
1276
	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
1277
	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
1278
	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1279
	INSTR(LRP,          trans_lrp),
1280
	INSTR(FRC,          trans_frac),
1281
	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
4401 Serge 1282
	INSTR(ARL,          instr_cat2, .opc = OPC_FLOOR_F),
4358 Serge 1283
	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
1284
	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
1285
	INSTR(POW,          trans_pow),
1286
	INSTR(ABS,          instr_cat2, .opc = OPC_ABSNEG_F),
1287
	INSTR(COS,          instr_cat4, .opc = OPC_SIN),
1288
	INSTR(SIN,          instr_cat4, .opc = OPC_COS),
1289
	INSTR(TEX,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1290
	INSTR(TXP,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
4401 Serge 1291
	INSTR(SGT,          trans_cmp),
1292
	INSTR(SLT,          trans_cmp),
1293
	INSTR(SGE,          trans_cmp),
1294
	INSTR(SLE,          trans_cmp),
1295
	INSTR(SNE,          trans_cmp),
1296
	INSTR(SEQ,          trans_cmp),
4358 Serge 1297
	INSTR(CMP,          trans_cmp),
1298
	INSTR(IF,           trans_if),
1299
	INSTR(ELSE,         trans_else),
1300
	INSTR(ENDIF,        trans_endif),
1301
	INSTR(END,          instr_cat0, .opc = OPC_END),
1302
};
1303
 
1304
static int
1305
decl_in(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1306
{
1307
	struct fd3_shader_stateobj *so = ctx->so;
1308
	unsigned base = ctx->base_reg[TGSI_FILE_INPUT];
1309
	unsigned i, flags = 0;
1310
	int nop = 0;
1311
 
1312
	if (ctx->so->half_precision)
1313
		flags |= IR3_REG_HALF;
1314
 
1315
	for (i = decl->Range.First; i <= decl->Range.Last; i++) {
1316
		unsigned n = so->inputs_count++;
1317
		unsigned r = regid(i + base, 0);
1318
		unsigned ncomp;
1319
 
1320
		/* TODO use ctx->info.input_usage_mask[decl->Range.n] to figure out ncomp: */
1321
		ncomp = 4;
1322
 
1323
		DBG("decl in -> r%d", i + base);   // XXX
1324
 
1325
		so->inputs[n].compmask = (1 << ncomp) - 1;
1326
		so->inputs[n].regid = r;
1327
		so->inputs[n].inloc = ctx->next_inloc;
1328
		ctx->next_inloc += ncomp;
1329
 
1330
		so->total_in += ncomp;
1331
 
1332
		/* for frag shaders, we need to generate the corresponding bary instr: */
1333
		if (ctx->type == TGSI_PROCESSOR_FRAGMENT) {
1334
			struct ir3_instruction *instr;
1335
 
1336
			instr = ir3_instr_create(ctx->ir, 2, OPC_BARY_F);
1337
			instr->repeat = ncomp - 1;
1338
 
1339
			/* dst register: */
1340
			ctx->last_input = ir3_reg_create(instr, r, flags);
1341
 
1342
			/* input position: */
1343
			ir3_reg_create(instr, 0, IR3_REG_IMMED | IR3_REG_R)->iim_val =
1344
					so->inputs[n].inloc - 8;
1345
 
1346
			/* input base (always r0.x): */
1347
			ir3_reg_create(instr, regid(0,0), 0);
1348
 
1349
			nop = 6;
1350
		}
1351
	}
1352
 
1353
	return nop;
1354
}
1355
 
1356
static void
1357
decl_out(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1358
{
1359
	struct fd3_shader_stateobj *so = ctx->so;
1360
	unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1361
	unsigned name = decl->Semantic.Name;
1362
	unsigned i;
1363
 
4401 Serge 1364
	compile_assert(ctx, decl->Declaration.Semantic);  // TODO is this ever not true?
4358 Serge 1365
 
1366
	DBG("decl out[%d] -> r%d", name, decl->Range.First + base);   // XXX
1367
 
1368
	if (ctx->type == TGSI_PROCESSOR_VERTEX) {
1369
		switch (name) {
1370
		case TGSI_SEMANTIC_POSITION:
1371
			so->pos_regid = regid(decl->Range.First + base, 0);
1372
			break;
1373
		case TGSI_SEMANTIC_PSIZE:
1374
			so->psize_regid = regid(decl->Range.First + base, 0);
1375
			break;
1376
		case TGSI_SEMANTIC_COLOR:
1377
		case TGSI_SEMANTIC_GENERIC:
1378
		case TGSI_SEMANTIC_FOG:
1379
		case TGSI_SEMANTIC_TEXCOORD:
1380
			for (i = decl->Range.First; i <= decl->Range.Last; i++)
1381
				so->outputs[so->outputs_count++].regid = regid(i + base, 0);
1382
			break;
1383
		default:
4401 Serge 1384
			compile_error(ctx, "unknown VS semantic name: %s\n",
4358 Serge 1385
					tgsi_semantic_names[name]);
1386
		}
1387
	} else {
1388
		switch (name) {
1389
		case TGSI_SEMANTIC_COLOR:
1390
			so->color_regid = regid(decl->Range.First + base, 0);
1391
			break;
1392
		default:
4401 Serge 1393
			compile_error(ctx, "unknown VS semantic name: %s\n",
4358 Serge 1394
					tgsi_semantic_names[name]);
1395
		}
1396
	}
1397
}
1398
 
1399
static void
1400
decl_samp(struct fd3_compile_context *ctx, struct tgsi_full_declaration *decl)
1401
{
1402
	ctx->so->samplers_count++;
1403
}
1404
 
1405
static void
1406
compile_instructions(struct fd3_compile_context *ctx)
1407
{
1408
	struct ir3_shader *ir = ctx->ir;
1409
	int nop = 0;
1410
 
1411
	while (!tgsi_parse_end_of_tokens(&ctx->parser)) {
1412
		tgsi_parse_token(&ctx->parser);
1413
 
1414
		switch (ctx->parser.FullToken.Token.Type) {
1415
		case TGSI_TOKEN_TYPE_DECLARATION: {
1416
			struct tgsi_full_declaration *decl =
1417
					&ctx->parser.FullToken.FullDeclaration;
1418
			if (decl->Declaration.File == TGSI_FILE_OUTPUT) {
1419
				decl_out(ctx, decl);
1420
			} else if (decl->Declaration.File == TGSI_FILE_INPUT) {
1421
				nop = decl_in(ctx, decl);
1422
			} else if (decl->Declaration.File == TGSI_FILE_SAMPLER) {
1423
				decl_samp(ctx, decl);
1424
			}
1425
			break;
1426
		}
1427
		case TGSI_TOKEN_TYPE_IMMEDIATE: {
1428
			/* TODO: if we know the immediate is small enough, and only
1429
			 * used with instructions that can embed an immediate, we
1430
			 * can skip this:
1431
			 */
1432
			struct tgsi_full_immediate *imm =
1433
					&ctx->parser.FullToken.FullImmediate;
1434
			unsigned n = ctx->so->immediates_count++;
1435
			memcpy(ctx->so->immediates[n].val, imm->u, 16);
1436
			break;
1437
		}
1438
		case TGSI_TOKEN_TYPE_INSTRUCTION: {
1439
			struct tgsi_full_instruction *inst =
1440
					&ctx->parser.FullToken.FullInstruction;
1441
			unsigned opc = inst->Instruction.Opcode;
1442
			const struct instr_translater *t = &translaters[opc];
1443
 
1444
			if (nop) {
1445
				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = nop - 1;
1446
				nop = 0;
1447
			}
1448
 
1449
			if (t->fxn) {
1450
				t->fxn(t, ctx, inst);
1451
				ctx->num_internal_temps = 0;
1452
			} else {
4401 Serge 1453
				compile_error(ctx, "unknown TGSI opc: %s\n",
4358 Serge 1454
						tgsi_get_opcode_name(opc));
1455
			}
1456
 
4401 Serge 1457
			switch (inst->Instruction.Saturate) {
1458
			case TGSI_SAT_ZERO_ONE:
1459
				create_clamp_imm(ctx, &inst->Dst[0].Register,
1460
						fui(0.0), fui(1.0));
1461
				break;
1462
			case TGSI_SAT_MINUS_PLUS_ONE:
1463
				create_clamp_imm(ctx, &inst->Dst[0].Register,
1464
						fui(-1.0), fui(1.0));
1465
				break;
1466
			}
1467
 
4358 Serge 1468
			break;
1469
		}
1470
		default:
1471
			break;
1472
		}
1473
	}
1474
 
1475
	if (ir->instrs_count > 0)
1476
		ir->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
1477
 
1478
	if (ctx->last_input)
1479
		ctx->last_input->flags |= IR3_REG_EI;
1480
}
1481
 
1482
int
1483
fd3_compile_shader(struct fd3_shader_stateobj *so,
1484
		const struct tgsi_token *tokens)
1485
{
1486
	struct fd3_compile_context ctx;
1487
 
1488
	assert(!so->ir);
1489
 
1490
	so->ir = ir3_shader_create();
1491
 
4401 Serge 1492
	assert(so->ir);
1493
 
4358 Serge 1494
	so->color_regid = regid(63,0);
1495
	so->pos_regid   = regid(63,0);
1496
	so->psize_regid = regid(63,0);
1497
 
1498
	if (compile_init(&ctx, so, tokens) != TGSI_PARSE_OK)
1499
		return -1;
1500
 
1501
	compile_instructions(&ctx);
1502
 
1503
	compile_free(&ctx);
1504
 
1505
	return 0;
1506
}