Subversion Repositories Kolibri OS

Rev

Rev 4358 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 4358 Rev 4401
Line 60... Line 60...
60
	if (reg->flags & IR3_REG_HALF)
60
	if (reg->flags & IR3_REG_HALF)
61
		num += MAX_REG;
61
		num += MAX_REG;
62
	return num;
62
	return num;
63
}
63
}
Line 64... Line 64...
64
 
64
 
-
 
65
static void regmask_set(regmask_t regmask, struct ir3_register *reg,
65
static void regmask_set(regmask_t regmask, struct ir3_register *reg)
66
		unsigned wrmask)
-
 
67
{
-
 
68
	unsigned i;
-
 
69
	for (i = 0; i < 4; i++) {
66
{
70
		if (wrmask & (1 << i)) {
67
	unsigned idx = regmask_idx(reg);
71
			unsigned idx = regmask_idx(reg) + i;
68
	regmask[idx / 8] |= 1 << (idx % 8);
72
			regmask[idx / 8] |= 1 << (idx % 8);
-
 
73
		}
-
 
74
	}
Line 69... Line 75...
69
}
75
}
70
 
76
 
71
static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
77
static unsigned regmask_get(regmask_t regmask, struct ir3_register *reg)
72
{
78
{
Line 89... Line 95...
89
	/* last input dst (for setting (ei) flag): */
95
	/* last input dst (for setting (ei) flag): */
90
	struct ir3_register *last_input;
96
	struct ir3_register *last_input;
Line 91... Line 97...
91
 
97
 
92
	unsigned next_inloc;
98
	unsigned next_inloc;
-
 
99
	unsigned num_internal_temps;
Line 93... Line 100...
93
	unsigned num_internal_temps;
100
	struct tgsi_src_register internal_temps[6];
94
 
101
 
95
	/* track registers which need to synchronize w/ "complex alu" cat3
102
	/* track registers which need to synchronize w/ "complex alu" cat3
96
	 * instruction pipeline:
103
	 * instruction pipeline:
Line 126... Line 133...
126
	/* used when dst is same as one of the src, to avoid overwriting a
133
	/* used when dst is same as one of the src, to avoid overwriting a
127
	 * src element before the remaining scalar instructions that make
134
	 * src element before the remaining scalar instructions that make
128
	 * up the vector operation
135
	 * up the vector operation
129
	 */
136
	 */
130
	struct tgsi_dst_register tmp_dst;
137
	struct tgsi_dst_register tmp_dst;
131
	struct tgsi_src_register tmp_src;
138
	struct tgsi_src_register *tmp_src;
132
};
139
};
Line -... Line 140...
-
 
140
 
-
 
141
 
-
 
142
static void vectorize(struct fd3_compile_context *ctx,
-
 
143
		struct ir3_instruction *instr, struct tgsi_dst_register *dst,
-
 
144
		int nsrcs, ...);
-
 
145
static void create_mov(struct fd3_compile_context *ctx,
-
 
146
		struct tgsi_dst_register *dst, struct tgsi_src_register *src);
133
 
147
 
134
static unsigned
148
static unsigned
135
compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
149
compile_init(struct fd3_compile_context *ctx, struct fd3_shader_stateobj *so,
136
		const struct tgsi_token *tokens)
150
		const struct tgsi_token *tokens)
137
{
151
{
Line 152... Line 166...
152
	tgsi_scan_shader(tokens, &ctx->info);
166
	tgsi_scan_shader(tokens, &ctx->info);
Line 153... Line 167...
153
 
167
 
154
	/* Immediates go after constants: */
168
	/* Immediates go after constants: */
155
	ctx->base_reg[TGSI_FILE_CONSTANT]  = 0;
169
	ctx->base_reg[TGSI_FILE_CONSTANT]  = 0;
156
	ctx->base_reg[TGSI_FILE_IMMEDIATE] =
170
	ctx->base_reg[TGSI_FILE_IMMEDIATE] =
Line 157... Line 171...
157
			ctx->info.file_count[TGSI_FILE_CONSTANT];
171
			ctx->info.file_max[TGSI_FILE_CONSTANT] + 1;
158
 
172
 
159
	/* Temporaries after outputs after inputs: */
173
	/* Temporaries after outputs after inputs: */
160
	ctx->base_reg[TGSI_FILE_INPUT]     = 0;
174
	ctx->base_reg[TGSI_FILE_INPUT]     = 0;
161
	ctx->base_reg[TGSI_FILE_OUTPUT]    =
175
	ctx->base_reg[TGSI_FILE_OUTPUT]    =
162
			ctx->info.file_count[TGSI_FILE_INPUT];
176
			ctx->info.file_max[TGSI_FILE_INPUT] + 1;
163
	ctx->base_reg[TGSI_FILE_TEMPORARY] =
177
	ctx->base_reg[TGSI_FILE_TEMPORARY] =
Line 164... Line 178...
164
			ctx->info.file_count[TGSI_FILE_INPUT] +
178
			ctx->info.file_max[TGSI_FILE_INPUT] + 1 +
165
			ctx->info.file_count[TGSI_FILE_OUTPUT];
179
			ctx->info.file_max[TGSI_FILE_OUTPUT] + 1;
166
 
180
 
Line 167... Line 181...
167
	so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
181
	so->first_immediate = ctx->base_reg[TGSI_FILE_IMMEDIATE];
168
	ctx->immediate_idx = 4 * (ctx->info.file_count[TGSI_FILE_CONSTANT] +
182
	ctx->immediate_idx = 4 * (ctx->info.file_max[TGSI_FILE_CONSTANT] + 1 +
169
			ctx->info.file_count[TGSI_FILE_IMMEDIATE]);
183
			ctx->info.file_max[TGSI_FILE_IMMEDIATE] + 1);
Line 176... Line 190...
176
 
190
 
177
	return ret;
191
	return ret;
Line 178... Line 192...
178
}
192
}
-
 
193
 
-
 
194
static void
-
 
195
compile_error(struct fd3_compile_context *ctx, const char *format, ...)
-
 
196
{
-
 
197
	va_list ap;
-
 
198
	va_start(ap, format);
-
 
199
	_debug_vprintf(format, ap);
-
 
200
	va_end(ap);
-
 
201
	tgsi_dump(ctx->tokens, 0);
-
 
202
	assert(0);
-
 
203
}
-
 
204
 
-
 
205
#define compile_assert(ctx, cond) do { \
-
 
206
		if (!(cond)) compile_error((ctx), "failed assert: "#cond"\n"); \
-
 
207
	} while (0)
179
 
208
 
180
static void
209
static void
181
compile_free(struct fd3_compile_context *ctx)
210
compile_free(struct fd3_compile_context *ctx)
182
{
211
{
Line 191... Line 220...
191
	opc_t opc;
220
	opc_t opc;
192
	opc_t hopc;    /* opc to use for half_precision mode, if different */
221
	opc_t hopc;    /* opc to use for half_precision mode, if different */
193
	unsigned arg;
222
	unsigned arg;
194
};
223
};
Line -... Line 224...
-
 
224
 
-
 
225
static unsigned
-
 
226
src_flags(struct fd3_compile_context *ctx, struct ir3_register *reg)
-
 
227
{
-
 
228
	unsigned flags = 0;
-
 
229
 
-
 
230
	if (regmask_get(ctx->needs_ss, reg)) {
-
 
231
		flags |= IR3_INSTR_SS;
-
 
232
		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
-
 
233
	}
-
 
234
 
-
 
235
	if (regmask_get(ctx->needs_sy, reg)) {
-
 
236
		flags |= IR3_INSTR_SY;
-
 
237
		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
-
 
238
	}
-
 
239
 
-
 
240
	return flags;
-
 
241
}
195
 
242
 
196
static struct ir3_register *
243
static struct ir3_register *
197
add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
244
add_dst_reg(struct fd3_compile_context *ctx, struct ir3_instruction *instr,
198
		const struct tgsi_dst_register *dst, unsigned chan)
245
		const struct tgsi_dst_register *dst, unsigned chan)
199
{
246
{
Line 203... Line 250...
203
	case TGSI_FILE_OUTPUT:
250
	case TGSI_FILE_OUTPUT:
204
	case TGSI_FILE_TEMPORARY:
251
	case TGSI_FILE_TEMPORARY:
205
		num = dst->Index + ctx->base_reg[dst->File];
252
		num = dst->Index + ctx->base_reg[dst->File];
206
		break;
253
		break;
207
	default:
254
	default:
208
		DBG("unsupported dst register file: %s",
255
		compile_error(ctx, "unsupported dst register file: %s\n",
209
			tgsi_file_name(dst->File));
256
			tgsi_file_name(dst->File));
210
		assert(0);
-
 
211
		break;
257
		break;
212
	}
258
	}
Line 213... Line 259...
213
 
259
 
214
	if (ctx->so->half_precision)
260
	if (ctx->so->half_precision)
Line 232... Line 278...
232
		 */
278
		 */
233
	case TGSI_FILE_CONSTANT:
279
	case TGSI_FILE_CONSTANT:
234
		flags |= IR3_REG_CONST;
280
		flags |= IR3_REG_CONST;
235
		num = src->Index + ctx->base_reg[src->File];
281
		num = src->Index + ctx->base_reg[src->File];
236
		break;
282
		break;
-
 
283
	case TGSI_FILE_OUTPUT:
-
 
284
		/* NOTE: we should only end up w/ OUTPUT file for things like
-
 
285
		 * clamp()'ing saturated dst instructions
-
 
286
		 */
237
	case TGSI_FILE_INPUT:
287
	case TGSI_FILE_INPUT:
238
	case TGSI_FILE_TEMPORARY:
288
	case TGSI_FILE_TEMPORARY:
239
		num = src->Index + ctx->base_reg[src->File];
289
		num = src->Index + ctx->base_reg[src->File];
240
		break;
290
		break;
241
	default:
291
	default:
242
		DBG("unsupported src register file: %s",
292
		compile_error(ctx, "unsupported src register file: %s\n",
243
			tgsi_file_name(src->File));
293
			tgsi_file_name(src->File));
244
		assert(0);
-
 
245
		break;
294
		break;
246
	}
295
	}
Line 247... Line 296...
247
 
296
 
248
	if (src->Absolute)
297
	if (src->Absolute)
Line 252... Line 301...
252
	if (ctx->so->half_precision)
301
	if (ctx->so->half_precision)
253
		flags |= IR3_REG_HALF;
302
		flags |= IR3_REG_HALF;
Line 254... Line 303...
254
 
303
 
Line 255... Line -...
255
	reg = ir3_reg_create(instr, regid(num, chan), flags);
-
 
256
 
304
	reg = ir3_reg_create(instr, regid(num, chan), flags);
257
	if (regmask_get(ctx->needs_ss, reg)) {
-
 
258
		instr->flags |= IR3_INSTR_SS;
-
 
259
		memset(ctx->needs_ss, 0, sizeof(ctx->needs_ss));
-
 
260
	}
-
 
261
 
-
 
262
	if (regmask_get(ctx->needs_sy, reg)) {
-
 
263
		instr->flags |= IR3_INSTR_SY;
-
 
Line 264... Line 305...
264
		memset(ctx->needs_sy, 0, sizeof(ctx->needs_sy));
305
 
265
	}
306
	instr->flags |= src_flags(ctx, reg);
Line 266... Line 307...
266
 
307
 
Line 283... Line 324...
283
}
324
}
Line 284... Line 325...
284
 
325
 
285
/* Get internal-temp src/dst to use for a sequence of instructions
326
/* Get internal-temp src/dst to use for a sequence of instructions
286
 * generated by a single TGSI op.
327
 * generated by a single TGSI op.
287
 */
328
 */
288
static void
329
static struct tgsi_src_register *
289
get_internal_temp(struct fd3_compile_context *ctx,
330
get_internal_temp(struct fd3_compile_context *ctx,
290
		struct tgsi_dst_register *tmp_dst,
-
 
291
		struct tgsi_src_register *tmp_src)
331
		struct tgsi_dst_register *tmp_dst)
-
 
332
{
292
{
333
	struct tgsi_src_register *tmp_src;
Line 293... Line 334...
293
	int n;
334
	int n;
294
 
335
 
295
	tmp_dst->File      = TGSI_FILE_TEMPORARY;
336
	tmp_dst->File      = TGSI_FILE_TEMPORARY;
296
	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
337
	tmp_dst->WriteMask = TGSI_WRITEMASK_XYZW;
Line 297... Line 338...
297
	tmp_dst->Indirect  = 0;
338
	tmp_dst->Indirect  = 0;
298
	tmp_dst->Dimension = 0;
339
	tmp_dst->Dimension = 0;
-
 
340
 
-
 
341
	/* assign next temporary: */
Line 299... Line 342...
299
 
342
	n = ctx->num_internal_temps++;
Line 300... Line 343...
300
	/* assign next temporary: */
343
	compile_assert(ctx, n < ARRAY_SIZE(ctx->internal_temps));
-
 
344
	tmp_src = &ctx->internal_temps[n];
-
 
345
 
301
	n = ctx->num_internal_temps++;
346
	tmp_dst->Index = ctx->info.file_max[TGSI_FILE_TEMPORARY] + n + 1;
Line 302... Line 347...
302
 
347
 
303
	tmp_dst->Index = ctx->info.file_count[TGSI_FILE_TEMPORARY] + n;
348
	src_from_dst(tmp_src, tmp_dst);
304
 
349
 
305
	src_from_dst(tmp_src, tmp_dst);
350
	return tmp_src;
306
}
351
}
307
 
352
 
308
/* same as get_internal_temp, but w/ src.xxxx (for instructions that
-
 
309
 * replicate their results)
353
/* same as get_internal_temp, but w/ src.xxxx (for instructions that
-
 
354
 * replicate their results)
310
 */
355
 */
311
static void
356
static struct tgsi_src_register *
312
get_internal_temp_repl(struct fd3_compile_context *ctx,
357
get_internal_temp_repl(struct fd3_compile_context *ctx,
-
 
358
		struct tgsi_dst_register *tmp_dst)
-
 
359
{
-
 
360
	struct tgsi_src_register *tmp_src =
-
 
361
			get_internal_temp(ctx, tmp_dst);
-
 
362
	tmp_src->SwizzleX = tmp_src->SwizzleY =
-
 
363
		tmp_src->SwizzleZ = tmp_src->SwizzleW = TGSI_SWIZZLE_X;
-
 
364
	return tmp_src;
-
 
365
}
-
 
366
 
-
 
367
static inline bool
-
 
368
is_const(struct tgsi_src_register *src)
-
 
369
{
-
 
370
	return (src->File == TGSI_FILE_CONSTANT) ||
-
 
371
			(src->File == TGSI_FILE_IMMEDIATE);
-
 
372
}
-
 
373
 
-
 
374
static type_t
-
 
375
get_ftype(struct fd3_compile_context *ctx)
-
 
376
{
-
 
377
	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
-
 
378
}
-
 
379
 
-
 
380
static type_t
-
 
381
get_utype(struct fd3_compile_context *ctx)
-
 
382
{
-
 
383
	return ctx->so->half_precision ? TYPE_U16 : TYPE_U32;
-
 
384
}
-
 
385
 
-
 
386
static unsigned
-
 
387
src_swiz(struct tgsi_src_register *src, int chan)
-
 
388
{
-
 
389
	switch (chan) {
-
 
390
	case 0: return src->SwizzleX;
-
 
391
	case 1: return src->SwizzleY;
-
 
392
	case 2: return src->SwizzleZ;
-
 
393
	case 3: return src->SwizzleW;
-
 
394
	}
-
 
395
	assert(0);
-
 
396
	return 0;
-
 
397
}
-
 
398
 
-
 
399
/* for instructions that cannot take a const register as src, if needed
-
 
400
 * generate a move to temporary gpr:
-
 
401
 */
-
 
402
static struct tgsi_src_register *
-
 
403
get_unconst(struct fd3_compile_context *ctx, struct tgsi_src_register *src)
-
 
404
{
-
 
405
	struct tgsi_dst_register tmp_dst;
-
 
406
	struct tgsi_src_register *tmp_src;
-
 
407
 
-
 
408
	compile_assert(ctx, is_const(src));
313
		struct tgsi_dst_register *tmp_dst,
409
 
Line 314... Line 410...
314
		struct tgsi_src_register *tmp_src)
410
	tmp_src = get_internal_temp(ctx, &tmp_dst);
315
{
411
 
316
	get_internal_temp(ctx, tmp_dst, tmp_src);
412
	create_mov(ctx, &tmp_dst, src);
Line 363... Line 459...
363
	reg->SwizzleY  = swiz2tgsi[swiz];
459
	reg->SwizzleY  = swiz2tgsi[swiz];
364
	reg->SwizzleZ  = swiz2tgsi[swiz];
460
	reg->SwizzleZ  = swiz2tgsi[swiz];
365
	reg->SwizzleW  = swiz2tgsi[swiz];
461
	reg->SwizzleW  = swiz2tgsi[swiz];
366
}
462
}
Line 367... Line -...
367
 
-
 
368
static type_t
-
 
369
get_type(struct fd3_compile_context *ctx)
-
 
370
{
-
 
371
	return ctx->so->half_precision ? TYPE_F16 : TYPE_F32;
-
 
372
}
-
 
373
 
-
 
374
static unsigned
-
 
375
src_swiz(struct tgsi_src_register *src, int chan)
-
 
376
{
-
 
377
	switch (chan) {
-
 
378
	case 0: return src->SwizzleX;
-
 
379
	case 1: return src->SwizzleY;
-
 
380
	case 2: return src->SwizzleZ;
-
 
381
	case 3: return src->SwizzleW;
-
 
382
	}
-
 
383
	assert(0);
-
 
384
	return 0;
-
 
385
}
-
 
386
 
463
 
387
static void
464
static void
388
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
465
create_mov(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
389
		struct tgsi_src_register *src)
466
		struct tgsi_src_register *src)
390
{
467
{
391
	type_t type_mov = get_type(ctx);
468
	type_t type_mov = get_ftype(ctx);
Line 392... Line 469...
392
	unsigned i;
469
	unsigned i;
393
 
470
 
394
	for (i = 0; i < 4; i++) {
471
	for (i = 0; i < 4; i++) {
Line 402... Line 479...
402
			add_src_reg(ctx, instr, src, src_swiz(src, i));
479
			add_src_reg(ctx, instr, src, src_swiz(src, i));
403
		} else {
480
		} else {
404
			ir3_instr_create(ctx->ir, 0, OPC_NOP);
481
			ir3_instr_create(ctx->ir, 0, OPC_NOP);
405
		}
482
		}
406
	}
483
	}
-
 
484
}
-
 
485
 
-
 
486
static void
-
 
487
create_clamp(struct fd3_compile_context *ctx, struct tgsi_dst_register *dst,
-
 
488
		struct tgsi_src_register *minval, struct tgsi_src_register *maxval)
-
 
489
{
-
 
490
	struct ir3_instruction *instr;
-
 
491
	struct tgsi_src_register src;
-
 
492
 
-
 
493
	src_from_dst(&src, dst);
-
 
494
 
-
 
495
	instr = ir3_instr_create(ctx->ir, 2, OPC_MAX_F);
-
 
496
	vectorize(ctx, instr, dst, 2, &src, 0, minval, 0);
-
 
497
 
-
 
498
	instr = ir3_instr_create(ctx->ir, 2, OPC_MIN_F);
-
 
499
	vectorize(ctx, instr, dst, 2, &src, 0, maxval, 0);
-
 
500
}
-
 
501
 
-
 
502
static void
-
 
503
create_clamp_imm(struct fd3_compile_context *ctx,
-
 
504
		struct tgsi_dst_register *dst,
-
 
505
		uint32_t minval, uint32_t maxval)
-
 
506
{
-
 
507
	struct tgsi_src_register minconst, maxconst;
Line -... Line 508...
-
 
508
 
-
 
509
	get_immediate(ctx, &minconst, minval);
-
 
510
	get_immediate(ctx, &maxconst, maxval);
-
 
511
 
407
 
512
	create_clamp(ctx, dst, &minconst, &maxconst);
Line 408... Line 513...
408
}
513
}
409
 
514
 
410
static struct tgsi_dst_register *
515
static struct tgsi_dst_register *
411
get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
516
get_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst)
412
{
517
{
413
	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
518
	struct tgsi_dst_register *dst = &inst->Dst[0].Register;
414
	unsigned i;
519
	unsigned i;
415
	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
520
	for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
416
		struct tgsi_src_register *src = &inst->Src[i].Register;
521
		struct tgsi_src_register *src = &inst->Src[i].Register;
417
		if ((src->File == dst->File) && (src->Index == dst->Index)) {
522
		if ((src->File == dst->File) && (src->Index == dst->Index)) {
418
			get_internal_temp(ctx, &ctx->tmp_dst, &ctx->tmp_src);
523
			ctx->tmp_src = get_internal_temp(ctx, &ctx->tmp_dst);
419
			ctx->tmp_dst.WriteMask = dst->WriteMask;
524
			ctx->tmp_dst.WriteMask = dst->WriteMask;
420
			dst = &ctx->tmp_dst;
525
			dst = &ctx->tmp_dst;
421
			break;
526
			break;
Line 428... Line 533...
428
put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
533
put_dst(struct fd3_compile_context *ctx, struct tgsi_full_instruction *inst,
429
		struct tgsi_dst_register *dst)
534
		struct tgsi_dst_register *dst)
430
{
535
{
431
	/* if necessary, add mov back into original dst: */
536
	/* if necessary, add mov back into original dst: */
432
	if (dst != &inst->Dst[0].Register) {
537
	if (dst != &inst->Dst[0].Register) {
433
		create_mov(ctx, &inst->Dst[0].Register, &ctx->tmp_src);
538
		create_mov(ctx, &inst->Dst[0].Register, ctx->tmp_src);
434
	}
539
	}
435
}
540
}
Line 436... Line 541...
436
 
541
 
437
/* helper to generate the necessary repeat and/or additional instructions
542
/* helper to generate the necessary repeat and/or additional instructions
Line 476... Line 581...
476
						va_arg(ap, struct tgsi_src_register *);
581
						va_arg(ap, struct tgsi_src_register *);
477
				(void)va_arg(ap, unsigned);
582
				(void)va_arg(ap, unsigned);
478
				cur->regs[j+1]->num =
583
				cur->regs[j+1]->num =
479
					regid(cur->regs[j+1]->num >> 2,
584
					regid(cur->regs[j+1]->num >> 2,
480
						src_swiz(src, i));
585
						src_swiz(src, i));
-
 
586
				cur->flags |= src_flags(ctx, cur->regs[j+1]);
481
			}
587
			}
482
			va_end(ap);
588
			va_end(ap);
483
		}
589
		}
484
	}
590
	}
Line 494... Line 600...
494
/*
600
/*
495
 * Handlers for TGSI instructions which do not have a 1:1 mapping to
601
 * Handlers for TGSI instructions which do not have a 1:1 mapping to
496
 * native instructions:
602
 * native instructions:
497
 */
603
 */
Line -... Line 604...
-
 
604
 
-
 
605
static inline void
-
 
606
get_swiz(unsigned *swiz, struct tgsi_src_register *src)
-
 
607
{
-
 
608
	swiz[0] = src->SwizzleX;
-
 
609
	swiz[1] = src->SwizzleY;
-
 
610
	swiz[2] = src->SwizzleZ;
-
 
611
	swiz[3] = src->SwizzleW;
-
 
612
}
498
 
613
 
499
static void
614
static void
500
trans_dotp(const struct instr_translater *t,
615
trans_dotp(const struct instr_translater *t,
501
		struct fd3_compile_context *ctx,
616
		struct fd3_compile_context *ctx,
502
		struct tgsi_full_instruction *inst)
617
		struct tgsi_full_instruction *inst)
503
{
618
{
504
	struct ir3_instruction *instr;
619
	struct ir3_instruction *instr;
505
	struct tgsi_dst_register tmp_dst;
620
	struct tgsi_dst_register tmp_dst;
506
	struct tgsi_src_register tmp_src;
621
	struct tgsi_src_register *tmp_src;
507
	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
622
	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
508
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
623
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
509
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
624
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
510
	unsigned swiz0[] = { src0->SwizzleX, src0->SwizzleY, src0->SwizzleZ, src0->SwizzleW };
625
	unsigned swiz0[4];
511
	unsigned swiz1[] = { src1->SwizzleX, src1->SwizzleY, src1->SwizzleZ, src1->SwizzleW };
626
	unsigned swiz1[4];
512
	opc_t opc_mad    = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
627
	opc_t opc_mad    = ctx->so->half_precision ? OPC_MAD_F16 : OPC_MAD_F32;
513
	unsigned n = t->arg;     /* number of components */
628
	unsigned n = t->arg;     /* number of components */
514
	unsigned i;
-
 
515
 
-
 
Line 516... Line -...
516
	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
-
 
517
 
-
 
518
	/* Blob compiler never seems to use a const in src1 position for
-
 
519
	 * mad.*, although there does seem (according to disassembler
-
 
520
	 * hidden in libllvm-a3xx.so) to be a bit to indicate that src1
-
 
521
	 * is a const.  Not sure if this is a hw bug, or simply that the
-
 
522
	 * disassembler lies.
-
 
523
	 */
-
 
524
	if ((src1->File == TGSI_FILE_IMMEDIATE) ||
-
 
525
			(src1->File == TGSI_FILE_CONSTANT)) {
629
	unsigned i, swapped = 0;
526
 
-
 
527
		/* the mov to tmp unswizzles src1, so now we have tmp.xyzw:
-
 
528
		 */
-
 
Line 529... Line -...
529
		for (i = 0; i < 4; i++)
-
 
530
			swiz1[i] = i;
630
 
531
 
631
	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
-
 
632
 
-
 
633
	/* in particular, can't handle const for src1 for cat3/mad:
532
		/* the first mul.f will clobber tmp.x, but that is ok
634
	 */
-
 
635
	if (is_const(src1)) {
-
 
636
		if (!is_const(src0)) {
533
		 * because after that point we no longer need tmp.x:
637
			struct tgsi_src_register *tmp;
-
 
638
			tmp = src0;
-
 
639
			src0 = src1;
-
 
640
			src1 = tmp;
-
 
641
			swapped = 1;
534
		 */
642
		} else {
Line -... Line 643...
-
 
643
			src0 = get_unconst(ctx, src0);
-
 
644
		}
-
 
645
	}
535
		create_mov(ctx, &tmp_dst, src1);
646
 
536
		src1 = &tmp_src;
647
	get_swiz(swiz0, src0);
537
	}
648
	get_swiz(swiz1, src1);
538
 
649
 
Line 546... Line 657...
546
 
657
 
547
		instr = ir3_instr_create(ctx->ir, 3, opc_mad);
658
		instr = ir3_instr_create(ctx->ir, 3, opc_mad);
548
		add_dst_reg(ctx, instr, &tmp_dst, 0);
659
		add_dst_reg(ctx, instr, &tmp_dst, 0);
549
		add_src_reg(ctx, instr, src0, swiz0[i]);
660
		add_src_reg(ctx, instr, src0, swiz0[i]);
550
		add_src_reg(ctx, instr, src1, swiz1[i]);
661
		add_src_reg(ctx, instr, src1, swiz1[i]);
551
		add_src_reg(ctx, instr, &tmp_src, 0);
662
		add_src_reg(ctx, instr, tmp_src, 0);
Line 552... Line 663...
552
	}
663
	}
553
 
664
 
554
	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
665
	/* DPH(a,b) = (a.x * b.x) + (a.y * b.y) + (a.z * b.z) + b.w */
Line 555... Line 666...
555
	if (t->tgsi_opc == TGSI_OPCODE_DPH) {
666
	if (t->tgsi_opc == TGSI_OPCODE_DPH) {
556
		ir3_instr_create(ctx->ir, 0, OPC_NOP);
667
		ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 1;
-
 
668
 
-
 
669
		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
-
 
670
		add_dst_reg(ctx, instr, &tmp_dst, 0);
557
 
671
		if (swapped)
558
		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
672
			add_src_reg(ctx, instr, src0, swiz0[i]);
Line 559... Line 673...
559
		add_dst_reg(ctx, instr, &tmp_dst, 0);
673
		else
560
		add_src_reg(ctx, instr, src1, swiz1[i]);
674
			add_src_reg(ctx, instr, src1, swiz1[i]);
Line 561... Line 675...
561
		add_src_reg(ctx, instr, &tmp_src, 0);
675
		add_src_reg(ctx, instr, tmp_src, 0);
Line 562... Line -...
562
 
-
 
563
		n++;
-
 
564
	}
-
 
565
 
-
 
566
	ir3_instr_create(ctx->ir, 0, OPC_NOP);
-
 
567
 
676
 
568
	/* pad out to multiple of 4 scalar instructions: */
677
		n++;
Line 569... Line 678...
569
	for (i = 2 * n; i % 4; i++) {
678
	}
570
		ir3_instr_create(ctx->ir, 0, OPC_NOP);
679
 
571
	}
680
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 2;
572
 
681
 
573
	create_mov(ctx, dst, &tmp_src);
682
	create_mov(ctx, dst, tmp_src);
574
}
683
}
575
 
684
 
576
/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
685
/* LRP(a,b,c) = (a * b) + ((1 - a) * c) */
577
static void
686
static void
578
trans_lrp(const struct instr_translater *t,
687
trans_lrp(const struct instr_translater *t,
-
 
688
		struct fd3_compile_context *ctx,
-
 
689
		struct tgsi_full_instruction *inst)
-
 
690
{
-
 
691
	struct ir3_instruction *instr;
-
 
692
	struct tgsi_dst_register tmp_dst1, tmp_dst2;
Line 579... Line 693...
579
		struct fd3_compile_context *ctx,
693
	struct tgsi_src_register *tmp_src1, *tmp_src2;
580
		struct tgsi_full_instruction *inst)
694
	struct tgsi_src_register tmp_const;
Line 581... Line 695...
581
{
695
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
Line 582... Line 696...
582
	struct ir3_instruction *instr;
696
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
583
	struct tgsi_dst_register tmp_dst1, tmp_dst2;
697
 
584
	struct tgsi_src_register tmp_src1, tmp_src2;
698
	if (is_const(src0) && is_const(src1))
585
	struct tgsi_src_register tmp_const;
-
 
586
 
-
 
Line 587... Line 699...
587
	get_internal_temp(ctx, &tmp_dst1, &tmp_src1);
699
		src0 = get_unconst(ctx, src0);
588
	get_internal_temp(ctx, &tmp_dst2, &tmp_src2);
700
 
589
 
701
	tmp_src1 = get_internal_temp(ctx, &tmp_dst1);
590
	get_immediate(ctx, &tmp_const, fui(1.0));
-
 
591
 
702
	tmp_src2 = get_internal_temp(ctx, &tmp_dst2);
Line 592... Line 703...
592
	/* tmp1 = (a * b) */
703
 
593
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
704
	get_immediate(ctx, &tmp_const, fui(1.0));
594
	vectorize(ctx, instr, &tmp_dst1, 2,
705
 
595
			&inst->Src[0].Register, 0,
706
	/* tmp1 = (a * b) */
596
			&inst->Src[1].Register, 0);
707
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
Line 597... Line 708...
597
 
708
	vectorize(ctx, instr, &tmp_dst1, 2, src0, 0, src1, 0);
598
	/* tmp2 = (1 - a) */
709
 
599
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
710
	/* tmp2 = (1 - a) */
600
	vectorize(ctx, instr, &tmp_dst2, 2,
711
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
601
			&tmp_const, 0,
712
	vectorize(ctx, instr, &tmp_dst2, 2, &tmp_const, 0,
602
			&inst->Src[0].Register, IR3_REG_NEGATE);
713
			src0, IR3_REG_NEGATE);
Line 603... Line 714...
603
 
714
 
604
	/* tmp2 = tmp2 * c */
715
	/* tmp2 = tmp2 * c */
605
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
716
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
606
	vectorize(ctx, instr, &tmp_dst2, 2,
717
	vectorize(ctx, instr, &tmp_dst2, 2,
607
			&tmp_src2, 0,
718
			tmp_src2, 0,
608
			&inst->Src[2].Register, 0);
719
			&inst->Src[2].Register, 0);
609
 
720
 
610
	/* dst = tmp1 + tmp2 */
721
	/* dst = tmp1 + tmp2 */
611
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
722
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_F);
Line 612... Line 723...
612
	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
723
	vectorize(ctx, instr, &inst->Dst[0].Register, 2,
Line 613... Line 724...
613
			&tmp_src1, 0,
724
			tmp_src1, 0,
614
			&tmp_src2, 0);
725
			tmp_src2, 0);
615
}
726
}
616
 
727
 
Line 617... Line 728...
617
/* FRC(x) = x - FLOOR(x) */
728
/* FRC(x) = x - FLOOR(x) */
618
static void
729
static void
619
trans_frac(const struct instr_translater *t,
730
trans_frac(const struct instr_translater *t,
620
		struct fd3_compile_context *ctx,
731
		struct fd3_compile_context *ctx,
621
		struct tgsi_full_instruction *inst)
732
		struct tgsi_full_instruction *inst)
622
{
733
{
Line 623... Line 734...
623
	struct ir3_instruction *instr;
734
	struct ir3_instruction *instr;
624
	struct tgsi_dst_register tmp_dst;
735
	struct tgsi_dst_register tmp_dst;
625
	struct tgsi_src_register tmp_src;
736
	struct tgsi_src_register *tmp_src;
Line 645... Line 756...
645
		struct tgsi_full_instruction *inst)
756
		struct tgsi_full_instruction *inst)
646
{
757
{
647
	struct ir3_instruction *instr;
758
	struct ir3_instruction *instr;
648
	struct ir3_register *r;
759
	struct ir3_register *r;
649
	struct tgsi_dst_register tmp_dst;
760
	struct tgsi_dst_register tmp_dst;
650
	struct tgsi_src_register tmp_src;
761
	struct tgsi_src_register *tmp_src;
651
	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
762
	struct tgsi_dst_register *dst  = &inst->Dst[0].Register;
652
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
763
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
653
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
764
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
Line 654... Line 765...
654
 
765
 
Line 655... Line 766...
655
	get_internal_temp_repl(ctx, &tmp_dst, &tmp_src);
766
	tmp_src = get_internal_temp_repl(ctx, &tmp_dst);
656
 
767
 
657
	/* log2 Rtmp, Rsrc0 */
768
	/* log2 Rtmp, Rsrc0 */
658
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
769
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
659
	instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
770
	instr = ir3_instr_create(ctx->ir, 4, OPC_LOG2);
660
	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
771
	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
Line 661... Line 772...
661
	add_src_reg(ctx, instr, src0, src0->SwizzleX);
772
	add_src_reg(ctx, instr, src0, src0->SwizzleX);
662
	regmask_set(ctx->needs_ss, r);
773
	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
663
 
774
 
664
	/* mul.f Rtmp, Rtmp, Rsrc1 */
775
	/* mul.f Rtmp, Rtmp, Rsrc1 */
665
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
776
	instr = ir3_instr_create(ctx->ir, 2, OPC_MUL_F);
Line 666... Line 777...
666
	add_dst_reg(ctx, instr, &tmp_dst, 0);
777
	add_dst_reg(ctx, instr, &tmp_dst, 0);
667
	add_src_reg(ctx, instr, &tmp_src, 0);
778
	add_src_reg(ctx, instr, tmp_src, 0);
668
	add_src_reg(ctx, instr, src1, src1->SwizzleX);
779
	add_src_reg(ctx, instr, src1, src1->SwizzleX);
Line 674... Line 785...
674
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
785
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
Line 675... Line 786...
675
 
786
 
676
	/* exp2 Rdst, Rtmp */
787
	/* exp2 Rdst, Rtmp */
677
	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
788
	instr = ir3_instr_create(ctx->ir, 4, OPC_EXP2);
678
	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
789
	r = add_dst_reg(ctx, instr, &tmp_dst, 0);
679
	add_src_reg(ctx, instr, &tmp_src, 0);
790
	add_src_reg(ctx, instr, tmp_src, 0);
Line 680... Line 791...
680
	regmask_set(ctx->needs_ss, r);
791
	regmask_set(ctx->needs_ss, r, TGSI_WRITEMASK_X);
681
 
792
 
Line 682... Line 793...
682
	create_mov(ctx, dst, &tmp_src);
793
	create_mov(ctx, dst, tmp_src);
683
}
794
}
684
 
795
 
685
/* texture fetch/sample instructions: */
796
/* texture fetch/sample instructions: */
686
static void
797
static void
687
trans_samp(const struct instr_translater *t,
798
trans_samp(const struct instr_translater *t,
688
		struct fd3_compile_context *ctx,
799
		struct fd3_compile_context *ctx,
689
		struct tgsi_full_instruction *inst)
800
		struct tgsi_full_instruction *inst)
690
{
-
 
691
	struct ir3_register *r;
-
 
692
	struct ir3_instruction *instr;
801
{
693
	struct tgsi_dst_register tmp_dst;
802
	struct ir3_register *r;
694
	struct tgsi_src_register tmp_src;
803
	struct ir3_instruction *instr;
695
	struct tgsi_src_register *coord = &inst->Src[0].Register;
804
	struct tgsi_src_register *coord = &inst->Src[0].Register;
696
	struct tgsi_src_register *samp  = &inst->Src[1].Register;
805
	struct tgsi_src_register *samp  = &inst->Src[1].Register;
Line 709... Line 818...
709
				(int8_t[4]){ 0,  1,  3, -1 } :  /* 2D */
818
				(int8_t[4]){ 0,  1,  3, -1 } :  /* 2D */
710
				(int8_t[4]){ 0,  1,  2,  3 };   /* 3D */
819
				(int8_t[4]){ 0,  1,  2,  3 };   /* 3D */
711
		flags |= IR3_INSTR_P;
820
		flags |= IR3_INSTR_P;
712
		break;
821
		break;
713
	default:
822
	default:
714
		assert(0);
823
		compile_assert(ctx, 0);
715
		break;
824
		break;
716
	}
825
	}
Line 717... Line 826...
717
 
826
 
718
	if (tex == TGSI_TEXTURE_3D)
827
	if (tex == TGSI_TEXTURE_3D)
Line 724... Line 833...
724
	 * might need to emit some mov instructions to shuffle things
833
	 * might need to emit some mov instructions to shuffle things
725
	 * around:
834
	 * around:
726
	 */
835
	 */
727
	for (i = 1; (i < 4) && (order[i] >= 0); i++) {
836
	for (i = 1; (i < 4) && (order[i] >= 0); i++) {
728
		if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
837
		if (src_swiz(coord, i) != (src_swiz(coord, 0) + order[i])) {
-
 
838
			struct tgsi_dst_register tmp_dst;
-
 
839
			struct tgsi_src_register *tmp_src;
-
 
840
 
729
			type_t type_mov = get_type(ctx);
841
			type_t type_mov = get_ftype(ctx);
Line 730... Line 842...
730
 
842
 
731
			/* need to move things around: */
843
			/* need to move things around: */
Line 732... Line 844...
732
			get_internal_temp(ctx, &tmp_dst, &tmp_src);
844
			tmp_src = get_internal_temp(ctx, &tmp_dst);
733
 
845
 
734
			for (j = 0; (j < 4) && (order[j] >= 0); j++) {
846
			for (j = 0; (j < 4) && (order[j] >= 0); j++) {
735
				instr = ir3_instr_create(ctx->ir, 1, 0);
847
				instr = ir3_instr_create(ctx->ir, 1, 0);
736
				instr->cat1.src_type = type_mov;
848
				instr->cat1.src_type = type_mov;
737
				instr->cat1.dst_type = type_mov;
849
				instr->cat1.dst_type = type_mov;
738
				add_dst_reg(ctx, instr, &tmp_dst, j);
850
				add_dst_reg(ctx, instr, &tmp_dst, j);
739
				add_src_reg(ctx, instr, coord,
851
				add_src_reg(ctx, instr, coord,
Line 740... Line 852...
740
						src_swiz(coord, order[j]));
852
						src_swiz(coord, order[j]));
Line 741... Line 853...
741
			}
853
			}
742
 
854
 
Line 743... Line 855...
743
			coord = &tmp_src;
855
			coord = tmp_src;
744
 
856
 
745
			if (j < 4)
857
			if (j < 4)
Line 746... Line 858...
746
				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
858
				ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 4 - j - 1;
747
 
859
 
748
			break;
860
			break;
749
		}
861
		}
750
	}
862
	}
Line 751... Line 863...
751
 
863
 
752
	instr = ir3_instr_create(ctx->ir, 5, t->opc);
864
	instr = ir3_instr_create(ctx->ir, 5, t->opc);
Line 753... Line 865...
753
	instr->cat5.type = get_type(ctx);
865
	instr->cat5.type = get_ftype(ctx);
Line 754... Line 866...
754
	instr->cat5.samp = samp->Index;
866
	instr->cat5.samp = samp->Index;
755
	instr->cat5.tex  = samp->Index;
867
	instr->cat5.tex  = samp->Index;
Line -... Line 868...
-
 
868
	instr->flags |= flags;
-
 
869
 
-
 
870
	r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
-
 
871
	r->wrmask = inst->Dst[0].Register.WriteMask;
-
 
872
 
-
 
873
	add_src_reg(ctx, instr, coord, coord->SwizzleX);
-
 
874
 
-
 
875
	regmask_set(ctx->needs_sy, r, r->wrmask);
-
 
876
}
-
 
877
 
-
 
878
/*
-
 
879
 * SEQ(a,b) = (a == b) ? 1.0 : 0.0
-
 
880
 *   cmps.f.eq tmp0, b, a
-
 
881
 *   cov.u16f16 dst, tmp0
-
 
882
 *
-
 
883
 * SNE(a,b) = (a != b) ? 1.0 : 0.0
-
 
884
 *   cmps.f.eq tmp0, b, a
-
 
885
 *   add.s tmp0, tmp0, -1
-
 
886
 *   sel.f16 dst, {0.0}, tmp0, {1.0}
-
 
887
 *
-
 
888
 * SGE(a,b) = (a >= b) ? 1.0 : 0.0
-
 
889
 *   cmps.f.ge tmp0, a, b
-
 
890
 *   cov.u16f16 dst, tmp0
-
 
891
 *
-
 
892
 * SLE(a,b) = (a <= b) ? 1.0 : 0.0
-
 
893
 *   cmps.f.ge tmp0, b, a
-
 
894
 *   cov.u16f16 dst, tmp0
-
 
895
 *
756
	instr->flags |= flags;
896
 * SGT(a,b) = (a > b)  ? 1.0 : 0.0
-
 
897
 *   cmps.f.ge tmp0, b, a
-
 
898
 *   add.s tmp0, tmp0, -1
-
 
899
 *   sel.f16 dst, {0.0}, tmp0, {1.0}
-
 
900
 *
757
 
901
 * SLT(a,b) = (a < b)  ? 1.0 : 0.0
758
	r = add_dst_reg(ctx, instr, &inst->Dst[0].Register, 0);
902
 *   cmps.f.ge tmp0, a, b
759
	r->wrmask = inst->Dst[0].Register.WriteMask;
903
 *   add.s tmp0, tmp0, -1
760
 
904
 *   sel.f16 dst, {0.0}, tmp0, {1.0}
761
	add_src_reg(ctx, instr, coord, coord->SwizzleX);
905
 *
762
 
906
 * CMP(a,b,c) = (a < 0.0) ? b : c
763
	regmask_set(ctx->needs_sy, r);
907
 *   cmps.f.ge tmp0, a, {0.0}
764
}
908
 *   add.s tmp0, tmp0, -1
765
 
909
 *   sel.f16 dst, c, tmp0, b
766
/* CMP(a,b,c) = (a < 0) ? b : c */
910
 */
767
static void
911
static void
-
 
912
trans_cmp(const struct instr_translater *t,
-
 
913
		struct fd3_compile_context *ctx,
Line 768... Line 914...
768
trans_cmp(const struct instr_translater *t,
914
		struct tgsi_full_instruction *inst)
Line -... Line 915...
-
 
915
{
-
 
916
	struct ir3_instruction *instr;
-
 
917
	struct tgsi_dst_register tmp_dst;
-
 
918
	struct tgsi_src_register *tmp_src;
-
 
919
	struct tgsi_src_register constval0, constval1;
-
 
920
	/* final instruction for CMP() uses orig src1 and src2: */
-
 
921
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-
 
922
	struct tgsi_src_register *a0, *a1;
-
 
923
	unsigned condition;
-
 
924
 
-
 
925
	tmp_src = get_internal_temp(ctx, &tmp_dst);
-
 
926
 
-
 
927
	switch (t->tgsi_opc) {
-
 
928
	case TGSI_OPCODE_SEQ:
-
 
929
	case TGSI_OPCODE_SNE:
-
 
930
		a0 = &inst->Src[1].Register;  /* b */
-
 
931
		a1 = &inst->Src[0].Register;  /* a */
-
 
932
		condition = IR3_COND_EQ;
-
 
933
		break;
-
 
934
	case TGSI_OPCODE_SGE:
-
 
935
	case TGSI_OPCODE_SLT:
-
 
936
		a0 = &inst->Src[0].Register;  /* a */
-
 
937
		a1 = &inst->Src[1].Register;  /* b */
-
 
938
		condition = IR3_COND_GE;
-
 
939
		break;
-
 
940
	case TGSI_OPCODE_SLE:
-
 
941
	case TGSI_OPCODE_SGT:
-
 
942
		a0 = &inst->Src[1].Register;  /* b */
-
 
943
		a1 = &inst->Src[0].Register;  /* a */
-
 
944
		condition = IR3_COND_GE;
-
 
945
		break;
-
 
946
	case TGSI_OPCODE_CMP:
-
 
947
		get_immediate(ctx, &constval0, fui(0.0));
769
		struct fd3_compile_context *ctx,
948
		a0 = &inst->Src[0].Register;  /* a */
770
		struct tgsi_full_instruction *inst)
949
		a1 = &constval0;              /* {0.0} */
771
{
950
		condition = IR3_COND_GE;
772
	struct ir3_instruction *instr;
-
 
773
	struct tgsi_dst_register tmp_dst;
951
		break;
774
	struct tgsi_src_register tmp_src;
-
 
775
	struct tgsi_src_register constval;
-
 
Line -... Line 952...
-
 
952
	default:
-
 
953
		compile_assert(ctx, 0);
-
 
954
		return;
-
 
955
	}
-
 
956
 
-
 
957
	if (is_const(a0) && is_const(a1))
-
 
958
		a0 = get_unconst(ctx, a0);
-
 
959
 
-
 
960
	/* cmps.f.ge tmp, a0, a1 */
-
 
961
	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
-
 
962
	instr->cat2.condition = condition;
-
 
963
	vectorize(ctx, instr, &tmp_dst, 2, a0, 0, a1, 0);
-
 
964
 
-
 
965
	switch (t->tgsi_opc) {
776
	/* final instruction uses original src1 and src2, so we need get_dst() */
966
	case TGSI_OPCODE_SEQ:
777
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
967
	case TGSI_OPCODE_SGE:
778
 
968
	case TGSI_OPCODE_SLE:
779
	get_internal_temp(ctx, &tmp_dst, &tmp_src);
969
		/* cov.u16f16 dst, tmp0 */
780
 
970
		instr = ir3_instr_create(ctx->ir, 1, 0);
781
	/* cmps.f.ge tmp, src0, 0.0 */
971
		instr->cat1.src_type = get_utype(ctx);
Line -... Line 972...
-
 
972
		instr->cat1.dst_type = get_ftype(ctx);
782
	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
973
		vectorize(ctx, instr, dst, 1, tmp_src, 0);
783
	instr->cat2.condition = IR3_COND_GE;
974
		break;
784
	get_immediate(ctx, &constval, fui(0.0));
975
	case TGSI_OPCODE_SNE:
785
	vectorize(ctx, instr, &tmp_dst, 2,
976
	case TGSI_OPCODE_SGT:
786
			&inst->Src[0].Register, 0,
977
	case TGSI_OPCODE_SLT:
787
			&constval, 0);
978
	case TGSI_OPCODE_CMP:
788
 
979
		/* add.s tmp, tmp, -1 */
-
 
980
		instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
-
 
981
		instr->repeat = 3;
-
 
982
		add_dst_reg(ctx, instr, &tmp_dst, 0);
-
 
983
		add_src_reg(ctx, instr, tmp_src, 0)->flags |= IR3_REG_R;
-
 
984
		ir3_reg_create(instr, 0, IR3_REG_IMMED)->iim_val = -1;
-
 
985
 
-
 
986
		if (t->tgsi_opc == TGSI_OPCODE_CMP) {
-
 
987
			/* sel.{f32,f16} dst, src2, tmp, src1 */
-
 
988
			instr = ir3_instr_create(ctx->ir, 3,
-
 
989
					ctx->so->half_precision ? OPC_SEL_F16 : OPC_SEL_F32);
-
 
990
			vectorize(ctx, instr, dst, 3,
-
 
991
					&inst->Src[2].Register, 0,
Line 789... Line 992...
789
	/* add.s tmp, tmp, -1 */
992
					tmp_src, 0,
790
	instr = ir3_instr_create(ctx->ir, 2, OPC_ADD_S);
993
					&inst->Src[1].Register, 0);
Line 791... Line 994...
791
	instr->repeat = 3;
994
		} else {
Line 856... Line 1059...
856
	struct tgsi_src_register *src = &inst->Src[0].Register;
1059
	struct tgsi_src_register *src = &inst->Src[0].Register;
857
	struct tgsi_src_register constval;
1060
	struct tgsi_src_register constval;
Line 858... Line 1061...
858
 
1061
 
Line -... Line 1062...
-
 
1062
	get_immediate(ctx, &constval, fui(0.0));
-
 
1063
 
-
 
1064
	if (is_const(src))
859
	get_immediate(ctx, &constval, fui(0.0));
1065
		src = get_unconst(ctx, src);
860
 
1066
 
861
	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
-
 
862
	ir3_reg_create(instr, regid(REG_P0, 0), 0);
1067
	instr = ir3_instr_create(ctx->ir, 2, OPC_CMPS_F);
-
 
1068
	ir3_reg_create(instr, regid(REG_P0, 0), 0);
863
	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
1069
	add_src_reg(ctx, instr, src, src->SwizzleX);
Line 864... Line 1070...
864
	add_src_reg(ctx, instr, src, src->SwizzleX);
1070
	add_src_reg(ctx, instr, &constval, constval.SwizzleX);
865
	instr->cat2.condition = IR3_COND_EQ;
1071
	instr->cat2.condition = IR3_COND_EQ;
866
 
1072
 
Line 937... Line 1143...
937
instr_cat2(const struct instr_translater *t,
1143
instr_cat2(const struct instr_translater *t,
938
		struct fd3_compile_context *ctx,
1144
		struct fd3_compile_context *ctx,
939
		struct tgsi_full_instruction *inst)
1145
		struct tgsi_full_instruction *inst)
940
{
1146
{
941
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
1147
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-
 
1148
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
-
 
1149
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
942
	struct ir3_instruction *instr;
1150
	struct ir3_instruction *instr;
943
	unsigned src0_flags = 0;
1151
	unsigned src0_flags = 0;
Line 944... Line -...
944
 
-
 
945
	instr = ir3_instr_create(ctx->ir, 2, t->opc);
-
 
946
 
1152
 
947
	switch (t->tgsi_opc) {
-
 
948
	case TGSI_OPCODE_SLT:
-
 
949
	case TGSI_OPCODE_SGE:
-
 
950
		instr->cat2.condition = t->arg;
-
 
951
		break;
1153
	switch (t->tgsi_opc) {
952
	case TGSI_OPCODE_ABS:
1154
	case TGSI_OPCODE_ABS:
953
		src0_flags = IR3_REG_ABS;
1155
		src0_flags = IR3_REG_ABS;
954
		break;
1156
		break;
Line 968... Line 1170...
968
	case OPC_NOT_B:
1170
	case OPC_NOT_B:
969
	case OPC_BFREV_B:
1171
	case OPC_BFREV_B:
970
	case OPC_SETRM:
1172
	case OPC_SETRM:
971
	case OPC_CBITS_B:
1173
	case OPC_CBITS_B:
972
		/* these only have one src reg */
1174
		/* these only have one src reg */
973
		vectorize(ctx, instr, dst, 1,
1175
		instr = ir3_instr_create(ctx->ir, 2, t->opc);
974
				&inst->Src[0].Register, src0_flags);
1176
		vectorize(ctx, instr, dst, 1, src0, src0_flags);
975
		break;
1177
		break;
976
	default:
1178
	default:
977
		vectorize(ctx, instr, dst, 2,
1179
		if (is_const(src0) && is_const(src1))
978
				&inst->Src[0].Register, src0_flags,
1180
			src0 = get_unconst(ctx, src0);
-
 
1181
 
979
				&inst->Src[1].Register, 0);
1182
		instr = ir3_instr_create(ctx->ir, 2, t->opc);
-
 
1183
		vectorize(ctx, instr, dst, 2, src0, src0_flags, src1, 0);
980
		break;
1184
		break;
981
	}
1185
	}
Line 982... Line 1186...
982
 
1186
 
983
	put_dst(ctx, inst, dst);
1187
	put_dst(ctx, inst, dst);
Line -... Line 1188...
-
 
1188
}
-
 
1189
 
-
 
1190
static bool is_mad(opc_t opc)
-
 
1191
{
-
 
1192
	switch (opc) {
-
 
1193
	case OPC_MAD_U16:
-
 
1194
	case OPC_MADSH_U16:
-
 
1195
	case OPC_MAD_S16:
-
 
1196
	case OPC_MADSH_M16:
-
 
1197
	case OPC_MAD_U24:
-
 
1198
	case OPC_MAD_S24:
-
 
1199
	case OPC_MAD_F16:
-
 
1200
	case OPC_MAD_F32:
-
 
1201
		return true;
-
 
1202
	default:
-
 
1203
		return false;
-
 
1204
	}
984
}
1205
}
985
 
1206
 
986
static void
1207
static void
987
instr_cat3(const struct instr_translater *t,
1208
instr_cat3(const struct instr_translater *t,
988
		struct fd3_compile_context *ctx,
1209
		struct fd3_compile_context *ctx,
989
		struct tgsi_full_instruction *inst)
1210
		struct tgsi_full_instruction *inst)
-
 
1211
{
990
{
1212
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
991
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-
 
992
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
-
 
993
	struct tgsi_dst_register tmp_dst;
1213
	struct tgsi_src_register *src0 = &inst->Src[0].Register;
Line 994... Line 1214...
994
	struct tgsi_src_register tmp_src;
1214
	struct tgsi_src_register *src1 = &inst->Src[1].Register;
995
	struct ir3_instruction *instr;
1215
	struct ir3_instruction *instr;
996
 
-
 
997
	/* Blob compiler never seems to use a const in src1 position..
-
 
998
	 * although there does seem (according to disassembler hidden
-
 
999
	 * in libllvm-a3xx.so) to be a bit to indicate that src1 is a
1216
 
1000
	 * const.  Not sure if this is a hw bug, or simply that the
1217
	/* in particular, can't handle const for src1 for cat3..
1001
	 * disassembler lies.
1218
	 * for mad, we can swap first two src's if needed:
1002
	 */
1219
	 */
-
 
1220
	if (is_const(src1)) {
1003
	if ((src1->File == TGSI_FILE_CONSTANT) ||
1221
		if (is_mad(t->opc) && !is_const(src0)) {
1004
			(src1->File == TGSI_FILE_IMMEDIATE)) {
1222
			struct tgsi_src_register *tmp;
-
 
1223
			tmp = src0;
-
 
1224
			src0 = src1;
-
 
1225
			src1 = tmp;
1005
		get_internal_temp(ctx, &tmp_dst, &tmp_src);
1226
		} else {
Line 1006... Line 1227...
1006
		create_mov(ctx, &tmp_dst, src1);
1227
			src0 = get_unconst(ctx, src0);
1007
		src1 = &tmp_src;
1228
		}
1008
	}
1229
	}
1009
 
-
 
1010
	instr = ir3_instr_create(ctx->ir, 3,
-
 
1011
			ctx->so->half_precision ? t->hopc : t->opc);
1230
 
1012
	vectorize(ctx, instr, dst, 3,
1231
	instr = ir3_instr_create(ctx->ir, 3,
1013
			&inst->Src[0].Register, 0,
1232
			ctx->so->half_precision ? t->hopc : t->opc);
Line 1014... Line 1233...
1014
			src1, 0,
1233
	vectorize(ctx, instr, dst, 3, src0, 0, src1, 0,
1015
			&inst->Src[2].Register, 0);
1234
			&inst->Src[2].Register, 0);
1016
	put_dst(ctx, inst, dst);
1235
	put_dst(ctx, inst, dst);
1017
}
1236
}
1018
 
1237
 
1019
static void
1238
static void
-
 
1239
instr_cat4(const struct instr_translater *t,
1020
instr_cat4(const struct instr_translater *t,
1240
		struct fd3_compile_context *ctx,
Line -... Line 1241...
-
 
1241
		struct tgsi_full_instruction *inst)
-
 
1242
{
-
 
1243
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-
 
1244
	struct tgsi_src_register *src = &inst->Src[0].Register;
1021
		struct fd3_compile_context *ctx,
1245
	struct ir3_instruction *instr;
1022
		struct tgsi_full_instruction *inst)
1246
 
Line 1023... Line 1247...
1023
{
1247
	/* seems like blob compiler avoids const as src.. */
1024
	struct tgsi_dst_register *dst = get_dst(ctx, inst);
-
 
Line 1025... Line 1248...
1025
	struct ir3_instruction *instr;
1248
	if (is_const(src))
-
 
1249
		src = get_unconst(ctx, src);
Line 1026... Line 1250...
1026
 
1250
 
1027
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
1251
	ir3_instr_create(ctx->ir, 0, OPC_NOP)->repeat = 5;
Line 1028... Line 1252...
1028
	instr = ir3_instr_create(ctx->ir, 4, t->opc);
1252
	instr = ir3_instr_create(ctx->ir, 4, t->opc);
Line 1049... Line 1273...
1049
	INSTR(DP3,          trans_dotp, .arg = 3),
1273
	INSTR(DP3,          trans_dotp, .arg = 3),
1050
	INSTR(DP4,          trans_dotp, .arg = 4),
1274
	INSTR(DP4,          trans_dotp, .arg = 4),
1051
	INSTR(DPH,          trans_dotp, .arg = 3),   /* almost like DP3 */
1275
	INSTR(DPH,          trans_dotp, .arg = 3),   /* almost like DP3 */
1052
	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
1276
	INSTR(MIN,          instr_cat2, .opc = OPC_MIN_F),
1053
	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
1277
	INSTR(MAX,          instr_cat2, .opc = OPC_MAX_F),
1054
	INSTR(SLT,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_LT),
-
 
1055
	INSTR(SGE,          instr_cat2, .opc = OPC_CMPS_F, .arg = IR3_COND_GE),
-
 
1056
	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1278
	INSTR(MAD,          instr_cat3, .opc = OPC_MAD_F32, .hopc = OPC_MAD_F16),
1057
	INSTR(LRP,          trans_lrp),
1279
	INSTR(LRP,          trans_lrp),
1058
	INSTR(FRC,          trans_frac),
1280
	INSTR(FRC,          trans_frac),
1059
	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
1281
	INSTR(FLR,          instr_cat2, .opc = OPC_FLOOR_F),
-
 
1282
	INSTR(ARL,          instr_cat2, .opc = OPC_FLOOR_F),
1060
	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
1283
	INSTR(EX2,          instr_cat4, .opc = OPC_EXP2),
1061
	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
1284
	INSTR(LG2,          instr_cat4, .opc = OPC_LOG2),
1062
	INSTR(POW,          trans_pow),
1285
	INSTR(POW,          trans_pow),
1063
	INSTR(ABS,          instr_cat2, .opc = OPC_ABSNEG_F),
1286
	INSTR(ABS,          instr_cat2, .opc = OPC_ABSNEG_F),
1064
	INSTR(COS,          instr_cat4, .opc = OPC_SIN),
1287
	INSTR(COS,          instr_cat4, .opc = OPC_SIN),
1065
	INSTR(SIN,          instr_cat4, .opc = OPC_COS),
1288
	INSTR(SIN,          instr_cat4, .opc = OPC_COS),
1066
	INSTR(TEX,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1289
	INSTR(TEX,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TEX),
1067
	INSTR(TXP,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
1290
	INSTR(TXP,          trans_samp, .opc = OPC_SAM, .arg = TGSI_OPCODE_TXP),
-
 
1291
	INSTR(SGT,          trans_cmp),
-
 
1292
	INSTR(SLT,          trans_cmp),
-
 
1293
	INSTR(SGE,          trans_cmp),
-
 
1294
	INSTR(SLE,          trans_cmp),
-
 
1295
	INSTR(SNE,          trans_cmp),
-
 
1296
	INSTR(SEQ,          trans_cmp),
1068
	INSTR(CMP,          trans_cmp),
1297
	INSTR(CMP,          trans_cmp),
1069
	INSTR(IF,           trans_if),
1298
	INSTR(IF,           trans_if),
1070
	INSTR(ELSE,         trans_else),
1299
	INSTR(ELSE,         trans_else),
1071
	INSTR(ENDIF,        trans_endif),
1300
	INSTR(ENDIF,        trans_endif),
1072
	INSTR(END,          instr_cat0, .opc = OPC_END),
1301
	INSTR(END,          instr_cat0, .opc = OPC_END),
Line 1130... Line 1359...
1130
	struct fd3_shader_stateobj *so = ctx->so;
1359
	struct fd3_shader_stateobj *so = ctx->so;
1131
	unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1360
	unsigned base = ctx->base_reg[TGSI_FILE_OUTPUT];
1132
	unsigned name = decl->Semantic.Name;
1361
	unsigned name = decl->Semantic.Name;
1133
	unsigned i;
1362
	unsigned i;
Line 1134... Line 1363...
1134
 
1363
 
Line 1135... Line 1364...
1135
	assert(decl->Declaration.Semantic);  // TODO is this ever not true?
1364
	compile_assert(ctx, decl->Declaration.Semantic);  // TODO is this ever not true?
Line 1136... Line 1365...
1136
 
1365
 
1137
	DBG("decl out[%d] -> r%d", name, decl->Range.First + base);   // XXX
1366
	DBG("decl out[%d] -> r%d", name, decl->Range.First + base);   // XXX
Line 1150... Line 1379...
1150
		case TGSI_SEMANTIC_TEXCOORD:
1379
		case TGSI_SEMANTIC_TEXCOORD:
1151
			for (i = decl->Range.First; i <= decl->Range.Last; i++)
1380
			for (i = decl->Range.First; i <= decl->Range.Last; i++)
1152
				so->outputs[so->outputs_count++].regid = regid(i + base, 0);
1381
				so->outputs[so->outputs_count++].regid = regid(i + base, 0);
1153
			break;
1382
			break;
1154
		default:
1383
		default:
1155
			DBG("unknown VS semantic name: %s",
1384
			compile_error(ctx, "unknown VS semantic name: %s\n",
1156
					tgsi_semantic_names[name]);
1385
					tgsi_semantic_names[name]);
1157
			assert(0);
-
 
1158
		}
1386
		}
1159
	} else {
1387
	} else {
1160
		switch (name) {
1388
		switch (name) {
1161
		case TGSI_SEMANTIC_COLOR:
1389
		case TGSI_SEMANTIC_COLOR:
1162
			so->color_regid = regid(decl->Range.First + base, 0);
1390
			so->color_regid = regid(decl->Range.First + base, 0);
1163
			break;
1391
			break;
1164
		default:
1392
		default:
1165
			DBG("unknown VS semantic name: %s",
1393
			compile_error(ctx, "unknown VS semantic name: %s\n",
1166
					tgsi_semantic_names[name]);
1394
					tgsi_semantic_names[name]);
1167
			assert(0);
-
 
1168
		}
1395
		}
1169
	}
1396
	}
1170
}
1397
}
Line 1171... Line 1398...
1171
 
1398
 
Line 1221... Line 1448...
1221
 
1448
 
1222
			if (t->fxn) {
1449
			if (t->fxn) {
1223
				t->fxn(t, ctx, inst);
1450
				t->fxn(t, ctx, inst);
1224
				ctx->num_internal_temps = 0;
1451
				ctx->num_internal_temps = 0;
1225
			} else {
1452
			} else {
1226
				debug_printf("unknown TGSI opc: %s\n",
1453
				compile_error(ctx, "unknown TGSI opc: %s\n",
-
 
1454
						tgsi_get_opcode_name(opc));
-
 
1455
			}
-
 
1456
 
-
 
1457
			switch (inst->Instruction.Saturate) {
-
 
1458
			case TGSI_SAT_ZERO_ONE:
-
 
1459
				create_clamp_imm(ctx, &inst->Dst[0].Register,
-
 
1460
						fui(0.0), fui(1.0));
-
 
1461
				break;
-
 
1462
			case TGSI_SAT_MINUS_PLUS_ONE:
1227
						tgsi_get_opcode_name(opc));
1463
				create_clamp_imm(ctx, &inst->Dst[0].Register,
1228
				tgsi_dump(ctx->tokens, 0);
1464
						fui(-1.0), fui(1.0));
1229
				assert(0);
1465
				break;
Line 1230... Line 1466...
1230
			}
1466
			}
1231
 
1467
 
1232
			break;
1468
			break;
Line 1251... Line 1487...
1251
 
1487
 
Line 1252... Line 1488...
1252
	assert(!so->ir);
1488
	assert(!so->ir);
Line -... Line 1489...
-
 
1489
 
-
 
1490
	so->ir = ir3_shader_create();
1253
 
1491
 
1254
	so->ir = ir3_shader_create();
1492
	assert(so->ir);
1255
 
1493
 
Line 1256... Line 1494...
1256
	so->color_regid = regid(63,0);
1494
	so->color_regid = regid(63,0);