Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright (c) 2012 Rob Clark 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
 * SOFTWARE.
22
 */
23
 
24
#include "ir-a2xx.h"
25
 
26
#include 
27
#include 
28
#include 
29
#include 
30
 
31
#include "freedreno_util.h"
32
#include "instr-a2xx.h"
33
 
34
#define DEBUG_MSG(f, ...)  do { if (0) DBG(f, ##__VA_ARGS__); } while (0)
35
#define WARN_MSG(f, ...)   DBG("WARN:  "f, ##__VA_ARGS__)
36
#define ERROR_MSG(f, ...)  DBG("ERROR: "f, ##__VA_ARGS__)
37
 
38
#define REG_MASK 0x3f
39
 
40
static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr);
41
 
42
static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
43
		uint32_t idx, struct ir2_shader_info *info);
44
 
45
static void reg_update_stats(struct ir2_register *reg,
46
		struct ir2_shader_info *info, bool dest);
47
static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n);
48
static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg);
49
static uint32_t reg_alu_dst_swiz(struct ir2_register *reg);
50
static uint32_t reg_alu_src_swiz(struct ir2_register *reg);
51
 
52
/* simple allocator to carve allocations out of an up-front allocated heap,
53
 * so that we can free everything easily in one shot.
54
 */
55
static void * ir2_alloc(struct ir2_shader *shader, int sz)
56
{
57
	void *ptr = &shader->heap[shader->heap_idx];
58
	shader->heap_idx += align(sz, 4);
59
	return ptr;
60
}
61
 
62
static char * ir2_strdup(struct ir2_shader *shader, const char *str)
63
{
64
	char *ptr = NULL;
65
	if (str) {
66
		int len = strlen(str);
67
		ptr = ir2_alloc(shader, len+1);
68
		memcpy(ptr, str, len);
69
		ptr[len] = '\0';
70
	}
71
	return ptr;
72
}
73
 
74
struct ir2_shader * ir2_shader_create(void)
75
{
76
	DEBUG_MSG("");
77
	return calloc(1, sizeof(struct ir2_shader));
78
}
79
 
80
void ir2_shader_destroy(struct ir2_shader *shader)
81
{
82
	DEBUG_MSG("");
83
	free(shader);
84
}
85
 
86
/* resolve addr/cnt/sequence fields in the individual CF's */
87
static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info)
88
{
89
	uint32_t addr;
90
	unsigned i;
91
	int j;
92
 
93
	addr = shader->cfs_count / 2;
94
	for (i = 0; i < shader->cfs_count; i++) {
95
		struct ir2_cf *cf = shader->cfs[i];
96
		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
97
			uint32_t sequence = 0;
98
 
99
			if (cf->exec.addr && (cf->exec.addr != addr))
100
				WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i);
101
			if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count))
102
				WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i);
103
 
104
			for (j = cf->exec.instrs_count - 1; j >= 0; j--) {
105
				struct ir2_instruction *instr = cf->exec.instrs[j];
106
				sequence <<= 2;
107
				if (instr->instr_type == IR2_FETCH)
108
					sequence |= 0x1;
109
				if (instr->sync)
110
					sequence |= 0x2;
111
			}
112
 
113
			cf->exec.addr = addr;
114
			cf->exec.cnt  = cf->exec.instrs_count;
115
			cf->exec.sequence = sequence;
116
 
117
			addr += cf->exec.instrs_count;
118
		}
119
	}
120
 
121
	info->sizedwords = 3 * addr;
122
 
123
	return 0;
124
}
125
 
126
void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info)
127
{
128
	uint32_t i, j;
129
	uint32_t *ptr, *dwords = NULL;
130
	uint32_t idx = 0;
131
	int ret;
132
 
133
	info->sizedwords    = 0;
134
	info->max_reg       = -1;
135
	info->max_input_reg = 0;
136
	info->regs_written  = 0;
137
 
138
	/* we need an even # of CF's.. insert a NOP if needed */
139
	if (shader->cfs_count != align(shader->cfs_count, 2))
140
		ir2_cf_create(shader, NOP);
141
 
142
	/* first pass, resolve sizes and addresses: */
143
	ret = shader_resolve(shader, info);
144
	if (ret) {
145
		ERROR_MSG("resolve failed: %d", ret);
146
		goto fail;
147
	}
148
 
149
	ptr = dwords = calloc(1, 4 * info->sizedwords);
150
 
151
	/* second pass, emit CF program in pairs: */
152
	for (i = 0; i < shader->cfs_count; i += 2) {
153
		instr_cf_t *cfs = (instr_cf_t *)ptr;
154
		ret = cf_emit(shader->cfs[i], &cfs[0]);
155
		if (ret) {
156
			ERROR_MSG("CF emit failed: %d\n", ret);
157
			goto fail;
158
		}
159
		ret = cf_emit(shader->cfs[i+1], &cfs[1]);
160
		if (ret) {
161
			ERROR_MSG("CF emit failed: %d\n", ret);
162
			goto fail;
163
		}
164
		ptr += 3;
165
		assert((ptr - dwords) <= info->sizedwords);
166
	}
167
 
168
	/* third pass, emit ALU/FETCH: */
169
	for (i = 0; i < shader->cfs_count; i++) {
170
		struct ir2_cf *cf = shader->cfs[i];
171
		if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) {
172
			for (j = 0; j < cf->exec.instrs_count; j++) {
173
				ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info);
174
				if (ret) {
175
					ERROR_MSG("instruction emit failed: %d", ret);
176
					goto fail;
177
				}
178
				ptr += 3;
179
				assert((ptr - dwords) <= info->sizedwords);
180
			}
181
		}
182
	}
183
 
184
	return dwords;
185
 
186
fail:
187
	free(dwords);
188
	return NULL;
189
}
190
 
191
 
192
struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type)
193
{
194
	struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf));
195
	DEBUG_MSG("%d", cf_type);
196
	cf->shader = shader;
197
	cf->cf_type = cf_type;
198
	assert(shader->cfs_count < ARRAY_SIZE(shader->cfs));
199
	shader->cfs[shader->cfs_count++] = cf;
200
	return cf;
201
}
202
 
203
 
204
/*
205
 * CF instructions:
206
 */
207
 
208
static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr)
209
{
210
	memset(instr, 0, sizeof(*instr));
211
 
212
	instr->opc = cf->cf_type;
213
 
214
	switch (cf->cf_type) {
215
	case NOP:
216
		break;
217
	case EXEC:
218
	case EXEC_END:
219
		assert(cf->exec.addr <= 0x1ff);
220
		assert(cf->exec.cnt <= 0x6);
221
		assert(cf->exec.sequence <= 0xfff);
222
		instr->exec.address = cf->exec.addr;
223
		instr->exec.count = cf->exec.cnt;
224
		instr->exec.serialize = cf->exec.sequence;
225
		break;
226
	case ALLOC:
227
		assert(cf->alloc.size <= 0xf);
228
		instr->alloc.size = cf->alloc.size;
229
		switch (cf->alloc.type) {
230
		case SQ_POSITION:
231
		case SQ_PARAMETER_PIXEL:
232
			instr->alloc.buffer_select = cf->alloc.type;
233
			break;
234
		default:
235
			ERROR_MSG("invalid alloc type: %d", cf->alloc.type);
236
			return -1;
237
		}
238
		break;
239
	case COND_EXEC:
240
	case COND_EXEC_END:
241
	case COND_PRED_EXEC:
242
	case COND_PRED_EXEC_END:
243
	case LOOP_START:
244
	case LOOP_END:
245
	case COND_CALL:
246
	case RETURN:
247
	case COND_JMP:
248
	case COND_EXEC_PRED_CLEAN:
249
	case COND_EXEC_PRED_CLEAN_END:
250
	case MARK_VS_FETCH_DONE:
251
		ERROR_MSG("TODO");
252
		return -1;
253
	}
254
 
255
	return 0;
256
}
257
 
258
 
259
struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type)
260
{
261
	struct ir2_instruction *instr =
262
			ir2_alloc(cf->shader, sizeof(struct ir2_instruction));
263
	DEBUG_MSG("%d", instr_type);
264
	instr->shader = cf->shader;
265
	instr->pred = cf->shader->pred;
266
	instr->instr_type = instr_type;
267
	assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs));
268
	cf->exec.instrs[cf->exec.instrs_count++] = instr;
269
	return instr;
270
}
271
 
272
 
273
/*
274
 * FETCH instructions:
275
 */
276
 
277
static int instr_emit_fetch(struct ir2_instruction *instr,
278
		uint32_t *dwords, uint32_t idx,
279
		struct ir2_shader_info *info)
280
{
281
	instr_fetch_t *fetch = (instr_fetch_t *)dwords;
282
	int reg = 0;
283
	struct ir2_register *dst_reg = instr->regs[reg++];
284
	struct ir2_register *src_reg = instr->regs[reg++];
285
 
286
	memset(fetch, 0, sizeof(*fetch));
287
 
288
	reg_update_stats(dst_reg, info, true);
289
	reg_update_stats(src_reg, info, false);
290
 
291
	fetch->opc = instr->fetch.opc;
292
 
293
	if (instr->fetch.opc == VTX_FETCH) {
294
		instr_fetch_vtx_t *vtx = &fetch->vtx;
295
 
296
		assert(instr->fetch.stride <= 0xff);
297
		assert(instr->fetch.fmt <= 0x3f);
298
		assert(instr->fetch.const_idx <= 0x1f);
299
		assert(instr->fetch.const_idx_sel <= 0x3);
300
 
301
		vtx->src_reg = src_reg->num;
302
		vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1);
303
		vtx->dst_reg = dst_reg->num;
304
		vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg);
305
		vtx->must_be_one = 1;
306
		vtx->const_index = instr->fetch.const_idx;
307
		vtx->const_index_sel = instr->fetch.const_idx_sel;
308
		vtx->format_comp_all = !!instr->fetch.is_signed;
309
		vtx->num_format_all = !instr->fetch.is_normalized;
310
		vtx->format = instr->fetch.fmt;
311
		vtx->stride = instr->fetch.stride;
312
		vtx->offset = instr->fetch.offset;
313
 
314
		if (instr->pred != IR2_PRED_NONE) {
315
			vtx->pred_select = 1;
316
			vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
317
		}
318
 
319
		/* XXX seems like every FETCH but the first has
320
		 * this bit set:
321
		 */
322
		vtx->reserved3 = (idx > 0) ? 0x1 : 0x0;
323
		vtx->reserved0 = (idx > 0) ? 0x2 : 0x3;
324
	} else if (instr->fetch.opc == TEX_FETCH) {
325
		instr_fetch_tex_t *tex = &fetch->tex;
326
 
327
		assert(instr->fetch.const_idx <= 0x1f);
328
 
329
		tex->src_reg = src_reg->num;
330
		tex->src_swiz = reg_fetch_src_swiz(src_reg, 3);
331
		tex->dst_reg = dst_reg->num;
332
		tex->dst_swiz = reg_fetch_dst_swiz(dst_reg);
333
		tex->const_idx = instr->fetch.const_idx;
334
		tex->mag_filter = TEX_FILTER_USE_FETCH_CONST;
335
		tex->min_filter = TEX_FILTER_USE_FETCH_CONST;
336
		tex->mip_filter = TEX_FILTER_USE_FETCH_CONST;
337
		tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST;
338
		tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST;
339
		tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST;
340
		tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST;
341
		tex->use_comp_lod = 1;
342
		tex->use_reg_lod = !instr->fetch.is_cube;
343
		tex->sample_location = SAMPLE_CENTER;
344
 
345
		if (instr->pred != IR2_PRED_NONE) {
346
			tex->pred_select = 1;
347
			tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0;
348
		}
349
 
350
	} else {
351
		ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc);
352
		return -1;
353
	}
354
 
355
	return 0;
356
}
357
 
358
/*
359
 * ALU instructions:
360
 */
361
 
362
static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords,
363
		struct ir2_shader_info *info)
364
{
365
	int reg = 0;
366
	instr_alu_t *alu = (instr_alu_t *)dwords;
367
	struct ir2_register *dst_reg  = instr->regs[reg++];
368
	struct ir2_register *src1_reg;
369
	struct ir2_register *src2_reg;
370
	struct ir2_register *src3_reg;
371
 
372
	memset(alu, 0, sizeof(*alu));
373
 
374
	/* handle instructions w/ 3 src operands: */
375
	switch (instr->alu.vector_opc) {
376
	case MULADDv:
377
	case CNDEv:
378
	case CNDGTEv:
379
	case CNDGTv:
380
	case DOT2ADDv:
381
		/* note: disassembler lists 3rd src first, ie:
382
		 *   MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2)
383
		 * which is the reason for this strange ordering.
384
		 */
385
		src3_reg = instr->regs[reg++];
386
		break;
387
	default:
388
		src3_reg = NULL;
389
		break;
390
	}
391
 
392
	src1_reg = instr->regs[reg++];
393
	src2_reg = instr->regs[reg++];
394
 
395
	reg_update_stats(dst_reg, info, true);
396
	reg_update_stats(src1_reg, info, false);
397
	reg_update_stats(src2_reg, info, false);
398
 
399
	assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0);
400
	assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4));
401
	assert((src1_reg->flags & IR2_REG_EXPORT) == 0);
402
	assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4));
403
	assert((src2_reg->flags & IR2_REG_EXPORT) == 0);
404
	assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4));
405
 
406
	if (instr->alu.vector_opc == ~0) {
407
		alu->vector_opc          = MAXv;
408
		alu->vector_write_mask   = 0;
409
	} else {
410
		alu->vector_opc          = instr->alu.vector_opc;
411
		alu->vector_write_mask   = reg_alu_dst_swiz(dst_reg);
412
	}
413
 
414
	alu->vector_dest         = dst_reg->num;
415
	alu->export_data         = !!(dst_reg->flags & IR2_REG_EXPORT);
416
 
417
	// TODO predicate case/condition.. need to add to parser
418
 
419
	alu->src2_reg            = src2_reg->num;
420
	alu->src2_swiz           = reg_alu_src_swiz(src2_reg);
421
	alu->src2_reg_negate     = !!(src2_reg->flags & IR2_REG_NEGATE);
422
	alu->src2_reg_abs        = !!(src2_reg->flags & IR2_REG_ABS);
423
	alu->src2_sel            = !(src2_reg->flags & IR2_REG_CONST);
424
 
425
	alu->src1_reg            = src1_reg->num;
426
	alu->src1_swiz           = reg_alu_src_swiz(src1_reg);
427
	alu->src1_reg_negate     = !!(src1_reg->flags & IR2_REG_NEGATE);
428
	alu->src1_reg_abs        = !!(src1_reg->flags & IR2_REG_ABS);
429
	alu->src1_sel            = !(src1_reg->flags & IR2_REG_CONST);
430
 
431
	alu->vector_clamp        = instr->alu.vector_clamp;
432
	alu->scalar_clamp        = instr->alu.scalar_clamp;
433
 
434
	if (instr->alu.scalar_opc != ~0) {
435
		struct ir2_register *sdst_reg = instr->regs[reg++];
436
 
437
		reg_update_stats(sdst_reg, info, true);
438
 
439
		assert(sdst_reg->flags == dst_reg->flags);
440
 
441
		if (src3_reg) {
442
			assert(src3_reg == instr->regs[reg++]);
443
		} else {
444
			src3_reg = instr->regs[reg++];
445
		}
446
 
447
		alu->scalar_dest         = sdst_reg->num;
448
		alu->scalar_write_mask   = reg_alu_dst_swiz(sdst_reg);
449
		alu->scalar_opc          = instr->alu.scalar_opc;
450
	} else {
451
		/* not sure if this is required, but adreno compiler seems
452
		 * to always set scalar opc to MAXs if it is not used:
453
		 */
454
		alu->scalar_opc = MAXs;
455
	}
456
 
457
	if (src3_reg) {
458
		reg_update_stats(src3_reg, info, false);
459
 
460
		alu->src3_reg            = src3_reg->num;
461
		alu->src3_swiz           = reg_alu_src_swiz(src3_reg);
462
		alu->src3_reg_negate     = !!(src3_reg->flags & IR2_REG_NEGATE);
463
		alu->src3_reg_abs        = !!(src3_reg->flags & IR2_REG_ABS);
464
		alu->src3_sel            = !(src3_reg->flags & IR2_REG_CONST);
465
	} else {
466
		/* not sure if this is required, but adreno compiler seems
467
		 * to always set register bank for 3rd src if unused:
468
		 */
469
		alu->src3_sel = 1;
470
	}
471
 
472
	if (instr->pred != IR2_PRED_NONE) {
473
		alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2;
474
	}
475
 
476
	return 0;
477
}
478
 
479
static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords,
480
		uint32_t idx, struct ir2_shader_info *info)
481
{
482
	switch (instr->instr_type) {
483
	case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info);
484
	case IR2_ALU:   return instr_emit_alu(instr, dwords, info);
485
	}
486
	return -1;
487
}
488
 
489
 
490
struct ir2_register * ir2_reg_create(struct ir2_instruction *instr,
491
		int num, const char *swizzle, int flags)
492
{
493
	struct ir2_register *reg =
494
			ir2_alloc(instr->shader, sizeof(struct ir2_register));
495
	DEBUG_MSG("%x, %d, %s", flags, num, swizzle);
496
	assert(num <= REG_MASK);
497
	reg->flags = flags;
498
	reg->num = num;
499
	reg->swizzle = ir2_strdup(instr->shader, swizzle);
500
	assert(instr->regs_count < ARRAY_SIZE(instr->regs));
501
	instr->regs[instr->regs_count++] = reg;
502
	return reg;
503
}
504
 
505
static void reg_update_stats(struct ir2_register *reg,
506
		struct ir2_shader_info *info, bool dest)
507
{
508
	if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) {
509
		info->max_reg = MAX2(info->max_reg, reg->num);
510
 
511
		if (dest) {
512
			info->regs_written |= (1 << reg->num);
513
		} else if (!(info->regs_written & (1 << reg->num))) {
514
			/* for registers that haven't been written, they must be an
515
			 * input register that the thread scheduler (presumably?)
516
			 * needs to know about:
517
			 */
518
			info->max_input_reg = MAX2(info->max_input_reg, reg->num);
519
		}
520
	}
521
}
522
 
523
static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n)
524
{
525
	uint32_t swiz = 0;
526
	int i;
527
 
528
	assert(reg->flags == 0);
529
	assert(reg->swizzle);
530
 
531
	DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle);
532
 
533
	for (i = n-1; i >= 0; i--) {
534
		swiz <<= 2;
535
		switch (reg->swizzle[i]) {
536
		default:
537
			ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle);
538
		case 'x': swiz |= 0x0; break;
539
		case 'y': swiz |= 0x1; break;
540
		case 'z': swiz |= 0x2; break;
541
		case 'w': swiz |= 0x3; break;
542
		}
543
	}
544
 
545
	return swiz;
546
}
547
 
548
static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg)
549
{
550
	uint32_t swiz = 0;
551
	int i;
552
 
553
	assert(reg->flags == 0);
554
	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
555
 
556
	DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle);
557
 
558
	if (reg->swizzle) {
559
		for (i = 3; i >= 0; i--) {
560
			swiz <<= 3;
561
			switch (reg->swizzle[i]) {
562
			default:
563
				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
564
			case 'x': swiz |= 0x0; break;
565
			case 'y': swiz |= 0x1; break;
566
			case 'z': swiz |= 0x2; break;
567
			case 'w': swiz |= 0x3; break;
568
			case '0': swiz |= 0x4; break;
569
			case '1': swiz |= 0x5; break;
570
			case '_': swiz |= 0x7; break;
571
			}
572
		}
573
	} else {
574
		swiz = 0x688;
575
	}
576
 
577
	return swiz;
578
}
579
 
580
/* actually, a write-mask */
581
static uint32_t reg_alu_dst_swiz(struct ir2_register *reg)
582
{
583
	uint32_t swiz = 0;
584
	int i;
585
 
586
	assert((reg->flags & ~IR2_REG_EXPORT) == 0);
587
	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
588
 
589
	DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle);
590
 
591
	if (reg->swizzle) {
592
		for (i = 3; i >= 0; i--) {
593
			swiz <<= 1;
594
			if (reg->swizzle[i] == "xyzw"[i]) {
595
				swiz |= 0x1;
596
			} else if (reg->swizzle[i] != '_') {
597
				ERROR_MSG("invalid dst swizzle: %s", reg->swizzle);
598
				break;
599
			}
600
		}
601
	} else {
602
		swiz = 0xf;
603
	}
604
 
605
	return swiz;
606
}
607
 
608
static uint32_t reg_alu_src_swiz(struct ir2_register *reg)
609
{
610
	uint32_t swiz = 0;
611
	int i;
612
 
613
	assert((reg->flags & IR2_REG_EXPORT) == 0);
614
	assert(!reg->swizzle || (strlen(reg->swizzle) == 4));
615
 
616
	DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle);
617
 
618
	if (reg->swizzle) {
619
		for (i = 3; i >= 0; i--) {
620
			swiz <<= 2;
621
			switch (reg->swizzle[i]) {
622
			default:
623
				ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle);
624
			case 'x': swiz |= (0x0 - i) & 0x3; break;
625
			case 'y': swiz |= (0x1 - i) & 0x3; break;
626
			case 'z': swiz |= (0x2 - i) & 0x3; break;
627
			case 'w': swiz |= (0x3 - i) & 0x3; break;
628
			}
629
		}
630
	} else {
631
		swiz = 0x0;
632
	}
633
 
634
	return swiz;
635
}