Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/*
2
 * Copyright (C) 2009 Nicolai Haehnle.
3
 * Copyright 2011 Tom Stellard 
4
 *
5
 * All Rights Reserved.
6
 *
7
 * Permission is hereby granted, free of charge, to any person obtaining
8
 * a copy of this software and associated documentation files (the
9
 * "Software"), to deal in the Software without restriction, including
10
 * without limitation the rights to use, copy, modify, merge, publish,
11
 * distribute, sublicense, and/or sell copies of the Software, and to
12
 * permit persons to whom the Software is furnished to do so, subject to
13
 * the following conditions:
14
 *
15
 * The above copyright notice and this permission notice (including the
16
 * next paragraph) shall be included in all copies or substantial
17
 * portions of the Software.
18
 *
19
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
20
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
22
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
23
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
24
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
25
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26
 *
27
 */
28
 
29
#include "radeon_program_pair.h"
30
 
31
#include 
32
 
33
#include "main/glheader.h"
34
#include "util/register_allocate.h"
35
#include "util/u_memory.h"
36
#include "util/ralloc.h"
37
 
38
#include "r300_fragprog_swizzle.h"
39
#include "radeon_compiler.h"
40
#include "radeon_compiler_util.h"
41
#include "radeon_dataflow.h"
42
#include "radeon_list.h"
43
#include "radeon_regalloc.h"
44
#include "radeon_variable.h"
45
 
46
#define VERBOSE 0
47
 
48
#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0)
49
 
50
 
51
 
52
struct register_info {
53
	struct live_intervals Live[4];
54
 
55
	unsigned int Used:1;
56
	unsigned int Allocated:1;
57
	unsigned int File:3;
58
	unsigned int Index:RC_REGISTER_INDEX_BITS;
59
	unsigned int Writemask;
60
};
61
 
62
struct regalloc_state {
63
	struct radeon_compiler * C;
64
 
65
	struct register_info * Input;
66
	unsigned int NumInputs;
67
 
68
	struct register_info * Temporary;
69
	unsigned int NumTemporaries;
70
 
71
	unsigned int Simple;
72
	int LoopEnd;
73
};
74
 
75
struct rc_class {
76
	enum rc_reg_class ID;
77
 
78
	unsigned int WritemaskCount;
79
 
80
	/** List of writemasks that belong to this class */
81
	unsigned int Writemasks[3];
82
 
83
 
84
};
85
 
86
static const struct rc_class rc_class_list [] = {
87
	{RC_REG_CLASS_SINGLE, 3,
88
		{RC_MASK_X,
89
		 RC_MASK_Y,
90
		 RC_MASK_Z}},
91
	{RC_REG_CLASS_DOUBLE, 3,
92
		{RC_MASK_X | RC_MASK_Y,
93
		 RC_MASK_X | RC_MASK_Z,
94
		 RC_MASK_Y | RC_MASK_Z}},
95
	{RC_REG_CLASS_TRIPLE, 1,
96
		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z,
97
		 RC_MASK_NONE,
98
		 RC_MASK_NONE}},
99
	{RC_REG_CLASS_ALPHA, 1,
100
		{RC_MASK_W,
101
		 RC_MASK_NONE,
102
		 RC_MASK_NONE}},
103
	{RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3,
104
		{RC_MASK_X | RC_MASK_W,
105
		 RC_MASK_Y | RC_MASK_W,
106
		 RC_MASK_Z | RC_MASK_W}},
107
	{RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3,
108
		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
109
		 RC_MASK_X | RC_MASK_Z | RC_MASK_W,
110
		 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}},
111
	{RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1,
112
		{RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
113
		RC_MASK_NONE,
114
		RC_MASK_NONE}},
115
	{RC_REG_CLASS_X, 1,
116
		{RC_MASK_X,
117
		RC_MASK_NONE,
118
		RC_MASK_NONE}},
119
	{RC_REG_CLASS_Y, 1,
120
		{RC_MASK_Y,
121
		RC_MASK_NONE,
122
		RC_MASK_NONE}},
123
	{RC_REG_CLASS_Z, 1,
124
		{RC_MASK_Z,
125
		RC_MASK_NONE,
126
		RC_MASK_NONE}},
127
	{RC_REG_CLASS_XY, 1,
128
		{RC_MASK_X | RC_MASK_Y,
129
		RC_MASK_NONE,
130
		RC_MASK_NONE}},
131
	{RC_REG_CLASS_YZ, 1,
132
		{RC_MASK_Y | RC_MASK_Z,
133
		RC_MASK_NONE,
134
		RC_MASK_NONE}},
135
	{RC_REG_CLASS_XZ, 1,
136
		{RC_MASK_X | RC_MASK_Z,
137
		RC_MASK_NONE,
138
		RC_MASK_NONE}},
139
	{RC_REG_CLASS_XW, 1,
140
		{RC_MASK_X | RC_MASK_W,
141
		RC_MASK_NONE,
142
		RC_MASK_NONE}},
143
	{RC_REG_CLASS_YW, 1,
144
		{RC_MASK_Y | RC_MASK_W,
145
		RC_MASK_NONE,
146
		RC_MASK_NONE}},
147
	{RC_REG_CLASS_ZW, 1,
148
		{RC_MASK_Z | RC_MASK_W,
149
		RC_MASK_NONE,
150
		RC_MASK_NONE}},
151
	{RC_REG_CLASS_XYW, 1,
152
		{RC_MASK_X | RC_MASK_Y | RC_MASK_W,
153
		RC_MASK_NONE,
154
		RC_MASK_NONE}},
155
	{RC_REG_CLASS_YZW, 1,
156
		{RC_MASK_Y | RC_MASK_Z | RC_MASK_W,
157
		RC_MASK_NONE,
158
		RC_MASK_NONE}},
159
	{RC_REG_CLASS_XZW, 1,
160
		{RC_MASK_X | RC_MASK_Z | RC_MASK_W,
161
		RC_MASK_NONE,
162
		RC_MASK_NONE}}
163
};
164
 
165
static void print_live_intervals(struct live_intervals * src)
166
{
167
	if (!src || !src->Used) {
168
		DBG("(null)");
169
		return;
170
	}
171
 
172
	DBG("(%i,%i)", src->Start, src->End);
173
}
174
 
175
static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b)
176
{
177
	if (VERBOSE) {
178
		DBG("overlap_live_intervals: ");
179
		print_live_intervals(a);
180
		DBG(" to ");
181
		print_live_intervals(b);
182
		DBG("\n");
183
	}
184
 
185
	if (!a->Used || !b->Used) {
186
		DBG("    unused interval\n");
187
		return 0;
188
	}
189
 
190
	if (a->Start > b->Start) {
191
		if (a->Start < b->End) {
192
			DBG("    overlap\n");
193
			return 1;
194
		}
195
	} else if (b->Start > a->Start) {
196
		if (b->Start < a->End) {
197
			DBG("    overlap\n");
198
			return 1;
199
		}
200
	} else { /* a->Start == b->Start */
201
		if (a->Start != a->End && b->Start != b->End) {
202
			DBG("    overlap\n");
203
			return 1;
204
		}
205
	}
206
 
207
	DBG("    no overlap\n");
208
 
209
	return 0;
210
}
211
 
212
static void scan_read_callback(void * data, struct rc_instruction * inst,
213
		rc_register_file file, unsigned int index, unsigned int mask)
214
{
215
	struct regalloc_state * s = data;
216
	struct register_info * reg;
217
	unsigned int i;
218
 
219
	if (file != RC_FILE_INPUT)
220
		return;
221
 
222
	s->Input[index].Used = 1;
223
	reg = &s->Input[index];
224
 
225
	for (i = 0; i < 4; i++) {
226
		if (!((mask >> i) & 0x1)) {
227
			continue;
228
		}
229
		reg->Live[i].Used = 1;
230
		reg->Live[i].Start = 0;
231
		reg->Live[i].End =
232
			s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP;
233
	}
234
}
235
 
236
static void remap_register(void * data, struct rc_instruction * inst,
237
		rc_register_file * file, unsigned int * index)
238
{
239
	struct regalloc_state * s = data;
240
	const struct register_info * reg;
241
 
242
	if (*file == RC_FILE_TEMPORARY && s->Simple)
243
		reg = &s->Temporary[*index];
244
	else if (*file == RC_FILE_INPUT)
245
		reg = &s->Input[*index];
246
	else
247
		return;
248
 
249
	if (reg->Allocated) {
250
		*index = reg->Index;
251
	}
252
}
253
 
254
static void alloc_input_simple(void * data, unsigned int input,
255
							unsigned int hwreg)
256
{
257
	struct regalloc_state * s = data;
258
 
259
	if (input >= s->NumInputs)
260
		return;
261
 
262
	s->Input[input].Allocated = 1;
263
	s->Input[input].File = RC_FILE_TEMPORARY;
264
	s->Input[input].Index = hwreg;
265
}
266
 
267
/* This functions offsets the temporary register indices by the number
268
 * of input registers, because input registers are actually temporaries and
269
 * should not occupy the same space.
270
 *
271
 * This pass is supposed to be used to maintain correct allocation of inputs
272
 * if the standard register allocation is disabled. */
273
static void do_regalloc_inputs_only(struct regalloc_state * s)
274
{
275
	for (unsigned i = 0; i < s->NumTemporaries; i++) {
276
		s->Temporary[i].Allocated = 1;
277
		s->Temporary[i].File = RC_FILE_TEMPORARY;
278
		s->Temporary[i].Index = i + s->NumInputs;
279
	}
280
}
281
 
282
static unsigned int is_derivative(rc_opcode op)
283
{
284
	return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY);
285
}
286
 
287
static int find_class(
288
	const struct rc_class * classes,
289
	unsigned int writemask,
290
	unsigned int max_writemask_count)
291
{
292
	unsigned int i;
293
	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
294
		unsigned int j;
295
		if (classes[i].WritemaskCount > max_writemask_count) {
296
			continue;
297
		}
298
		for (j = 0; j < 3; j++) {
299
			if (classes[i].Writemasks[j] == writemask) {
300
				return i;
301
			}
302
		}
303
	}
304
	return -1;
305
}
306
 
307
struct variable_get_class_cb_data {
308
	unsigned int * can_change_writemask;
309
	unsigned int conversion_swizzle;
310
};
311
 
312
static void variable_get_class_read_cb(
313
	void * userdata,
314
	struct rc_instruction * inst,
315
	struct rc_pair_instruction_arg * arg,
316
	struct rc_pair_instruction_source * src)
317
{
318
	struct variable_get_class_cb_data * d = userdata;
319
	unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle,
320
							d->conversion_swizzle);
321
	if (!r300_swizzle_is_native_basic(new_swizzle)) {
322
		*d->can_change_writemask = 0;
323
	}
324
}
325
 
326
static enum rc_reg_class variable_get_class(
327
	struct rc_variable * variable,
328
	const struct rc_class * classes)
329
{
330
	unsigned int i;
331
	unsigned int can_change_writemask= 1;
332
	unsigned int writemask = rc_variable_writemask_sum(variable);
333
	struct rc_list * readers = rc_variable_readers_union(variable);
334
	int class_index;
335
 
336
	if (!variable->C->is_r500) {
337
		struct rc_class c;
338
		struct rc_variable * var_ptr;
339
		/* The assumption here is that if an instruction has type
340
		 * RC_INSTRUCTION_NORMAL then it is a TEX instruction.
341
		 * r300 and r400 can't swizzle the result of a TEX lookup. */
342
		for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) {
343
			if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) {
344
				writemask = RC_MASK_XYZW;
345
			}
346
		}
347
 
348
		/* Check if it is possible to do swizzle packing for r300/r400
349
		 * without creating non-native swizzles. */
350
		class_index = find_class(classes, writemask, 3);
351
		if (class_index < 0) {
352
			goto error;
353
		}
354
		c = classes[class_index];
355
		if (c.WritemaskCount == 1) {
356
			goto done;
357
		}
358
		for (i = 0; i < c.WritemaskCount; i++) {
359
			struct rc_variable * var_ptr;
360
			for (var_ptr = variable; var_ptr;
361
						var_ptr = var_ptr->Friend) {
362
				int j;
363
				unsigned int conversion_swizzle =
364
						rc_make_conversion_swizzle(
365
						writemask, c.Writemasks[i]);
366
				struct variable_get_class_cb_data d;
367
				d.can_change_writemask = &can_change_writemask;
368
				d.conversion_swizzle = conversion_swizzle;
369
				/* If we get this far var_ptr->Inst has to
370
				 * be a pair instruction.  If variable or any
371
				 * of its friends are normal instructions,
372
				 * then the writemask will be set to RC_MASK_XYZW
373
				 * and the function will return before it gets
374
				 * here. */
375
				rc_pair_for_all_reads_arg(var_ptr->Inst,
376
					variable_get_class_read_cb, &d);
377
 
378
				for (j = 0; j < var_ptr->ReaderCount; j++) {
379
					unsigned int old_swizzle;
380
					unsigned int new_swizzle;
381
					struct rc_reader r = var_ptr->Readers[j];
382
					if (r.Inst->Type ==
383
							RC_INSTRUCTION_PAIR ) {
384
						old_swizzle = r.U.P.Arg->Swizzle;
385
					} else {
386
						/* Source operands of TEX
387
						 * instructions can't be
388
						 * swizzle on r300/r400 GPUs.
389
						 */
390
						can_change_writemask = 0;
391
						break;
392
					}
393
					new_swizzle = rc_adjust_channels(
394
						old_swizzle, conversion_swizzle);
395
					if (!r300_swizzle_is_native_basic(
396
								new_swizzle)) {
397
						can_change_writemask = 0;
398
						break;
399
					}
400
				}
401
				if (!can_change_writemask) {
402
					break;
403
				}
404
			}
405
			if (!can_change_writemask) {
406
				break;
407
			}
408
		}
409
	}
410
 
411
	if (variable->Inst->Type == RC_INSTRUCTION_PAIR) {
412
		/* DDX/DDY seem to always fail when their writemasks are
413
		 * changed.*/
414
		if (is_derivative(variable->Inst->U.P.RGB.Opcode)
415
		    || is_derivative(variable->Inst->U.P.Alpha.Opcode)) {
416
			can_change_writemask = 0;
417
		}
418
	}
419
	for ( ; readers; readers = readers->Next) {
420
		struct rc_reader * r = readers->Item;
421
		if (r->Inst->Type == RC_INSTRUCTION_PAIR) {
422
			if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) {
423
				can_change_writemask = 0;
424
				break;
425
			}
426
			/* DDX/DDY also fail when their swizzles are changed. */
427
			if (is_derivative(r->Inst->U.P.RGB.Opcode)
428
			    || is_derivative(r->Inst->U.P.Alpha.Opcode)) {
429
				can_change_writemask = 0;
430
				break;
431
			}
432
		}
433
	}
434
 
435
	class_index = find_class(classes, writemask,
436
						can_change_writemask ? 3 : 1);
437
done:
438
	if (class_index > -1) {
439
		return classes[class_index].ID;
440
	} else {
441
error:
442
		rc_error(variable->C,
443
				"Could not find class for index=%u mask=%u\n",
444
				variable->Dst.Index, writemask);
445
		return 0;
446
	}
447
}
448
 
449
static unsigned int overlap_live_intervals_array(
450
	struct live_intervals * a,
451
	struct live_intervals * b)
452
{
453
	unsigned int a_chan, b_chan;
454
	for (a_chan = 0; a_chan < 4; a_chan++) {
455
		for (b_chan = 0; b_chan < 4; b_chan++) {
456
			if (overlap_live_intervals(&a[a_chan], &b[b_chan])) {
457
					return 1;
458
			}
459
		}
460
	}
461
	return 0;
462
}
463
 
464
static unsigned int reg_get_index(int reg)
465
{
466
	return reg / RC_MASK_XYZW;
467
}
468
 
469
static unsigned int reg_get_writemask(int reg)
470
{
471
	return (reg % RC_MASK_XYZW) + 1;
472
}
473
 
474
static int get_reg_id(unsigned int index, unsigned int writemask)
475
{
476
	assert(writemask);
477
	if (writemask == 0) {
478
		return 0;
479
	}
480
	return (index * RC_MASK_XYZW) + (writemask - 1);
481
}
482
 
483
#if VERBOSE
484
static void print_reg(int reg)
485
{
486
	unsigned int index = reg_get_index(reg);
487
	unsigned int mask = reg_get_writemask(reg);
488
	fprintf(stderr, "Temp[%u].%c%c%c%c", index,
489
		mask & RC_MASK_X ? 'x' : '_',
490
		mask & RC_MASK_Y ? 'y' : '_',
491
		mask & RC_MASK_Z ? 'z' : '_',
492
		mask & RC_MASK_W ? 'w' : '_');
493
}
494
#endif
495
 
496
static void add_register_conflicts(
497
	struct ra_regs * regs,
498
	unsigned int max_temp_regs)
499
{
500
	unsigned int index, a_mask, b_mask;
501
	for (index = 0; index < max_temp_regs; index++) {
502
		for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) {
503
			for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW;
504
								b_mask++) {
505
				if (a_mask & b_mask) {
506
					ra_add_reg_conflict(regs,
507
						get_reg_id(index, a_mask),
508
						get_reg_id(index, b_mask));
509
				}
510
			}
511
		}
512
	}
513
}
514
 
515
static void do_advanced_regalloc(struct regalloc_state * s)
516
{
517
 
518
	unsigned int i, input_node, node_count, node_index;
519
	unsigned int * node_classes;
520
	struct rc_instruction * inst;
521
	struct rc_list * var_ptr;
522
	struct rc_list * variables;
523
	struct ra_graph * graph;
524
	const struct rc_regalloc_state *ra_state = s->C->regalloc_state;
525
 
526
	/* Get list of program variables */
527
	variables = rc_get_variables(s->C);
528
	node_count = rc_list_count(variables);
529
	node_classes = memory_pool_malloc(&s->C->Pool,
530
			node_count * sizeof(unsigned int));
531
 
532
	for (var_ptr = variables, node_index = 0; var_ptr;
533
					var_ptr = var_ptr->Next, node_index++) {
534
		unsigned int class_index;
535
		/* Compute the live intervals */
536
		rc_variable_compute_live_intervals(var_ptr->Item);
537
 
538
		class_index = variable_get_class(var_ptr->Item,	rc_class_list);
539
		node_classes[node_index] = ra_state->class_ids[class_index];
540
	}
541
 
542
 
543
	/* Calculate live intervals for input registers */
544
	for (inst = s->C->Program.Instructions.Next;
545
					inst != &s->C->Program.Instructions;
546
					inst = inst->Next) {
547
		rc_opcode op = rc_get_flow_control_inst(inst);
548
		if (op == RC_OPCODE_BGNLOOP) {
549
			struct rc_instruction * endloop =
550
							rc_match_bgnloop(inst);
551
			if (endloop->IP > s->LoopEnd) {
552
				s->LoopEnd = endloop->IP;
553
			}
554
		}
555
		rc_for_all_reads_mask(inst, scan_read_callback, s);
556
	}
557
 
558
	/* Compute the writemask for inputs. */
559
	for (i = 0; i < s->NumInputs; i++) {
560
		unsigned int chan, writemask = 0;
561
		for (chan = 0; chan < 4; chan++) {
562
			if (s->Input[i].Live[chan].Used) {
563
				writemask |= (1 << chan);
564
			}
565
		}
566
		s->Input[i].Writemask = writemask;
567
	}
568
 
569
	graph = ra_alloc_interference_graph(ra_state->regs,
570
						node_count + s->NumInputs);
571
 
572
	for (node_index = 0; node_index < node_count; node_index++) {
573
		ra_set_node_class(graph, node_index, node_classes[node_index]);
574
	}
575
 
576
	/* Build the interference graph */
577
	for (var_ptr = variables, node_index = 0; var_ptr;
578
					var_ptr = var_ptr->Next,node_index++) {
579
		struct rc_list * a, * b;
580
		unsigned int b_index;
581
 
582
		for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1;
583
						b; b = b->Next, b_index++) {
584
			struct rc_variable * var_a = a->Item;
585
			while (var_a) {
586
				struct rc_variable * var_b = b->Item;
587
				while (var_b) {
588
					if (overlap_live_intervals_array(var_a->Live, var_b->Live)) {
589
						ra_add_node_interference(graph,
590
							node_index, b_index);
591
					}
592
					var_b = var_b->Friend;
593
				}
594
				var_a = var_a->Friend;
595
			}
596
		}
597
	}
598
 
599
	/* Add input registers to the interference graph */
600
	for (i = 0, input_node = 0; i< s->NumInputs; i++) {
601
		if (!s->Input[i].Writemask) {
602
			continue;
603
		}
604
		for (var_ptr = variables, node_index = 0;
605
				var_ptr; var_ptr = var_ptr->Next, node_index++) {
606
			struct rc_variable * var = var_ptr->Item;
607
			if (overlap_live_intervals_array(s->Input[i].Live,
608
								var->Live)) {
609
				ra_add_node_interference(graph, node_index,
610
						node_count + input_node);
611
			}
612
		}
613
		/* Manually allocate a register for this input */
614
		ra_set_node_reg(graph, node_count + input_node, get_reg_id(
615
				s->Input[i].Index, s->Input[i].Writemask));
616
		input_node++;
617
	}
618
 
619
	if (!ra_allocate(graph)) {
620
		rc_error(s->C, "Ran out of hardware temporaries\n");
621
		return;
622
	}
623
 
624
	/* Rewrite the registers */
625
	for (var_ptr = variables, node_index = 0; var_ptr;
626
				var_ptr = var_ptr->Next, node_index++) {
627
		int reg = ra_get_node_reg(graph, node_index);
628
		unsigned int writemask = reg_get_writemask(reg);
629
		unsigned int index = reg_get_index(reg);
630
		struct rc_variable * var = var_ptr->Item;
631
 
632
		if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) {
633
			writemask = rc_variable_writemask_sum(var);
634
		}
635
 
636
		if (var->Dst.File == RC_FILE_INPUT) {
637
			continue;
638
		}
639
		rc_variable_change_dst(var, index, writemask);
640
	}
641
 
642
	ralloc_free(graph);
643
}
644
 
645
void rc_init_regalloc_state(struct rc_regalloc_state *s)
646
{
647
	unsigned i, j, index;
648
	unsigned **ra_q_values;
649
 
650
	/* Pre-computed q values.  This array describes the maximum number of
651
	 * a class's [row] registers that are in conflict with a single
652
	 * register from another class [column].
653
	 *
654
	 * For example:
655
	 * q_values[0][2] is 3, because a register from class 2
656
	 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from
657
	 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y,
658
	 * and T0.z.
659
	 *
660
	 * q_values[2][0] is 1, because a register from class 0
661
	 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from
662
	 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz
663
	 *
664
	 * The q values for each register class [row] will never be greater
665
	 * than the maximum number of writemask combinations for that class.
666
	 *
667
	 * For example:
668
	 *
669
	 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination,
670
	 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater
671
	 * than 1.
672
	 */
673
	const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = {
674
	{1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2},
675
	{2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3},
676
	{1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
677
	{0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1},
678
	{1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3},
679
	{2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3},
680
	{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
681
	{1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1},
682
	{1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0},
683
	{1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1},
684
	{1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1},
685
	{1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1},
686
	{1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1},
687
	{1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1},
688
	{1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1},
689
	{1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1},
690
	{1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1},
691
	{1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
692
	{1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}
693
	};
694
 
695
	/* Allocate the main ra data structure */
696
	s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW);
697
 
698
	/* Create the register classes */
699
	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
700
		const struct rc_class *class = &rc_class_list[i];
701
		s->class_ids[class->ID] = ra_alloc_reg_class(s->regs);
702
 
703
		/* Assign registers to the classes */
704
		for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) {
705
			for (j = 0; j < class->WritemaskCount; j++) {
706
				int reg_id = get_reg_id(index,
707
						class->Writemasks[j]);
708
				ra_class_add_reg(s->regs,
709
					s->class_ids[class->ID], reg_id);
710
			}
711
		}
712
	}
713
 
714
	/* Set the q values.  The q_values array is indexed based on
715
	 * the rc_reg_class ID (RC_REG_CLASS_*) which might be
716
	 * different than the ID assigned to that class by ra.
717
	 * This why we need to manually construct this list.
718
	 */
719
	ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *));
720
 
721
	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
722
		ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned));
723
		for (j = 0; j < RC_REG_CLASS_COUNT; j++) {
724
			ra_q_values[s->class_ids[i]][s->class_ids[j]] =
725
							q_values[i][j];
726
		}
727
	}
728
 
729
	/* Add register conflicts */
730
	add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS);
731
 
732
	ra_set_finalize(s->regs, ra_q_values);
733
 
734
	for (i = 0; i < RC_REG_CLASS_COUNT; i++) {
735
		FREE(ra_q_values[i]);
736
	}
737
	FREE(ra_q_values);
738
}
739
 
740
void rc_destroy_regalloc_state(struct rc_regalloc_state *s)
741
{
742
	ralloc_free(s->regs);
743
}
744
 
745
/**
746
 * @param user This parameter should be a pointer to an integer value.  If this
747
 * integer value is zero, then a simple register allocator will be used that
748
 * only allocates space for input registers (\sa do_regalloc_inputs_only).  If
749
 * user is non-zero, then the regular register allocator will be used
750
 * (\sa do_regalloc).
751
  */
752
void rc_pair_regalloc(struct radeon_compiler *cc, void *user)
753
{
754
	struct r300_fragment_program_compiler *c =
755
				(struct r300_fragment_program_compiler*)cc;
756
	struct regalloc_state s;
757
	int * do_full_regalloc = (int*)user;
758
 
759
	memset(&s, 0, sizeof(s));
760
	s.C = cc;
761
	s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1;
762
	s.Input = memory_pool_malloc(&cc->Pool,
763
			s.NumInputs * sizeof(struct register_info));
764
	memset(s.Input, 0, s.NumInputs * sizeof(struct register_info));
765
 
766
	s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1;
767
	s.Temporary = memory_pool_malloc(&cc->Pool,
768
			s.NumTemporaries * sizeof(struct register_info));
769
	memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info));
770
 
771
	rc_recompute_ips(s.C);
772
 
773
	c->AllocateHwInputs(c, &alloc_input_simple, &s);
774
	if (*do_full_regalloc) {
775
		do_advanced_regalloc(&s);
776
	} else {
777
		s.Simple = 1;
778
		do_regalloc_inputs_only(&s);
779
	}
780
 
781
	/* Rewrite inputs and if we are doing the simple allocation, rewrite
782
	 * temporaries too. */
783
	for (struct rc_instruction *inst = s.C->Program.Instructions.Next;
784
					inst != &s.C->Program.Instructions;
785
					inst = inst->Next) {
786
		rc_remap_registers(inst, &remap_register, &s);
787
	}
788
}