Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright (C) 2009 Nicolai Haehnle.
3
 *
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining
7
 * a copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sublicense, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial
16
 * portions of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21
 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22
 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23
 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24
 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 */
27
 
28
#include "radeon_dataflow.h"
29
 
30
#include "radeon_compiler.h"
31
 
32
 
33
struct updatemask_state {
34
	unsigned char Output[RC_REGISTER_MAX_INDEX];
35
	unsigned char Temporary[RC_REGISTER_MAX_INDEX];
36
	unsigned char Address;
37
	unsigned char Special[RC_NUM_SPECIAL_REGISTERS];
38
};
39
 
40
struct instruction_state {
41
	unsigned char WriteMask:4;
42
	unsigned char WriteALUResult:1;
43
	unsigned char SrcReg[3];
44
};
45
 
46
struct loopinfo {
47
	struct updatemask_state * Breaks;
48
	unsigned int BreakCount;
49
	unsigned int BreaksReserved;
50
};
51
 
52
struct branchinfo {
53
	unsigned int HaveElse:1;
54
 
55
	struct updatemask_state StoreEndif;
56
	struct updatemask_state StoreElse;
57
};
58
 
59
struct deadcode_state {
60
	struct radeon_compiler * C;
61
	struct instruction_state * Instructions;
62
 
63
	struct updatemask_state R;
64
 
65
	struct branchinfo * BranchStack;
66
	unsigned int BranchStackSize;
67
	unsigned int BranchStackReserved;
68
 
69
	struct loopinfo * LoopStack;
70
	unsigned int LoopStackSize;
71
	unsigned int LoopStackReserved;
72
};
73
 
74
 
75
static void or_updatemasks(
76
	struct updatemask_state * dst,
77
	struct updatemask_state * a,
78
	struct updatemask_state * b)
79
{
80
	for(unsigned int i = 0; i < RC_REGISTER_MAX_INDEX; ++i) {
81
		dst->Output[i] = a->Output[i] | b->Output[i];
82
		dst->Temporary[i] = a->Temporary[i] | b->Temporary[i];
83
	}
84
 
85
	for(unsigned int i = 0; i < RC_NUM_SPECIAL_REGISTERS; ++i)
86
		dst->Special[i] = a->Special[i] | b->Special[i];
87
 
88
	dst->Address = a->Address | b->Address;
89
}
90
 
91
static void push_break(struct deadcode_state *s)
92
{
93
	struct loopinfo * loop = &s->LoopStack[s->LoopStackSize - 1];
94
	memory_pool_array_reserve(&s->C->Pool, struct updatemask_state,
95
		loop->Breaks, loop->BreakCount, loop->BreaksReserved, 1);
96
 
97
	memcpy(&loop->Breaks[loop->BreakCount++], &s->R, sizeof(s->R));
98
}
99
 
100
static void push_loop(struct deadcode_state * s)
101
{
102
	memory_pool_array_reserve(&s->C->Pool, struct loopinfo, s->LoopStack,
103
			s->LoopStackSize, s->LoopStackReserved, 1);
104
	memset(&s->LoopStack[s->LoopStackSize++], 0, sizeof(struct loopinfo));
105
}
106
 
107
static void push_branch(struct deadcode_state * s)
108
{
109
	struct branchinfo * branch;
110
 
111
	memory_pool_array_reserve(&s->C->Pool, struct branchinfo, s->BranchStack,
112
			s->BranchStackSize, s->BranchStackReserved, 1);
113
 
114
	branch = &s->BranchStack[s->BranchStackSize++];
115
	branch->HaveElse = 0;
116
	memcpy(&branch->StoreEndif, &s->R, sizeof(s->R));
117
}
118
 
119
static unsigned char * get_used_ptr(struct deadcode_state *s, rc_register_file file, unsigned int index)
120
{
121
	if (file == RC_FILE_OUTPUT || file == RC_FILE_TEMPORARY) {
122
		if (index >= RC_REGISTER_MAX_INDEX) {
123
			rc_error(s->C, "%s: index %i is out of bounds for file %i\n", __FUNCTION__, index, file);
124
			return 0;
125
		}
126
 
127
		if (file == RC_FILE_OUTPUT)
128
			return &s->R.Output[index];
129
		else
130
			return &s->R.Temporary[index];
131
	} else if (file == RC_FILE_ADDRESS) {
132
		return &s->R.Address;
133
	} else if (file == RC_FILE_SPECIAL) {
134
		if (index >= RC_NUM_SPECIAL_REGISTERS) {
135
			rc_error(s->C, "%s: special file index %i out of bounds\n", __FUNCTION__, index);
136
			return 0;
137
		}
138
 
139
		return &s->R.Special[index];
140
	}
141
 
142
	return 0;
143
}
144
 
145
static void mark_used(struct deadcode_state * s, rc_register_file file, unsigned int index, unsigned int mask)
146
{
147
	unsigned char * pused = get_used_ptr(s, file, index);
148
	if (pused)
149
		*pused |= mask;
150
}
151
 
152
static void update_instruction(struct deadcode_state * s, struct rc_instruction * inst)
153
{
154
	const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
155
	struct instruction_state * insts = &s->Instructions[inst->IP];
156
	unsigned int usedmask = 0;
157
	unsigned int srcmasks[3];
158
 
159
	if (opcode->HasDstReg) {
160
		unsigned char * pused = get_used_ptr(s, inst->U.I.DstReg.File, inst->U.I.DstReg.Index);
161
		if (pused) {
162
			usedmask = *pused & inst->U.I.DstReg.WriteMask;
163
			*pused &= ~usedmask;
164
		}
165
	}
166
 
167
	insts->WriteMask |= usedmask;
168
 
169
	if (inst->U.I.WriteALUResult) {
170
		unsigned char * pused = get_used_ptr(s, RC_FILE_SPECIAL, RC_SPECIAL_ALU_RESULT);
171
		if (pused && *pused) {
172
			if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
173
				usedmask |= RC_MASK_X;
174
			else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
175
				usedmask |= RC_MASK_W;
176
 
177
			*pused = 0;
178
			insts->WriteALUResult = 1;
179
		}
180
	}
181
 
182
	rc_compute_sources_for_writemask(inst, usedmask, srcmasks);
183
 
184
	for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) {
185
		unsigned int refmask = 0;
186
		unsigned int newsrcmask = srcmasks[src] & ~insts->SrcReg[src];
187
		insts->SrcReg[src] |= newsrcmask;
188
 
189
		for(unsigned int chan = 0; chan < 4; ++chan) {
190
			if (GET_BIT(newsrcmask, chan))
191
				refmask |= 1 << GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan);
192
		}
193
 
194
		/* get rid of spurious bits from ZERO, ONE, etc. swizzles */
195
		refmask &= RC_MASK_XYZW;
196
 
197
		if (!refmask)
198
			continue;
199
 
200
		mark_used(s, inst->U.I.SrcReg[src].File, inst->U.I.SrcReg[src].Index, refmask);
201
 
202
		if (inst->U.I.SrcReg[src].RelAddr)
203
			mark_used(s, RC_FILE_ADDRESS, 0, RC_MASK_X);
204
	}
205
}
206
 
207
static void mark_output_use(void * data, unsigned int index, unsigned int mask)
208
{
209
	struct deadcode_state * s = data;
210
 
211
	mark_used(s, RC_FILE_OUTPUT, index, mask);
212
}
213
 
214
void rc_dataflow_deadcode(struct radeon_compiler * c, void *user)
215
{
216
	struct deadcode_state s;
217
	unsigned int nr_instructions;
218
	rc_dataflow_mark_outputs_fn dce = (rc_dataflow_mark_outputs_fn)user;
219
	unsigned int ip;
220
 
221
	memset(&s, 0, sizeof(s));
222
	s.C = c;
223
 
224
	nr_instructions = rc_recompute_ips(c);
225
	s.Instructions = memory_pool_malloc(&c->Pool, sizeof(struct instruction_state)*nr_instructions);
226
	memset(s.Instructions, 0, sizeof(struct instruction_state)*nr_instructions);
227
 
228
	dce(c, &s, &mark_output_use);
229
 
230
	for(struct rc_instruction * inst = c->Program.Instructions.Prev;
231
	    inst != &c->Program.Instructions;
232
	    inst = inst->Prev) {
233
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
234
 
235
		switch(opcode->Opcode){
236
		/* Mark all sources in the loop body as used before doing
237
		 * normal deadcode analysis.  This is probably not optimal.
238
		 */
239
		case RC_OPCODE_ENDLOOP:
240
		{
241
			int endloops = 1;
242
			struct rc_instruction *ptr;
243
			for(ptr = inst->Prev; endloops > 0; ptr = ptr->Prev){
244
				opcode = rc_get_opcode_info(ptr->U.I.Opcode);
245
				if(ptr->U.I.Opcode == RC_OPCODE_BGNLOOP){
246
					endloops--;
247
					continue;
248
				}
249
				if(ptr->U.I.Opcode == RC_OPCODE_ENDLOOP){
250
					endloops++;
251
					continue;
252
				}
253
				if(opcode->HasDstReg){
254
					int src = 0;
255
					unsigned int srcmasks[3];
256
					rc_compute_sources_for_writemask(ptr,
257
						ptr->U.I.DstReg.WriteMask, srcmasks);
258
					for(src=0; src < opcode->NumSrcRegs; src++){
259
						mark_used(&s,
260
							ptr->U.I.SrcReg[src].File,
261
							ptr->U.I.SrcReg[src].Index,
262
							srcmasks[src]);
263
					}
264
				}
265
			}
266
			push_loop(&s);
267
			break;
268
		}
269
		case RC_OPCODE_BRK:
270
			push_break(&s);
271
			break;
272
		case RC_OPCODE_BGNLOOP:
273
		{
274
			unsigned int i;
275
			struct loopinfo * loop = &s.LoopStack[s.LoopStackSize-1];
276
			for(i = 0; i < loop->BreakCount; i++) {
277
				or_updatemasks(&s.R, &s.R, &loop->Breaks[i]);
278
			}
279
			break;
280
		}
281
		case RC_OPCODE_CONT:
282
			break;
283
		case RC_OPCODE_ENDIF:
284
			push_branch(&s);
285
			break;
286
		default:
287
			if (opcode->IsFlowControl && s.BranchStackSize) {
288
				struct branchinfo * branch = &s.BranchStack[s.BranchStackSize-1];
289
				if (opcode->Opcode == RC_OPCODE_IF) {
290
					or_updatemasks(&s.R,
291
							&s.R,
292
							branch->HaveElse ? &branch->StoreElse : &branch->StoreEndif);
293
 
294
					s.BranchStackSize--;
295
				} else if (opcode->Opcode == RC_OPCODE_ELSE) {
296
					if (branch->HaveElse) {
297
						rc_error(c, "%s: Multiple ELSE for one IF/ENDIF\n", __FUNCTION__);
298
					} else {
299
						memcpy(&branch->StoreElse, &s.R, sizeof(s.R));
300
						memcpy(&s.R, &branch->StoreEndif, sizeof(s.R));
301
						branch->HaveElse = 1;
302
					}
303
				} else {
304
					rc_error(c, "%s: Unhandled control flow instruction %s\n", __FUNCTION__, opcode->Name);
305
				}
306
			}
307
		}
308
 
309
		update_instruction(&s, inst);
310
	}
311
 
312
	ip = 0;
313
	for(struct rc_instruction * inst = c->Program.Instructions.Next;
314
	    inst != &c->Program.Instructions;
315
	    inst = inst->Next, ++ip) {
316
		const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode);
317
		int dead = 1;
318
		unsigned int srcmasks[3];
319
		unsigned int usemask;
320
 
321
		if (!opcode->HasDstReg) {
322
			dead = 0;
323
		} else {
324
			inst->U.I.DstReg.WriteMask = s.Instructions[ip].WriteMask;
325
			if (s.Instructions[ip].WriteMask)
326
				dead = 0;
327
 
328
			if (s.Instructions[ip].WriteALUResult)
329
				dead = 0;
330
			else
331
				inst->U.I.WriteALUResult = RC_ALURESULT_NONE;
332
		}
333
 
334
		if (dead) {
335
			struct rc_instruction * todelete = inst;
336
			inst = inst->Prev;
337
			rc_remove_instruction(todelete);
338
			continue;
339
		}
340
 
341
		usemask = s.Instructions[ip].WriteMask;
342
 
343
		if (inst->U.I.WriteALUResult == RC_ALURESULT_X)
344
			usemask |= RC_MASK_X;
345
		else if (inst->U.I.WriteALUResult == RC_ALURESULT_W)
346
			usemask |= RC_MASK_W;
347
 
348
		rc_compute_sources_for_writemask(inst, usemask, srcmasks);
349
 
350
		for(unsigned int src = 0; src < 3; ++src) {
351
			for(unsigned int chan = 0; chan < 4; ++chan) {
352
				if (!GET_BIT(srcmasks[src], chan))
353
					SET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan, RC_SWIZZLE_UNUSED);
354
			}
355
		}
356
	}
357
 
358
	rc_calculate_inputs_outputs(c);
359
}