Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
 
3
/*
4
 * Copyright (C) 2014 Rob Clark 
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice (including the next
14
 * paragraph) shall be included in all copies or substantial portions of the
15
 * Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
 * SOFTWARE.
24
 *
25
 * Authors:
26
 *    Rob Clark 
27
 */
28
 
29
#include "pipe/p_shader_tokens.h"
30
#include "util/u_math.h"
31
 
32
#include "freedreno_util.h"
33
 
34
#include "ir3.h"
35
 
36
/*
37
 * Legalize:
38
 *
39
 * We currently require that scheduling ensures that we have enough nop's
40
 * in all the right places.  The legalize step mostly handles fixing up
41
 * instruction flags ((ss)/(sy)/(ei)), and collapses sequences of nop's
42
 * into fewer nop's w/ rpt flag.
43
 */
44
 
45
struct ir3_legalize_ctx {
46
	struct ir3_block *block;
47
	bool has_samp;
48
	int max_bary;
49
};
50
 
51
static void legalize(struct ir3_legalize_ctx *ctx)
52
{
53
	struct ir3_block *block = ctx->block;
54
	struct ir3_instruction *n;
55
	struct ir3 *shader = block->shader;
56
	struct ir3_instruction *end =
57
			ir3_instr_create(block, 0, OPC_END);
58
	struct ir3_instruction *last_input = NULL;
59
	struct ir3_instruction *last_rel = NULL;
60
	regmask_t needs_ss_war;       /* write after read */
61
	regmask_t needs_ss;
62
	regmask_t needs_sy;
63
 
64
	regmask_init(&needs_ss_war);
65
	regmask_init(&needs_ss);
66
	regmask_init(&needs_sy);
67
 
68
	shader->instrs_count = 0;
69
 
70
	for (n = block->head; n; n = n->next) {
71
		struct ir3_register *reg;
72
		unsigned i;
73
 
74
		if (is_meta(n))
75
			continue;
76
 
77
		if (is_input(n)) {
78
			struct ir3_register *inloc = n->regs[1];
79
			assert(inloc->flags & IR3_REG_IMMED);
80
			ctx->max_bary = MAX2(ctx->max_bary, inloc->iim_val);
81
		}
82
 
83
		/* NOTE: consider dst register too.. it could happen that
84
		 * texture sample instruction (for example) writes some
85
		 * components which are unused.  A subsequent instruction
86
		 * that writes the same register can race w/ the sam instr
87
		 * resulting in undefined results:
88
		 */
89
		for (i = 0; i < n->regs_count; i++) {
90
			reg = n->regs[i];
91
 
92
			if (reg_gpr(reg)) {
93
 
94
				/* TODO: we probably only need (ss) for alu
95
				 * instr consuming sfu result.. need to make
96
				 * some tests for both this and (sy)..
97
				 */
98
				if (regmask_get(&needs_ss, reg)) {
99
					n->flags |= IR3_INSTR_SS;
100
					regmask_init(&needs_ss);
101
				}
102
 
103
				if (regmask_get(&needs_sy, reg)) {
104
					n->flags |= IR3_INSTR_SY;
105
					regmask_init(&needs_sy);
106
				}
107
			}
108
 
109
			/* TODO: is it valid to have address reg loaded from a
110
			 * relative src (ie. mova a0, c)?  If so, the
111
			 * last_rel check below should be moved ahead of this:
112
			 */
113
			if (reg->flags & IR3_REG_RELATIV)
114
				last_rel = n;
115
		}
116
 
117
		if (n->regs_count > 0) {
118
			reg = n->regs[0];
119
			if (regmask_get(&needs_ss_war, reg)) {
120
				n->flags |= IR3_INSTR_SS;
121
				regmask_init(&needs_ss_war); // ??? I assume?
122
			}
123
 
124
			if (last_rel && (reg->num == regid(REG_A0, 0))) {
125
				last_rel->flags |= IR3_INSTR_UL;
126
				last_rel = NULL;
127
			}
128
		}
129
 
130
		/* cat5+ does not have an (ss) bit, if needed we need to
131
		 * insert a nop to carry the sync flag.  Would be kinda
132
		 * clever if we were aware of this during scheduling, but
133
		 * this should be a pretty rare case:
134
		 */
135
		if ((n->flags & IR3_INSTR_SS) && (n->category >= 5)) {
136
			struct ir3_instruction *nop;
137
			nop = ir3_instr_create(block, 0, OPC_NOP);
138
			nop->flags |= IR3_INSTR_SS;
139
			n->flags &= ~IR3_INSTR_SS;
140
		}
141
 
142
		/* need to be able to set (ss) on first instruction: */
143
		if ((shader->instrs_count == 0) && (n->category >= 5))
144
			ir3_instr_create(block, 0, OPC_NOP);
145
 
146
		if (is_nop(n) && shader->instrs_count) {
147
			struct ir3_instruction *last =
148
					shader->instrs[shader->instrs_count-1];
149
			if (is_nop(last) && (last->repeat < 5)) {
150
				last->repeat++;
151
				last->flags |= n->flags;
152
				continue;
153
			}
154
		}
155
 
156
		shader->instrs[shader->instrs_count++] = n;
157
 
158
		if (is_sfu(n))
159
			regmask_set(&needs_ss, n->regs[0]);
160
 
161
		if (is_tex(n)) {
162
			/* this ends up being the # of samp instructions.. but that
163
			 * is ok, everything else only cares whether it is zero or
164
			 * not.  We do this here, rather than when we encounter a
165
			 * SAMP decl, because (especially in binning pass shader)
166
			 * the samp instruction(s) could get eliminated if the
167
			 * result is not used.
168
			 */
169
			ctx->has_samp = true;
170
			regmask_set(&needs_sy, n->regs[0]);
171
		} else if (is_mem(n)) {
172
			regmask_set(&needs_sy, n->regs[0]);
173
		}
174
 
175
		/* both tex/sfu appear to not always immediately consume
176
		 * their src register(s):
177
		 */
178
		if (is_tex(n) || is_sfu(n) || is_mem(n)) {
179
			foreach_src(reg, n) {
180
				if (reg_gpr(reg))
181
					regmask_set(&needs_ss_war, reg);
182
			}
183
		}
184
 
185
		if (is_input(n))
186
			last_input = n;
187
	}
188
 
189
	if (last_input) {
190
		/* special hack.. if using ldlv to bypass interpolation,
191
		 * we need to insert a dummy bary.f on which we can set
192
		 * the (ei) flag:
193
		 */
194
		if (is_mem(last_input) && (last_input->opc == OPC_LDLV)) {
195
			int i, cnt;
196
 
197
			/* note that ir3_instr_create() inserts into
198
			 * shader->instrs[] and increments the count..
199
			 * so we need to bump up the cnt initially (to
200
			 * avoid it clobbering the last real instr) and
201
			 * restore it after.
202
			 */
203
			cnt = ++shader->instrs_count;
204
 
205
			/* inserting instructions would be a bit nicer if list.. */
206
			for (i = cnt - 2; i >= 0; i--) {
207
				if (shader->instrs[i] == last_input) {
208
 
209
					/* (ss)bary.f (ei)r63.x, 0, r0.x */
210
					last_input = ir3_instr_create(block, 2, OPC_BARY_F);
211
					last_input->flags |= IR3_INSTR_SS;
212
					ir3_reg_create(last_input, regid(63, 0), 0);
213
					ir3_reg_create(last_input, 0, IR3_REG_IMMED)->iim_val = 0;
214
					ir3_reg_create(last_input, regid(0, 0), 0);
215
 
216
					shader->instrs[i + 1] = last_input;
217
 
218
					break;
219
				}
220
				shader->instrs[i + 1] = shader->instrs[i];
221
			}
222
 
223
			shader->instrs_count = cnt;
224
		}
225
		last_input->regs[0]->flags |= IR3_REG_EI;
226
	}
227
 
228
	if (last_rel)
229
		last_rel->flags |= IR3_INSTR_UL;
230
 
231
	shader->instrs[shader->instrs_count++] = end;
232
 
233
	shader->instrs[0]->flags |= IR3_INSTR_SS | IR3_INSTR_SY;
234
}
235
 
236
void ir3_block_legalize(struct ir3_block *block,
237
		bool *has_samp, int *max_bary)
238
{
239
	struct ir3_legalize_ctx ctx = {
240
			.block = block,
241
			.max_bary = -1,
242
	};
243
 
244
	legalize(&ctx);
245
 
246
	*has_samp = ctx.has_samp;
247
	*max_bary = ctx.max_bary;
248
}