Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright (c) 2012 Rob Clark |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
20 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
21 | * SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | #include "ir-a2xx.h" |
||
25 | |||
26 | #include |
||
27 | #include |
||
28 | #include |
||
29 | #include |
||
30 | |||
31 | #include "freedreno_util.h" |
||
32 | #include "instr-a2xx.h" |
||
33 | |||
34 | #define DEBUG_MSG(f, ...) do { if (0) DBG(f, ##__VA_ARGS__); } while (0) |
||
35 | #define WARN_MSG(f, ...) DBG("WARN: "f, ##__VA_ARGS__) |
||
36 | #define ERROR_MSG(f, ...) DBG("ERROR: "f, ##__VA_ARGS__) |
||
37 | |||
38 | #define REG_MASK 0x3f |
||
39 | |||
40 | static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr); |
||
41 | |||
42 | static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, |
||
43 | uint32_t idx, struct ir2_shader_info *info); |
||
44 | |||
45 | static void reg_update_stats(struct ir2_register *reg, |
||
46 | struct ir2_shader_info *info, bool dest); |
||
47 | static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n); |
||
48 | static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg); |
||
49 | static uint32_t reg_alu_dst_swiz(struct ir2_register *reg); |
||
50 | static uint32_t reg_alu_src_swiz(struct ir2_register *reg); |
||
51 | |||
52 | /* simple allocator to carve allocations out of an up-front allocated heap, |
||
53 | * so that we can free everything easily in one shot. |
||
54 | */ |
||
55 | static void * ir2_alloc(struct ir2_shader *shader, int sz) |
||
56 | { |
||
57 | void *ptr = &shader->heap[shader->heap_idx]; |
||
58 | shader->heap_idx += align(sz, 4); |
||
59 | return ptr; |
||
60 | } |
||
61 | |||
62 | static char * ir2_strdup(struct ir2_shader *shader, const char *str) |
||
63 | { |
||
64 | char *ptr = NULL; |
||
65 | if (str) { |
||
66 | int len = strlen(str); |
||
67 | ptr = ir2_alloc(shader, len+1); |
||
68 | memcpy(ptr, str, len); |
||
69 | ptr[len] = '\0'; |
||
70 | } |
||
71 | return ptr; |
||
72 | } |
||
73 | |||
74 | struct ir2_shader * ir2_shader_create(void) |
||
75 | { |
||
76 | DEBUG_MSG(""); |
||
77 | return calloc(1, sizeof(struct ir2_shader)); |
||
78 | } |
||
79 | |||
80 | void ir2_shader_destroy(struct ir2_shader *shader) |
||
81 | { |
||
82 | DEBUG_MSG(""); |
||
83 | free(shader); |
||
84 | } |
||
85 | |||
86 | /* resolve addr/cnt/sequence fields in the individual CF's */ |
||
87 | static int shader_resolve(struct ir2_shader *shader, struct ir2_shader_info *info) |
||
88 | { |
||
89 | uint32_t addr; |
||
90 | unsigned i; |
||
91 | int j; |
||
92 | |||
93 | addr = shader->cfs_count / 2; |
||
94 | for (i = 0; i < shader->cfs_count; i++) { |
||
95 | struct ir2_cf *cf = shader->cfs[i]; |
||
96 | if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { |
||
97 | uint32_t sequence = 0; |
||
98 | |||
99 | if (cf->exec.addr && (cf->exec.addr != addr)) |
||
100 | WARN_MSG("invalid addr '%d' at CF %d", cf->exec.addr, i); |
||
101 | if (cf->exec.cnt && (cf->exec.cnt != cf->exec.instrs_count)) |
||
102 | WARN_MSG("invalid cnt '%d' at CF %d", cf->exec.cnt, i); |
||
103 | |||
104 | for (j = cf->exec.instrs_count - 1; j >= 0; j--) { |
||
105 | struct ir2_instruction *instr = cf->exec.instrs[j]; |
||
106 | sequence <<= 2; |
||
107 | if (instr->instr_type == IR2_FETCH) |
||
108 | sequence |= 0x1; |
||
109 | if (instr->sync) |
||
110 | sequence |= 0x2; |
||
111 | } |
||
112 | |||
113 | cf->exec.addr = addr; |
||
114 | cf->exec.cnt = cf->exec.instrs_count; |
||
115 | cf->exec.sequence = sequence; |
||
116 | |||
117 | addr += cf->exec.instrs_count; |
||
118 | } |
||
119 | } |
||
120 | |||
121 | info->sizedwords = 3 * addr; |
||
122 | |||
123 | return 0; |
||
124 | } |
||
125 | |||
126 | void * ir2_shader_assemble(struct ir2_shader *shader, struct ir2_shader_info *info) |
||
127 | { |
||
128 | uint32_t i, j; |
||
129 | uint32_t *ptr, *dwords = NULL; |
||
130 | uint32_t idx = 0; |
||
131 | int ret; |
||
132 | |||
133 | info->sizedwords = 0; |
||
134 | info->max_reg = -1; |
||
135 | info->max_input_reg = 0; |
||
136 | info->regs_written = 0; |
||
137 | |||
138 | /* we need an even # of CF's.. insert a NOP if needed */ |
||
139 | if (shader->cfs_count != align(shader->cfs_count, 2)) |
||
140 | ir2_cf_create(shader, NOP); |
||
141 | |||
142 | /* first pass, resolve sizes and addresses: */ |
||
143 | ret = shader_resolve(shader, info); |
||
144 | if (ret) { |
||
145 | ERROR_MSG("resolve failed: %d", ret); |
||
146 | goto fail; |
||
147 | } |
||
148 | |||
149 | ptr = dwords = calloc(1, 4 * info->sizedwords); |
||
150 | |||
151 | /* second pass, emit CF program in pairs: */ |
||
152 | for (i = 0; i < shader->cfs_count; i += 2) { |
||
153 | instr_cf_t *cfs = (instr_cf_t *)ptr; |
||
154 | ret = cf_emit(shader->cfs[i], &cfs[0]); |
||
155 | if (ret) { |
||
156 | ERROR_MSG("CF emit failed: %d\n", ret); |
||
157 | goto fail; |
||
158 | } |
||
159 | ret = cf_emit(shader->cfs[i+1], &cfs[1]); |
||
160 | if (ret) { |
||
161 | ERROR_MSG("CF emit failed: %d\n", ret); |
||
162 | goto fail; |
||
163 | } |
||
164 | ptr += 3; |
||
165 | assert((ptr - dwords) <= info->sizedwords); |
||
166 | } |
||
167 | |||
168 | /* third pass, emit ALU/FETCH: */ |
||
169 | for (i = 0; i < shader->cfs_count; i++) { |
||
170 | struct ir2_cf *cf = shader->cfs[i]; |
||
171 | if ((cf->cf_type == EXEC) || (cf->cf_type == EXEC_END)) { |
||
172 | for (j = 0; j < cf->exec.instrs_count; j++) { |
||
173 | ret = instr_emit(cf->exec.instrs[j], ptr, idx++, info); |
||
174 | if (ret) { |
||
175 | ERROR_MSG("instruction emit failed: %d", ret); |
||
176 | goto fail; |
||
177 | } |
||
178 | ptr += 3; |
||
179 | assert((ptr - dwords) <= info->sizedwords); |
||
180 | } |
||
181 | } |
||
182 | } |
||
183 | |||
184 | return dwords; |
||
185 | |||
186 | fail: |
||
187 | free(dwords); |
||
188 | return NULL; |
||
189 | } |
||
190 | |||
191 | |||
192 | struct ir2_cf * ir2_cf_create(struct ir2_shader *shader, instr_cf_opc_t cf_type) |
||
193 | { |
||
194 | struct ir2_cf *cf = ir2_alloc(shader, sizeof(struct ir2_cf)); |
||
195 | DEBUG_MSG("%d", cf_type); |
||
196 | cf->shader = shader; |
||
197 | cf->cf_type = cf_type; |
||
198 | assert(shader->cfs_count < ARRAY_SIZE(shader->cfs)); |
||
199 | shader->cfs[shader->cfs_count++] = cf; |
||
200 | return cf; |
||
201 | } |
||
202 | |||
203 | |||
204 | /* |
||
205 | * CF instructions: |
||
206 | */ |
||
207 | |||
208 | static int cf_emit(struct ir2_cf *cf, instr_cf_t *instr) |
||
209 | { |
||
210 | memset(instr, 0, sizeof(*instr)); |
||
211 | |||
212 | instr->opc = cf->cf_type; |
||
213 | |||
214 | switch (cf->cf_type) { |
||
215 | case NOP: |
||
216 | break; |
||
217 | case EXEC: |
||
218 | case EXEC_END: |
||
219 | assert(cf->exec.addr <= 0x1ff); |
||
220 | assert(cf->exec.cnt <= 0x6); |
||
221 | assert(cf->exec.sequence <= 0xfff); |
||
222 | instr->exec.address = cf->exec.addr; |
||
223 | instr->exec.count = cf->exec.cnt; |
||
224 | instr->exec.serialize = cf->exec.sequence; |
||
225 | break; |
||
226 | case ALLOC: |
||
227 | assert(cf->alloc.size <= 0xf); |
||
228 | instr->alloc.size = cf->alloc.size; |
||
229 | switch (cf->alloc.type) { |
||
230 | case SQ_POSITION: |
||
231 | case SQ_PARAMETER_PIXEL: |
||
232 | instr->alloc.buffer_select = cf->alloc.type; |
||
233 | break; |
||
234 | default: |
||
235 | ERROR_MSG("invalid alloc type: %d", cf->alloc.type); |
||
236 | return -1; |
||
237 | } |
||
238 | break; |
||
239 | case COND_EXEC: |
||
240 | case COND_EXEC_END: |
||
241 | case COND_PRED_EXEC: |
||
242 | case COND_PRED_EXEC_END: |
||
243 | case LOOP_START: |
||
244 | case LOOP_END: |
||
245 | case COND_CALL: |
||
246 | case RETURN: |
||
247 | case COND_JMP: |
||
248 | case COND_EXEC_PRED_CLEAN: |
||
249 | case COND_EXEC_PRED_CLEAN_END: |
||
250 | case MARK_VS_FETCH_DONE: |
||
251 | ERROR_MSG("TODO"); |
||
252 | return -1; |
||
253 | } |
||
254 | |||
255 | return 0; |
||
256 | } |
||
257 | |||
258 | |||
259 | struct ir2_instruction * ir2_instr_create(struct ir2_cf *cf, int instr_type) |
||
260 | { |
||
261 | struct ir2_instruction *instr = |
||
262 | ir2_alloc(cf->shader, sizeof(struct ir2_instruction)); |
||
263 | DEBUG_MSG("%d", instr_type); |
||
264 | instr->shader = cf->shader; |
||
265 | instr->pred = cf->shader->pred; |
||
266 | instr->instr_type = instr_type; |
||
267 | assert(cf->exec.instrs_count < ARRAY_SIZE(cf->exec.instrs)); |
||
268 | cf->exec.instrs[cf->exec.instrs_count++] = instr; |
||
269 | return instr; |
||
270 | } |
||
271 | |||
272 | |||
273 | /* |
||
274 | * FETCH instructions: |
||
275 | */ |
||
276 | |||
277 | static int instr_emit_fetch(struct ir2_instruction *instr, |
||
278 | uint32_t *dwords, uint32_t idx, |
||
279 | struct ir2_shader_info *info) |
||
280 | { |
||
281 | instr_fetch_t *fetch = (instr_fetch_t *)dwords; |
||
282 | int reg = 0; |
||
283 | struct ir2_register *dst_reg = instr->regs[reg++]; |
||
284 | struct ir2_register *src_reg = instr->regs[reg++]; |
||
285 | |||
286 | memset(fetch, 0, sizeof(*fetch)); |
||
287 | |||
288 | reg_update_stats(dst_reg, info, true); |
||
289 | reg_update_stats(src_reg, info, false); |
||
290 | |||
291 | fetch->opc = instr->fetch.opc; |
||
292 | |||
293 | if (instr->fetch.opc == VTX_FETCH) { |
||
294 | instr_fetch_vtx_t *vtx = &fetch->vtx; |
||
295 | |||
296 | assert(instr->fetch.stride <= 0xff); |
||
297 | assert(instr->fetch.fmt <= 0x3f); |
||
298 | assert(instr->fetch.const_idx <= 0x1f); |
||
299 | assert(instr->fetch.const_idx_sel <= 0x3); |
||
300 | |||
301 | vtx->src_reg = src_reg->num; |
||
302 | vtx->src_swiz = reg_fetch_src_swiz(src_reg, 1); |
||
303 | vtx->dst_reg = dst_reg->num; |
||
304 | vtx->dst_swiz = reg_fetch_dst_swiz(dst_reg); |
||
305 | vtx->must_be_one = 1; |
||
306 | vtx->const_index = instr->fetch.const_idx; |
||
307 | vtx->const_index_sel = instr->fetch.const_idx_sel; |
||
308 | vtx->format_comp_all = !!instr->fetch.is_signed; |
||
309 | vtx->num_format_all = !instr->fetch.is_normalized; |
||
310 | vtx->format = instr->fetch.fmt; |
||
311 | vtx->stride = instr->fetch.stride; |
||
312 | vtx->offset = instr->fetch.offset; |
||
313 | |||
314 | if (instr->pred != IR2_PRED_NONE) { |
||
315 | vtx->pred_select = 1; |
||
316 | vtx->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; |
||
317 | } |
||
318 | |||
319 | /* XXX seems like every FETCH but the first has |
||
320 | * this bit set: |
||
321 | */ |
||
322 | vtx->reserved3 = (idx > 0) ? 0x1 : 0x0; |
||
323 | vtx->reserved0 = (idx > 0) ? 0x2 : 0x3; |
||
324 | } else if (instr->fetch.opc == TEX_FETCH) { |
||
325 | instr_fetch_tex_t *tex = &fetch->tex; |
||
326 | |||
327 | assert(instr->fetch.const_idx <= 0x1f); |
||
328 | |||
329 | tex->src_reg = src_reg->num; |
||
330 | tex->src_swiz = reg_fetch_src_swiz(src_reg, 3); |
||
331 | tex->dst_reg = dst_reg->num; |
||
332 | tex->dst_swiz = reg_fetch_dst_swiz(dst_reg); |
||
333 | tex->const_idx = instr->fetch.const_idx; |
||
334 | tex->mag_filter = TEX_FILTER_USE_FETCH_CONST; |
||
335 | tex->min_filter = TEX_FILTER_USE_FETCH_CONST; |
||
336 | tex->mip_filter = TEX_FILTER_USE_FETCH_CONST; |
||
337 | tex->aniso_filter = ANISO_FILTER_USE_FETCH_CONST; |
||
338 | tex->arbitrary_filter = ARBITRARY_FILTER_USE_FETCH_CONST; |
||
339 | tex->vol_mag_filter = TEX_FILTER_USE_FETCH_CONST; |
||
340 | tex->vol_min_filter = TEX_FILTER_USE_FETCH_CONST; |
||
341 | tex->use_comp_lod = 1; |
||
342 | tex->use_reg_lod = !instr->fetch.is_cube; |
||
343 | tex->sample_location = SAMPLE_CENTER; |
||
344 | |||
345 | if (instr->pred != IR2_PRED_NONE) { |
||
346 | tex->pred_select = 1; |
||
347 | tex->pred_condition = (instr->pred == IR2_PRED_EQ) ? 1 : 0; |
||
348 | } |
||
349 | |||
350 | } else { |
||
351 | ERROR_MSG("invalid fetch opc: %d\n", instr->fetch.opc); |
||
352 | return -1; |
||
353 | } |
||
354 | |||
355 | return 0; |
||
356 | } |
||
357 | |||
358 | /* |
||
359 | * ALU instructions: |
||
360 | */ |
||
361 | |||
362 | static int instr_emit_alu(struct ir2_instruction *instr, uint32_t *dwords, |
||
363 | struct ir2_shader_info *info) |
||
364 | { |
||
365 | int reg = 0; |
||
366 | instr_alu_t *alu = (instr_alu_t *)dwords; |
||
367 | struct ir2_register *dst_reg = instr->regs[reg++]; |
||
368 | struct ir2_register *src1_reg; |
||
369 | struct ir2_register *src2_reg; |
||
370 | struct ir2_register *src3_reg; |
||
371 | |||
372 | memset(alu, 0, sizeof(*alu)); |
||
373 | |||
374 | /* handle instructions w/ 3 src operands: */ |
||
375 | switch (instr->alu.vector_opc) { |
||
376 | case MULADDv: |
||
377 | case CNDEv: |
||
378 | case CNDGTEv: |
||
379 | case CNDGTv: |
||
380 | case DOT2ADDv: |
||
381 | /* note: disassembler lists 3rd src first, ie: |
||
382 | * MULADDv Rdst = Rsrc3 + (Rsrc1 * Rsrc2) |
||
383 | * which is the reason for this strange ordering. |
||
384 | */ |
||
385 | src3_reg = instr->regs[reg++]; |
||
386 | break; |
||
387 | default: |
||
388 | src3_reg = NULL; |
||
389 | break; |
||
390 | } |
||
391 | |||
392 | src1_reg = instr->regs[reg++]; |
||
393 | src2_reg = instr->regs[reg++]; |
||
394 | |||
395 | reg_update_stats(dst_reg, info, true); |
||
396 | reg_update_stats(src1_reg, info, false); |
||
397 | reg_update_stats(src2_reg, info, false); |
||
398 | |||
399 | assert((dst_reg->flags & ~IR2_REG_EXPORT) == 0); |
||
400 | assert(!dst_reg->swizzle || (strlen(dst_reg->swizzle) == 4)); |
||
401 | assert((src1_reg->flags & IR2_REG_EXPORT) == 0); |
||
402 | assert(!src1_reg->swizzle || (strlen(src1_reg->swizzle) == 4)); |
||
403 | assert((src2_reg->flags & IR2_REG_EXPORT) == 0); |
||
404 | assert(!src2_reg->swizzle || (strlen(src2_reg->swizzle) == 4)); |
||
405 | |||
406 | if (instr->alu.vector_opc == ~0) { |
||
407 | alu->vector_opc = MAXv; |
||
408 | alu->vector_write_mask = 0; |
||
409 | } else { |
||
410 | alu->vector_opc = instr->alu.vector_opc; |
||
411 | alu->vector_write_mask = reg_alu_dst_swiz(dst_reg); |
||
412 | } |
||
413 | |||
414 | alu->vector_dest = dst_reg->num; |
||
415 | alu->export_data = !!(dst_reg->flags & IR2_REG_EXPORT); |
||
416 | |||
417 | // TODO predicate case/condition.. need to add to parser |
||
418 | |||
419 | alu->src2_reg = src2_reg->num; |
||
420 | alu->src2_swiz = reg_alu_src_swiz(src2_reg); |
||
421 | alu->src2_reg_negate = !!(src2_reg->flags & IR2_REG_NEGATE); |
||
422 | alu->src2_reg_abs = !!(src2_reg->flags & IR2_REG_ABS); |
||
423 | alu->src2_sel = !(src2_reg->flags & IR2_REG_CONST); |
||
424 | |||
425 | alu->src1_reg = src1_reg->num; |
||
426 | alu->src1_swiz = reg_alu_src_swiz(src1_reg); |
||
427 | alu->src1_reg_negate = !!(src1_reg->flags & IR2_REG_NEGATE); |
||
428 | alu->src1_reg_abs = !!(src1_reg->flags & IR2_REG_ABS); |
||
429 | alu->src1_sel = !(src1_reg->flags & IR2_REG_CONST); |
||
430 | |||
431 | alu->vector_clamp = instr->alu.vector_clamp; |
||
432 | alu->scalar_clamp = instr->alu.scalar_clamp; |
||
433 | |||
434 | if (instr->alu.scalar_opc != ~0) { |
||
435 | struct ir2_register *sdst_reg = instr->regs[reg++]; |
||
436 | |||
437 | reg_update_stats(sdst_reg, info, true); |
||
438 | |||
439 | assert(sdst_reg->flags == dst_reg->flags); |
||
440 | |||
441 | if (src3_reg) { |
||
442 | assert(src3_reg == instr->regs[reg++]); |
||
443 | } else { |
||
444 | src3_reg = instr->regs[reg++]; |
||
445 | } |
||
446 | |||
447 | alu->scalar_dest = sdst_reg->num; |
||
448 | alu->scalar_write_mask = reg_alu_dst_swiz(sdst_reg); |
||
449 | alu->scalar_opc = instr->alu.scalar_opc; |
||
450 | } else { |
||
451 | /* not sure if this is required, but adreno compiler seems |
||
452 | * to always set scalar opc to MAXs if it is not used: |
||
453 | */ |
||
454 | alu->scalar_opc = MAXs; |
||
455 | } |
||
456 | |||
457 | if (src3_reg) { |
||
458 | reg_update_stats(src3_reg, info, false); |
||
459 | |||
460 | alu->src3_reg = src3_reg->num; |
||
461 | alu->src3_swiz = reg_alu_src_swiz(src3_reg); |
||
462 | alu->src3_reg_negate = !!(src3_reg->flags & IR2_REG_NEGATE); |
||
463 | alu->src3_reg_abs = !!(src3_reg->flags & IR2_REG_ABS); |
||
464 | alu->src3_sel = !(src3_reg->flags & IR2_REG_CONST); |
||
465 | } else { |
||
466 | /* not sure if this is required, but adreno compiler seems |
||
467 | * to always set register bank for 3rd src if unused: |
||
468 | */ |
||
469 | alu->src3_sel = 1; |
||
470 | } |
||
471 | |||
472 | if (instr->pred != IR2_PRED_NONE) { |
||
473 | alu->pred_select = (instr->pred == IR2_PRED_EQ) ? 3 : 2; |
||
474 | } |
||
475 | |||
476 | return 0; |
||
477 | } |
||
478 | |||
479 | static int instr_emit(struct ir2_instruction *instr, uint32_t *dwords, |
||
480 | uint32_t idx, struct ir2_shader_info *info) |
||
481 | { |
||
482 | switch (instr->instr_type) { |
||
483 | case IR2_FETCH: return instr_emit_fetch(instr, dwords, idx, info); |
||
484 | case IR2_ALU: return instr_emit_alu(instr, dwords, info); |
||
485 | } |
||
486 | return -1; |
||
487 | } |
||
488 | |||
489 | |||
490 | struct ir2_register * ir2_reg_create(struct ir2_instruction *instr, |
||
491 | int num, const char *swizzle, int flags) |
||
492 | { |
||
493 | struct ir2_register *reg = |
||
494 | ir2_alloc(instr->shader, sizeof(struct ir2_register)); |
||
495 | DEBUG_MSG("%x, %d, %s", flags, num, swizzle); |
||
496 | assert(num <= REG_MASK); |
||
497 | reg->flags = flags; |
||
498 | reg->num = num; |
||
499 | reg->swizzle = ir2_strdup(instr->shader, swizzle); |
||
500 | assert(instr->regs_count < ARRAY_SIZE(instr->regs)); |
||
501 | instr->regs[instr->regs_count++] = reg; |
||
502 | return reg; |
||
503 | } |
||
504 | |||
505 | static void reg_update_stats(struct ir2_register *reg, |
||
506 | struct ir2_shader_info *info, bool dest) |
||
507 | { |
||
508 | if (!(reg->flags & (IR2_REG_CONST|IR2_REG_EXPORT))) { |
||
509 | info->max_reg = MAX2(info->max_reg, reg->num); |
||
510 | |||
511 | if (dest) { |
||
512 | info->regs_written |= (1 << reg->num); |
||
513 | } else if (!(info->regs_written & (1 << reg->num))) { |
||
514 | /* for registers that haven't been written, they must be an |
||
515 | * input register that the thread scheduler (presumably?) |
||
516 | * needs to know about: |
||
517 | */ |
||
518 | info->max_input_reg = MAX2(info->max_input_reg, reg->num); |
||
519 | } |
||
520 | } |
||
521 | } |
||
522 | |||
523 | static uint32_t reg_fetch_src_swiz(struct ir2_register *reg, uint32_t n) |
||
524 | { |
||
525 | uint32_t swiz = 0; |
||
526 | int i; |
||
527 | |||
528 | assert(reg->flags == 0); |
||
529 | assert(reg->swizzle); |
||
530 | |||
531 | DEBUG_MSG("fetch src R%d.%s", reg->num, reg->swizzle); |
||
532 | |||
533 | for (i = n-1; i >= 0; i--) { |
||
534 | swiz <<= 2; |
||
535 | switch (reg->swizzle[i]) { |
||
536 | default: |
||
537 | ERROR_MSG("invalid fetch src swizzle: %s", reg->swizzle); |
||
538 | case 'x': swiz |= 0x0; break; |
||
539 | case 'y': swiz |= 0x1; break; |
||
540 | case 'z': swiz |= 0x2; break; |
||
541 | case 'w': swiz |= 0x3; break; |
||
542 | } |
||
543 | } |
||
544 | |||
545 | return swiz; |
||
546 | } |
||
547 | |||
548 | static uint32_t reg_fetch_dst_swiz(struct ir2_register *reg) |
||
549 | { |
||
550 | uint32_t swiz = 0; |
||
551 | int i; |
||
552 | |||
553 | assert(reg->flags == 0); |
||
554 | assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); |
||
555 | |||
556 | DEBUG_MSG("fetch dst R%d.%s", reg->num, reg->swizzle); |
||
557 | |||
558 | if (reg->swizzle) { |
||
559 | for (i = 3; i >= 0; i--) { |
||
560 | swiz <<= 3; |
||
561 | switch (reg->swizzle[i]) { |
||
562 | default: |
||
563 | ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); |
||
564 | case 'x': swiz |= 0x0; break; |
||
565 | case 'y': swiz |= 0x1; break; |
||
566 | case 'z': swiz |= 0x2; break; |
||
567 | case 'w': swiz |= 0x3; break; |
||
568 | case '0': swiz |= 0x4; break; |
||
569 | case '1': swiz |= 0x5; break; |
||
570 | case '_': swiz |= 0x7; break; |
||
571 | } |
||
572 | } |
||
573 | } else { |
||
574 | swiz = 0x688; |
||
575 | } |
||
576 | |||
577 | return swiz; |
||
578 | } |
||
579 | |||
580 | /* actually, a write-mask */ |
||
581 | static uint32_t reg_alu_dst_swiz(struct ir2_register *reg) |
||
582 | { |
||
583 | uint32_t swiz = 0; |
||
584 | int i; |
||
585 | |||
586 | assert((reg->flags & ~IR2_REG_EXPORT) == 0); |
||
587 | assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); |
||
588 | |||
589 | DEBUG_MSG("alu dst R%d.%s", reg->num, reg->swizzle); |
||
590 | |||
591 | if (reg->swizzle) { |
||
592 | for (i = 3; i >= 0; i--) { |
||
593 | swiz <<= 1; |
||
594 | if (reg->swizzle[i] == "xyzw"[i]) { |
||
595 | swiz |= 0x1; |
||
596 | } else if (reg->swizzle[i] != '_') { |
||
597 | ERROR_MSG("invalid dst swizzle: %s", reg->swizzle); |
||
598 | break; |
||
599 | } |
||
600 | } |
||
601 | } else { |
||
602 | swiz = 0xf; |
||
603 | } |
||
604 | |||
605 | return swiz; |
||
606 | } |
||
607 | |||
608 | static uint32_t reg_alu_src_swiz(struct ir2_register *reg) |
||
609 | { |
||
610 | uint32_t swiz = 0; |
||
611 | int i; |
||
612 | |||
613 | assert((reg->flags & IR2_REG_EXPORT) == 0); |
||
614 | assert(!reg->swizzle || (strlen(reg->swizzle) == 4)); |
||
615 | |||
616 | DEBUG_MSG("vector src R%d.%s", reg->num, reg->swizzle); |
||
617 | |||
618 | if (reg->swizzle) { |
||
619 | for (i = 3; i >= 0; i--) { |
||
620 | swiz <<= 2; |
||
621 | switch (reg->swizzle[i]) { |
||
622 | default: |
||
623 | ERROR_MSG("invalid vector src swizzle: %s", reg->swizzle); |
||
624 | case 'x': swiz |= (0x0 - i) & 0x3; break; |
||
625 | case 'y': swiz |= (0x1 - i) & 0x3; break; |
||
626 | case 'z': swiz |= (0x2 - i) & 0x3; break; |
||
627 | case 'w': swiz |= (0x3 - i) & 0x3; break; |
||
628 | } |
||
629 | } |
||
630 | } else { |
||
631 | swiz = 0x0; |
||
632 | } |
||
633 | |||
634 | return swiz; |
||
635 | }=><=>=><=>=><=>=><=>><>><>>=>=>=>=>=>=>>=>=>=>=>>=>>>=>>=><=>> |