Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 2012-2013 LunarG, Inc. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
22 | * DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Chia-I Wu |
||
26 | */ |
||
27 | |||
28 | #include "tgsi/tgsi_dump.h" |
||
29 | #include "tgsi/tgsi_util.h" |
||
30 | #include "toy_compiler.h" |
||
31 | #include "toy_tgsi.h" |
||
32 | #include "toy_legalize.h" |
||
33 | #include "toy_optimize.h" |
||
34 | #include "toy_helpers.h" |
||
35 | #include "ilo_shader_internal.h" |
||
36 | |||
37 | struct vs_compile_context { |
||
38 | struct ilo_shader *shader; |
||
39 | const struct ilo_shader_variant *variant; |
||
40 | |||
41 | struct toy_compiler tc; |
||
42 | struct toy_tgsi tgsi; |
||
43 | int const_cache; |
||
44 | |||
45 | int output_map[PIPE_MAX_SHADER_OUTPUTS]; |
||
46 | |||
47 | int num_grf_per_vrf; |
||
48 | int first_const_grf; |
||
49 | int first_ucp_grf; |
||
50 | int first_vue_grf; |
||
51 | int first_free_grf; |
||
52 | int last_free_grf; |
||
53 | |||
54 | int first_free_mrf; |
||
55 | int last_free_mrf; |
||
56 | }; |
||
57 | |||
58 | static void |
||
59 | vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc, |
||
60 | struct toy_dst dst, int dim, int idx) |
||
61 | { |
||
62 | struct toy_compiler *tc = &vcc->tc; |
||
63 | int slot; |
||
64 | |||
65 | assert(!dim); |
||
66 | |||
67 | slot = toy_tgsi_find_input(&vcc->tgsi, idx); |
||
68 | if (slot >= 0) { |
||
69 | const int first_in_grf = vcc->first_vue_grf + |
||
70 | (vcc->shader->in.count - vcc->tgsi.num_inputs); |
||
71 | const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index; |
||
72 | const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); |
||
73 | |||
74 | tc_MOV(tc, dst, src); |
||
75 | } |
||
76 | else { |
||
77 | /* undeclared input */ |
||
78 | tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
||
79 | } |
||
80 | } |
||
81 | |||
82 | static bool |
||
83 | vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc, |
||
84 | struct toy_dst dst, int dim, |
||
85 | struct toy_src idx) |
||
86 | { |
||
87 | const int i = idx.val32; |
||
88 | const int grf = vcc->first_const_grf + i / 2; |
||
89 | const int grf_subreg = (i & 1) * 16; |
||
90 | struct toy_src src; |
||
91 | |||
92 | if (!vcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || |
||
93 | grf >= vcc->first_ucp_grf) |
||
94 | return false; |
||
95 | |||
96 | |||
97 | src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041); |
||
98 | tc_MOV(&vcc->tc, dst, src); |
||
99 | |||
100 | return true; |
||
101 | } |
||
102 | |||
103 | static void |
||
104 | vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc, |
||
105 | struct toy_dst dst, int dim, |
||
106 | struct toy_src idx) |
||
107 | { |
||
108 | const struct toy_dst header = |
||
109 | tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); |
||
110 | const struct toy_dst block_offsets = |
||
111 | tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0)); |
||
112 | const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
||
113 | struct toy_compiler *tc = &vcc->tc; |
||
114 | unsigned msg_type, msg_ctrl, msg_len; |
||
115 | struct toy_inst *inst; |
||
116 | struct toy_src desc; |
||
117 | |||
118 | if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) |
||
119 | return; |
||
120 | |||
121 | /* set message header */ |
||
122 | inst = tc_MOV(tc, header, r0); |
||
123 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
124 | |||
125 | /* set block offsets */ |
||
126 | tc_MOV(tc, block_offsets, idx); |
||
127 | |||
128 | msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ; |
||
129 | msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;; |
||
130 | msg_len = 2; |
||
131 | |||
132 | desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false, |
||
133 | msg_type, msg_ctrl, vcc->shader->bt.const_base + dim); |
||
134 | |||
135 | tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache); |
||
136 | } |
||
137 | |||
138 | static void |
||
139 | vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc, |
||
140 | struct toy_dst dst, int dim, |
||
141 | struct toy_src idx) |
||
142 | { |
||
143 | struct toy_compiler *tc = &vcc->tc; |
||
144 | const struct toy_dst offset = |
||
145 | tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); |
||
146 | struct toy_src desc; |
||
147 | |||
148 | if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx)) |
||
149 | return; |
||
150 | |||
151 | /* |
||
152 | * In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was |
||
153 | * changed from OWord Dual Block Read to ld to increase performance in the |
||
154 | * classic driver. Since we use the constant cache instead of the data |
||
155 | * cache, I wonder if we still want to follow the classic driver. |
||
156 | */ |
||
157 | |||
158 | /* set offset */ |
||
159 | tc_MOV(tc, offset, idx); |
||
160 | |||
161 | desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false, |
||
162 | GEN6_MSG_SAMPLER_SIMD4X2, |
||
163 | GEN6_MSG_SAMPLER_LD, |
||
164 | 0, |
||
165 | vcc->shader->bt.const_base + dim); |
||
166 | |||
167 | tc_SEND(tc, dst, tsrc_from(offset), desc, GEN6_SFID_SAMPLER); |
||
168 | } |
||
169 | |||
170 | static void |
||
171 | vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc, |
||
172 | struct toy_dst dst, int idx) |
||
173 | { |
||
174 | const uint32_t *imm; |
||
175 | int ch; |
||
176 | |||
177 | imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL); |
||
178 | |||
179 | for (ch = 0; ch < 4; ch++) { |
||
180 | /* raw moves */ |
||
181 | tc_MOV(&vcc->tc, |
||
182 | tdst_writemask(tdst_ud(dst), 1 << ch), |
||
183 | tsrc_imm_ud(imm[ch])); |
||
184 | } |
||
185 | } |
||
186 | |||
187 | |||
188 | static void |
||
189 | vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc, |
||
190 | struct toy_dst dst, int dim, int idx) |
||
191 | { |
||
192 | struct toy_compiler *tc = &vcc->tc; |
||
193 | const struct toy_tgsi *tgsi = &vcc->tgsi; |
||
194 | int slot; |
||
195 | |||
196 | assert(!dim); |
||
197 | |||
198 | slot = toy_tgsi_find_system_value(tgsi, idx); |
||
199 | if (slot < 0) |
||
200 | return; |
||
201 | |||
202 | switch (tgsi->system_values[slot].semantic_name) { |
||
203 | case TGSI_SEMANTIC_INSTANCEID: |
||
204 | case TGSI_SEMANTIC_VERTEXID: |
||
205 | /* |
||
206 | * In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for |
||
207 | * the generated IDs, with VID in the X channel and IID in the Y |
||
208 | * channel. |
||
209 | */ |
||
210 | { |
||
211 | const int grf = vcc->first_vue_grf; |
||
212 | const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0); |
||
213 | const enum toy_swizzle swizzle = |
||
214 | (tgsi->system_values[slot].semantic_name == |
||
215 | TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X; |
||
216 | |||
217 | tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle))); |
||
218 | } |
||
219 | break; |
||
220 | case TGSI_SEMANTIC_PRIMID: |
||
221 | default: |
||
222 | tc_fail(tc, "unhandled system value"); |
||
223 | tc_MOV(tc, dst, tsrc_imm_d(0)); |
||
224 | break; |
||
225 | } |
||
226 | } |
||
227 | |||
228 | static void |
||
229 | vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc, |
||
230 | struct toy_inst *inst) |
||
231 | { |
||
232 | struct toy_compiler *tc = &vcc->tc; |
||
233 | int dim, idx; |
||
234 | |||
235 | assert(inst->src[0].file == TOY_FILE_IMM); |
||
236 | dim = inst->src[0].val32; |
||
237 | |||
238 | assert(inst->src[1].file == TOY_FILE_IMM); |
||
239 | idx = inst->src[1].val32; |
||
240 | |||
241 | switch (inst->opcode) { |
||
242 | case TOY_OPCODE_TGSI_IN: |
||
243 | vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx); |
||
244 | break; |
||
245 | case TOY_OPCODE_TGSI_CONST: |
||
246 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) |
||
247 | vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]); |
||
248 | else |
||
249 | vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]); |
||
250 | break; |
||
251 | case TOY_OPCODE_TGSI_SV: |
||
252 | vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx); |
||
253 | break; |
||
254 | case TOY_OPCODE_TGSI_IMM: |
||
255 | assert(!dim); |
||
256 | vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx); |
||
257 | break; |
||
258 | default: |
||
259 | tc_fail(tc, "unhandled TGSI fetch"); |
||
260 | break; |
||
261 | } |
||
262 | |||
263 | tc_discard_inst(tc, inst); |
||
264 | } |
||
265 | |||
266 | static void |
||
267 | vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc, |
||
268 | struct toy_inst *inst) |
||
269 | { |
||
270 | struct toy_compiler *tc = &vcc->tc; |
||
271 | enum tgsi_file_type file; |
||
272 | int dim, idx; |
||
273 | struct toy_src indirect_dim, indirect_idx; |
||
274 | |||
275 | assert(inst->src[0].file == TOY_FILE_IMM); |
||
276 | file = inst->src[0].val32; |
||
277 | |||
278 | assert(inst->src[1].file == TOY_FILE_IMM); |
||
279 | dim = inst->src[1].val32; |
||
280 | indirect_dim = inst->src[2]; |
||
281 | |||
282 | assert(inst->src[3].file == TOY_FILE_IMM); |
||
283 | idx = inst->src[3].val32; |
||
284 | indirect_idx = inst->src[4]; |
||
285 | |||
286 | /* no dimension indirection */ |
||
287 | assert(indirect_dim.file == TOY_FILE_IMM); |
||
288 | dim += indirect_dim.val32; |
||
289 | |||
290 | switch (inst->opcode) { |
||
291 | case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
||
292 | if (file == TGSI_FILE_CONSTANT) { |
||
293 | if (idx) { |
||
294 | struct toy_dst tmp = tc_alloc_tmp(tc); |
||
295 | |||
296 | tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); |
||
297 | indirect_idx = tsrc_from(tmp); |
||
298 | } |
||
299 | |||
300 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) |
||
301 | vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx); |
||
302 | else |
||
303 | vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx); |
||
304 | break; |
||
305 | } |
||
306 | /* fall through */ |
||
307 | case TOY_OPCODE_TGSI_INDIRECT_STORE: |
||
308 | default: |
||
309 | tc_fail(tc, "unhandled TGSI indirection"); |
||
310 | break; |
||
311 | } |
||
312 | |||
313 | tc_discard_inst(tc, inst); |
||
314 | } |
||
315 | |||
316 | /** |
||
317 | * Emit instructions to move sampling parameters to the message registers. |
||
318 | */ |
||
319 | static int |
||
320 | vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf, |
||
321 | struct toy_src coords, int num_coords, |
||
322 | struct toy_src bias_or_lod, struct toy_src ref_or_si, |
||
323 | struct toy_src ddx, struct toy_src ddy, int num_derivs) |
||
324 | { |
||
325 | const unsigned coords_writemask = (1 << num_coords) - 1; |
||
326 | struct toy_dst m[3]; |
||
327 | int num_params, i; |
||
328 | |||
329 | assert(num_coords <= 4); |
||
330 | assert(num_derivs <= 3 && num_derivs <= num_coords); |
||
331 | |||
332 | for (i = 0; i < Elements(m); i++) |
||
333 | m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0); |
||
334 | |||
335 | switch (msg_type) { |
||
336 | case GEN6_MSG_SAMPLER_SAMPLE_L: |
||
337 | tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); |
||
338 | tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod); |
||
339 | num_params = 5; |
||
340 | break; |
||
341 | case GEN6_MSG_SAMPLER_SAMPLE_D: |
||
342 | tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); |
||
343 | tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ), |
||
344 | tsrc_swizzle(ddx, 0, 0, 1, 1)); |
||
345 | tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW), |
||
346 | tsrc_swizzle(ddy, 0, 0, 1, 1)); |
||
347 | if (num_derivs > 2) { |
||
348 | tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X), |
||
349 | tsrc_swizzle1(ddx, 2)); |
||
350 | tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y), |
||
351 | tsrc_swizzle1(ddy, 2)); |
||
352 | } |
||
353 | num_params = 4 + num_derivs * 2; |
||
354 | break; |
||
355 | case GEN6_MSG_SAMPLER_SAMPLE_L_C: |
||
356 | tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords); |
||
357 | tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si); |
||
358 | tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod); |
||
359 | num_params = 6; |
||
360 | break; |
||
361 | case GEN6_MSG_SAMPLER_LD: |
||
362 | assert(num_coords <= 3); |
||
363 | tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords); |
||
364 | tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod); |
||
365 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { |
||
366 | num_params = 4; |
||
367 | } |
||
368 | else { |
||
369 | tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si); |
||
370 | num_params = 5; |
||
371 | } |
||
372 | break; |
||
373 | case GEN6_MSG_SAMPLER_RESINFO: |
||
374 | tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod); |
||
375 | num_params = 1; |
||
376 | break; |
||
377 | default: |
||
378 | tc_fail(tc, "unknown sampler opcode"); |
||
379 | num_params = 0; |
||
380 | break; |
||
381 | } |
||
382 | |||
383 | return (num_params + 3) / 4; |
||
384 | } |
||
385 | |||
386 | /** |
||
387 | * Set up message registers and return the message descriptor for sampling. |
||
388 | */ |
||
389 | static struct toy_src |
||
390 | vs_prepare_tgsi_sampling(struct vs_compile_context *vcc, |
||
391 | const struct toy_inst *inst, |
||
392 | int base_mrf, unsigned *ret_sampler_index) |
||
393 | { |
||
394 | struct toy_compiler *tc = &vcc->tc; |
||
395 | unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; |
||
396 | struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si; |
||
397 | int num_coords, ref_pos, num_derivs; |
||
398 | int sampler_src; |
||
399 | |||
400 | simd_mode = GEN6_MSG_SAMPLER_SIMD4X2; |
||
401 | |||
402 | coords = inst->src[0]; |
||
403 | ddx = tsrc_null(); |
||
404 | ddy = tsrc_null(); |
||
405 | bias_or_lod = tsrc_null(); |
||
406 | ref_or_si = tsrc_null(); |
||
407 | num_derivs = 0; |
||
408 | sampler_src = 1; |
||
409 | |||
410 | num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos); |
||
411 | |||
412 | /* extract the parameters */ |
||
413 | switch (inst->opcode) { |
||
414 | case TOY_OPCODE_TGSI_TXD: |
||
415 | if (ref_pos >= 0) { |
||
416 | assert(ref_pos < 4); |
||
417 | |||
418 | msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C; |
||
419 | ref_or_si = tsrc_swizzle1(coords, ref_pos); |
||
420 | |||
421 | if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5)) |
||
422 | tc_fail(tc, "TXD with shadow sampler not supported"); |
||
423 | } |
||
424 | else { |
||
425 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_D; |
||
426 | } |
||
427 | |||
428 | ddx = inst->src[1]; |
||
429 | ddy = inst->src[2]; |
||
430 | num_derivs = num_coords; |
||
431 | sampler_src = 3; |
||
432 | break; |
||
433 | case TOY_OPCODE_TGSI_TXL: |
||
434 | if (ref_pos >= 0) { |
||
435 | assert(ref_pos < 3); |
||
436 | |||
437 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; |
||
438 | ref_or_si = tsrc_swizzle1(coords, ref_pos); |
||
439 | } |
||
440 | else { |
||
441 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; |
||
442 | } |
||
443 | |||
444 | bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); |
||
445 | break; |
||
446 | case TOY_OPCODE_TGSI_TXF: |
||
447 | msg_type = GEN6_MSG_SAMPLER_LD; |
||
448 | |||
449 | switch (inst->tex.target) { |
||
450 | case TGSI_TEXTURE_2D_MSAA: |
||
451 | case TGSI_TEXTURE_2D_ARRAY_MSAA: |
||
452 | assert(ref_pos >= 0 && ref_pos < 4); |
||
453 | /* lod is always 0 */ |
||
454 | bias_or_lod = tsrc_imm_d(0); |
||
455 | ref_or_si = tsrc_swizzle1(coords, ref_pos); |
||
456 | break; |
||
457 | default: |
||
458 | bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W); |
||
459 | break; |
||
460 | } |
||
461 | |||
462 | /* offset the coordinates */ |
||
463 | if (!tsrc_is_null(inst->tex.offsets[0])) { |
||
464 | struct toy_dst tmp; |
||
465 | |||
466 | tmp = tc_alloc_tmp(tc); |
||
467 | tc_ADD(tc, tmp, coords, inst->tex.offsets[0]); |
||
468 | coords = tsrc_from(tmp); |
||
469 | } |
||
470 | |||
471 | sampler_src = 1; |
||
472 | break; |
||
473 | case TOY_OPCODE_TGSI_TXQ: |
||
474 | msg_type = GEN6_MSG_SAMPLER_RESINFO; |
||
475 | num_coords = 0; |
||
476 | bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X); |
||
477 | break; |
||
478 | case TOY_OPCODE_TGSI_TXQ_LZ: |
||
479 | msg_type = GEN6_MSG_SAMPLER_RESINFO; |
||
480 | num_coords = 0; |
||
481 | sampler_src = 0; |
||
482 | break; |
||
483 | case TOY_OPCODE_TGSI_TXL2: |
||
484 | if (ref_pos >= 0) { |
||
485 | assert(ref_pos < 4); |
||
486 | |||
487 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; |
||
488 | ref_or_si = tsrc_swizzle1(coords, ref_pos); |
||
489 | } |
||
490 | else { |
||
491 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; |
||
492 | } |
||
493 | |||
494 | bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X); |
||
495 | sampler_src = 2; |
||
496 | break; |
||
497 | default: |
||
498 | assert(!"unhandled sampling opcode"); |
||
499 | if (ret_sampler_index) |
||
500 | *ret_sampler_index = 0; |
||
501 | return tsrc_null(); |
||
502 | break; |
||
503 | } |
||
504 | |||
505 | assert(inst->src[sampler_src].file == TOY_FILE_IMM); |
||
506 | sampler_index = inst->src[sampler_src].val32; |
||
507 | binding_table_index = vcc->shader->bt.tex_base + sampler_index; |
||
508 | |||
509 | /* |
||
510 | * From the Sandy Bridge PRM, volume 4 part 1, page 18: |
||
511 | * |
||
512 | * "Note that the (cube map) coordinates delivered to the sampling |
||
513 | * engine must already have been divided by the component with the |
||
514 | * largest absolute value." |
||
515 | */ |
||
516 | switch (inst->tex.target) { |
||
517 | case TGSI_TEXTURE_CUBE: |
||
518 | case TGSI_TEXTURE_SHADOWCUBE: |
||
519 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
520 | case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
||
521 | /* TXQ does not need coordinates */ |
||
522 | if (num_coords >= 3) { |
||
523 | struct toy_dst tmp, max; |
||
524 | struct toy_src abs_coords[3]; |
||
525 | int i; |
||
526 | |||
527 | tmp = tc_alloc_tmp(tc); |
||
528 | max = tdst_writemask(tmp, TOY_WRITEMASK_W); |
||
529 | |||
530 | for (i = 0; i < 3; i++) |
||
531 | abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i)); |
||
532 | |||
533 | tc_SEL(tc, max, abs_coords[0], abs_coords[0], GEN6_COND_GE); |
||
534 | tc_SEL(tc, max, tsrc_from(max), abs_coords[0], GEN6_COND_GE); |
||
535 | tc_INV(tc, max, tsrc_from(max)); |
||
536 | |||
537 | for (i = 0; i < 3; i++) |
||
538 | tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max)); |
||
539 | |||
540 | coords = tsrc_from(tmp); |
||
541 | } |
||
542 | break; |
||
543 | } |
||
544 | |||
545 | /* set up sampler parameters */ |
||
546 | msg_len = vs_add_sampler_params(tc, msg_type, base_mrf, |
||
547 | coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); |
||
548 | |||
549 | /* |
||
550 | * From the Sandy Bridge PRM, volume 4 part 1, page 136: |
||
551 | * |
||
552 | * "The maximum message length allowed to the sampler is 11. This would |
||
553 | * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of |
||
554 | * SIMD16." |
||
555 | */ |
||
556 | if (msg_len > 11) |
||
557 | tc_fail(tc, "maximum length for messages to the sampler is 11"); |
||
558 | |||
559 | if (ret_sampler_index) |
||
560 | *ret_sampler_index = sampler_index; |
||
561 | |||
562 | return tsrc_imm_mdesc_sampler(tc, msg_len, 1, |
||
563 | false, simd_mode, msg_type, sampler_index, binding_table_index); |
||
564 | } |
||
565 | |||
566 | static void |
||
567 | vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc, |
||
568 | struct toy_inst *inst) |
||
569 | { |
||
570 | struct toy_compiler *tc = &vcc->tc; |
||
571 | struct toy_src desc; |
||
572 | struct toy_dst dst, tmp; |
||
573 | unsigned sampler_index; |
||
574 | int swizzles[4], i; |
||
575 | unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask; |
||
576 | bool need_filter; |
||
577 | |||
578 | desc = vs_prepare_tgsi_sampling(vcc, inst, |
||
579 | vcc->first_free_mrf, &sampler_index); |
||
580 | |||
581 | switch (inst->opcode) { |
||
582 | case TOY_OPCODE_TGSI_TXF: |
||
583 | case TOY_OPCODE_TGSI_TXQ: |
||
584 | case TOY_OPCODE_TGSI_TXQ_LZ: |
||
585 | need_filter = false; |
||
586 | break; |
||
587 | default: |
||
588 | need_filter = true; |
||
589 | break; |
||
590 | } |
||
591 | |||
592 | toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER); |
||
593 | inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0); |
||
594 | inst->src[1] = desc; |
||
595 | |||
596 | /* write to a temp first */ |
||
597 | tmp = tc_alloc_tmp(tc); |
||
598 | tmp.type = inst->dst.type; |
||
599 | dst = inst->dst; |
||
600 | inst->dst = tmp; |
||
601 | |||
602 | tc_move_inst(tc, inst); |
||
603 | |||
604 | if (need_filter) { |
||
605 | assert(sampler_index < vcc->variant->num_sampler_views); |
||
606 | swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r; |
||
607 | swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g; |
||
608 | swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b; |
||
609 | swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a; |
||
610 | } |
||
611 | else { |
||
612 | swizzles[0] = PIPE_SWIZZLE_RED; |
||
613 | swizzles[1] = PIPE_SWIZZLE_GREEN; |
||
614 | swizzles[2] = PIPE_SWIZZLE_BLUE; |
||
615 | swizzles[3] = PIPE_SWIZZLE_ALPHA; |
||
616 | } |
||
617 | |||
618 | swizzle_zero_mask = 0; |
||
619 | swizzle_one_mask = 0; |
||
620 | swizzle_normal_mask = 0; |
||
621 | for (i = 0; i < 4; i++) { |
||
622 | switch (swizzles[i]) { |
||
623 | case PIPE_SWIZZLE_ZERO: |
||
624 | swizzle_zero_mask |= 1 << i; |
||
625 | swizzles[i] = i; |
||
626 | break; |
||
627 | case PIPE_SWIZZLE_ONE: |
||
628 | swizzle_one_mask |= 1 << i; |
||
629 | swizzles[i] = i; |
||
630 | break; |
||
631 | default: |
||
632 | swizzle_normal_mask |= 1 << i; |
||
633 | break; |
||
634 | } |
||
635 | } |
||
636 | |||
637 | /* swizzle the results */ |
||
638 | if (swizzle_normal_mask) { |
||
639 | tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask), |
||
640 | tsrc_swizzle(tsrc_from(tmp), swizzles[0], |
||
641 | swizzles[1], swizzles[2], swizzles[3])); |
||
642 | } |
||
643 | if (swizzle_zero_mask) |
||
644 | tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f)); |
||
645 | if (swizzle_one_mask) |
||
646 | tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f)); |
||
647 | } |
||
648 | |||
649 | static void |
||
650 | vs_lower_opcode_urb_write(struct toy_compiler *tc, struct toy_inst *inst) |
||
651 | { |
||
652 | /* vs_write_vue() has set up the message registers */ |
||
653 | toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB); |
||
654 | } |
||
655 | |||
656 | static void |
||
657 | vs_lower_virtual_opcodes(struct vs_compile_context *vcc) |
||
658 | { |
||
659 | struct toy_compiler *tc = &vcc->tc; |
||
660 | struct toy_inst *inst; |
||
661 | |||
662 | tc_head(tc); |
||
663 | while ((inst = tc_next(tc)) != NULL) { |
||
664 | switch (inst->opcode) { |
||
665 | case TOY_OPCODE_TGSI_IN: |
||
666 | case TOY_OPCODE_TGSI_CONST: |
||
667 | case TOY_OPCODE_TGSI_SV: |
||
668 | case TOY_OPCODE_TGSI_IMM: |
||
669 | vs_lower_opcode_tgsi_direct(vcc, inst); |
||
670 | break; |
||
671 | case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
||
672 | case TOY_OPCODE_TGSI_INDIRECT_STORE: |
||
673 | vs_lower_opcode_tgsi_indirect(vcc, inst); |
||
674 | break; |
||
675 | case TOY_OPCODE_TGSI_TEX: |
||
676 | case TOY_OPCODE_TGSI_TXB: |
||
677 | case TOY_OPCODE_TGSI_TXD: |
||
678 | case TOY_OPCODE_TGSI_TXL: |
||
679 | case TOY_OPCODE_TGSI_TXP: |
||
680 | case TOY_OPCODE_TGSI_TXF: |
||
681 | case TOY_OPCODE_TGSI_TXQ: |
||
682 | case TOY_OPCODE_TGSI_TXQ_LZ: |
||
683 | case TOY_OPCODE_TGSI_TEX2: |
||
684 | case TOY_OPCODE_TGSI_TXB2: |
||
685 | case TOY_OPCODE_TGSI_TXL2: |
||
686 | case TOY_OPCODE_TGSI_SAMPLE: |
||
687 | case TOY_OPCODE_TGSI_SAMPLE_I: |
||
688 | case TOY_OPCODE_TGSI_SAMPLE_I_MS: |
||
689 | case TOY_OPCODE_TGSI_SAMPLE_B: |
||
690 | case TOY_OPCODE_TGSI_SAMPLE_C: |
||
691 | case TOY_OPCODE_TGSI_SAMPLE_C_LZ: |
||
692 | case TOY_OPCODE_TGSI_SAMPLE_D: |
||
693 | case TOY_OPCODE_TGSI_SAMPLE_L: |
||
694 | case TOY_OPCODE_TGSI_GATHER4: |
||
695 | case TOY_OPCODE_TGSI_SVIEWINFO: |
||
696 | case TOY_OPCODE_TGSI_SAMPLE_POS: |
||
697 | case TOY_OPCODE_TGSI_SAMPLE_INFO: |
||
698 | vs_lower_opcode_tgsi_sampling(vcc, inst); |
||
699 | break; |
||
700 | case TOY_OPCODE_INV: |
||
701 | case TOY_OPCODE_LOG: |
||
702 | case TOY_OPCODE_EXP: |
||
703 | case TOY_OPCODE_SQRT: |
||
704 | case TOY_OPCODE_RSQ: |
||
705 | case TOY_OPCODE_SIN: |
||
706 | case TOY_OPCODE_COS: |
||
707 | case TOY_OPCODE_FDIV: |
||
708 | case TOY_OPCODE_POW: |
||
709 | case TOY_OPCODE_INT_DIV_QUOTIENT: |
||
710 | case TOY_OPCODE_INT_DIV_REMAINDER: |
||
711 | toy_compiler_lower_math(tc, inst); |
||
712 | break; |
||
713 | case TOY_OPCODE_URB_WRITE: |
||
714 | vs_lower_opcode_urb_write(tc, inst); |
||
715 | break; |
||
716 | default: |
||
717 | if (inst->opcode > 127) |
||
718 | tc_fail(tc, "unhandled virtual opcode"); |
||
719 | break; |
||
720 | } |
||
721 | } |
||
722 | } |
||
723 | |||
724 | /** |
||
725 | * Compile the shader. |
||
726 | */ |
||
727 | static bool |
||
728 | vs_compile(struct vs_compile_context *vcc) |
||
729 | { |
||
730 | struct toy_compiler *tc = &vcc->tc; |
||
731 | struct ilo_shader *sh = vcc->shader; |
||
732 | |||
733 | vs_lower_virtual_opcodes(vcc); |
||
734 | toy_compiler_legalize_for_ra(tc); |
||
735 | toy_compiler_optimize(tc); |
||
736 | toy_compiler_allocate_registers(tc, |
||
737 | vcc->first_free_grf, |
||
738 | vcc->last_free_grf, |
||
739 | vcc->num_grf_per_vrf); |
||
740 | toy_compiler_legalize_for_asm(tc); |
||
741 | |||
742 | if (tc->fail) { |
||
743 | ilo_err("failed to legalize VS instructions: %s\n", tc->reason); |
||
744 | return false; |
||
745 | } |
||
746 | |||
747 | if (ilo_debug & ILO_DEBUG_VS) { |
||
748 | ilo_printf("legalized instructions:\n"); |
||
749 | toy_compiler_dump(tc); |
||
750 | ilo_printf("\n"); |
||
751 | } |
||
752 | |||
753 | if (true) { |
||
754 | sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); |
||
755 | } |
||
756 | else { |
||
757 | static const uint32_t microcode[] = { |
||
758 | /* fill in the microcode here */ |
||
759 | 0x0, 0x0, 0x0, 0x0, |
||
760 | }; |
||
761 | const bool swap = true; |
||
762 | |||
763 | sh->kernel_size = sizeof(microcode); |
||
764 | sh->kernel = MALLOC(sh->kernel_size); |
||
765 | |||
766 | if (sh->kernel) { |
||
767 | const int num_dwords = sizeof(microcode) / 4; |
||
768 | const uint32_t *src = microcode; |
||
769 | uint32_t *dst = (uint32_t *) sh->kernel; |
||
770 | int i; |
||
771 | |||
772 | for (i = 0; i < num_dwords; i += 4) { |
||
773 | if (swap) { |
||
774 | dst[i + 0] = src[i + 3]; |
||
775 | dst[i + 1] = src[i + 2]; |
||
776 | dst[i + 2] = src[i + 1]; |
||
777 | dst[i + 3] = src[i + 0]; |
||
778 | } |
||
779 | else { |
||
780 | memcpy(dst, src, 16); |
||
781 | } |
||
782 | } |
||
783 | } |
||
784 | } |
||
785 | |||
786 | if (!sh->kernel) { |
||
787 | ilo_err("failed to compile VS: %s\n", tc->reason); |
||
788 | return false; |
||
789 | } |
||
790 | |||
791 | if (ilo_debug & ILO_DEBUG_VS) { |
||
792 | ilo_printf("disassembly:\n"); |
||
793 | toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); |
||
794 | ilo_printf("\n"); |
||
795 | } |
||
796 | |||
797 | return true; |
||
798 | } |
||
799 | |||
800 | /** |
||
801 | * Collect the toy registers to be written to the VUE. |
||
802 | */ |
||
803 | static int |
||
804 | vs_collect_outputs(struct vs_compile_context *vcc, struct toy_src *outs) |
||
805 | { |
||
806 | const struct toy_tgsi *tgsi = &vcc->tgsi; |
||
807 | int i; |
||
808 | |||
809 | for (i = 0; i < vcc->shader->out.count; i++) { |
||
810 | const int slot = vcc->output_map[i]; |
||
811 | const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi, |
||
812 | TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1; |
||
813 | struct toy_src src; |
||
814 | |||
815 | if (vrf >= 0) { |
||
816 | struct toy_dst dst; |
||
817 | |||
818 | dst = tdst(TOY_FILE_VRF, vrf, 0); |
||
819 | src = tsrc_from(dst); |
||
820 | |||
821 | if (i == 0) { |
||
822 | /* PSIZE is at channel W */ |
||
823 | tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W), |
||
824 | tsrc_swizzle1(src, TOY_SWIZZLE_X)); |
||
825 | |||
826 | /* the other channels are for the header */ |
||
827 | dst = tdst_d(dst); |
||
828 | tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ), |
||
829 | tsrc_imm_d(0)); |
||
830 | } |
||
831 | else { |
||
832 | /* initialize unused channels to 0.0f */ |
||
833 | if (tgsi->outputs[slot].undefined_mask) { |
||
834 | dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask); |
||
835 | tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f)); |
||
836 | } |
||
837 | } |
||
838 | } |
||
839 | else { |
||
840 | /* XXX this is too ugly */ |
||
841 | if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST && |
||
842 | slot < 0) { |
||
843 | /* ok, we need to compute clip distance */ |
||
844 | int clipvert_slot = -1, clipvert_vrf, j; |
||
845 | |||
846 | for (j = 0; j < tgsi->num_outputs; j++) { |
||
847 | if (tgsi->outputs[j].semantic_name == |
||
848 | TGSI_SEMANTIC_CLIPVERTEX) { |
||
849 | clipvert_slot = j; |
||
850 | break; |
||
851 | } |
||
852 | else if (tgsi->outputs[j].semantic_name == |
||
853 | TGSI_SEMANTIC_POSITION) { |
||
854 | /* remember pos, but keep looking */ |
||
855 | clipvert_slot = j; |
||
856 | } |
||
857 | } |
||
858 | |||
859 | clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi, |
||
860 | TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1; |
||
861 | if (clipvert_vrf >= 0) { |
||
862 | struct toy_dst tmp = tc_alloc_tmp(&vcc->tc); |
||
863 | struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0); |
||
864 | int first_ucp, last_ucp; |
||
865 | |||
866 | if (vcc->shader->out.semantic_indices[i]) { |
||
867 | first_ucp = 4; |
||
868 | last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1); |
||
869 | } |
||
870 | else { |
||
871 | first_ucp = 0; |
||
872 | last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1); |
||
873 | } |
||
874 | |||
875 | for (j = first_ucp; j <= last_ucp; j++) { |
||
876 | const int plane_grf = vcc->first_ucp_grf + j / 2; |
||
877 | const int plane_subreg = (j & 1) * 16; |
||
878 | const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF, |
||
879 | plane_grf, plane_subreg), TOY_RECT_041); |
||
880 | const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j); |
||
881 | |||
882 | tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask), |
||
883 | clipvert, plane); |
||
884 | } |
||
885 | |||
886 | src = tsrc_from(tmp); |
||
887 | } |
||
888 | else { |
||
889 | src = tsrc_imm_f(0.0f); |
||
890 | } |
||
891 | } |
||
892 | else { |
||
893 | src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f); |
||
894 | } |
||
895 | } |
||
896 | |||
897 | outs[i] = src; |
||
898 | } |
||
899 | |||
900 | return i; |
||
901 | } |
||
902 | |||
903 | /** |
||
904 | * Emit instructions to write the VUE. |
||
905 | */ |
||
906 | static void |
||
907 | vs_write_vue(struct vs_compile_context *vcc) |
||
908 | { |
||
909 | struct toy_compiler *tc = &vcc->tc; |
||
910 | struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS]; |
||
911 | struct toy_dst header; |
||
912 | struct toy_src r0; |
||
913 | struct toy_inst *inst; |
||
914 | int sent_attrs, total_attrs; |
||
915 | |||
916 | header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0)); |
||
917 | r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
||
918 | inst = tc_MOV(tc, header, r0); |
||
919 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
920 | |||
921 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { |
||
922 | inst = tc_OR(tc, tdst_offset(header, 0, 5), |
||
923 | tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010), |
||
924 | tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010)); |
||
925 | inst->exec_size = GEN6_EXECSIZE_1; |
||
926 | inst->access_mode = GEN6_ALIGN_1; |
||
927 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
928 | } |
||
929 | |||
930 | total_attrs = vs_collect_outputs(vcc, outs); |
||
931 | sent_attrs = 0; |
||
932 | while (sent_attrs < total_attrs) { |
||
933 | struct toy_src desc; |
||
934 | int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs; |
||
935 | int num_attrs, msg_len, i; |
||
936 | bool eot; |
||
937 | |||
938 | num_attrs = total_attrs - sent_attrs; |
||
939 | eot = true; |
||
940 | |||
941 | /* see if we need another message */ |
||
942 | avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1; |
||
943 | if (num_attrs > avail_mrf_for_attrs) { |
||
944 | /* |
||
945 | * From the Sandy Bridge PRM, volume 4 part 2, page 22: |
||
946 | * |
||
947 | * "Offset. This field specifies a destination offset (in 256-bit |
||
948 | * units) from the start of the URB entry(s), as referenced by |
||
949 | * URB Return Handle n, at which the data (if any) will be |
||
950 | * written." |
||
951 | * |
||
952 | * As we need to offset the following messages, we must make sure |
||
953 | * this one writes an even number of attributes. |
||
954 | */ |
||
955 | num_attrs = avail_mrf_for_attrs & ~1; |
||
956 | eot = false; |
||
957 | } |
||
958 | |||
959 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { |
||
960 | /* do not forget about the header */ |
||
961 | msg_len = 1 + num_attrs; |
||
962 | } |
||
963 | else { |
||
964 | /* |
||
965 | * From the Sandy Bridge PRM, volume 4 part 2, page 26: |
||
966 | * |
||
967 | * "At least 256 bits per vertex (512 bits total, M1 & M2) must |
||
968 | * be written. Writing only 128 bits per vertex (256 bits |
||
969 | * total, M1 only) results in UNDEFINED operation." |
||
970 | * |
||
971 | * "[DevSNB] Interleave writes must be in multiples of 256 per |
||
972 | * vertex." |
||
973 | * |
||
974 | * That is, we must write or appear to write an even number of |
||
975 | * attributes, starting from two. |
||
976 | */ |
||
977 | if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) { |
||
978 | num_attrs--; |
||
979 | eot = false; |
||
980 | } |
||
981 | |||
982 | msg_len = 1 + align(num_attrs, 2); |
||
983 | } |
||
984 | |||
985 | for (i = 0; i < num_attrs; i++) |
||
986 | tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]); |
||
987 | |||
988 | assert(sent_attrs % 2 == 0); |
||
989 | desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0, |
||
990 | eot, true, false, true, sent_attrs / 2, 0); |
||
991 | |||
992 | tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc); |
||
993 | |||
994 | sent_attrs += num_attrs; |
||
995 | } |
||
996 | } |
||
997 | |||
998 | /** |
||
999 | * Set up shader inputs for fixed-function units. |
||
1000 | */ |
||
1001 | static void |
||
1002 | vs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi) |
||
1003 | { |
||
1004 | int num_attrs, i; |
||
1005 | |||
1006 | /* vertex/instance id is the first VE if exists */ |
||
1007 | for (i = 0; i < tgsi->num_system_values; i++) { |
||
1008 | bool found = false; |
||
1009 | |||
1010 | switch (tgsi->system_values[i].semantic_name) { |
||
1011 | case TGSI_SEMANTIC_INSTANCEID: |
||
1012 | case TGSI_SEMANTIC_VERTEXID: |
||
1013 | found = true; |
||
1014 | break; |
||
1015 | default: |
||
1016 | break; |
||
1017 | } |
||
1018 | |||
1019 | if (found) { |
||
1020 | sh->in.semantic_names[sh->in.count] = |
||
1021 | tgsi->system_values[i].semantic_name; |
||
1022 | sh->in.semantic_indices[sh->in.count] = |
||
1023 | tgsi->system_values[i].semantic_index; |
||
1024 | sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT; |
||
1025 | sh->in.centroid[sh->in.count] = false; |
||
1026 | |||
1027 | sh->in.count++; |
||
1028 | break; |
||
1029 | } |
||
1030 | } |
||
1031 | |||
1032 | num_attrs = 0; |
||
1033 | for (i = 0; i < tgsi->num_inputs; i++) { |
||
1034 | assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC); |
||
1035 | if (tgsi->inputs[i].semantic_index >= num_attrs) |
||
1036 | num_attrs = tgsi->inputs[i].semantic_index + 1; |
||
1037 | } |
||
1038 | assert(num_attrs <= PIPE_MAX_ATTRIBS); |
||
1039 | |||
1040 | /* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */ |
||
1041 | for (i = 0; i < num_attrs; i++) { |
||
1042 | sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC; |
||
1043 | sh->in.semantic_indices[sh->in.count + i] = i; |
||
1044 | sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT; |
||
1045 | sh->in.centroid[sh->in.count + i] = false; |
||
1046 | } |
||
1047 | |||
1048 | sh->in.count += num_attrs; |
||
1049 | |||
1050 | sh->in.has_pos = false; |
||
1051 | sh->in.has_linear_interp = false; |
||
1052 | sh->in.barycentric_interpolation_mode = 0; |
||
1053 | } |
||
1054 | |||
1055 | /** |
||
1056 | * Set up shader outputs for fixed-function units. |
||
1057 | */ |
||
1058 | static void |
||
1059 | vs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi, |
||
1060 | bool output_clipdist, int *output_map) |
||
1061 | { |
||
1062 | int psize_slot = -1, pos_slot = -1; |
||
1063 | int clipdist_slot[2] = { -1, -1 }; |
||
1064 | int color_slot[4] = { -1, -1, -1, -1 }; |
||
1065 | int num_outs, i; |
||
1066 | |||
1067 | /* find out the slots of outputs that need special care */ |
||
1068 | for (i = 0; i < tgsi->num_outputs; i++) { |
||
1069 | switch (tgsi->outputs[i].semantic_name) { |
||
1070 | case TGSI_SEMANTIC_PSIZE: |
||
1071 | psize_slot = i; |
||
1072 | break; |
||
1073 | case TGSI_SEMANTIC_POSITION: |
||
1074 | pos_slot = i; |
||
1075 | break; |
||
1076 | case TGSI_SEMANTIC_CLIPDIST: |
||
1077 | if (tgsi->outputs[i].semantic_index) |
||
1078 | clipdist_slot[1] = i; |
||
1079 | else |
||
1080 | clipdist_slot[0] = i; |
||
1081 | break; |
||
1082 | case TGSI_SEMANTIC_COLOR: |
||
1083 | if (tgsi->outputs[i].semantic_index) |
||
1084 | color_slot[2] = i; |
||
1085 | else |
||
1086 | color_slot[0] = i; |
||
1087 | break; |
||
1088 | case TGSI_SEMANTIC_BCOLOR: |
||
1089 | if (tgsi->outputs[i].semantic_index) |
||
1090 | color_slot[3] = i; |
||
1091 | else |
||
1092 | color_slot[1] = i; |
||
1093 | break; |
||
1094 | default: |
||
1095 | break; |
||
1096 | } |
||
1097 | } |
||
1098 | |||
1099 | /* the first two VUEs are always PSIZE and POSITION */ |
||
1100 | num_outs = 2; |
||
1101 | output_map[0] = psize_slot; |
||
1102 | output_map[1] = pos_slot; |
||
1103 | |||
1104 | sh->out.register_indices[0] = |
||
1105 | (psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1; |
||
1106 | sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE; |
||
1107 | sh->out.semantic_indices[0] = 0; |
||
1108 | |||
1109 | sh->out.register_indices[1] = |
||
1110 | (pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1; |
||
1111 | sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION; |
||
1112 | sh->out.semantic_indices[1] = 0; |
||
1113 | |||
1114 | sh->out.has_pos = true; |
||
1115 | |||
1116 | /* followed by optional clip distances */ |
||
1117 | if (output_clipdist) { |
||
1118 | sh->out.register_indices[num_outs] = |
||
1119 | (clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1; |
||
1120 | sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; |
||
1121 | sh->out.semantic_indices[num_outs] = 0; |
||
1122 | output_map[num_outs++] = clipdist_slot[0]; |
||
1123 | |||
1124 | sh->out.register_indices[num_outs] = |
||
1125 | (clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1; |
||
1126 | sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST; |
||
1127 | sh->out.semantic_indices[num_outs] = 1; |
||
1128 | output_map[num_outs++] = clipdist_slot[1]; |
||
1129 | } |
||
1130 | |||
1131 | /* |
||
1132 | * make BCOLOR follow COLOR so that we can make use of |
||
1133 | * ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF |
||
1134 | */ |
||
1135 | for (i = 0; i < 4; i++) { |
||
1136 | const int slot = color_slot[i]; |
||
1137 | |||
1138 | if (slot < 0) |
||
1139 | continue; |
||
1140 | |||
1141 | sh->out.register_indices[num_outs] = tgsi->outputs[slot].index; |
||
1142 | sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name; |
||
1143 | sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index; |
||
1144 | |||
1145 | output_map[num_outs++] = slot; |
||
1146 | } |
||
1147 | |||
1148 | /* add the rest of the outputs */ |
||
1149 | for (i = 0; i < tgsi->num_outputs; i++) { |
||
1150 | switch (tgsi->outputs[i].semantic_name) { |
||
1151 | case TGSI_SEMANTIC_PSIZE: |
||
1152 | case TGSI_SEMANTIC_POSITION: |
||
1153 | case TGSI_SEMANTIC_CLIPDIST: |
||
1154 | case TGSI_SEMANTIC_COLOR: |
||
1155 | case TGSI_SEMANTIC_BCOLOR: |
||
1156 | break; |
||
1157 | default: |
||
1158 | sh->out.register_indices[num_outs] = tgsi->outputs[i].index; |
||
1159 | sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name; |
||
1160 | sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index; |
||
1161 | output_map[num_outs++] = i; |
||
1162 | break; |
||
1163 | } |
||
1164 | } |
||
1165 | |||
1166 | sh->out.count = num_outs; |
||
1167 | } |
||
1168 | |||
1169 | /** |
||
1170 | * Translate the TGSI tokens. |
||
1171 | */ |
||
1172 | static bool |
||
1173 | vs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, |
||
1174 | struct toy_tgsi *tgsi) |
||
1175 | { |
||
1176 | if (ilo_debug & ILO_DEBUG_VS) { |
||
1177 | ilo_printf("dumping vertex shader\n"); |
||
1178 | ilo_printf("\n"); |
||
1179 | |||
1180 | tgsi_dump(tokens, 0); |
||
1181 | ilo_printf("\n"); |
||
1182 | } |
||
1183 | |||
1184 | toy_compiler_translate_tgsi(tc, tokens, true, tgsi); |
||
1185 | if (tc->fail) { |
||
1186 | ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason); |
||
1187 | return false; |
||
1188 | } |
||
1189 | |||
1190 | if (ilo_debug & ILO_DEBUG_VS) { |
||
1191 | ilo_printf("TGSI translator:\n"); |
||
1192 | toy_tgsi_dump(tgsi); |
||
1193 | ilo_printf("\n"); |
||
1194 | toy_compiler_dump(tc); |
||
1195 | ilo_printf("\n"); |
||
1196 | } |
||
1197 | |||
1198 | return true; |
||
1199 | } |
||
1200 | |||
1201 | /** |
||
1202 | * Set up VS compile context. This includes translating the TGSI tokens. |
||
1203 | */ |
||
1204 | static bool |
||
1205 | vs_setup(struct vs_compile_context *vcc, |
||
1206 | const struct ilo_shader_state *state, |
||
1207 | const struct ilo_shader_variant *variant) |
||
1208 | { |
||
1209 | int num_consts; |
||
1210 | |||
1211 | memset(vcc, 0, sizeof(*vcc)); |
||
1212 | |||
1213 | vcc->shader = CALLOC_STRUCT(ilo_shader); |
||
1214 | if (!vcc->shader) |
||
1215 | return false; |
||
1216 | |||
1217 | vcc->variant = variant; |
||
1218 | |||
1219 | toy_compiler_init(&vcc->tc, state->info.dev); |
||
1220 | vcc->tc.templ.access_mode = GEN6_ALIGN_16; |
||
1221 | vcc->tc.templ.exec_size = GEN6_EXECSIZE_8; |
||
1222 | vcc->tc.rect_linear_width = 4; |
||
1223 | |||
1224 | /* |
||
1225 | * The classic driver uses the sampler cache (gen6) or the data cache |
||
1226 | * (gen7). Why? |
||
1227 | */ |
||
1228 | vcc->const_cache = GEN6_SFID_DP_CC; |
||
1229 | |||
1230 | if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) { |
||
1231 | toy_compiler_cleanup(&vcc->tc); |
||
1232 | FREE(vcc->shader); |
||
1233 | return false; |
||
1234 | } |
||
1235 | |||
1236 | vs_setup_shader_in(vcc->shader, &vcc->tgsi); |
||
1237 | vs_setup_shader_out(vcc->shader, &vcc->tgsi, |
||
1238 | (vcc->variant->u.vs.num_ucps > 0), vcc->output_map); |
||
1239 | |||
1240 | if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) { |
||
1241 | num_consts = (vcc->tgsi.const_count + 1) / 2; |
||
1242 | |||
1243 | /* |
||
1244 | * From the Sandy Bridge PRM, volume 2 part 1, page 138: |
||
1245 | * |
||
1246 | * "The sum of all four read length fields (each incremented to |
||
1247 | * represent the actual read length) must be less than or equal to |
||
1248 | * 32" |
||
1249 | */ |
||
1250 | if (num_consts > 32) |
||
1251 | num_consts = 0; |
||
1252 | } |
||
1253 | else { |
||
1254 | num_consts = 0; |
||
1255 | } |
||
1256 | |||
1257 | vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count || num_consts); |
||
1258 | vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); |
||
1259 | |||
1260 | /* r0 is reserved for payload header */ |
||
1261 | vcc->first_const_grf = 1; |
||
1262 | vcc->first_ucp_grf = vcc->first_const_grf + num_consts; |
||
1263 | |||
1264 | /* fit each pair of user clip planes into a register */ |
||
1265 | vcc->first_vue_grf = vcc->first_ucp_grf + |
||
1266 | (vcc->variant->u.vs.num_ucps + 1) / 2; |
||
1267 | |||
1268 | vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count; |
||
1269 | vcc->last_free_grf = 127; |
||
1270 | |||
1271 | /* m0 is reserved for system routines */ |
||
1272 | vcc->first_free_mrf = 1; |
||
1273 | vcc->last_free_mrf = 15; |
||
1274 | |||
1275 | vcc->num_grf_per_vrf = 1; |
||
1276 | |||
1277 | if (ilo_dev_gen(vcc->tc.dev) >= ILO_GEN(7)) { |
||
1278 | vcc->last_free_grf -= 15; |
||
1279 | vcc->first_free_mrf = vcc->last_free_grf + 1; |
||
1280 | vcc->last_free_mrf = vcc->first_free_mrf + 14; |
||
1281 | } |
||
1282 | |||
1283 | vcc->shader->in.start_grf = vcc->first_const_grf; |
||
1284 | vcc->shader->pcb.clip_state_size = |
||
1285 | vcc->variant->u.vs.num_ucps * (sizeof(float) * 4); |
||
1286 | |||
1287 | vcc->shader->bt.tex_base = 0; |
||
1288 | vcc->shader->bt.tex_count = vcc->variant->num_sampler_views; |
||
1289 | |||
1290 | vcc->shader->bt.const_base = vcc->shader->bt.tex_base + |
||
1291 | vcc->shader->bt.tex_count; |
||
1292 | vcc->shader->bt.const_count = state->info.constant_buffer_count; |
||
1293 | |||
1294 | vcc->shader->bt.total_count = vcc->shader->bt.const_base + |
||
1295 | vcc->shader->bt.const_count; |
||
1296 | |||
1297 | return true; |
||
1298 | } |
||
1299 | |||
1300 | /** |
||
1301 | * Compile the vertex shader. |
||
1302 | */ |
||
1303 | struct ilo_shader * |
||
1304 | ilo_shader_compile_vs(const struct ilo_shader_state *state, |
||
1305 | const struct ilo_shader_variant *variant) |
||
1306 | { |
||
1307 | struct vs_compile_context vcc; |
||
1308 | bool need_gs; |
||
1309 | |||
1310 | if (!vs_setup(&vcc, state, variant)) |
||
1311 | return NULL; |
||
1312 | |||
1313 | if (ilo_dev_gen(vcc.tc.dev) >= ILO_GEN(7)) { |
||
1314 | need_gs = false; |
||
1315 | } |
||
1316 | else { |
||
1317 | need_gs = variant->u.vs.rasterizer_discard || |
||
1318 | state->info.stream_output.num_outputs; |
||
1319 | } |
||
1320 | |||
1321 | vs_write_vue(&vcc); |
||
1322 | |||
1323 | if (!vs_compile(&vcc)) { |
||
1324 | FREE(vcc.shader); |
||
1325 | vcc.shader = NULL; |
||
1326 | } |
||
1327 | |||
1328 | toy_tgsi_cleanup(&vcc.tgsi); |
||
1329 | toy_compiler_cleanup(&vcc.tc); |
||
1330 | |||
1331 | if (need_gs) { |
||
1332 | int so_mapping[PIPE_MAX_SHADER_OUTPUTS]; |
||
1333 | int i, j; |
||
1334 | |||
1335 | for (i = 0; i < vcc.tgsi.num_outputs; i++) { |
||
1336 | int attr = 0; |
||
1337 | |||
1338 | for (j = 0; j < vcc.shader->out.count; j++) { |
||
1339 | if (vcc.tgsi.outputs[i].semantic_name == |
||
1340 | vcc.shader->out.semantic_names[j] && |
||
1341 | vcc.tgsi.outputs[i].semantic_index == |
||
1342 | vcc.shader->out.semantic_indices[j]) { |
||
1343 | attr = j; |
||
1344 | break; |
||
1345 | } |
||
1346 | } |
||
1347 | |||
1348 | so_mapping[i] = attr; |
||
1349 | } |
||
1350 | |||
1351 | if (!ilo_shader_compile_gs_passthrough(state, variant, |
||
1352 | so_mapping, vcc.shader)) { |
||
1353 | ilo_shader_destroy_kernel(vcc.shader); |
||
1354 | vcc.shader = NULL; |
||
1355 | } |
||
1356 | } |
||
1357 | |||
1358 | return vcc.shader; |
||
1359 | }>>>>>>>=>>>>>><>=>>>>>><>><>><>>>><>>>>>>>>=>>=>=>=>><>>><>> |