Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Mesa 3-D graphics library |
||
3 | * |
||
4 | * Copyright (C) 2012-2013 LunarG, Inc. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice shall be included |
||
14 | * in all copies or substantial portions of the Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
||
22 | * DEALINGS IN THE SOFTWARE. |
||
23 | * |
||
24 | * Authors: |
||
25 | * Chia-I Wu |
||
26 | */ |
||
27 | |||
28 | #include "tgsi/tgsi_dump.h" |
||
29 | #include "tgsi/tgsi_util.h" |
||
30 | #include "toy_compiler.h" |
||
31 | #include "toy_tgsi.h" |
||
32 | #include "toy_legalize.h" |
||
33 | #include "toy_optimize.h" |
||
34 | #include "toy_helpers.h" |
||
35 | #include "ilo_shader_internal.h" |
||
36 | |||
37 | struct fs_compile_context { |
||
38 | struct ilo_shader *shader; |
||
39 | const struct ilo_shader_variant *variant; |
||
40 | |||
41 | struct toy_compiler tc; |
||
42 | struct toy_tgsi tgsi; |
||
43 | |||
44 | int const_cache; |
||
45 | int dispatch_mode; |
||
46 | |||
47 | struct { |
||
48 | int interp_perspective_pixel; |
||
49 | int interp_perspective_centroid; |
||
50 | int interp_perspective_sample; |
||
51 | int interp_nonperspective_pixel; |
||
52 | int interp_nonperspective_centroid; |
||
53 | int interp_nonperspective_sample; |
||
54 | int source_depth; |
||
55 | int source_w; |
||
56 | int pos_offset; |
||
57 | } payloads[2]; |
||
58 | |||
59 | int first_const_grf; |
||
60 | int first_attr_grf; |
||
61 | int first_free_grf; |
||
62 | int last_free_grf; |
||
63 | |||
64 | int num_grf_per_vrf; |
||
65 | |||
66 | int first_free_mrf; |
||
67 | int last_free_mrf; |
||
68 | }; |
||
69 | |||
70 | static void |
||
71 | fetch_position(struct fs_compile_context *fcc, struct toy_dst dst) |
||
72 | { |
||
73 | struct toy_compiler *tc = &fcc->tc; |
||
74 | const struct toy_src src_z = |
||
75 | tsrc(TOY_FILE_GRF, fcc->payloads[0].source_depth, 0); |
||
76 | const struct toy_src src_w = |
||
77 | tsrc(TOY_FILE_GRF, fcc->payloads[0].source_w, 0); |
||
78 | const int fb_height = |
||
79 | (fcc->variant->u.fs.fb_height) ? fcc->variant->u.fs.fb_height : 1; |
||
80 | const bool origin_upper_left = |
||
81 | (fcc->tgsi.props.fs_coord_origin == TGSI_FS_COORD_ORIGIN_UPPER_LEFT); |
||
82 | const bool pixel_center_integer = |
||
83 | (fcc->tgsi.props.fs_coord_pixel_center == |
||
84 | TGSI_FS_COORD_PIXEL_CENTER_INTEGER); |
||
85 | struct toy_src subspan_x, subspan_y; |
||
86 | struct toy_dst tmp, tmp_uw; |
||
87 | struct toy_dst real_dst[4]; |
||
88 | |||
89 | tdst_transpose(dst, real_dst); |
||
90 | |||
91 | subspan_x = tsrc_uw(tsrc(TOY_FILE_GRF, 1, 2 * 4)); |
||
92 | subspan_x = tsrc_rect(subspan_x, TOY_RECT_240); |
||
93 | |||
94 | subspan_y = tsrc_offset(subspan_x, 0, 1); |
||
95 | |||
96 | tmp_uw = tdst_uw(tc_alloc_tmp(tc)); |
||
97 | tmp = tc_alloc_tmp(tc); |
||
98 | |||
99 | /* X */ |
||
100 | tc_ADD(tc, tmp_uw, subspan_x, tsrc_imm_v(0x10101010)); |
||
101 | tc_MOV(tc, tmp, tsrc_from(tmp_uw)); |
||
102 | if (pixel_center_integer) |
||
103 | tc_MOV(tc, real_dst[0], tsrc_from(tmp)); |
||
104 | else |
||
105 | tc_ADD(tc, real_dst[0], tsrc_from(tmp), tsrc_imm_f(0.5f)); |
||
106 | |||
107 | /* Y */ |
||
108 | tc_ADD(tc, tmp_uw, subspan_y, tsrc_imm_v(0x11001100)); |
||
109 | tc_MOV(tc, tmp, tsrc_from(tmp_uw)); |
||
110 | if (origin_upper_left && pixel_center_integer) { |
||
111 | tc_MOV(tc, real_dst[1], tsrc_from(tmp)); |
||
112 | } |
||
113 | else { |
||
114 | struct toy_src y = tsrc_from(tmp); |
||
115 | float offset = 0.0f; |
||
116 | |||
117 | if (!pixel_center_integer) |
||
118 | offset += 0.5f; |
||
119 | |||
120 | if (!origin_upper_left) { |
||
121 | offset += (float) (fb_height - 1); |
||
122 | y = tsrc_negate(y); |
||
123 | } |
||
124 | |||
125 | tc_ADD(tc, real_dst[1], y, tsrc_imm_f(offset)); |
||
126 | } |
||
127 | |||
128 | /* Z and W */ |
||
129 | tc_MOV(tc, real_dst[2], src_z); |
||
130 | tc_INV(tc, real_dst[3], src_w); |
||
131 | } |
||
132 | |||
133 | static void |
||
134 | fetch_face(struct fs_compile_context *fcc, struct toy_dst dst) |
||
135 | { |
||
136 | struct toy_compiler *tc = &fcc->tc; |
||
137 | const struct toy_src r0 = tsrc_d(tsrc(TOY_FILE_GRF, 0, 0)); |
||
138 | struct toy_dst tmp_f, tmp; |
||
139 | struct toy_dst real_dst[4]; |
||
140 | |||
141 | tdst_transpose(dst, real_dst); |
||
142 | |||
143 | tmp_f = tc_alloc_tmp(tc); |
||
144 | tmp = tdst_d(tmp_f); |
||
145 | tc_SHR(tc, tmp, tsrc_rect(r0, TOY_RECT_010), tsrc_imm_d(15)); |
||
146 | tc_AND(tc, tmp, tsrc_from(tmp), tsrc_imm_d(1)); |
||
147 | tc_MOV(tc, tmp_f, tsrc_from(tmp)); |
||
148 | |||
149 | /* convert to 1.0 and -1.0 */ |
||
150 | tc_MUL(tc, tmp_f, tsrc_from(tmp_f), tsrc_imm_f(-2.0f)); |
||
151 | tc_ADD(tc, real_dst[0], tsrc_from(tmp_f), tsrc_imm_f(1.0f)); |
||
152 | |||
153 | tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f)); |
||
154 | tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f)); |
||
155 | tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f)); |
||
156 | } |
||
157 | |||
158 | static void |
||
159 | fetch_attr(struct fs_compile_context *fcc, struct toy_dst dst, int slot) |
||
160 | { |
||
161 | struct toy_compiler *tc = &fcc->tc; |
||
162 | struct toy_dst real_dst[4]; |
||
163 | bool is_const = false; |
||
164 | int grf, interp, ch; |
||
165 | |||
166 | tdst_transpose(dst, real_dst); |
||
167 | |||
168 | grf = fcc->first_attr_grf + slot * 2; |
||
169 | |||
170 | switch (fcc->tgsi.inputs[slot].interp) { |
||
171 | case TGSI_INTERPOLATE_CONSTANT: |
||
172 | is_const = true; |
||
173 | break; |
||
174 | case TGSI_INTERPOLATE_LINEAR: |
||
175 | if (fcc->tgsi.inputs[slot].centroid) |
||
176 | interp = fcc->payloads[0].interp_nonperspective_centroid; |
||
177 | else |
||
178 | interp = fcc->payloads[0].interp_nonperspective_pixel; |
||
179 | break; |
||
180 | case TGSI_INTERPOLATE_COLOR: |
||
181 | if (fcc->variant->u.fs.flatshade) { |
||
182 | is_const = true; |
||
183 | break; |
||
184 | } |
||
185 | /* fall through */ |
||
186 | case TGSI_INTERPOLATE_PERSPECTIVE: |
||
187 | if (fcc->tgsi.inputs[slot].centroid) |
||
188 | interp = fcc->payloads[0].interp_perspective_centroid; |
||
189 | else |
||
190 | interp = fcc->payloads[0].interp_perspective_pixel; |
||
191 | break; |
||
192 | default: |
||
193 | assert(!"unexpected FS interpolation"); |
||
194 | interp = fcc->payloads[0].interp_perspective_pixel; |
||
195 | break; |
||
196 | } |
||
197 | |||
198 | if (is_const) { |
||
199 | struct toy_src a0[4]; |
||
200 | |||
201 | a0[0] = tsrc(TOY_FILE_GRF, grf + 0, 3 * 4); |
||
202 | a0[1] = tsrc(TOY_FILE_GRF, grf + 0, 7 * 4); |
||
203 | a0[2] = tsrc(TOY_FILE_GRF, grf + 1, 3 * 4); |
||
204 | a0[3] = tsrc(TOY_FILE_GRF, grf + 1, 7 * 4); |
||
205 | |||
206 | for (ch = 0; ch < 4; ch++) |
||
207 | tc_MOV(tc, real_dst[ch], tsrc_rect(a0[ch], TOY_RECT_010)); |
||
208 | } |
||
209 | else { |
||
210 | struct toy_src attr[4], uv; |
||
211 | |||
212 | attr[0] = tsrc(TOY_FILE_GRF, grf + 0, 0); |
||
213 | attr[1] = tsrc(TOY_FILE_GRF, grf + 0, 4 * 4); |
||
214 | attr[2] = tsrc(TOY_FILE_GRF, grf + 1, 0); |
||
215 | attr[3] = tsrc(TOY_FILE_GRF, grf + 1, 4 * 4); |
||
216 | |||
217 | uv = tsrc(TOY_FILE_GRF, interp, 0); |
||
218 | |||
219 | for (ch = 0; ch < 4; ch++) { |
||
220 | tc_add2(tc, GEN6_OPCODE_PLN, real_dst[ch], |
||
221 | tsrc_rect(attr[ch], TOY_RECT_010), uv); |
||
222 | } |
||
223 | } |
||
224 | |||
225 | if (fcc->tgsi.inputs[slot].semantic_name == TGSI_SEMANTIC_FOG) { |
||
226 | tc_MOV(tc, real_dst[1], tsrc_imm_f(0.0f)); |
||
227 | tc_MOV(tc, real_dst[2], tsrc_imm_f(0.0f)); |
||
228 | tc_MOV(tc, real_dst[3], tsrc_imm_f(1.0f)); |
||
229 | } |
||
230 | } |
||
231 | |||
232 | static void |
||
233 | fs_lower_opcode_tgsi_in(struct fs_compile_context *fcc, |
||
234 | struct toy_dst dst, int dim, int idx) |
||
235 | { |
||
236 | int slot; |
||
237 | |||
238 | assert(!dim); |
||
239 | |||
240 | slot = toy_tgsi_find_input(&fcc->tgsi, idx); |
||
241 | if (slot < 0) |
||
242 | return; |
||
243 | |||
244 | switch (fcc->tgsi.inputs[slot].semantic_name) { |
||
245 | case TGSI_SEMANTIC_POSITION: |
||
246 | fetch_position(fcc, dst); |
||
247 | break; |
||
248 | case TGSI_SEMANTIC_FACE: |
||
249 | fetch_face(fcc, dst); |
||
250 | break; |
||
251 | default: |
||
252 | fetch_attr(fcc, dst, slot); |
||
253 | break; |
||
254 | } |
||
255 | } |
||
256 | |||
257 | static void |
||
258 | fs_lower_opcode_tgsi_indirect_const(struct fs_compile_context *fcc, |
||
259 | struct toy_dst dst, int dim, |
||
260 | struct toy_src idx) |
||
261 | { |
||
262 | const struct toy_dst offset = |
||
263 | tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); |
||
264 | struct toy_compiler *tc = &fcc->tc; |
||
265 | unsigned simd_mode, param_size; |
||
266 | struct toy_inst *inst; |
||
267 | struct toy_src desc, real_src[4]; |
||
268 | struct toy_dst tmp, real_dst[4]; |
||
269 | int i; |
||
270 | |||
271 | tsrc_transpose(idx, real_src); |
||
272 | |||
273 | /* set offset */ |
||
274 | inst = tc_MOV(tc, offset, real_src[0]); |
||
275 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
276 | |||
277 | switch (inst->exec_size) { |
||
278 | case GEN6_EXECSIZE_8: |
||
279 | simd_mode = GEN6_MSG_SAMPLER_SIMD8; |
||
280 | param_size = 1; |
||
281 | break; |
||
282 | case GEN6_EXECSIZE_16: |
||
283 | simd_mode = GEN6_MSG_SAMPLER_SIMD16; |
||
284 | param_size = 2; |
||
285 | break; |
||
286 | default: |
||
287 | assert(!"unsupported execution size"); |
||
288 | tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
||
289 | return; |
||
290 | break; |
||
291 | } |
||
292 | |||
293 | desc = tsrc_imm_mdesc_sampler(tc, param_size, param_size * 4, false, |
||
294 | simd_mode, |
||
295 | GEN6_MSG_SAMPLER_LD, |
||
296 | 0, |
||
297 | fcc->shader->bt.const_base + dim); |
||
298 | |||
299 | tmp = tdst(TOY_FILE_VRF, tc_alloc_vrf(tc, param_size * 4), 0); |
||
300 | inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER); |
||
301 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
302 | |||
303 | tdst_transpose(dst, real_dst); |
||
304 | for (i = 0; i < 4; i++) { |
||
305 | const struct toy_src src = |
||
306 | tsrc_offset(tsrc_from(tmp), param_size * i, 0); |
||
307 | |||
308 | /* cast to type D to make sure these are raw moves */ |
||
309 | tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); |
||
310 | } |
||
311 | } |
||
312 | |||
313 | static bool |
||
314 | fs_lower_opcode_tgsi_const_pcb(struct fs_compile_context *fcc, |
||
315 | struct toy_dst dst, int dim, |
||
316 | struct toy_src idx) |
||
317 | { |
||
318 | const int grf = fcc->first_const_grf + idx.val32 / 2; |
||
319 | const int grf_subreg = (idx.val32 & 1) * 16; |
||
320 | struct toy_src src; |
||
321 | struct toy_dst real_dst[4]; |
||
322 | int i; |
||
323 | |||
324 | if (!fcc->variant->use_pcb || dim != 0 || idx.file != TOY_FILE_IMM || |
||
325 | grf >= fcc->first_attr_grf) |
||
326 | return false; |
||
327 | |||
328 | src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_010); |
||
329 | |||
330 | tdst_transpose(dst, real_dst); |
||
331 | for (i = 0; i < 4; i++) { |
||
332 | /* cast to type D to make sure these are raw moves */ |
||
333 | tc_MOV(&fcc->tc, tdst_d(real_dst[i]), tsrc_d(tsrc_offset(src, 0, i))); |
||
334 | } |
||
335 | |||
336 | return true; |
||
337 | } |
||
338 | |||
339 | static void |
||
340 | fs_lower_opcode_tgsi_const_gen6(struct fs_compile_context *fcc, |
||
341 | struct toy_dst dst, int dim, struct toy_src idx) |
||
342 | { |
||
343 | const struct toy_dst header = |
||
344 | tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); |
||
345 | const struct toy_dst global_offset = |
||
346 | tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 2 * 4)); |
||
347 | const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
||
348 | struct toy_compiler *tc = &fcc->tc; |
||
349 | unsigned msg_type, msg_ctrl, msg_len; |
||
350 | struct toy_inst *inst; |
||
351 | struct toy_src desc; |
||
352 | struct toy_dst tmp, real_dst[4]; |
||
353 | int i; |
||
354 | |||
355 | if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) |
||
356 | return; |
||
357 | |||
358 | /* set message header */ |
||
359 | inst = tc_MOV(tc, header, r0); |
||
360 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
361 | |||
362 | /* set global offset */ |
||
363 | inst = tc_MOV(tc, global_offset, idx); |
||
364 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
365 | inst->exec_size = GEN6_EXECSIZE_1; |
||
366 | inst->src[0].rect = TOY_RECT_010; |
||
367 | |||
368 | msg_type = GEN6_MSG_DP_OWORD_BLOCK_READ; |
||
369 | msg_ctrl = GEN6_MSG_DP_OWORD_BLOCK_SIZE_1_LO; |
||
370 | msg_len = 1; |
||
371 | |||
372 | desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false, |
||
373 | msg_type, msg_ctrl, fcc->shader->bt.const_base + dim); |
||
374 | |||
375 | tmp = tc_alloc_tmp(tc); |
||
376 | |||
377 | tc_SEND(tc, tmp, tsrc_from(header), desc, fcc->const_cache); |
||
378 | |||
379 | tdst_transpose(dst, real_dst); |
||
380 | for (i = 0; i < 4; i++) { |
||
381 | const struct toy_src src = |
||
382 | tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i); |
||
383 | |||
384 | /* cast to type D to make sure these are raw moves */ |
||
385 | tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); |
||
386 | } |
||
387 | } |
||
388 | |||
389 | static void |
||
390 | fs_lower_opcode_tgsi_const_gen7(struct fs_compile_context *fcc, |
||
391 | struct toy_dst dst, int dim, struct toy_src idx) |
||
392 | { |
||
393 | struct toy_compiler *tc = &fcc->tc; |
||
394 | const struct toy_dst offset = |
||
395 | tdst_ud(tdst(TOY_FILE_MRF, fcc->first_free_mrf, 0)); |
||
396 | struct toy_src desc; |
||
397 | struct toy_inst *inst; |
||
398 | struct toy_dst tmp, real_dst[4]; |
||
399 | int i; |
||
400 | |||
401 | if (fs_lower_opcode_tgsi_const_pcb(fcc, dst, dim, idx)) |
||
402 | return; |
||
403 | |||
404 | /* |
||
405 | * In 4c1fdae0a01b3f92ec03b61aac1d3df500d51fc6, pull constant load was |
||
406 | * changed from OWord Block Read to ld to increase performance in the |
||
407 | * classic driver. Since we use the constant cache instead of the data |
||
408 | * cache, I wonder if we still want to follow the classic driver. |
||
409 | */ |
||
410 | |||
411 | /* set offset */ |
||
412 | inst = tc_MOV(tc, offset, tsrc_rect(idx, TOY_RECT_010)); |
||
413 | inst->exec_size = GEN6_EXECSIZE_8; |
||
414 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
415 | |||
416 | desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false, |
||
417 | GEN6_MSG_SAMPLER_SIMD4X2, |
||
418 | GEN6_MSG_SAMPLER_LD, |
||
419 | 0, |
||
420 | fcc->shader->bt.const_base + dim); |
||
421 | |||
422 | tmp = tc_alloc_tmp(tc); |
||
423 | inst = tc_SEND(tc, tmp, tsrc_from(offset), desc, GEN6_SFID_SAMPLER); |
||
424 | inst->exec_size = GEN6_EXECSIZE_8; |
||
425 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
426 | |||
427 | tdst_transpose(dst, real_dst); |
||
428 | for (i = 0; i < 4; i++) { |
||
429 | const struct toy_src src = |
||
430 | tsrc_offset(tsrc_rect(tsrc_from(tmp), TOY_RECT_010), 0, i); |
||
431 | |||
432 | /* cast to type D to make sure these are raw moves */ |
||
433 | tc_MOV(tc, tdst_d(real_dst[i]), tsrc_d(src)); |
||
434 | } |
||
435 | } |
||
436 | |||
437 | static void |
||
438 | fs_lower_opcode_tgsi_imm(struct fs_compile_context *fcc, |
||
439 | struct toy_dst dst, int idx) |
||
440 | { |
||
441 | const uint32_t *imm; |
||
442 | struct toy_dst real_dst[4]; |
||
443 | int ch; |
||
444 | |||
445 | imm = toy_tgsi_get_imm(&fcc->tgsi, idx, NULL); |
||
446 | |||
447 | tdst_transpose(dst, real_dst); |
||
448 | /* raw moves */ |
||
449 | for (ch = 0; ch < 4; ch++) |
||
450 | tc_MOV(&fcc->tc, tdst_ud(real_dst[ch]), tsrc_imm_ud(imm[ch])); |
||
451 | } |
||
452 | |||
453 | static void |
||
454 | fs_lower_opcode_tgsi_sv(struct fs_compile_context *fcc, |
||
455 | struct toy_dst dst, int dim, int idx) |
||
456 | { |
||
457 | struct toy_compiler *tc = &fcc->tc; |
||
458 | const struct toy_tgsi *tgsi = &fcc->tgsi; |
||
459 | int slot; |
||
460 | |||
461 | assert(!dim); |
||
462 | |||
463 | slot = toy_tgsi_find_system_value(tgsi, idx); |
||
464 | if (slot < 0) |
||
465 | return; |
||
466 | |||
467 | switch (tgsi->system_values[slot].semantic_name) { |
||
468 | case TGSI_SEMANTIC_PRIMID: |
||
469 | case TGSI_SEMANTIC_INSTANCEID: |
||
470 | case TGSI_SEMANTIC_VERTEXID: |
||
471 | default: |
||
472 | tc_fail(tc, "unhandled system value"); |
||
473 | tc_MOV(tc, dst, tsrc_imm_d(0)); |
||
474 | break; |
||
475 | } |
||
476 | } |
||
477 | |||
478 | static void |
||
479 | fs_lower_opcode_tgsi_direct(struct fs_compile_context *fcc, |
||
480 | struct toy_inst *inst) |
||
481 | { |
||
482 | struct toy_compiler *tc = &fcc->tc; |
||
483 | int dim, idx; |
||
484 | |||
485 | assert(inst->src[0].file == TOY_FILE_IMM); |
||
486 | dim = inst->src[0].val32; |
||
487 | |||
488 | assert(inst->src[1].file == TOY_FILE_IMM); |
||
489 | idx = inst->src[1].val32; |
||
490 | |||
491 | switch (inst->opcode) { |
||
492 | case TOY_OPCODE_TGSI_IN: |
||
493 | fs_lower_opcode_tgsi_in(fcc, inst->dst, dim, idx); |
||
494 | break; |
||
495 | case TOY_OPCODE_TGSI_CONST: |
||
496 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) |
||
497 | fs_lower_opcode_tgsi_const_gen7(fcc, inst->dst, dim, inst->src[1]); |
||
498 | else |
||
499 | fs_lower_opcode_tgsi_const_gen6(fcc, inst->dst, dim, inst->src[1]); |
||
500 | break; |
||
501 | case TOY_OPCODE_TGSI_SV: |
||
502 | fs_lower_opcode_tgsi_sv(fcc, inst->dst, dim, idx); |
||
503 | break; |
||
504 | case TOY_OPCODE_TGSI_IMM: |
||
505 | assert(!dim); |
||
506 | fs_lower_opcode_tgsi_imm(fcc, inst->dst, idx); |
||
507 | break; |
||
508 | default: |
||
509 | tc_fail(tc, "unhandled TGSI fetch"); |
||
510 | break; |
||
511 | } |
||
512 | |||
513 | tc_discard_inst(tc, inst); |
||
514 | } |
||
515 | |||
516 | static void |
||
517 | fs_lower_opcode_tgsi_indirect(struct fs_compile_context *fcc, |
||
518 | struct toy_inst *inst) |
||
519 | { |
||
520 | struct toy_compiler *tc = &fcc->tc; |
||
521 | enum tgsi_file_type file; |
||
522 | int dim, idx; |
||
523 | struct toy_src indirect_dim, indirect_idx; |
||
524 | |||
525 | assert(inst->src[0].file == TOY_FILE_IMM); |
||
526 | file = inst->src[0].val32; |
||
527 | |||
528 | assert(inst->src[1].file == TOY_FILE_IMM); |
||
529 | dim = inst->src[1].val32; |
||
530 | indirect_dim = inst->src[2]; |
||
531 | |||
532 | assert(inst->src[3].file == TOY_FILE_IMM); |
||
533 | idx = inst->src[3].val32; |
||
534 | indirect_idx = inst->src[4]; |
||
535 | |||
536 | /* no dimension indirection */ |
||
537 | assert(indirect_dim.file == TOY_FILE_IMM); |
||
538 | dim += indirect_dim.val32; |
||
539 | |||
540 | switch (inst->opcode) { |
||
541 | case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
||
542 | if (file == TGSI_FILE_CONSTANT) { |
||
543 | if (idx) { |
||
544 | struct toy_dst tmp = tc_alloc_tmp(tc); |
||
545 | |||
546 | tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx)); |
||
547 | indirect_idx = tsrc_from(tmp); |
||
548 | } |
||
549 | |||
550 | fs_lower_opcode_tgsi_indirect_const(fcc, inst->dst, dim, indirect_idx); |
||
551 | break; |
||
552 | } |
||
553 | /* fall through */ |
||
554 | case TOY_OPCODE_TGSI_INDIRECT_STORE: |
||
555 | default: |
||
556 | tc_fail(tc, "unhandled TGSI indirection"); |
||
557 | break; |
||
558 | } |
||
559 | |||
560 | tc_discard_inst(tc, inst); |
||
561 | } |
||
562 | |||
563 | /** |
||
564 | * Emit instructions to move sampling parameters to the message registers. |
||
565 | */ |
||
566 | static int |
||
567 | fs_add_sampler_params_gen6(struct toy_compiler *tc, int msg_type, |
||
568 | int base_mrf, int param_size, |
||
569 | struct toy_src *coords, int num_coords, |
||
570 | struct toy_src bias_or_lod, struct toy_src ref_or_si, |
||
571 | struct toy_src *ddx, struct toy_src *ddy, |
||
572 | int num_derivs) |
||
573 | { |
||
574 | int num_params, i; |
||
575 | |||
576 | assert(num_coords <= 4); |
||
577 | assert(num_derivs <= 3 && num_derivs <= num_coords); |
||
578 | |||
579 | #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) |
||
580 | switch (msg_type) { |
||
581 | case GEN6_MSG_SAMPLER_SAMPLE: |
||
582 | for (i = 0; i < num_coords; i++) |
||
583 | tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
||
584 | num_params = num_coords; |
||
585 | break; |
||
586 | case GEN6_MSG_SAMPLER_SAMPLE_B: |
||
587 | case GEN6_MSG_SAMPLER_SAMPLE_L: |
||
588 | for (i = 0; i < num_coords; i++) |
||
589 | tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
||
590 | tc_MOV(tc, SAMPLER_PARAM(4), bias_or_lod); |
||
591 | num_params = 5; |
||
592 | break; |
||
593 | case GEN6_MSG_SAMPLER_SAMPLE_C: |
||
594 | for (i = 0; i < num_coords; i++) |
||
595 | tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
||
596 | tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si); |
||
597 | num_params = 5; |
||
598 | break; |
||
599 | case GEN6_MSG_SAMPLER_SAMPLE_D: |
||
600 | for (i = 0; i < num_coords; i++) |
||
601 | tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
||
602 | for (i = 0; i < num_derivs; i++) { |
||
603 | tc_MOV(tc, SAMPLER_PARAM(4 + i * 2), ddx[i]); |
||
604 | tc_MOV(tc, SAMPLER_PARAM(5 + i * 2), ddy[i]); |
||
605 | } |
||
606 | num_params = 4 + num_derivs * 2; |
||
607 | break; |
||
608 | case GEN6_MSG_SAMPLER_SAMPLE_B_C: |
||
609 | case GEN6_MSG_SAMPLER_SAMPLE_L_C: |
||
610 | for (i = 0; i < num_coords; i++) |
||
611 | tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
||
612 | tc_MOV(tc, SAMPLER_PARAM(4), ref_or_si); |
||
613 | tc_MOV(tc, SAMPLER_PARAM(5), bias_or_lod); |
||
614 | num_params = 6; |
||
615 | break; |
||
616 | case GEN6_MSG_SAMPLER_LD: |
||
617 | assert(num_coords <= 3); |
||
618 | |||
619 | for (i = 0; i < num_coords; i++) |
||
620 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(i)), coords[i]); |
||
621 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(3)), bias_or_lod); |
||
622 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(4)), ref_or_si); |
||
623 | num_params = 5; |
||
624 | break; |
||
625 | case GEN6_MSG_SAMPLER_RESINFO: |
||
626 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); |
||
627 | num_params = 1; |
||
628 | break; |
||
629 | default: |
||
630 | tc_fail(tc, "unknown sampler opcode"); |
||
631 | num_params = 0; |
||
632 | break; |
||
633 | } |
||
634 | #undef SAMPLER_PARAM |
||
635 | |||
636 | return num_params * param_size; |
||
637 | } |
||
638 | |||
639 | static int |
||
640 | fs_add_sampler_params_gen7(struct toy_compiler *tc, int msg_type, |
||
641 | int base_mrf, int param_size, |
||
642 | struct toy_src *coords, int num_coords, |
||
643 | struct toy_src bias_or_lod, struct toy_src ref_or_si, |
||
644 | struct toy_src *ddx, struct toy_src *ddy, |
||
645 | int num_derivs) |
||
646 | { |
||
647 | int num_params, i; |
||
648 | |||
649 | assert(num_coords <= 4); |
||
650 | assert(num_derivs <= 3 && num_derivs <= num_coords); |
||
651 | |||
652 | #define SAMPLER_PARAM(p) (tdst(TOY_FILE_MRF, base_mrf + (p) * param_size, 0)) |
||
653 | switch (msg_type) { |
||
654 | case GEN6_MSG_SAMPLER_SAMPLE: |
||
655 | for (i = 0; i < num_coords; i++) |
||
656 | tc_MOV(tc, SAMPLER_PARAM(i), coords[i]); |
||
657 | num_params = num_coords; |
||
658 | break; |
||
659 | case GEN6_MSG_SAMPLER_SAMPLE_B: |
||
660 | case GEN6_MSG_SAMPLER_SAMPLE_L: |
||
661 | tc_MOV(tc, SAMPLER_PARAM(0), bias_or_lod); |
||
662 | for (i = 0; i < num_coords; i++) |
||
663 | tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); |
||
664 | num_params = 1 + num_coords; |
||
665 | break; |
||
666 | case GEN6_MSG_SAMPLER_SAMPLE_C: |
||
667 | tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); |
||
668 | for (i = 0; i < num_coords; i++) |
||
669 | tc_MOV(tc, SAMPLER_PARAM(1 + i), coords[i]); |
||
670 | num_params = 1 + num_coords; |
||
671 | break; |
||
672 | case GEN6_MSG_SAMPLER_SAMPLE_D: |
||
673 | for (i = 0; i < num_coords; i++) { |
||
674 | tc_MOV(tc, SAMPLER_PARAM(i * 3), coords[i]); |
||
675 | if (i < num_derivs) { |
||
676 | tc_MOV(tc, SAMPLER_PARAM(i * 3 + 1), ddx[i]); |
||
677 | tc_MOV(tc, SAMPLER_PARAM(i * 3 + 2), ddy[i]); |
||
678 | } |
||
679 | } |
||
680 | num_params = num_coords * 3 - ((num_coords > num_derivs) ? 2 : 0); |
||
681 | break; |
||
682 | case GEN6_MSG_SAMPLER_SAMPLE_B_C: |
||
683 | case GEN6_MSG_SAMPLER_SAMPLE_L_C: |
||
684 | tc_MOV(tc, SAMPLER_PARAM(0), ref_or_si); |
||
685 | tc_MOV(tc, SAMPLER_PARAM(1), bias_or_lod); |
||
686 | for (i = 0; i < num_coords; i++) |
||
687 | tc_MOV(tc, SAMPLER_PARAM(2 + i), coords[i]); |
||
688 | num_params = 2 + num_coords; |
||
689 | break; |
||
690 | case GEN6_MSG_SAMPLER_LD: |
||
691 | assert(num_coords >= 1 && num_coords <= 3); |
||
692 | |||
693 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), coords[0]); |
||
694 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(1)), bias_or_lod); |
||
695 | for (i = 1; i < num_coords; i++) |
||
696 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(1 + i)), coords[i]); |
||
697 | num_params = 1 + num_coords; |
||
698 | break; |
||
699 | case GEN6_MSG_SAMPLER_RESINFO: |
||
700 | tc_MOV(tc, tdst_d(SAMPLER_PARAM(0)), bias_or_lod); |
||
701 | num_params = 1; |
||
702 | break; |
||
703 | default: |
||
704 | tc_fail(tc, "unknown sampler opcode"); |
||
705 | num_params = 0; |
||
706 | break; |
||
707 | } |
||
708 | #undef SAMPLER_PARAM |
||
709 | |||
710 | return num_params * param_size; |
||
711 | } |
||
712 | |||
713 | /** |
||
714 | * Set up message registers and return the message descriptor for sampling. |
||
715 | */ |
||
716 | static struct toy_src |
||
717 | fs_prepare_tgsi_sampling(struct fs_compile_context *fcc, |
||
718 | const struct toy_inst *inst, |
||
719 | int base_mrf, const uint32_t *saturate_coords, |
||
720 | unsigned *ret_sampler_index) |
||
721 | { |
||
722 | struct toy_compiler *tc = &fcc->tc; |
||
723 | unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index; |
||
724 | struct toy_src coords[4], ddx[4], ddy[4], bias_or_lod, ref_or_si; |
||
725 | int num_coords, ref_pos, num_derivs; |
||
726 | int sampler_src, param_size, i; |
||
727 | |||
728 | switch (inst->exec_size) { |
||
729 | case GEN6_EXECSIZE_8: |
||
730 | simd_mode = GEN6_MSG_SAMPLER_SIMD8; |
||
731 | param_size = 1; |
||
732 | break; |
||
733 | case GEN6_EXECSIZE_16: |
||
734 | simd_mode = GEN6_MSG_SAMPLER_SIMD16; |
||
735 | param_size = 2; |
||
736 | break; |
||
737 | default: |
||
738 | tc_fail(tc, "unsupported execute size for sampling"); |
||
739 | return tsrc_null(); |
||
740 | break; |
||
741 | } |
||
742 | |||
743 | num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos); |
||
744 | tsrc_transpose(inst->src[0], coords); |
||
745 | bias_or_lod = tsrc_null(); |
||
746 | ref_or_si = tsrc_null(); |
||
747 | num_derivs = 0; |
||
748 | sampler_src = 1; |
||
749 | |||
750 | /* |
||
751 | * For TXD, |
||
752 | * |
||
753 | * src0 := (x, y, z, w) |
||
754 | * src1 := ddx |
||
755 | * src2 := ddy |
||
756 | * src3 := sampler |
||
757 | * |
||
758 | * For TEX2, TXB2, and TXL2, |
||
759 | * |
||
760 | * src0 := (x, y, z, w) |
||
761 | * src1 := (v or bias or lod, ...) |
||
762 | * src2 := sampler |
||
763 | * |
||
764 | * For TEX, TXB, TXL, and TXP, |
||
765 | * |
||
766 | * src0 := (x, y, z, w or bias or lod or projection) |
||
767 | * src1 := sampler |
||
768 | * |
||
769 | * For TXQ, |
||
770 | * |
||
771 | * src0 := (lod, ...) |
||
772 | * src1 := sampler |
||
773 | * |
||
774 | * For TXQ_LZ, |
||
775 | * |
||
776 | * src0 := sampler |
||
777 | * |
||
778 | * And for TXF, |
||
779 | * |
||
780 | * src0 := (x, y, z, w or lod) |
||
781 | * src1 := sampler |
||
782 | * |
||
783 | * State trackers should not generate opcode+texture combinations with |
||
784 | * which the two definitions conflict (e.g., TXB with SHADOW2DARRAY). |
||
785 | */ |
||
786 | switch (inst->opcode) { |
||
787 | case TOY_OPCODE_TGSI_TEX: |
||
788 | if (ref_pos >= 0) { |
||
789 | assert(ref_pos < 4); |
||
790 | |||
791 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_C; |
||
792 | ref_or_si = coords[ref_pos]; |
||
793 | } |
||
794 | else { |
||
795 | msg_type = GEN6_MSG_SAMPLER_SAMPLE; |
||
796 | } |
||
797 | break; |
||
798 | case TOY_OPCODE_TGSI_TXD: |
||
799 | if (ref_pos >= 0) { |
||
800 | assert(ref_pos < 4); |
||
801 | |||
802 | msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C; |
||
803 | ref_or_si = coords[ref_pos]; |
||
804 | |||
805 | if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5)) |
||
806 | tc_fail(tc, "TXD with shadow sampler not supported"); |
||
807 | } |
||
808 | else { |
||
809 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_D; |
||
810 | } |
||
811 | |||
812 | tsrc_transpose(inst->src[1], ddx); |
||
813 | tsrc_transpose(inst->src[2], ddy); |
||
814 | num_derivs = num_coords; |
||
815 | sampler_src = 3; |
||
816 | break; |
||
817 | case TOY_OPCODE_TGSI_TXP: |
||
818 | if (ref_pos >= 0) { |
||
819 | assert(ref_pos < 3); |
||
820 | |||
821 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_C; |
||
822 | ref_or_si = coords[ref_pos]; |
||
823 | } |
||
824 | else { |
||
825 | msg_type = GEN6_MSG_SAMPLER_SAMPLE; |
||
826 | } |
||
827 | |||
828 | /* project the coordinates */ |
||
829 | { |
||
830 | struct toy_dst tmp[4]; |
||
831 | |||
832 | tc_alloc_tmp4(tc, tmp); |
||
833 | |||
834 | tc_INV(tc, tmp[3], coords[3]); |
||
835 | for (i = 0; i < num_coords && i < 3; i++) { |
||
836 | tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); |
||
837 | coords[i] = tsrc_from(tmp[i]); |
||
838 | } |
||
839 | |||
840 | if (ref_pos >= i) { |
||
841 | tc_MUL(tc, tmp[ref_pos], ref_or_si, tsrc_from(tmp[3])); |
||
842 | ref_or_si = tsrc_from(tmp[ref_pos]); |
||
843 | } |
||
844 | } |
||
845 | break; |
||
846 | case TOY_OPCODE_TGSI_TXB: |
||
847 | if (ref_pos >= 0) { |
||
848 | assert(ref_pos < 3); |
||
849 | |||
850 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C; |
||
851 | ref_or_si = coords[ref_pos]; |
||
852 | } |
||
853 | else { |
||
854 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_B; |
||
855 | } |
||
856 | |||
857 | bias_or_lod = coords[3]; |
||
858 | break; |
||
859 | case TOY_OPCODE_TGSI_TXL: |
||
860 | if (ref_pos >= 0) { |
||
861 | assert(ref_pos < 3); |
||
862 | |||
863 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; |
||
864 | ref_or_si = coords[ref_pos]; |
||
865 | } |
||
866 | else { |
||
867 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; |
||
868 | } |
||
869 | |||
870 | bias_or_lod = coords[3]; |
||
871 | break; |
||
872 | case TOY_OPCODE_TGSI_TXF: |
||
873 | msg_type = GEN6_MSG_SAMPLER_LD; |
||
874 | |||
875 | switch (inst->tex.target) { |
||
876 | case TGSI_TEXTURE_2D_MSAA: |
||
877 | case TGSI_TEXTURE_2D_ARRAY_MSAA: |
||
878 | assert(ref_pos >= 0 && ref_pos < 4); |
||
879 | /* lod is always 0 */ |
||
880 | bias_or_lod = tsrc_imm_d(0); |
||
881 | ref_or_si = coords[ref_pos]; |
||
882 | break; |
||
883 | default: |
||
884 | bias_or_lod = coords[3]; |
||
885 | break; |
||
886 | } |
||
887 | |||
888 | /* offset the coordinates */ |
||
889 | if (!tsrc_is_null(inst->tex.offsets[0])) { |
||
890 | struct toy_dst tmp[4]; |
||
891 | struct toy_src offsets[4]; |
||
892 | |||
893 | tc_alloc_tmp4(tc, tmp); |
||
894 | tsrc_transpose(inst->tex.offsets[0], offsets); |
||
895 | |||
896 | for (i = 0; i < num_coords; i++) { |
||
897 | tc_ADD(tc, tmp[i], coords[i], offsets[i]); |
||
898 | coords[i] = tsrc_from(tmp[i]); |
||
899 | } |
||
900 | } |
||
901 | |||
902 | sampler_src = 1; |
||
903 | break; |
||
904 | case TOY_OPCODE_TGSI_TXQ: |
||
905 | msg_type = GEN6_MSG_SAMPLER_RESINFO; |
||
906 | num_coords = 0; |
||
907 | bias_or_lod = coords[0]; |
||
908 | break; |
||
909 | case TOY_OPCODE_TGSI_TXQ_LZ: |
||
910 | msg_type = GEN6_MSG_SAMPLER_RESINFO; |
||
911 | num_coords = 0; |
||
912 | sampler_src = 0; |
||
913 | break; |
||
914 | case TOY_OPCODE_TGSI_TEX2: |
||
915 | if (ref_pos >= 0) { |
||
916 | assert(ref_pos < 5); |
||
917 | |||
918 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_C; |
||
919 | |||
920 | if (ref_pos >= 4) { |
||
921 | struct toy_src src1[4]; |
||
922 | tsrc_transpose(inst->src[1], src1); |
||
923 | ref_or_si = src1[ref_pos - 4]; |
||
924 | } |
||
925 | else { |
||
926 | ref_or_si = coords[ref_pos]; |
||
927 | } |
||
928 | } |
||
929 | else { |
||
930 | msg_type = GEN6_MSG_SAMPLER_SAMPLE; |
||
931 | } |
||
932 | |||
933 | sampler_src = 2; |
||
934 | break; |
||
935 | case TOY_OPCODE_TGSI_TXB2: |
||
936 | if (ref_pos >= 0) { |
||
937 | assert(ref_pos < 4); |
||
938 | |||
939 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_B_C; |
||
940 | ref_or_si = coords[ref_pos]; |
||
941 | } |
||
942 | else { |
||
943 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_B; |
||
944 | } |
||
945 | |||
946 | { |
||
947 | struct toy_src src1[4]; |
||
948 | tsrc_transpose(inst->src[1], src1); |
||
949 | bias_or_lod = src1[0]; |
||
950 | } |
||
951 | |||
952 | sampler_src = 2; |
||
953 | break; |
||
954 | case TOY_OPCODE_TGSI_TXL2: |
||
955 | if (ref_pos >= 0) { |
||
956 | assert(ref_pos < 4); |
||
957 | |||
958 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C; |
||
959 | ref_or_si = coords[ref_pos]; |
||
960 | } |
||
961 | else { |
||
962 | msg_type = GEN6_MSG_SAMPLER_SAMPLE_L; |
||
963 | } |
||
964 | |||
965 | { |
||
966 | struct toy_src src1[4]; |
||
967 | tsrc_transpose(inst->src[1], src1); |
||
968 | bias_or_lod = src1[0]; |
||
969 | } |
||
970 | |||
971 | sampler_src = 2; |
||
972 | break; |
||
973 | default: |
||
974 | assert(!"unhandled sampling opcode"); |
||
975 | return tsrc_null(); |
||
976 | break; |
||
977 | } |
||
978 | |||
979 | assert(inst->src[sampler_src].file == TOY_FILE_IMM); |
||
980 | sampler_index = inst->src[sampler_src].val32; |
||
981 | binding_table_index = fcc->shader->bt.tex_base + sampler_index; |
||
982 | |||
983 | /* |
||
984 | * From the Sandy Bridge PRM, volume 4 part 1, page 18: |
||
985 | * |
||
986 | * "Note that the (cube map) coordinates delivered to the sampling |
||
987 | * engine must already have been divided by the component with the |
||
988 | * largest absolute value." |
||
989 | */ |
||
990 | switch (inst->tex.target) { |
||
991 | case TGSI_TEXTURE_CUBE: |
||
992 | case TGSI_TEXTURE_SHADOWCUBE: |
||
993 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
994 | case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
||
995 | /* TXQ does not need coordinates */ |
||
996 | if (num_coords >= 3) { |
||
997 | struct toy_dst tmp[4]; |
||
998 | |||
999 | tc_alloc_tmp4(tc, tmp); |
||
1000 | |||
1001 | tc_SEL(tc, tmp[3], tsrc_absolute(coords[0]), |
||
1002 | tsrc_absolute(coords[1]), GEN6_COND_GE); |
||
1003 | tc_SEL(tc, tmp[3], tsrc_from(tmp[3]), |
||
1004 | tsrc_absolute(coords[2]), GEN6_COND_GE); |
||
1005 | tc_INV(tc, tmp[3], tsrc_from(tmp[3])); |
||
1006 | |||
1007 | for (i = 0; i < 3; i++) { |
||
1008 | tc_MUL(tc, tmp[i], coords[i], tsrc_from(tmp[3])); |
||
1009 | coords[i] = tsrc_from(tmp[i]); |
||
1010 | } |
||
1011 | } |
||
1012 | break; |
||
1013 | } |
||
1014 | |||
1015 | /* |
||
1016 | * Saturate (s, t, r). saturate_coords is set for sampler and coordinate |
||
1017 | * that uses linear filtering and PIPE_TEX_WRAP_CLAMP respectively. It is |
||
1018 | * so that sampling outside the border gets the correct colors. |
||
1019 | */ |
||
1020 | for (i = 0; i < MIN2(num_coords, 3); i++) { |
||
1021 | bool is_rect; |
||
1022 | |||
1023 | if (!(saturate_coords[i] & (1 << sampler_index))) |
||
1024 | continue; |
||
1025 | |||
1026 | switch (inst->tex.target) { |
||
1027 | case TGSI_TEXTURE_RECT: |
||
1028 | case TGSI_TEXTURE_SHADOWRECT: |
||
1029 | is_rect = true; |
||
1030 | break; |
||
1031 | default: |
||
1032 | is_rect = false; |
||
1033 | break; |
||
1034 | } |
||
1035 | |||
1036 | if (is_rect) { |
||
1037 | struct toy_src min, max; |
||
1038 | struct toy_dst tmp; |
||
1039 | |||
1040 | tc_fail(tc, "GL_CLAMP with rectangle texture unsupported"); |
||
1041 | tmp = tc_alloc_tmp(tc); |
||
1042 | |||
1043 | /* saturate to [0, width] or [0, height] */ |
||
1044 | /* TODO TXQ? */ |
||
1045 | min = tsrc_imm_f(0.0f); |
||
1046 | max = tsrc_imm_f(2048.0f); |
||
1047 | |||
1048 | tc_SEL(tc, tmp, coords[i], min, GEN6_COND_G); |
||
1049 | tc_SEL(tc, tmp, tsrc_from(tmp), max, GEN6_COND_L); |
||
1050 | |||
1051 | coords[i] = tsrc_from(tmp); |
||
1052 | } |
||
1053 | else { |
||
1054 | struct toy_dst tmp; |
||
1055 | struct toy_inst *inst2; |
||
1056 | |||
1057 | tmp = tc_alloc_tmp(tc); |
||
1058 | |||
1059 | /* saturate to [0.0f, 1.0f] */ |
||
1060 | inst2 = tc_MOV(tc, tmp, coords[i]); |
||
1061 | inst2->saturate = true; |
||
1062 | |||
1063 | coords[i] = tsrc_from(tmp); |
||
1064 | } |
||
1065 | } |
||
1066 | |||
1067 | /* set up sampler parameters */ |
||
1068 | if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) { |
||
1069 | msg_len = fs_add_sampler_params_gen7(tc, msg_type, base_mrf, param_size, |
||
1070 | coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); |
||
1071 | } |
||
1072 | else { |
||
1073 | msg_len = fs_add_sampler_params_gen6(tc, msg_type, base_mrf, param_size, |
||
1074 | coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs); |
||
1075 | } |
||
1076 | |||
1077 | /* |
||
1078 | * From the Sandy Bridge PRM, volume 4 part 1, page 136: |
||
1079 | * |
||
1080 | * "The maximum message length allowed to the sampler is 11. This would |
||
1081 | * disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of |
||
1082 | * SIMD16." |
||
1083 | */ |
||
1084 | if (msg_len > 11) |
||
1085 | tc_fail(tc, "maximum length for messages to the sampler is 11"); |
||
1086 | |||
1087 | if (ret_sampler_index) |
||
1088 | *ret_sampler_index = sampler_index; |
||
1089 | |||
1090 | return tsrc_imm_mdesc_sampler(tc, msg_len, 4 * param_size, |
||
1091 | false, simd_mode, msg_type, sampler_index, binding_table_index); |
||
1092 | } |
||
1093 | |||
1094 | static void |
||
1095 | fs_lower_opcode_tgsi_sampling(struct fs_compile_context *fcc, |
||
1096 | struct toy_inst *inst) |
||
1097 | { |
||
1098 | struct toy_compiler *tc = &fcc->tc; |
||
1099 | struct toy_dst dst[4], tmp[4]; |
||
1100 | struct toy_src desc; |
||
1101 | unsigned sampler_index; |
||
1102 | int swizzles[4], i; |
||
1103 | bool need_filter; |
||
1104 | |||
1105 | desc = fs_prepare_tgsi_sampling(fcc, inst, |
||
1106 | fcc->first_free_mrf, |
||
1107 | fcc->variant->saturate_tex_coords, |
||
1108 | &sampler_index); |
||
1109 | |||
1110 | switch (inst->opcode) { |
||
1111 | case TOY_OPCODE_TGSI_TXF: |
||
1112 | case TOY_OPCODE_TGSI_TXQ: |
||
1113 | case TOY_OPCODE_TGSI_TXQ_LZ: |
||
1114 | need_filter = false; |
||
1115 | break; |
||
1116 | default: |
||
1117 | need_filter = true; |
||
1118 | break; |
||
1119 | } |
||
1120 | |||
1121 | toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER); |
||
1122 | inst->src[0] = tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0); |
||
1123 | inst->src[1] = desc; |
||
1124 | for (i = 2; i < Elements(inst->src); i++) |
||
1125 | inst->src[i] = tsrc_null(); |
||
1126 | |||
1127 | /* write to temps first */ |
||
1128 | tc_alloc_tmp4(tc, tmp); |
||
1129 | for (i = 0; i < 4; i++) |
||
1130 | tmp[i].type = inst->dst.type; |
||
1131 | tdst_transpose(inst->dst, dst); |
||
1132 | inst->dst = tmp[0]; |
||
1133 | |||
1134 | tc_move_inst(tc, inst); |
||
1135 | |||
1136 | if (need_filter) { |
||
1137 | assert(sampler_index < fcc->variant->num_sampler_views); |
||
1138 | swizzles[0] = fcc->variant->sampler_view_swizzles[sampler_index].r; |
||
1139 | swizzles[1] = fcc->variant->sampler_view_swizzles[sampler_index].g; |
||
1140 | swizzles[2] = fcc->variant->sampler_view_swizzles[sampler_index].b; |
||
1141 | swizzles[3] = fcc->variant->sampler_view_swizzles[sampler_index].a; |
||
1142 | } |
||
1143 | else { |
||
1144 | swizzles[0] = PIPE_SWIZZLE_RED; |
||
1145 | swizzles[1] = PIPE_SWIZZLE_GREEN; |
||
1146 | swizzles[2] = PIPE_SWIZZLE_BLUE; |
||
1147 | swizzles[3] = PIPE_SWIZZLE_ALPHA; |
||
1148 | } |
||
1149 | |||
1150 | /* swizzle the results */ |
||
1151 | for (i = 0; i < 4; i++) { |
||
1152 | switch (swizzles[i]) { |
||
1153 | case PIPE_SWIZZLE_ZERO: |
||
1154 | tc_MOV(tc, dst[i], tsrc_imm_f(0.0f)); |
||
1155 | break; |
||
1156 | case PIPE_SWIZZLE_ONE: |
||
1157 | tc_MOV(tc, dst[i], tsrc_imm_f(1.0f)); |
||
1158 | break; |
||
1159 | default: |
||
1160 | tc_MOV(tc, dst[i], tsrc_from(tmp[swizzles[i]])); |
||
1161 | break; |
||
1162 | } |
||
1163 | } |
||
1164 | } |
||
1165 | |||
1166 | static void |
||
1167 | fs_lower_opcode_derivative(struct toy_compiler *tc, struct toy_inst *inst) |
||
1168 | { |
||
1169 | struct toy_dst dst[4]; |
||
1170 | struct toy_src src[4]; |
||
1171 | int i; |
||
1172 | |||
1173 | tdst_transpose(inst->dst, dst); |
||
1174 | tsrc_transpose(inst->src[0], src); |
||
1175 | |||
1176 | /* |
||
1177 | * Every four fragments are from a 2x2 subspan, with |
||
1178 | * |
||
1179 | * fragment 1 on the top-left, |
||
1180 | * fragment 2 on the top-right, |
||
1181 | * fragment 3 on the bottom-left, |
||
1182 | * fragment 4 on the bottom-right. |
||
1183 | * |
||
1184 | * DDX should thus produce |
||
1185 | * |
||
1186 | * dst = src.yyww - src.xxzz |
||
1187 | * |
||
1188 | * and DDY should produce |
||
1189 | * |
||
1190 | * dst = src.zzww - src.xxyy |
||
1191 | * |
||
1192 | * But since we are in GEN6_ALIGN_1, swizzling does not work and we have to |
||
1193 | * play with the region parameters. |
||
1194 | */ |
||
1195 | if (inst->opcode == TOY_OPCODE_DDX) { |
||
1196 | for (i = 0; i < 4; i++) { |
||
1197 | struct toy_src left, right; |
||
1198 | |||
1199 | left = tsrc_rect(src[i], TOY_RECT_220); |
||
1200 | right = tsrc_offset(left, 0, 1); |
||
1201 | |||
1202 | tc_ADD(tc, dst[i], right, tsrc_negate(left)); |
||
1203 | } |
||
1204 | } |
||
1205 | else { |
||
1206 | for (i = 0; i < 4; i++) { |
||
1207 | struct toy_src top, bottom; |
||
1208 | |||
1209 | /* approximate with dst = src.zzzz - src.xxxx */ |
||
1210 | top = tsrc_rect(src[i], TOY_RECT_440); |
||
1211 | bottom = tsrc_offset(top, 0, 2); |
||
1212 | |||
1213 | tc_ADD(tc, dst[i], bottom, tsrc_negate(top)); |
||
1214 | } |
||
1215 | } |
||
1216 | |||
1217 | tc_discard_inst(tc, inst); |
||
1218 | } |
||
1219 | |||
1220 | static void |
||
1221 | fs_lower_opcode_fb_write(struct toy_compiler *tc, struct toy_inst *inst) |
||
1222 | { |
||
1223 | /* fs_write_fb() has set up the message registers */ |
||
1224 | toy_compiler_lower_to_send(tc, inst, true, |
||
1225 | GEN6_SFID_DP_RC); |
||
1226 | } |
||
1227 | |||
1228 | static void |
||
1229 | fs_lower_opcode_kil(struct toy_compiler *tc, struct toy_inst *inst) |
||
1230 | { |
||
1231 | struct toy_dst pixel_mask_dst; |
||
1232 | struct toy_src f0, pixel_mask; |
||
1233 | struct toy_inst *tmp; |
||
1234 | |||
1235 | /* lower half of r1.7:ud */ |
||
1236 | pixel_mask_dst = tdst_uw(tdst(TOY_FILE_GRF, 1, 7 * 4)); |
||
1237 | pixel_mask = tsrc_rect(tsrc_from(pixel_mask_dst), TOY_RECT_010); |
||
1238 | |||
1239 | f0 = tsrc_rect(tsrc_uw(tsrc(TOY_FILE_ARF, GEN6_ARF_F0, 0)), TOY_RECT_010); |
||
1240 | |||
1241 | /* KILL or KILL_IF */ |
||
1242 | if (tsrc_is_null(inst->src[0])) { |
||
1243 | struct toy_src dummy = tsrc_uw(tsrc(TOY_FILE_GRF, 0, 0)); |
||
1244 | struct toy_dst f0_dst = tdst_uw(tdst(TOY_FILE_ARF, GEN6_ARF_F0, 0)); |
||
1245 | |||
1246 | /* create a mask that masks out all pixels */ |
||
1247 | tmp = tc_MOV(tc, f0_dst, tsrc_rect(tsrc_imm_uw(0xffff), TOY_RECT_010)); |
||
1248 | tmp->exec_size = GEN6_EXECSIZE_1; |
||
1249 | tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
1250 | |||
1251 | tc_CMP(tc, tdst_null(), dummy, dummy, GEN6_COND_NZ); |
||
1252 | |||
1253 | /* swapping the two src operands breaks glBitmap()!? */ |
||
1254 | tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask); |
||
1255 | tmp->exec_size = GEN6_EXECSIZE_1; |
||
1256 | tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
1257 | } |
||
1258 | else { |
||
1259 | struct toy_src src[4]; |
||
1260 | int i; |
||
1261 | |||
1262 | tsrc_transpose(inst->src[0], src); |
||
1263 | /* mask out killed pixels */ |
||
1264 | for (i = 0; i < 4; i++) { |
||
1265 | tc_CMP(tc, tdst_null(), src[i], tsrc_imm_f(0.0f), |
||
1266 | GEN6_COND_GE); |
||
1267 | |||
1268 | /* swapping the two src operands breaks glBitmap()!? */ |
||
1269 | tmp = tc_AND(tc, pixel_mask_dst, f0, pixel_mask); |
||
1270 | tmp->exec_size = GEN6_EXECSIZE_1; |
||
1271 | tmp->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
1272 | } |
||
1273 | } |
||
1274 | |||
1275 | tc_discard_inst(tc, inst); |
||
1276 | } |
||
1277 | |||
1278 | static void |
||
1279 | fs_lower_virtual_opcodes(struct fs_compile_context *fcc) |
||
1280 | { |
||
1281 | struct toy_compiler *tc = &fcc->tc; |
||
1282 | struct toy_inst *inst; |
||
1283 | |||
1284 | /* lower TGSI's first, as they might be lowered to other virtual opcodes */ |
||
1285 | tc_head(tc); |
||
1286 | while ((inst = tc_next(tc)) != NULL) { |
||
1287 | switch (inst->opcode) { |
||
1288 | case TOY_OPCODE_TGSI_IN: |
||
1289 | case TOY_OPCODE_TGSI_CONST: |
||
1290 | case TOY_OPCODE_TGSI_SV: |
||
1291 | case TOY_OPCODE_TGSI_IMM: |
||
1292 | fs_lower_opcode_tgsi_direct(fcc, inst); |
||
1293 | break; |
||
1294 | case TOY_OPCODE_TGSI_INDIRECT_FETCH: |
||
1295 | case TOY_OPCODE_TGSI_INDIRECT_STORE: |
||
1296 | fs_lower_opcode_tgsi_indirect(fcc, inst); |
||
1297 | break; |
||
1298 | case TOY_OPCODE_TGSI_TEX: |
||
1299 | case TOY_OPCODE_TGSI_TXB: |
||
1300 | case TOY_OPCODE_TGSI_TXD: |
||
1301 | case TOY_OPCODE_TGSI_TXL: |
||
1302 | case TOY_OPCODE_TGSI_TXP: |
||
1303 | case TOY_OPCODE_TGSI_TXF: |
||
1304 | case TOY_OPCODE_TGSI_TXQ: |
||
1305 | case TOY_OPCODE_TGSI_TXQ_LZ: |
||
1306 | case TOY_OPCODE_TGSI_TEX2: |
||
1307 | case TOY_OPCODE_TGSI_TXB2: |
||
1308 | case TOY_OPCODE_TGSI_TXL2: |
||
1309 | case TOY_OPCODE_TGSI_SAMPLE: |
||
1310 | case TOY_OPCODE_TGSI_SAMPLE_I: |
||
1311 | case TOY_OPCODE_TGSI_SAMPLE_I_MS: |
||
1312 | case TOY_OPCODE_TGSI_SAMPLE_B: |
||
1313 | case TOY_OPCODE_TGSI_SAMPLE_C: |
||
1314 | case TOY_OPCODE_TGSI_SAMPLE_C_LZ: |
||
1315 | case TOY_OPCODE_TGSI_SAMPLE_D: |
||
1316 | case TOY_OPCODE_TGSI_SAMPLE_L: |
||
1317 | case TOY_OPCODE_TGSI_GATHER4: |
||
1318 | case TOY_OPCODE_TGSI_SVIEWINFO: |
||
1319 | case TOY_OPCODE_TGSI_SAMPLE_POS: |
||
1320 | case TOY_OPCODE_TGSI_SAMPLE_INFO: |
||
1321 | fs_lower_opcode_tgsi_sampling(fcc, inst); |
||
1322 | break; |
||
1323 | } |
||
1324 | } |
||
1325 | |||
1326 | tc_head(tc); |
||
1327 | while ((inst = tc_next(tc)) != NULL) { |
||
1328 | switch (inst->opcode) { |
||
1329 | case TOY_OPCODE_INV: |
||
1330 | case TOY_OPCODE_LOG: |
||
1331 | case TOY_OPCODE_EXP: |
||
1332 | case TOY_OPCODE_SQRT: |
||
1333 | case TOY_OPCODE_RSQ: |
||
1334 | case TOY_OPCODE_SIN: |
||
1335 | case TOY_OPCODE_COS: |
||
1336 | case TOY_OPCODE_FDIV: |
||
1337 | case TOY_OPCODE_POW: |
||
1338 | case TOY_OPCODE_INT_DIV_QUOTIENT: |
||
1339 | case TOY_OPCODE_INT_DIV_REMAINDER: |
||
1340 | toy_compiler_lower_math(tc, inst); |
||
1341 | break; |
||
1342 | case TOY_OPCODE_DDX: |
||
1343 | case TOY_OPCODE_DDY: |
||
1344 | fs_lower_opcode_derivative(tc, inst); |
||
1345 | break; |
||
1346 | case TOY_OPCODE_FB_WRITE: |
||
1347 | fs_lower_opcode_fb_write(tc, inst); |
||
1348 | break; |
||
1349 | case TOY_OPCODE_KIL: |
||
1350 | fs_lower_opcode_kil(tc, inst); |
||
1351 | break; |
||
1352 | default: |
||
1353 | if (inst->opcode > 127) |
||
1354 | tc_fail(tc, "unhandled virtual opcode"); |
||
1355 | break; |
||
1356 | } |
||
1357 | } |
||
1358 | } |
||
1359 | |||
1360 | /** |
||
1361 | * Compile the shader. |
||
1362 | */ |
||
1363 | static bool |
||
1364 | fs_compile(struct fs_compile_context *fcc) |
||
1365 | { |
||
1366 | struct toy_compiler *tc = &fcc->tc; |
||
1367 | struct ilo_shader *sh = fcc->shader; |
||
1368 | |||
1369 | fs_lower_virtual_opcodes(fcc); |
||
1370 | toy_compiler_legalize_for_ra(tc); |
||
1371 | toy_compiler_optimize(tc); |
||
1372 | toy_compiler_allocate_registers(tc, |
||
1373 | fcc->first_free_grf, |
||
1374 | fcc->last_free_grf, |
||
1375 | fcc->num_grf_per_vrf); |
||
1376 | toy_compiler_legalize_for_asm(tc); |
||
1377 | |||
1378 | if (tc->fail) { |
||
1379 | ilo_err("failed to legalize FS instructions: %s\n", tc->reason); |
||
1380 | return false; |
||
1381 | } |
||
1382 | |||
1383 | if (ilo_debug & ILO_DEBUG_FS) { |
||
1384 | ilo_printf("legalized instructions:\n"); |
||
1385 | toy_compiler_dump(tc); |
||
1386 | ilo_printf("\n"); |
||
1387 | } |
||
1388 | |||
1389 | if (true) { |
||
1390 | sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size); |
||
1391 | } |
||
1392 | else { |
||
1393 | static const uint32_t microcode[] = { |
||
1394 | /* fill in the microcode here */ |
||
1395 | 0x0, 0x0, 0x0, 0x0, |
||
1396 | }; |
||
1397 | const bool swap = true; |
||
1398 | |||
1399 | sh->kernel_size = sizeof(microcode); |
||
1400 | sh->kernel = MALLOC(sh->kernel_size); |
||
1401 | |||
1402 | if (sh->kernel) { |
||
1403 | const int num_dwords = sizeof(microcode) / 4; |
||
1404 | const uint32_t *src = microcode; |
||
1405 | uint32_t *dst = (uint32_t *) sh->kernel; |
||
1406 | int i; |
||
1407 | |||
1408 | for (i = 0; i < num_dwords; i += 4) { |
||
1409 | if (swap) { |
||
1410 | dst[i + 0] = src[i + 3]; |
||
1411 | dst[i + 1] = src[i + 2]; |
||
1412 | dst[i + 2] = src[i + 1]; |
||
1413 | dst[i + 3] = src[i + 0]; |
||
1414 | } |
||
1415 | else { |
||
1416 | memcpy(dst, src, 16); |
||
1417 | } |
||
1418 | } |
||
1419 | } |
||
1420 | } |
||
1421 | |||
1422 | if (!sh->kernel) { |
||
1423 | ilo_err("failed to compile FS: %s\n", tc->reason); |
||
1424 | return false; |
||
1425 | } |
||
1426 | |||
1427 | if (ilo_debug & ILO_DEBUG_FS) { |
||
1428 | ilo_printf("disassembly:\n"); |
||
1429 | toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false); |
||
1430 | ilo_printf("\n"); |
||
1431 | } |
||
1432 | |||
1433 | return true; |
||
1434 | } |
||
1435 | |||
1436 | /** |
||
1437 | * Emit instructions to write the color buffers (and the depth buffer). |
||
1438 | */ |
||
1439 | static void |
||
1440 | fs_write_fb(struct fs_compile_context *fcc) |
||
1441 | { |
||
1442 | struct toy_compiler *tc = &fcc->tc; |
||
1443 | int base_mrf = fcc->first_free_mrf; |
||
1444 | const struct toy_dst header = tdst_ud(tdst(TOY_FILE_MRF, base_mrf, 0)); |
||
1445 | bool header_present = false; |
||
1446 | struct toy_src desc; |
||
1447 | unsigned msg_type, ctrl; |
||
1448 | int color_slots[ILO_MAX_DRAW_BUFFERS], num_cbufs; |
||
1449 | int pos_slot = -1, cbuf, i; |
||
1450 | |||
1451 | for (i = 0; i < Elements(color_slots); i++) |
||
1452 | color_slots[i] = -1; |
||
1453 | |||
1454 | for (i = 0; i < fcc->tgsi.num_outputs; i++) { |
||
1455 | if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_COLOR) { |
||
1456 | assert(fcc->tgsi.outputs[i].semantic_index < Elements(color_slots)); |
||
1457 | color_slots[fcc->tgsi.outputs[i].semantic_index] = i; |
||
1458 | } |
||
1459 | else if (fcc->tgsi.outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) { |
||
1460 | pos_slot = i; |
||
1461 | } |
||
1462 | } |
||
1463 | |||
1464 | num_cbufs = fcc->variant->u.fs.num_cbufs; |
||
1465 | /* still need to send EOT (and probably depth) */ |
||
1466 | if (!num_cbufs) |
||
1467 | num_cbufs = 1; |
||
1468 | |||
1469 | /* we need the header to specify the pixel mask or render target */ |
||
1470 | if (fcc->tgsi.uses_kill || num_cbufs > 1) { |
||
1471 | const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0)); |
||
1472 | struct toy_inst *inst; |
||
1473 | |||
1474 | inst = tc_MOV(tc, header, r0); |
||
1475 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
1476 | base_mrf += fcc->num_grf_per_vrf; |
||
1477 | |||
1478 | /* this is a two-register header */ |
||
1479 | if (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) { |
||
1480 | inst = tc_MOV(tc, tdst_offset(header, 1, 0), tsrc_offset(r0, 1, 0)); |
||
1481 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
1482 | base_mrf += fcc->num_grf_per_vrf; |
||
1483 | } |
||
1484 | |||
1485 | header_present = true; |
||
1486 | } |
||
1487 | |||
1488 | for (cbuf = 0; cbuf < num_cbufs; cbuf++) { |
||
1489 | const int slot = |
||
1490 | color_slots[(fcc->tgsi.props.fs_color0_writes_all_cbufs) ? 0 : cbuf]; |
||
1491 | int mrf = base_mrf, vrf; |
||
1492 | struct toy_src src[4]; |
||
1493 | |||
1494 | if (slot >= 0) { |
||
1495 | const unsigned undefined_mask = |
||
1496 | fcc->tgsi.outputs[slot].undefined_mask; |
||
1497 | const int index = fcc->tgsi.outputs[slot].index; |
||
1498 | |||
1499 | vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index); |
||
1500 | if (vrf >= 0) { |
||
1501 | const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0); |
||
1502 | tsrc_transpose(tmp, src); |
||
1503 | } |
||
1504 | else { |
||
1505 | /* use (0, 0, 0, 0) */ |
||
1506 | tsrc_transpose(tsrc_imm_f(0.0f), src); |
||
1507 | } |
||
1508 | |||
1509 | for (i = 0; i < 4; i++) { |
||
1510 | const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); |
||
1511 | |||
1512 | if (undefined_mask & (1 << i)) |
||
1513 | src[i] = tsrc_imm_f(0.0f); |
||
1514 | |||
1515 | tc_MOV(tc, dst, src[i]); |
||
1516 | |||
1517 | mrf += fcc->num_grf_per_vrf; |
||
1518 | } |
||
1519 | } |
||
1520 | else { |
||
1521 | /* use (0, 0, 0, 0) */ |
||
1522 | for (i = 0; i < 4; i++) { |
||
1523 | const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); |
||
1524 | |||
1525 | tc_MOV(tc, dst, tsrc_imm_f(0.0f)); |
||
1526 | mrf += fcc->num_grf_per_vrf; |
||
1527 | } |
||
1528 | } |
||
1529 | |||
1530 | /* select BLEND_STATE[rt] */ |
||
1531 | if (cbuf > 0) { |
||
1532 | struct toy_inst *inst; |
||
1533 | |||
1534 | inst = tc_MOV(tc, tdst_offset(header, 0, 2), tsrc_imm_ud(cbuf)); |
||
1535 | inst->mask_ctrl = GEN6_MASKCTRL_NOMASK; |
||
1536 | inst->exec_size = GEN6_EXECSIZE_1; |
||
1537 | inst->src[0].rect = TOY_RECT_010; |
||
1538 | } |
||
1539 | |||
1540 | if (cbuf == 0 && pos_slot >= 0) { |
||
1541 | const int index = fcc->tgsi.outputs[pos_slot].index; |
||
1542 | const struct toy_dst dst = tdst(TOY_FILE_MRF, mrf, 0); |
||
1543 | struct toy_src src[4]; |
||
1544 | int vrf; |
||
1545 | |||
1546 | vrf = toy_tgsi_get_vrf(&fcc->tgsi, TGSI_FILE_OUTPUT, 0, index); |
||
1547 | if (vrf >= 0) { |
||
1548 | const struct toy_src tmp = tsrc(TOY_FILE_VRF, vrf, 0); |
||
1549 | tsrc_transpose(tmp, src); |
||
1550 | } |
||
1551 | else { |
||
1552 | /* use (0, 0, 0, 0) */ |
||
1553 | tsrc_transpose(tsrc_imm_f(0.0f), src); |
||
1554 | } |
||
1555 | |||
1556 | /* only Z */ |
||
1557 | tc_MOV(tc, dst, src[2]); |
||
1558 | |||
1559 | mrf += fcc->num_grf_per_vrf; |
||
1560 | } |
||
1561 | |||
1562 | msg_type = (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? |
||
1563 | GEN6_MSG_DP_RT_MODE_SIMD16 >> 8 : |
||
1564 | GEN6_MSG_DP_RT_MODE_SIMD8_LO >> 8; |
||
1565 | |||
1566 | ctrl = (cbuf == num_cbufs - 1) << 12 | |
||
1567 | msg_type << 8; |
||
1568 | |||
1569 | desc = tsrc_imm_mdesc_data_port(tc, cbuf == num_cbufs - 1, |
||
1570 | mrf - fcc->first_free_mrf, 0, |
||
1571 | header_present, false, |
||
1572 | GEN6_MSG_DP_RT_WRITE, |
||
1573 | ctrl, fcc->shader->bt.rt_base + cbuf); |
||
1574 | |||
1575 | tc_add2(tc, TOY_OPCODE_FB_WRITE, tdst_null(), |
||
1576 | tsrc(TOY_FILE_MRF, fcc->first_free_mrf, 0), desc); |
||
1577 | } |
||
1578 | } |
||
1579 | |||
1580 | /** |
||
1581 | * Set up shader outputs for fixed-function units. |
||
1582 | */ |
||
1583 | static void |
||
1584 | fs_setup_shader_out(struct ilo_shader *sh, const struct toy_tgsi *tgsi) |
||
1585 | { |
||
1586 | int i; |
||
1587 | |||
1588 | sh->out.count = tgsi->num_outputs; |
||
1589 | for (i = 0; i < tgsi->num_outputs; i++) { |
||
1590 | sh->out.register_indices[i] = tgsi->outputs[i].index; |
||
1591 | sh->out.semantic_names[i] = tgsi->outputs[i].semantic_name; |
||
1592 | sh->out.semantic_indices[i] = tgsi->outputs[i].semantic_index; |
||
1593 | |||
1594 | if (tgsi->outputs[i].semantic_name == TGSI_SEMANTIC_POSITION) |
||
1595 | sh->out.has_pos = true; |
||
1596 | } |
||
1597 | } |
||
1598 | |||
1599 | /** |
||
1600 | * Set up shader inputs for fixed-function units. |
||
1601 | */ |
||
1602 | static void |
||
1603 | fs_setup_shader_in(struct ilo_shader *sh, const struct toy_tgsi *tgsi, |
||
1604 | bool flatshade) |
||
1605 | { |
||
1606 | int i; |
||
1607 | |||
1608 | sh->in.count = tgsi->num_inputs; |
||
1609 | for (i = 0; i < tgsi->num_inputs; i++) { |
||
1610 | sh->in.semantic_names[i] = tgsi->inputs[i].semantic_name; |
||
1611 | sh->in.semantic_indices[i] = tgsi->inputs[i].semantic_index; |
||
1612 | sh->in.interp[i] = tgsi->inputs[i].interp; |
||
1613 | sh->in.centroid[i] = tgsi->inputs[i].centroid; |
||
1614 | |||
1615 | if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_POSITION) { |
||
1616 | sh->in.has_pos = true; |
||
1617 | continue; |
||
1618 | } |
||
1619 | else if (tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_FACE) { |
||
1620 | continue; |
||
1621 | } |
||
1622 | |||
1623 | switch (tgsi->inputs[i].interp) { |
||
1624 | case TGSI_INTERPOLATE_CONSTANT: |
||
1625 | sh->in.const_interp_enable |= 1 << i; |
||
1626 | break; |
||
1627 | case TGSI_INTERPOLATE_LINEAR: |
||
1628 | sh->in.has_linear_interp = true; |
||
1629 | |||
1630 | if (tgsi->inputs[i].centroid) { |
||
1631 | sh->in.barycentric_interpolation_mode |= |
||
1632 | GEN6_INTERP_NONPERSPECTIVE_CENTROID; |
||
1633 | } |
||
1634 | else { |
||
1635 | sh->in.barycentric_interpolation_mode |= |
||
1636 | GEN6_INTERP_NONPERSPECTIVE_PIXEL; |
||
1637 | } |
||
1638 | break; |
||
1639 | case TGSI_INTERPOLATE_COLOR: |
||
1640 | if (flatshade) { |
||
1641 | sh->in.const_interp_enable |= 1 << i; |
||
1642 | break; |
||
1643 | } |
||
1644 | /* fall through */ |
||
1645 | case TGSI_INTERPOLATE_PERSPECTIVE: |
||
1646 | if (tgsi->inputs[i].centroid) { |
||
1647 | sh->in.barycentric_interpolation_mode |= |
||
1648 | GEN6_INTERP_PERSPECTIVE_CENTROID; |
||
1649 | } |
||
1650 | else { |
||
1651 | sh->in.barycentric_interpolation_mode |= |
||
1652 | GEN6_INTERP_PERSPECTIVE_PIXEL; |
||
1653 | } |
||
1654 | break; |
||
1655 | default: |
||
1656 | break; |
||
1657 | } |
||
1658 | } |
||
1659 | } |
||
1660 | |||
1661 | static int |
||
1662 | fs_setup_payloads(struct fs_compile_context *fcc) |
||
1663 | { |
||
1664 | const struct ilo_shader *sh = fcc->shader; |
||
1665 | int grf, i; |
||
1666 | |||
1667 | grf = 0; |
||
1668 | |||
1669 | /* r0: header */ |
||
1670 | grf++; |
||
1671 | |||
1672 | /* r1-r2: coordinates and etc. */ |
||
1673 | grf += (fcc->dispatch_mode == GEN6_PS_DISPATCH_32) ? 2 : 1; |
||
1674 | |||
1675 | for (i = 0; i < Elements(fcc->payloads); i++) { |
||
1676 | const int reg_scale = |
||
1677 | (fcc->dispatch_mode == GEN6_PS_DISPATCH_8) ? 1 : 2; |
||
1678 | |||
1679 | /* r3-r26 or r32-r55: barycentric interpolation parameters */ |
||
1680 | if (sh->in.barycentric_interpolation_mode & |
||
1681 | (GEN6_INTERP_PERSPECTIVE_PIXEL)) { |
||
1682 | fcc->payloads[i].interp_perspective_pixel = grf; |
||
1683 | grf += 2 * reg_scale; |
||
1684 | } |
||
1685 | if (sh->in.barycentric_interpolation_mode & |
||
1686 | (GEN6_INTERP_PERSPECTIVE_CENTROID)) { |
||
1687 | fcc->payloads[i].interp_perspective_centroid = grf; |
||
1688 | grf += 2 * reg_scale; |
||
1689 | } |
||
1690 | if (sh->in.barycentric_interpolation_mode & |
||
1691 | (GEN6_INTERP_PERSPECTIVE_SAMPLE)) { |
||
1692 | fcc->payloads[i].interp_perspective_sample = grf; |
||
1693 | grf += 2 * reg_scale; |
||
1694 | } |
||
1695 | if (sh->in.barycentric_interpolation_mode & |
||
1696 | (GEN6_INTERP_NONPERSPECTIVE_PIXEL)) { |
||
1697 | fcc->payloads[i].interp_nonperspective_pixel = grf; |
||
1698 | grf += 2 * reg_scale; |
||
1699 | } |
||
1700 | if (sh->in.barycentric_interpolation_mode & |
||
1701 | (GEN6_INTERP_NONPERSPECTIVE_CENTROID)) { |
||
1702 | fcc->payloads[i].interp_nonperspective_centroid = grf; |
||
1703 | grf += 2 * reg_scale; |
||
1704 | } |
||
1705 | if (sh->in.barycentric_interpolation_mode & |
||
1706 | (GEN6_INTERP_NONPERSPECTIVE_SAMPLE)) { |
||
1707 | fcc->payloads[i].interp_nonperspective_sample = grf; |
||
1708 | grf += 2 * reg_scale; |
||
1709 | } |
||
1710 | |||
1711 | /* r27-r28 or r56-r57: interpoloated depth */ |
||
1712 | if (sh->in.has_pos) { |
||
1713 | fcc->payloads[i].source_depth = grf; |
||
1714 | grf += 1 * reg_scale; |
||
1715 | } |
||
1716 | |||
1717 | /* r29-r30 or r58-r59: interpoloated w */ |
||
1718 | if (sh->in.has_pos) { |
||
1719 | fcc->payloads[i].source_w = grf; |
||
1720 | grf += 1 * reg_scale; |
||
1721 | } |
||
1722 | |||
1723 | /* r31 or r60: position offset */ |
||
1724 | if (false) { |
||
1725 | fcc->payloads[i].pos_offset = grf; |
||
1726 | grf++; |
||
1727 | } |
||
1728 | |||
1729 | if (fcc->dispatch_mode != GEN6_PS_DISPATCH_32) |
||
1730 | break; |
||
1731 | } |
||
1732 | |||
1733 | return grf; |
||
1734 | } |
||
1735 | |||
1736 | /** |
||
1737 | * Translate the TGSI tokens. |
||
1738 | */ |
||
1739 | static bool |
||
1740 | fs_setup_tgsi(struct toy_compiler *tc, const struct tgsi_token *tokens, |
||
1741 | struct toy_tgsi *tgsi) |
||
1742 | { |
||
1743 | if (ilo_debug & ILO_DEBUG_FS) { |
||
1744 | ilo_printf("dumping fragment shader\n"); |
||
1745 | ilo_printf("\n"); |
||
1746 | |||
1747 | tgsi_dump(tokens, 0); |
||
1748 | ilo_printf("\n"); |
||
1749 | } |
||
1750 | |||
1751 | toy_compiler_translate_tgsi(tc, tokens, false, tgsi); |
||
1752 | if (tc->fail) { |
||
1753 | ilo_err("failed to translate FS TGSI tokens: %s\n", tc->reason); |
||
1754 | return false; |
||
1755 | } |
||
1756 | |||
1757 | if (ilo_debug & ILO_DEBUG_FS) { |
||
1758 | ilo_printf("TGSI translator:\n"); |
||
1759 | toy_tgsi_dump(tgsi); |
||
1760 | ilo_printf("\n"); |
||
1761 | toy_compiler_dump(tc); |
||
1762 | ilo_printf("\n"); |
||
1763 | } |
||
1764 | |||
1765 | return true; |
||
1766 | } |
||
1767 | |||
1768 | /** |
||
1769 | * Set up FS compile context. This includes translating the TGSI tokens. |
||
1770 | */ |
||
1771 | static bool |
||
1772 | fs_setup(struct fs_compile_context *fcc, |
||
1773 | const struct ilo_shader_state *state, |
||
1774 | const struct ilo_shader_variant *variant) |
||
1775 | { |
||
1776 | int num_consts; |
||
1777 | |||
1778 | memset(fcc, 0, sizeof(*fcc)); |
||
1779 | |||
1780 | fcc->shader = CALLOC_STRUCT(ilo_shader); |
||
1781 | if (!fcc->shader) |
||
1782 | return false; |
||
1783 | |||
1784 | fcc->variant = variant; |
||
1785 | |||
1786 | toy_compiler_init(&fcc->tc, state->info.dev); |
||
1787 | |||
1788 | fcc->dispatch_mode = GEN6_PS_DISPATCH_8; |
||
1789 | |||
1790 | fcc->tc.templ.access_mode = GEN6_ALIGN_1; |
||
1791 | if (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) { |
||
1792 | fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1H; |
||
1793 | fcc->tc.templ.exec_size = GEN6_EXECSIZE_16; |
||
1794 | } |
||
1795 | else { |
||
1796 | fcc->tc.templ.qtr_ctrl = GEN6_QTRCTRL_1Q; |
||
1797 | fcc->tc.templ.exec_size = GEN6_EXECSIZE_8; |
||
1798 | } |
||
1799 | |||
1800 | fcc->tc.rect_linear_width = 8; |
||
1801 | |||
1802 | /* |
||
1803 | * The classic driver uses the sampler cache (gen6) or the data cache |
||
1804 | * (gen7). Why? |
||
1805 | */ |
||
1806 | fcc->const_cache = GEN6_SFID_DP_CC; |
||
1807 | |||
1808 | if (!fs_setup_tgsi(&fcc->tc, state->info.tokens, &fcc->tgsi)) { |
||
1809 | toy_compiler_cleanup(&fcc->tc); |
||
1810 | FREE(fcc->shader); |
||
1811 | return false; |
||
1812 | } |
||
1813 | |||
1814 | fs_setup_shader_in(fcc->shader, &fcc->tgsi, fcc->variant->u.fs.flatshade); |
||
1815 | fs_setup_shader_out(fcc->shader, &fcc->tgsi); |
||
1816 | |||
1817 | if (fcc->variant->use_pcb && !fcc->tgsi.const_indirect) { |
||
1818 | num_consts = (fcc->tgsi.const_count + 1) / 2; |
||
1819 | |||
1820 | /* |
||
1821 | * From the Sandy Bridge PRM, volume 2 part 1, page 287: |
||
1822 | * |
||
1823 | * "The sum of all four read length fields (each incremented to |
||
1824 | * represent the actual read length) must be less than or equal to |
||
1825 | * 64" |
||
1826 | * |
||
1827 | * Since we are usually under a high register pressure, do not allow |
||
1828 | * for more than 8. |
||
1829 | */ |
||
1830 | if (num_consts > 8) |
||
1831 | num_consts = 0; |
||
1832 | } |
||
1833 | else { |
||
1834 | num_consts = 0; |
||
1835 | } |
||
1836 | |||
1837 | fcc->shader->skip_cbuf0_upload = (!fcc->tgsi.const_count || num_consts); |
||
1838 | fcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8); |
||
1839 | |||
1840 | fcc->first_const_grf = fs_setup_payloads(fcc); |
||
1841 | fcc->first_attr_grf = fcc->first_const_grf + num_consts; |
||
1842 | fcc->first_free_grf = fcc->first_attr_grf + fcc->shader->in.count * 2; |
||
1843 | fcc->last_free_grf = 127; |
||
1844 | |||
1845 | /* m0 is reserved for system routines */ |
||
1846 | fcc->first_free_mrf = 1; |
||
1847 | fcc->last_free_mrf = 15; |
||
1848 | |||
1849 | /* instructions are compressed with GEN6_EXECSIZE_16 */ |
||
1850 | fcc->num_grf_per_vrf = |
||
1851 | (fcc->dispatch_mode == GEN6_PS_DISPATCH_16) ? 2 : 1; |
||
1852 | |||
1853 | if (ilo_dev_gen(fcc->tc.dev) >= ILO_GEN(7)) { |
||
1854 | fcc->last_free_grf -= 15; |
||
1855 | fcc->first_free_mrf = fcc->last_free_grf + 1; |
||
1856 | fcc->last_free_mrf = fcc->first_free_mrf + 14; |
||
1857 | } |
||
1858 | |||
1859 | fcc->shader->in.start_grf = fcc->first_const_grf; |
||
1860 | fcc->shader->has_kill = fcc->tgsi.uses_kill; |
||
1861 | fcc->shader->dispatch_16 = |
||
1862 | (fcc->dispatch_mode == GEN6_PS_DISPATCH_16); |
||
1863 | |||
1864 | fcc->shader->bt.rt_base = 0; |
||
1865 | fcc->shader->bt.rt_count = fcc->variant->u.fs.num_cbufs; |
||
1866 | /* to send EOT */ |
||
1867 | if (!fcc->shader->bt.rt_count) |
||
1868 | fcc->shader->bt.rt_count = 1; |
||
1869 | |||
1870 | fcc->shader->bt.tex_base = fcc->shader->bt.rt_base + |
||
1871 | fcc->shader->bt.rt_count; |
||
1872 | fcc->shader->bt.tex_count = fcc->variant->num_sampler_views; |
||
1873 | |||
1874 | fcc->shader->bt.const_base = fcc->shader->bt.tex_base + |
||
1875 | fcc->shader->bt.tex_count; |
||
1876 | fcc->shader->bt.const_count = state->info.constant_buffer_count; |
||
1877 | |||
1878 | fcc->shader->bt.total_count = fcc->shader->bt.const_base + |
||
1879 | fcc->shader->bt.const_count; |
||
1880 | |||
1881 | return true; |
||
1882 | } |
||
1883 | |||
1884 | /** |
||
1885 | * Compile the fragment shader. |
||
1886 | */ |
||
1887 | struct ilo_shader * |
||
1888 | ilo_shader_compile_fs(const struct ilo_shader_state *state, |
||
1889 | const struct ilo_shader_variant *variant) |
||
1890 | { |
||
1891 | struct fs_compile_context fcc; |
||
1892 | |||
1893 | if (!fs_setup(&fcc, state, variant)) |
||
1894 | return NULL; |
||
1895 | |||
1896 | fs_write_fb(&fcc); |
||
1897 | |||
1898 | if (!fs_compile(&fcc)) { |
||
1899 | FREE(fcc.shader); |
||
1900 | fcc.shader = NULL; |
||
1901 | } |
||
1902 | |||
1903 | toy_tgsi_cleanup(&fcc.tgsi); |
||
1904 | toy_compiler_cleanup(&fcc.tc); |
||
1905 | |||
1906 | return fcc.shader; |
||
1907 | }>><>><>>>><>><>>><>>>>>>>>>>>>>>><>>>>>>>>>>>>>>>>>=>>>>>>>=>=>=>>=>>>>>>>=>=>=>>>>>>>>>> |