Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright (c) 2014 Scott Mansell |
||
3 | * Copyright © 2014 Broadcom |
||
4 | * |
||
5 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
6 | * copy of this software and associated documentation files (the "Software"), |
||
7 | * to deal in the Software without restriction, including without limitation |
||
8 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
9 | * and/or sell copies of the Software, and to permit persons to whom the |
||
10 | * Software is furnished to do so, subject to the following conditions: |
||
11 | * |
||
12 | * The above copyright notice and this permission notice (including the next |
||
13 | * paragraph) shall be included in all copies or substantial portions of the |
||
14 | * Software. |
||
15 | * |
||
16 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
17 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
18 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
19 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
20 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
21 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
22 | * IN THE SOFTWARE. |
||
23 | */ |
||
24 | |||
25 | #include |
||
26 | #include "pipe/p_state.h" |
||
27 | #include "util/u_format.h" |
||
28 | #include "util/u_hash.h" |
||
29 | #include "util/u_math.h" |
||
30 | #include "util/u_memory.h" |
||
31 | #include "util/u_pack_color.h" |
||
32 | #include "util/format_srgb.h" |
||
33 | #include "util/ralloc.h" |
||
34 | #include "util/hash_table.h" |
||
35 | #include "tgsi/tgsi_dump.h" |
||
36 | #include "tgsi/tgsi_info.h" |
||
37 | #include "tgsi/tgsi_lowering.h" |
||
38 | #include "tgsi/tgsi_parse.h" |
||
39 | #include "nir/tgsi_to_nir.h" |
||
40 | |||
41 | #include "vc4_context.h" |
||
42 | #include "vc4_qpu.h" |
||
43 | #include "vc4_qir.h" |
||
44 | #ifdef USE_VC4_SIMULATOR |
||
45 | #include "simpenrose/simpenrose.h" |
||
46 | #endif |
||
47 | |||
48 | struct vc4_key { |
||
49 | struct vc4_uncompiled_shader *shader_state; |
||
50 | struct { |
||
51 | enum pipe_format format; |
||
52 | unsigned compare_mode:1; |
||
53 | unsigned compare_func:3; |
||
54 | unsigned wrap_s:3; |
||
55 | unsigned wrap_t:3; |
||
56 | uint8_t swizzle[4]; |
||
57 | } tex[VC4_MAX_TEXTURE_SAMPLERS]; |
||
58 | uint8_t ucp_enables; |
||
59 | }; |
||
60 | |||
61 | struct vc4_fs_key { |
||
62 | struct vc4_key base; |
||
63 | enum pipe_format color_format; |
||
64 | bool depth_enabled; |
||
65 | bool stencil_enabled; |
||
66 | bool stencil_twoside; |
||
67 | bool stencil_full_writemasks; |
||
68 | bool is_points; |
||
69 | bool is_lines; |
||
70 | bool alpha_test; |
||
71 | bool point_coord_upper_left; |
||
72 | bool light_twoside; |
||
73 | uint8_t alpha_test_func; |
||
74 | uint8_t logicop_func; |
||
75 | uint32_t point_sprite_mask; |
||
76 | |||
77 | struct pipe_rt_blend_state blend; |
||
78 | }; |
||
79 | |||
80 | struct vc4_vs_key { |
||
81 | struct vc4_key base; |
||
82 | |||
83 | /** |
||
84 | * This is a proxy for the array of FS input semantics, which is |
||
85 | * larger than we would want to put in the key. |
||
86 | */ |
||
87 | uint64_t compiled_fs_id; |
||
88 | |||
89 | enum pipe_format attr_formats[8]; |
||
90 | bool is_coord; |
||
91 | bool per_vertex_point_size; |
||
92 | }; |
||
93 | |||
94 | static void |
||
95 | resize_qreg_array(struct vc4_compile *c, |
||
96 | struct qreg **regs, |
||
97 | uint32_t *size, |
||
98 | uint32_t decl_size) |
||
99 | { |
||
100 | if (*size >= decl_size) |
||
101 | return; |
||
102 | |||
103 | uint32_t old_size = *size; |
||
104 | *size = MAX2(*size * 2, decl_size); |
||
105 | *regs = reralloc(c, *regs, struct qreg, *size); |
||
106 | if (!*regs) { |
||
107 | fprintf(stderr, "Malloc failure\n"); |
||
108 | abort(); |
||
109 | } |
||
110 | |||
111 | for (uint32_t i = old_size; i < *size; i++) |
||
112 | (*regs)[i] = c->undef; |
||
113 | } |
||
114 | |||
115 | static struct qreg |
||
116 | indirect_uniform_load(struct vc4_compile *c, |
||
117 | struct qreg indirect_offset, |
||
118 | unsigned offset) |
||
119 | { |
||
120 | struct vc4_compiler_ubo_range *range = NULL; |
||
121 | unsigned i; |
||
122 | for (i = 0; i < c->num_uniform_ranges; i++) { |
||
123 | range = &c->ubo_ranges[i]; |
||
124 | if (offset >= range->src_offset && |
||
125 | offset < range->src_offset + range->size) { |
||
126 | break; |
||
127 | } |
||
128 | } |
||
129 | /* The driver-location-based offset always has to be within a declared |
||
130 | * uniform range. |
||
131 | */ |
||
132 | assert(range); |
||
133 | if (!range->used) { |
||
134 | range->used = true; |
||
135 | range->dst_offset = c->next_ubo_dst_offset; |
||
136 | c->next_ubo_dst_offset += range->size; |
||
137 | c->num_ubo_ranges++; |
||
138 | }; |
||
139 | |||
140 | offset -= range->src_offset; |
||
141 | /* Translate the user's TGSI register index from the TGSI register |
||
142 | * base to a byte offset. |
||
143 | */ |
||
144 | indirect_offset = qir_SHL(c, indirect_offset, qir_uniform_ui(c, 4)); |
||
145 | |||
146 | /* Adjust for where we stored the TGSI register base. */ |
||
147 | indirect_offset = qir_ADD(c, indirect_offset, |
||
148 | qir_uniform_ui(c, (range->dst_offset + |
||
149 | offset))); |
||
150 | indirect_offset = qir_MIN(c, indirect_offset, |
||
151 | qir_uniform_ui(c, (range->dst_offset + |
||
152 | range->size - 4))); |
||
153 | |||
154 | qir_TEX_DIRECT(c, indirect_offset, qir_uniform(c, QUNIFORM_UBO_ADDR, 0)); |
||
155 | struct qreg r4 = qir_TEX_RESULT(c); |
||
156 | c->num_texture_samples++; |
||
157 | return qir_MOV(c, r4); |
||
158 | } |
||
159 | |||
160 | static struct qreg * |
||
161 | ntq_get_dest(struct vc4_compile *c, nir_dest dest) |
||
162 | { |
||
163 | assert(!dest.is_ssa); |
||
164 | nir_register *reg = dest.reg.reg; |
||
165 | struct hash_entry *entry = _mesa_hash_table_search(c->def_ht, reg); |
||
166 | assert(reg->num_array_elems == 0); |
||
167 | assert(dest.reg.base_offset == 0); |
||
168 | |||
169 | struct qreg *qregs = entry->data; |
||
170 | return qregs; |
||
171 | } |
||
172 | |||
173 | static struct qreg |
||
174 | ntq_get_src(struct vc4_compile *c, nir_src src, int i) |
||
175 | { |
||
176 | struct hash_entry *entry; |
||
177 | if (src.is_ssa) { |
||
178 | entry = _mesa_hash_table_search(c->def_ht, src.ssa); |
||
179 | assert(i < src.ssa->num_components); |
||
180 | } else { |
||
181 | nir_register *reg = src.reg.reg; |
||
182 | entry = _mesa_hash_table_search(c->def_ht, reg); |
||
183 | assert(reg->num_array_elems == 0); |
||
184 | assert(src.reg.base_offset == 0); |
||
185 | assert(i < reg->num_components); |
||
186 | } |
||
187 | |||
188 | struct qreg *qregs = entry->data; |
||
189 | return qregs[i]; |
||
190 | } |
||
191 | |||
192 | static struct qreg |
||
193 | ntq_get_alu_src(struct vc4_compile *c, nir_alu_instr *instr, |
||
194 | unsigned src) |
||
195 | { |
||
196 | assert(util_is_power_of_two(instr->dest.write_mask)); |
||
197 | unsigned chan = ffs(instr->dest.write_mask) - 1; |
||
198 | struct qreg r = ntq_get_src(c, instr->src[src].src, |
||
199 | instr->src[src].swizzle[chan]); |
||
200 | |||
201 | assert(!instr->src[src].abs); |
||
202 | assert(!instr->src[src].negate); |
||
203 | |||
204 | return r; |
||
205 | }; |
||
206 | |||
207 | static struct qreg |
||
208 | get_swizzled_channel(struct vc4_compile *c, |
||
209 | struct qreg *srcs, int swiz) |
||
210 | { |
||
211 | switch (swiz) { |
||
212 | default: |
||
213 | case UTIL_FORMAT_SWIZZLE_NONE: |
||
214 | fprintf(stderr, "warning: unknown swizzle\n"); |
||
215 | /* FALLTHROUGH */ |
||
216 | case UTIL_FORMAT_SWIZZLE_0: |
||
217 | return qir_uniform_f(c, 0.0); |
||
218 | case UTIL_FORMAT_SWIZZLE_1: |
||
219 | return qir_uniform_f(c, 1.0); |
||
220 | case UTIL_FORMAT_SWIZZLE_X: |
||
221 | case UTIL_FORMAT_SWIZZLE_Y: |
||
222 | case UTIL_FORMAT_SWIZZLE_Z: |
||
223 | case UTIL_FORMAT_SWIZZLE_W: |
||
224 | return srcs[swiz]; |
||
225 | } |
||
226 | } |
||
227 | |||
228 | static inline struct qreg |
||
229 | qir_SAT(struct vc4_compile *c, struct qreg val) |
||
230 | { |
||
231 | return qir_FMAX(c, |
||
232 | qir_FMIN(c, val, qir_uniform_f(c, 1.0)), |
||
233 | qir_uniform_f(c, 0.0)); |
||
234 | } |
||
235 | |||
236 | static struct qreg |
||
237 | ntq_rcp(struct vc4_compile *c, struct qreg x) |
||
238 | { |
||
239 | struct qreg r = qir_RCP(c, x); |
||
240 | |||
241 | /* Apply a Newton-Raphson step to improve the accuracy. */ |
||
242 | r = qir_FMUL(c, r, qir_FSUB(c, |
||
243 | qir_uniform_f(c, 2.0), |
||
244 | qir_FMUL(c, x, r))); |
||
245 | |||
246 | return r; |
||
247 | } |
||
248 | |||
249 | static struct qreg |
||
250 | ntq_rsq(struct vc4_compile *c, struct qreg x) |
||
251 | { |
||
252 | struct qreg r = qir_RSQ(c, x); |
||
253 | |||
254 | /* Apply a Newton-Raphson step to improve the accuracy. */ |
||
255 | r = qir_FMUL(c, r, qir_FSUB(c, |
||
256 | qir_uniform_f(c, 1.5), |
||
257 | qir_FMUL(c, |
||
258 | qir_uniform_f(c, 0.5), |
||
259 | qir_FMUL(c, x, |
||
260 | qir_FMUL(c, r, r))))); |
||
261 | |||
262 | return r; |
||
263 | } |
||
264 | |||
265 | static struct qreg |
||
266 | qir_srgb_decode(struct vc4_compile *c, struct qreg srgb) |
||
267 | { |
||
268 | struct qreg low = qir_FMUL(c, srgb, qir_uniform_f(c, 1.0 / 12.92)); |
||
269 | struct qreg high = qir_POW(c, |
||
270 | qir_FMUL(c, |
||
271 | qir_FADD(c, |
||
272 | srgb, |
||
273 | qir_uniform_f(c, 0.055)), |
||
274 | qir_uniform_f(c, 1.0 / 1.055)), |
||
275 | qir_uniform_f(c, 2.4)); |
||
276 | |||
277 | qir_SF(c, qir_FSUB(c, srgb, qir_uniform_f(c, 0.04045))); |
||
278 | return qir_SEL_X_Y_NS(c, low, high); |
||
279 | } |
||
280 | |||
281 | static struct qreg |
||
282 | qir_srgb_encode(struct vc4_compile *c, struct qreg linear) |
||
283 | { |
||
284 | struct qreg low = qir_FMUL(c, linear, qir_uniform_f(c, 12.92)); |
||
285 | struct qreg high = qir_FSUB(c, |
||
286 | qir_FMUL(c, |
||
287 | qir_uniform_f(c, 1.055), |
||
288 | qir_POW(c, |
||
289 | linear, |
||
290 | qir_uniform_f(c, 0.41666))), |
||
291 | qir_uniform_f(c, 0.055)); |
||
292 | |||
293 | qir_SF(c, qir_FSUB(c, linear, qir_uniform_f(c, 0.0031308))); |
||
294 | return qir_SEL_X_Y_NS(c, low, high); |
||
295 | } |
||
296 | |||
297 | static struct qreg |
||
298 | ntq_umul(struct vc4_compile *c, struct qreg src0, struct qreg src1) |
||
299 | { |
||
300 | struct qreg src0_hi = qir_SHR(c, src0, |
||
301 | qir_uniform_ui(c, 24)); |
||
302 | struct qreg src1_hi = qir_SHR(c, src1, |
||
303 | qir_uniform_ui(c, 24)); |
||
304 | |||
305 | struct qreg hilo = qir_MUL24(c, src0_hi, src1); |
||
306 | struct qreg lohi = qir_MUL24(c, src0, src1_hi); |
||
307 | struct qreg lolo = qir_MUL24(c, src0, src1); |
||
308 | |||
309 | return qir_ADD(c, lolo, qir_SHL(c, |
||
310 | qir_ADD(c, hilo, lohi), |
||
311 | qir_uniform_ui(c, 24))); |
||
312 | } |
||
313 | |||
314 | static void |
||
315 | ntq_emit_tex(struct vc4_compile *c, nir_tex_instr *instr) |
||
316 | { |
||
317 | struct qreg s, t, r, lod, proj, compare; |
||
318 | bool is_txb = false, is_txl = false, has_proj = false; |
||
319 | unsigned unit = instr->sampler_index; |
||
320 | |||
321 | for (unsigned i = 0; i < instr->num_srcs; i++) { |
||
322 | switch (instr->src[i].src_type) { |
||
323 | case nir_tex_src_coord: |
||
324 | s = ntq_get_src(c, instr->src[i].src, 0); |
||
325 | if (instr->sampler_dim != GLSL_SAMPLER_DIM_1D) |
||
326 | t = ntq_get_src(c, instr->src[i].src, 1); |
||
327 | if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) |
||
328 | r = ntq_get_src(c, instr->src[i].src, 2); |
||
329 | break; |
||
330 | case nir_tex_src_bias: |
||
331 | lod = ntq_get_src(c, instr->src[i].src, 0); |
||
332 | is_txb = true; |
||
333 | break; |
||
334 | case nir_tex_src_lod: |
||
335 | lod = ntq_get_src(c, instr->src[i].src, 0); |
||
336 | is_txl = true; |
||
337 | break; |
||
338 | case nir_tex_src_comparitor: |
||
339 | compare = ntq_get_src(c, instr->src[i].src, 0); |
||
340 | break; |
||
341 | case nir_tex_src_projector: |
||
342 | proj = qir_RCP(c, ntq_get_src(c, instr->src[i].src, 0)); |
||
343 | s = qir_FMUL(c, s, proj); |
||
344 | t = qir_FMUL(c, t, proj); |
||
345 | has_proj = true; |
||
346 | break; |
||
347 | default: |
||
348 | unreachable("unknown texture source"); |
||
349 | } |
||
350 | } |
||
351 | |||
352 | struct qreg texture_u[] = { |
||
353 | qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P0, unit), |
||
354 | qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P1, unit), |
||
355 | qir_uniform(c, QUNIFORM_CONSTANT, 0), |
||
356 | qir_uniform(c, QUNIFORM_CONSTANT, 0), |
||
357 | }; |
||
358 | uint32_t next_texture_u = 0; |
||
359 | |||
360 | /* There is no native support for GL texture rectangle coordinates, so |
||
361 | * we have to rescale from ([0, width], [0, height]) to ([0, 1], [0, |
||
362 | * 1]). |
||
363 | */ |
||
364 | if (instr->sampler_dim == GLSL_SAMPLER_DIM_RECT) { |
||
365 | s = qir_FMUL(c, s, |
||
366 | qir_uniform(c, QUNIFORM_TEXRECT_SCALE_X, unit)); |
||
367 | t = qir_FMUL(c, t, |
||
368 | qir_uniform(c, QUNIFORM_TEXRECT_SCALE_Y, unit)); |
||
369 | } |
||
370 | |||
371 | if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE || is_txl) { |
||
372 | texture_u[2] = qir_uniform(c, QUNIFORM_TEXTURE_CONFIG_P2, |
||
373 | unit | (is_txl << 16)); |
||
374 | } |
||
375 | |||
376 | if (instr->sampler_dim == GLSL_SAMPLER_DIM_CUBE) { |
||
377 | struct qreg ma = qir_FMAXABS(c, qir_FMAXABS(c, s, t), r); |
||
378 | struct qreg rcp_ma = qir_RCP(c, ma); |
||
379 | s = qir_FMUL(c, s, rcp_ma); |
||
380 | t = qir_FMUL(c, t, rcp_ma); |
||
381 | r = qir_FMUL(c, r, rcp_ma); |
||
382 | |||
383 | qir_TEX_R(c, r, texture_u[next_texture_u++]); |
||
384 | } else if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP_TO_BORDER || |
||
385 | c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP || |
||
386 | c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP_TO_BORDER || |
||
387 | c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) { |
||
388 | qir_TEX_R(c, qir_uniform(c, QUNIFORM_TEXTURE_BORDER_COLOR, unit), |
||
389 | texture_u[next_texture_u++]); |
||
390 | } |
||
391 | |||
392 | if (c->key->tex[unit].wrap_s == PIPE_TEX_WRAP_CLAMP) { |
||
393 | s = qir_SAT(c, s); |
||
394 | } |
||
395 | |||
396 | if (c->key->tex[unit].wrap_t == PIPE_TEX_WRAP_CLAMP) { |
||
397 | t = qir_SAT(c, t); |
||
398 | } |
||
399 | |||
400 | qir_TEX_T(c, t, texture_u[next_texture_u++]); |
||
401 | |||
402 | if (is_txl || is_txb) |
||
403 | qir_TEX_B(c, lod, texture_u[next_texture_u++]); |
||
404 | |||
405 | qir_TEX_S(c, s, texture_u[next_texture_u++]); |
||
406 | |||
407 | c->num_texture_samples++; |
||
408 | struct qreg r4 = qir_TEX_RESULT(c); |
||
409 | |||
410 | enum pipe_format format = c->key->tex[unit].format; |
||
411 | |||
412 | struct qreg unpacked[4]; |
||
413 | if (util_format_is_depth_or_stencil(format)) { |
||
414 | struct qreg depthf = qir_ITOF(c, qir_SHR(c, r4, |
||
415 | qir_uniform_ui(c, 8))); |
||
416 | struct qreg normalized = qir_FMUL(c, depthf, |
||
417 | qir_uniform_f(c, 1.0f/0xffffff)); |
||
418 | |||
419 | struct qreg depth_output; |
||
420 | |||
421 | struct qreg one = qir_uniform_f(c, 1.0f); |
||
422 | if (c->key->tex[unit].compare_mode) { |
||
423 | if (has_proj) |
||
424 | compare = qir_FMUL(c, compare, proj); |
||
425 | |||
426 | switch (c->key->tex[unit].compare_func) { |
||
427 | case PIPE_FUNC_NEVER: |
||
428 | depth_output = qir_uniform_f(c, 0.0f); |
||
429 | break; |
||
430 | case PIPE_FUNC_ALWAYS: |
||
431 | depth_output = one; |
||
432 | break; |
||
433 | case PIPE_FUNC_EQUAL: |
||
434 | qir_SF(c, qir_FSUB(c, compare, normalized)); |
||
435 | depth_output = qir_SEL_X_0_ZS(c, one); |
||
436 | break; |
||
437 | case PIPE_FUNC_NOTEQUAL: |
||
438 | qir_SF(c, qir_FSUB(c, compare, normalized)); |
||
439 | depth_output = qir_SEL_X_0_ZC(c, one); |
||
440 | break; |
||
441 | case PIPE_FUNC_GREATER: |
||
442 | qir_SF(c, qir_FSUB(c, compare, normalized)); |
||
443 | depth_output = qir_SEL_X_0_NC(c, one); |
||
444 | break; |
||
445 | case PIPE_FUNC_GEQUAL: |
||
446 | qir_SF(c, qir_FSUB(c, normalized, compare)); |
||
447 | depth_output = qir_SEL_X_0_NS(c, one); |
||
448 | break; |
||
449 | case PIPE_FUNC_LESS: |
||
450 | qir_SF(c, qir_FSUB(c, compare, normalized)); |
||
451 | depth_output = qir_SEL_X_0_NS(c, one); |
||
452 | break; |
||
453 | case PIPE_FUNC_LEQUAL: |
||
454 | qir_SF(c, qir_FSUB(c, normalized, compare)); |
||
455 | depth_output = qir_SEL_X_0_NC(c, one); |
||
456 | break; |
||
457 | } |
||
458 | } else { |
||
459 | depth_output = normalized; |
||
460 | } |
||
461 | |||
462 | for (int i = 0; i < 4; i++) |
||
463 | unpacked[i] = depth_output; |
||
464 | } else { |
||
465 | for (int i = 0; i < 4; i++) |
||
466 | unpacked[i] = qir_R4_UNPACK(c, r4, i); |
||
467 | } |
||
468 | |||
469 | const uint8_t *format_swiz = vc4_get_format_swizzle(format); |
||
470 | struct qreg texture_output[4]; |
||
471 | for (int i = 0; i < 4; i++) { |
||
472 | texture_output[i] = get_swizzled_channel(c, unpacked, |
||
473 | format_swiz[i]); |
||
474 | } |
||
475 | |||
476 | if (util_format_is_srgb(format)) { |
||
477 | for (int i = 0; i < 3; i++) |
||
478 | texture_output[i] = qir_srgb_decode(c, |
||
479 | texture_output[i]); |
||
480 | } |
||
481 | |||
482 | struct qreg *dest = ntq_get_dest(c, instr->dest); |
||
483 | for (int i = 0; i < 4; i++) { |
||
484 | dest[i] = get_swizzled_channel(c, texture_output, |
||
485 | c->key->tex[unit].swizzle[i]); |
||
486 | } |
||
487 | } |
||
488 | |||
489 | /** |
||
490 | * Computes x - floor(x), which is tricky because our FTOI truncates (rounds |
||
491 | * to zero). |
||
492 | */ |
||
493 | static struct qreg |
||
494 | ntq_ffract(struct vc4_compile *c, struct qreg src) |
||
495 | { |
||
496 | struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); |
||
497 | struct qreg diff = qir_FSUB(c, src, trunc); |
||
498 | qir_SF(c, diff); |
||
499 | return qir_SEL_X_Y_NS(c, |
||
500 | qir_FADD(c, diff, qir_uniform_f(c, 1.0)), |
||
501 | diff); |
||
502 | } |
||
503 | |||
504 | /** |
||
505 | * Computes floor(x), which is tricky because our FTOI truncates (rounds to |
||
506 | * zero). |
||
507 | */ |
||
508 | static struct qreg |
||
509 | ntq_ffloor(struct vc4_compile *c, struct qreg src) |
||
510 | { |
||
511 | struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); |
||
512 | |||
513 | /* This will be < 0 if we truncated and the truncation was of a value |
||
514 | * that was < 0 in the first place. |
||
515 | */ |
||
516 | qir_SF(c, qir_FSUB(c, src, trunc)); |
||
517 | |||
518 | return qir_SEL_X_Y_NS(c, |
||
519 | qir_FSUB(c, trunc, qir_uniform_f(c, 1.0)), |
||
520 | trunc); |
||
521 | } |
||
522 | |||
523 | /** |
||
524 | * Computes ceil(x), which is tricky because our FTOI truncates (rounds to |
||
525 | * zero). |
||
526 | */ |
||
527 | static struct qreg |
||
528 | ntq_fceil(struct vc4_compile *c, struct qreg src) |
||
529 | { |
||
530 | struct qreg trunc = qir_ITOF(c, qir_FTOI(c, src)); |
||
531 | |||
532 | /* This will be < 0 if we truncated and the truncation was of a value |
||
533 | * that was > 0 in the first place. |
||
534 | */ |
||
535 | qir_SF(c, qir_FSUB(c, trunc, src)); |
||
536 | |||
537 | return qir_SEL_X_Y_NS(c, |
||
538 | qir_FADD(c, trunc, qir_uniform_f(c, 1.0)), |
||
539 | trunc); |
||
540 | } |
||
541 | |||
542 | static struct qreg |
||
543 | ntq_fsin(struct vc4_compile *c, struct qreg src) |
||
544 | { |
||
545 | float coeff[] = { |
||
546 | -2.0 * M_PI, |
||
547 | pow(2.0 * M_PI, 3) / (3 * 2 * 1), |
||
548 | -pow(2.0 * M_PI, 5) / (5 * 4 * 3 * 2 * 1), |
||
549 | pow(2.0 * M_PI, 7) / (7 * 6 * 5 * 4 * 3 * 2 * 1), |
||
550 | -pow(2.0 * M_PI, 9) / (9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), |
||
551 | }; |
||
552 | |||
553 | struct qreg scaled_x = |
||
554 | qir_FMUL(c, |
||
555 | src, |
||
556 | qir_uniform_f(c, 1.0f / (M_PI * 2.0f))); |
||
557 | |||
558 | struct qreg x = qir_FADD(c, |
||
559 | ntq_ffract(c, scaled_x), |
||
560 | qir_uniform_f(c, -0.5)); |
||
561 | struct qreg x2 = qir_FMUL(c, x, x); |
||
562 | struct qreg sum = qir_FMUL(c, x, qir_uniform_f(c, coeff[0])); |
||
563 | for (int i = 1; i < ARRAY_SIZE(coeff); i++) { |
||
564 | x = qir_FMUL(c, x, x2); |
||
565 | sum = qir_FADD(c, |
||
566 | sum, |
||
567 | qir_FMUL(c, |
||
568 | x, |
||
569 | qir_uniform_f(c, coeff[i]))); |
||
570 | } |
||
571 | return sum; |
||
572 | } |
||
573 | |||
574 | static struct qreg |
||
575 | ntq_fcos(struct vc4_compile *c, struct qreg src) |
||
576 | { |
||
577 | float coeff[] = { |
||
578 | -1.0f, |
||
579 | pow(2.0 * M_PI, 2) / (2 * 1), |
||
580 | -pow(2.0 * M_PI, 4) / (4 * 3 * 2 * 1), |
||
581 | pow(2.0 * M_PI, 6) / (6 * 5 * 4 * 3 * 2 * 1), |
||
582 | -pow(2.0 * M_PI, 8) / (8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), |
||
583 | pow(2.0 * M_PI, 10) / (10 * 9 * 8 * 7 * 6 * 5 * 4 * 3 * 2 * 1), |
||
584 | }; |
||
585 | |||
586 | struct qreg scaled_x = |
||
587 | qir_FMUL(c, src, |
||
588 | qir_uniform_f(c, 1.0f / (M_PI * 2.0f))); |
||
589 | struct qreg x_frac = qir_FADD(c, |
||
590 | ntq_ffract(c, scaled_x), |
||
591 | qir_uniform_f(c, -0.5)); |
||
592 | |||
593 | struct qreg sum = qir_uniform_f(c, coeff[0]); |
||
594 | struct qreg x2 = qir_FMUL(c, x_frac, x_frac); |
||
595 | struct qreg x = x2; /* Current x^2, x^4, or x^6 */ |
||
596 | for (int i = 1; i < ARRAY_SIZE(coeff); i++) { |
||
597 | if (i != 1) |
||
598 | x = qir_FMUL(c, x, x2); |
||
599 | |||
600 | struct qreg mul = qir_FMUL(c, |
||
601 | x, |
||
602 | qir_uniform_f(c, coeff[i])); |
||
603 | if (i == 0) |
||
604 | sum = mul; |
||
605 | else |
||
606 | sum = qir_FADD(c, sum, mul); |
||
607 | } |
||
608 | return sum; |
||
609 | } |
||
610 | |||
611 | static struct qreg |
||
612 | ntq_fsign(struct vc4_compile *c, struct qreg src) |
||
613 | { |
||
614 | qir_SF(c, src); |
||
615 | return qir_SEL_X_Y_NC(c, |
||
616 | qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)), |
||
617 | qir_uniform_f(c, -1.0)); |
||
618 | } |
||
619 | |||
620 | static struct qreg |
||
621 | get_channel_from_vpm(struct vc4_compile *c, |
||
622 | struct qreg *vpm_reads, |
||
623 | uint8_t swiz, |
||
624 | const struct util_format_description *desc) |
||
625 | { |
||
626 | const struct util_format_channel_description *chan = |
||
627 | &desc->channel[swiz]; |
||
628 | struct qreg temp; |
||
629 | |||
630 | if (swiz > UTIL_FORMAT_SWIZZLE_W) |
||
631 | return get_swizzled_channel(c, vpm_reads, swiz); |
||
632 | else if (chan->size == 32 && |
||
633 | chan->type == UTIL_FORMAT_TYPE_FLOAT) { |
||
634 | return get_swizzled_channel(c, vpm_reads, swiz); |
||
635 | } else if (chan->size == 32 && |
||
636 | chan->type == UTIL_FORMAT_TYPE_SIGNED) { |
||
637 | if (chan->normalized) { |
||
638 | return qir_FMUL(c, |
||
639 | qir_ITOF(c, vpm_reads[swiz]), |
||
640 | qir_uniform_f(c, |
||
641 | 1.0 / 0x7fffffff)); |
||
642 | } else { |
||
643 | return qir_ITOF(c, vpm_reads[swiz]); |
||
644 | } |
||
645 | } else if (chan->size == 8 && |
||
646 | (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || |
||
647 | chan->type == UTIL_FORMAT_TYPE_SIGNED)) { |
||
648 | struct qreg vpm = vpm_reads[0]; |
||
649 | if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { |
||
650 | temp = qir_XOR(c, vpm, qir_uniform_ui(c, 0x80808080)); |
||
651 | if (chan->normalized) { |
||
652 | return qir_FSUB(c, qir_FMUL(c, |
||
653 | qir_UNPACK_8_F(c, temp, swiz), |
||
654 | qir_uniform_f(c, 2.0)), |
||
655 | qir_uniform_f(c, 1.0)); |
||
656 | } else { |
||
657 | return qir_FADD(c, |
||
658 | qir_ITOF(c, |
||
659 | qir_UNPACK_8_I(c, temp, |
||
660 | swiz)), |
||
661 | qir_uniform_f(c, -128.0)); |
||
662 | } |
||
663 | } else { |
||
664 | if (chan->normalized) { |
||
665 | return qir_UNPACK_8_F(c, vpm, swiz); |
||
666 | } else { |
||
667 | return qir_ITOF(c, qir_UNPACK_8_I(c, vpm, swiz)); |
||
668 | } |
||
669 | } |
||
670 | } else if (chan->size == 16 && |
||
671 | (chan->type == UTIL_FORMAT_TYPE_UNSIGNED || |
||
672 | chan->type == UTIL_FORMAT_TYPE_SIGNED)) { |
||
673 | struct qreg vpm = vpm_reads[swiz / 2]; |
||
674 | |||
675 | /* Note that UNPACK_16F eats a half float, not ints, so we use |
||
676 | * UNPACK_16_I for all of these. |
||
677 | */ |
||
678 | if (chan->type == UTIL_FORMAT_TYPE_SIGNED) { |
||
679 | temp = qir_ITOF(c, qir_UNPACK_16_I(c, vpm, swiz % 2)); |
||
680 | if (chan->normalized) { |
||
681 | return qir_FMUL(c, temp, |
||
682 | qir_uniform_f(c, 1/32768.0f)); |
||
683 | } else { |
||
684 | return temp; |
||
685 | } |
||
686 | } else { |
||
687 | /* UNPACK_16I sign-extends, so we have to emit ANDs. */ |
||
688 | temp = vpm; |
||
689 | if (swiz == 1 || swiz == 3) |
||
690 | temp = qir_UNPACK_16_I(c, temp, 1); |
||
691 | temp = qir_AND(c, temp, qir_uniform_ui(c, 0xffff)); |
||
692 | temp = qir_ITOF(c, temp); |
||
693 | |||
694 | if (chan->normalized) { |
||
695 | return qir_FMUL(c, temp, |
||
696 | qir_uniform_f(c, 1 / 65535.0)); |
||
697 | } else { |
||
698 | return temp; |
||
699 | } |
||
700 | } |
||
701 | } else { |
||
702 | return c->undef; |
||
703 | } |
||
704 | } |
||
705 | |||
706 | static void |
||
707 | emit_vertex_input(struct vc4_compile *c, int attr) |
||
708 | { |
||
709 | enum pipe_format format = c->vs_key->attr_formats[attr]; |
||
710 | uint32_t attr_size = util_format_get_blocksize(format); |
||
711 | struct qreg vpm_reads[4]; |
||
712 | |||
713 | c->vattr_sizes[attr] = align(attr_size, 4); |
||
714 | for (int i = 0; i < align(attr_size, 4) / 4; i++) { |
||
715 | struct qreg vpm = { QFILE_VPM, attr * 4 + i }; |
||
716 | vpm_reads[i] = qir_MOV(c, vpm); |
||
717 | c->num_inputs++; |
||
718 | } |
||
719 | |||
720 | bool format_warned = false; |
||
721 | const struct util_format_description *desc = |
||
722 | util_format_description(format); |
||
723 | |||
724 | for (int i = 0; i < 4; i++) { |
||
725 | uint8_t swiz = desc->swizzle[i]; |
||
726 | struct qreg result = get_channel_from_vpm(c, vpm_reads, |
||
727 | swiz, desc); |
||
728 | |||
729 | if (result.file == QFILE_NULL) { |
||
730 | if (!format_warned) { |
||
731 | fprintf(stderr, |
||
732 | "vtx element %d unsupported type: %s\n", |
||
733 | attr, util_format_name(format)); |
||
734 | format_warned = true; |
||
735 | } |
||
736 | result = qir_uniform_f(c, 0.0); |
||
737 | } |
||
738 | c->inputs[attr * 4 + i] = result; |
||
739 | } |
||
740 | } |
||
741 | |||
742 | static void |
||
743 | emit_fragcoord_input(struct vc4_compile *c, int attr) |
||
744 | { |
||
745 | c->inputs[attr * 4 + 0] = qir_FRAG_X(c); |
||
746 | c->inputs[attr * 4 + 1] = qir_FRAG_Y(c); |
||
747 | c->inputs[attr * 4 + 2] = |
||
748 | qir_FMUL(c, |
||
749 | qir_ITOF(c, qir_FRAG_Z(c)), |
||
750 | qir_uniform_f(c, 1.0 / 0xffffff)); |
||
751 | c->inputs[attr * 4 + 3] = qir_RCP(c, qir_FRAG_W(c)); |
||
752 | } |
||
753 | |||
754 | static void |
||
755 | emit_point_coord_input(struct vc4_compile *c, int attr) |
||
756 | { |
||
757 | if (c->point_x.file == QFILE_NULL) { |
||
758 | c->point_x = qir_uniform_f(c, 0.0); |
||
759 | c->point_y = qir_uniform_f(c, 0.0); |
||
760 | } |
||
761 | |||
762 | c->inputs[attr * 4 + 0] = c->point_x; |
||
763 | if (c->fs_key->point_coord_upper_left) { |
||
764 | c->inputs[attr * 4 + 1] = qir_FSUB(c, |
||
765 | qir_uniform_f(c, 1.0), |
||
766 | c->point_y); |
||
767 | } else { |
||
768 | c->inputs[attr * 4 + 1] = c->point_y; |
||
769 | } |
||
770 | c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0); |
||
771 | c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0); |
||
772 | } |
||
773 | |||
774 | static struct qreg |
||
775 | emit_fragment_varying(struct vc4_compile *c, uint8_t semantic, |
||
776 | uint8_t index, uint8_t swizzle) |
||
777 | { |
||
778 | uint32_t i = c->num_input_semantics++; |
||
779 | struct qreg vary = { |
||
780 | QFILE_VARY, |
||
781 | i |
||
782 | }; |
||
783 | |||
784 | if (c->num_input_semantics >= c->input_semantics_array_size) { |
||
785 | c->input_semantics_array_size = |
||
786 | MAX2(4, c->input_semantics_array_size * 2); |
||
787 | |||
788 | c->input_semantics = reralloc(c, c->input_semantics, |
||
789 | struct vc4_varying_semantic, |
||
790 | c->input_semantics_array_size); |
||
791 | } |
||
792 | |||
793 | c->input_semantics[i].semantic = semantic; |
||
794 | c->input_semantics[i].index = index; |
||
795 | c->input_semantics[i].swizzle = swizzle; |
||
796 | |||
797 | return qir_VARY_ADD_C(c, qir_FMUL(c, vary, qir_FRAG_W(c))); |
||
798 | } |
||
799 | |||
800 | static void |
||
801 | emit_fragment_input(struct vc4_compile *c, int attr, |
||
802 | unsigned semantic_name, unsigned semantic_index) |
||
803 | { |
||
804 | for (int i = 0; i < 4; i++) { |
||
805 | c->inputs[attr * 4 + i] = |
||
806 | emit_fragment_varying(c, |
||
807 | semantic_name, |
||
808 | semantic_index, |
||
809 | i); |
||
810 | c->num_inputs++; |
||
811 | } |
||
812 | } |
||
813 | |||
814 | static void |
||
815 | emit_face_input(struct vc4_compile *c, int attr) |
||
816 | { |
||
817 | c->inputs[attr * 4 + 0] = qir_FSUB(c, |
||
818 | qir_uniform_f(c, 1.0), |
||
819 | qir_FMUL(c, |
||
820 | qir_ITOF(c, qir_FRAG_REV_FLAG(c)), |
||
821 | qir_uniform_f(c, 2.0))); |
||
822 | c->inputs[attr * 4 + 1] = qir_uniform_f(c, 0.0); |
||
823 | c->inputs[attr * 4 + 2] = qir_uniform_f(c, 0.0); |
||
824 | c->inputs[attr * 4 + 3] = qir_uniform_f(c, 1.0); |
||
825 | } |
||
826 | |||
827 | static void |
||
828 | add_output(struct vc4_compile *c, |
||
829 | uint32_t decl_offset, |
||
830 | uint8_t semantic_name, |
||
831 | uint8_t semantic_index, |
||
832 | uint8_t semantic_swizzle) |
||
833 | { |
||
834 | uint32_t old_array_size = c->outputs_array_size; |
||
835 | resize_qreg_array(c, &c->outputs, &c->outputs_array_size, |
||
836 | decl_offset + 1); |
||
837 | |||
838 | if (old_array_size != c->outputs_array_size) { |
||
839 | c->output_semantics = reralloc(c, |
||
840 | c->output_semantics, |
||
841 | struct vc4_varying_semantic, |
||
842 | c->outputs_array_size); |
||
843 | } |
||
844 | |||
845 | c->output_semantics[decl_offset].semantic = semantic_name; |
||
846 | c->output_semantics[decl_offset].index = semantic_index; |
||
847 | c->output_semantics[decl_offset].swizzle = semantic_swizzle; |
||
848 | } |
||
849 | |||
850 | static void |
||
851 | declare_uniform_range(struct vc4_compile *c, uint32_t start, uint32_t size) |
||
852 | { |
||
853 | unsigned array_id = c->num_uniform_ranges++; |
||
854 | if (array_id >= c->ubo_ranges_array_size) { |
||
855 | c->ubo_ranges_array_size = MAX2(c->ubo_ranges_array_size * 2, |
||
856 | array_id + 1); |
||
857 | c->ubo_ranges = reralloc(c, c->ubo_ranges, |
||
858 | struct vc4_compiler_ubo_range, |
||
859 | c->ubo_ranges_array_size); |
||
860 | } |
||
861 | |||
862 | c->ubo_ranges[array_id].dst_offset = 0; |
||
863 | c->ubo_ranges[array_id].src_offset = start; |
||
864 | c->ubo_ranges[array_id].size = size; |
||
865 | c->ubo_ranges[array_id].used = false; |
||
866 | } |
||
867 | |||
868 | static void |
||
869 | ntq_emit_alu(struct vc4_compile *c, nir_alu_instr *instr) |
||
870 | { |
||
871 | /* Vectors are special in that they have non-scalarized writemasks, |
||
872 | * and just take the first swizzle channel for each argument in order |
||
873 | * into each writemask channel. |
||
874 | */ |
||
875 | if (instr->op == nir_op_vec2 || |
||
876 | instr->op == nir_op_vec3 || |
||
877 | instr->op == nir_op_vec4) { |
||
878 | struct qreg srcs[4]; |
||
879 | for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) |
||
880 | srcs[i] = ntq_get_src(c, instr->src[i].src, |
||
881 | instr->src[i].swizzle[0]); |
||
882 | struct qreg *dest = ntq_get_dest(c, instr->dest.dest); |
||
883 | for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) |
||
884 | dest[i] = srcs[i]; |
||
885 | return; |
||
886 | } |
||
887 | |||
888 | /* General case: We can just grab the one used channel per src. */ |
||
889 | struct qreg src[nir_op_infos[instr->op].num_inputs]; |
||
890 | for (int i = 0; i < nir_op_infos[instr->op].num_inputs; i++) { |
||
891 | src[i] = ntq_get_alu_src(c, instr, i); |
||
892 | } |
||
893 | |||
894 | /* Pick the channel to store the output in. */ |
||
895 | assert(!instr->dest.saturate); |
||
896 | struct qreg *dest = ntq_get_dest(c, instr->dest.dest); |
||
897 | assert(util_is_power_of_two(instr->dest.write_mask)); |
||
898 | dest += ffs(instr->dest.write_mask) - 1; |
||
899 | |||
900 | switch (instr->op) { |
||
901 | case nir_op_fmov: |
||
902 | case nir_op_imov: |
||
903 | *dest = qir_MOV(c, src[0]); |
||
904 | break; |
||
905 | case nir_op_fmul: |
||
906 | *dest = qir_FMUL(c, src[0], src[1]); |
||
907 | break; |
||
908 | case nir_op_fadd: |
||
909 | *dest = qir_FADD(c, src[0], src[1]); |
||
910 | break; |
||
911 | case nir_op_fsub: |
||
912 | *dest = qir_FSUB(c, src[0], src[1]); |
||
913 | break; |
||
914 | case nir_op_fmin: |
||
915 | *dest = qir_FMIN(c, src[0], src[1]); |
||
916 | break; |
||
917 | case nir_op_fmax: |
||
918 | *dest = qir_FMAX(c, src[0], src[1]); |
||
919 | break; |
||
920 | |||
921 | case nir_op_f2i: |
||
922 | case nir_op_f2u: |
||
923 | *dest = qir_FTOI(c, src[0]); |
||
924 | break; |
||
925 | case nir_op_i2f: |
||
926 | case nir_op_u2f: |
||
927 | *dest = qir_ITOF(c, src[0]); |
||
928 | break; |
||
929 | case nir_op_b2f: |
||
930 | *dest = qir_AND(c, src[0], qir_uniform_f(c, 1.0)); |
||
931 | break; |
||
932 | case nir_op_b2i: |
||
933 | *dest = qir_AND(c, src[0], qir_uniform_ui(c, 1)); |
||
934 | break; |
||
935 | case nir_op_i2b: |
||
936 | case nir_op_f2b: |
||
937 | qir_SF(c, src[0]); |
||
938 | *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); |
||
939 | break; |
||
940 | |||
941 | case nir_op_iadd: |
||
942 | *dest = qir_ADD(c, src[0], src[1]); |
||
943 | break; |
||
944 | case nir_op_ushr: |
||
945 | *dest = qir_SHR(c, src[0], src[1]); |
||
946 | break; |
||
947 | case nir_op_isub: |
||
948 | *dest = qir_SUB(c, src[0], src[1]); |
||
949 | break; |
||
950 | case nir_op_ishr: |
||
951 | *dest = qir_ASR(c, src[0], src[1]); |
||
952 | break; |
||
953 | case nir_op_ishl: |
||
954 | *dest = qir_SHL(c, src[0], src[1]); |
||
955 | break; |
||
956 | case nir_op_imin: |
||
957 | *dest = qir_MIN(c, src[0], src[1]); |
||
958 | break; |
||
959 | case nir_op_imax: |
||
960 | *dest = qir_MAX(c, src[0], src[1]); |
||
961 | break; |
||
962 | case nir_op_iand: |
||
963 | *dest = qir_AND(c, src[0], src[1]); |
||
964 | break; |
||
965 | case nir_op_ior: |
||
966 | *dest = qir_OR(c, src[0], src[1]); |
||
967 | break; |
||
968 | case nir_op_ixor: |
||
969 | *dest = qir_XOR(c, src[0], src[1]); |
||
970 | break; |
||
971 | case nir_op_inot: |
||
972 | *dest = qir_NOT(c, src[0]); |
||
973 | break; |
||
974 | |||
975 | case nir_op_imul: |
||
976 | *dest = ntq_umul(c, src[0], src[1]); |
||
977 | break; |
||
978 | |||
979 | case nir_op_seq: |
||
980 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
981 | *dest = qir_SEL_X_0_ZS(c, qir_uniform_f(c, 1.0)); |
||
982 | break; |
||
983 | case nir_op_sne: |
||
984 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
985 | *dest = qir_SEL_X_0_ZC(c, qir_uniform_f(c, 1.0)); |
||
986 | break; |
||
987 | case nir_op_sge: |
||
988 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
989 | *dest = qir_SEL_X_0_NC(c, qir_uniform_f(c, 1.0)); |
||
990 | break; |
||
991 | case nir_op_slt: |
||
992 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
993 | *dest = qir_SEL_X_0_NS(c, qir_uniform_f(c, 1.0)); |
||
994 | break; |
||
995 | case nir_op_feq: |
||
996 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
997 | *dest = qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0)); |
||
998 | break; |
||
999 | case nir_op_fne: |
||
1000 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
1001 | *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); |
||
1002 | break; |
||
1003 | case nir_op_fge: |
||
1004 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
1005 | *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); |
||
1006 | break; |
||
1007 | case nir_op_flt: |
||
1008 | qir_SF(c, qir_FSUB(c, src[0], src[1])); |
||
1009 | *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); |
||
1010 | break; |
||
1011 | case nir_op_ieq: |
||
1012 | qir_SF(c, qir_SUB(c, src[0], src[1])); |
||
1013 | *dest = qir_SEL_X_0_ZS(c, qir_uniform_ui(c, ~0)); |
||
1014 | break; |
||
1015 | case nir_op_ine: |
||
1016 | qir_SF(c, qir_SUB(c, src[0], src[1])); |
||
1017 | *dest = qir_SEL_X_0_ZC(c, qir_uniform_ui(c, ~0)); |
||
1018 | break; |
||
1019 | case nir_op_ige: |
||
1020 | qir_SF(c, qir_SUB(c, src[0], src[1])); |
||
1021 | *dest = qir_SEL_X_0_NC(c, qir_uniform_ui(c, ~0)); |
||
1022 | break; |
||
1023 | case nir_op_ilt: |
||
1024 | qir_SF(c, qir_SUB(c, src[0], src[1])); |
||
1025 | *dest = qir_SEL_X_0_NS(c, qir_uniform_ui(c, ~0)); |
||
1026 | break; |
||
1027 | |||
1028 | case nir_op_bcsel: |
||
1029 | qir_SF(c, src[0]); |
||
1030 | *dest = qir_SEL_X_Y_NS(c, src[1], src[2]); |
||
1031 | break; |
||
1032 | case nir_op_fcsel: |
||
1033 | qir_SF(c, src[0]); |
||
1034 | *dest = qir_SEL_X_Y_ZC(c, src[1], src[2]); |
||
1035 | break; |
||
1036 | |||
1037 | case nir_op_frcp: |
||
1038 | *dest = ntq_rcp(c, src[0]); |
||
1039 | break; |
||
1040 | case nir_op_frsq: |
||
1041 | *dest = ntq_rsq(c, src[0]); |
||
1042 | break; |
||
1043 | case nir_op_fexp2: |
||
1044 | *dest = qir_EXP2(c, src[0]); |
||
1045 | break; |
||
1046 | case nir_op_flog2: |
||
1047 | *dest = qir_LOG2(c, src[0]); |
||
1048 | break; |
||
1049 | |||
1050 | case nir_op_ftrunc: |
||
1051 | *dest = qir_ITOF(c, qir_FTOI(c, src[0])); |
||
1052 | break; |
||
1053 | case nir_op_fceil: |
||
1054 | *dest = ntq_fceil(c, src[0]); |
||
1055 | break; |
||
1056 | case nir_op_ffract: |
||
1057 | *dest = ntq_ffract(c, src[0]); |
||
1058 | break; |
||
1059 | case nir_op_ffloor: |
||
1060 | *dest = ntq_ffloor(c, src[0]); |
||
1061 | break; |
||
1062 | |||
1063 | case nir_op_fsin: |
||
1064 | *dest = ntq_fsin(c, src[0]); |
||
1065 | break; |
||
1066 | case nir_op_fcos: |
||
1067 | *dest = ntq_fcos(c, src[0]); |
||
1068 | break; |
||
1069 | |||
1070 | case nir_op_fsign: |
||
1071 | *dest = ntq_fsign(c, src[0]); |
||
1072 | break; |
||
1073 | |||
1074 | case nir_op_fabs: |
||
1075 | *dest = qir_FMAXABS(c, src[0], src[0]); |
||
1076 | break; |
||
1077 | case nir_op_iabs: |
||
1078 | *dest = qir_MAX(c, src[0], |
||
1079 | qir_SUB(c, qir_uniform_ui(c, 0), src[0])); |
||
1080 | break; |
||
1081 | |||
1082 | default: |
||
1083 | fprintf(stderr, "unknown NIR ALU inst: "); |
||
1084 | nir_print_instr(&instr->instr, stderr); |
||
1085 | fprintf(stderr, "\n"); |
||
1086 | abort(); |
||
1087 | } |
||
1088 | } |
||
1089 | |||
1090 | static struct qreg |
||
1091 | vc4_blend_channel(struct vc4_compile *c, |
||
1092 | struct qreg *dst, |
||
1093 | struct qreg *src, |
||
1094 | struct qreg val, |
||
1095 | unsigned factor, |
||
1096 | int channel) |
||
1097 | { |
||
1098 | switch(factor) { |
||
1099 | case PIPE_BLENDFACTOR_ONE: |
||
1100 | return val; |
||
1101 | case PIPE_BLENDFACTOR_SRC_COLOR: |
||
1102 | return qir_FMUL(c, val, src[channel]); |
||
1103 | case PIPE_BLENDFACTOR_SRC_ALPHA: |
||
1104 | return qir_FMUL(c, val, src[3]); |
||
1105 | case PIPE_BLENDFACTOR_DST_ALPHA: |
||
1106 | return qir_FMUL(c, val, dst[3]); |
||
1107 | case PIPE_BLENDFACTOR_DST_COLOR: |
||
1108 | return qir_FMUL(c, val, dst[channel]); |
||
1109 | case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: |
||
1110 | if (channel != 3) { |
||
1111 | return qir_FMUL(c, |
||
1112 | val, |
||
1113 | qir_FMIN(c, |
||
1114 | src[3], |
||
1115 | qir_FSUB(c, |
||
1116 | qir_uniform_f(c, 1.0), |
||
1117 | dst[3]))); |
||
1118 | } else { |
||
1119 | return val; |
||
1120 | } |
||
1121 | case PIPE_BLENDFACTOR_CONST_COLOR: |
||
1122 | return qir_FMUL(c, val, |
||
1123 | qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, |
||
1124 | channel)); |
||
1125 | case PIPE_BLENDFACTOR_CONST_ALPHA: |
||
1126 | return qir_FMUL(c, val, |
||
1127 | qir_uniform(c, QUNIFORM_BLEND_CONST_COLOR, 3)); |
||
1128 | case PIPE_BLENDFACTOR_ZERO: |
||
1129 | return qir_uniform_f(c, 0.0); |
||
1130 | case PIPE_BLENDFACTOR_INV_SRC_COLOR: |
||
1131 | return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0), |
||
1132 | src[channel])); |
||
1133 | case PIPE_BLENDFACTOR_INV_SRC_ALPHA: |
||
1134 | return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0), |
||
1135 | src[3])); |
||
1136 | case PIPE_BLENDFACTOR_INV_DST_ALPHA: |
||
1137 | return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0), |
||
1138 | dst[3])); |
||
1139 | case PIPE_BLENDFACTOR_INV_DST_COLOR: |
||
1140 | return qir_FMUL(c, val, qir_FSUB(c, qir_uniform_f(c, 1.0), |
||
1141 | dst[channel])); |
||
1142 | case PIPE_BLENDFACTOR_INV_CONST_COLOR: |
||
1143 | return qir_FMUL(c, val, |
||
1144 | qir_FSUB(c, qir_uniform_f(c, 1.0), |
||
1145 | qir_uniform(c, |
||
1146 | QUNIFORM_BLEND_CONST_COLOR, |
||
1147 | channel))); |
||
1148 | case PIPE_BLENDFACTOR_INV_CONST_ALPHA: |
||
1149 | return qir_FMUL(c, val, |
||
1150 | qir_FSUB(c, qir_uniform_f(c, 1.0), |
||
1151 | qir_uniform(c, |
||
1152 | QUNIFORM_BLEND_CONST_COLOR, |
||
1153 | 3))); |
||
1154 | |||
1155 | default: |
||
1156 | case PIPE_BLENDFACTOR_SRC1_COLOR: |
||
1157 | case PIPE_BLENDFACTOR_SRC1_ALPHA: |
||
1158 | case PIPE_BLENDFACTOR_INV_SRC1_COLOR: |
||
1159 | case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: |
||
1160 | /* Unsupported. */ |
||
1161 | fprintf(stderr, "Unknown blend factor %d\n", factor); |
||
1162 | return val; |
||
1163 | } |
||
1164 | } |
||
1165 | |||
1166 | static struct qreg |
||
1167 | vc4_blend_func(struct vc4_compile *c, |
||
1168 | struct qreg src, struct qreg dst, |
||
1169 | unsigned func) |
||
1170 | { |
||
1171 | switch (func) { |
||
1172 | case PIPE_BLEND_ADD: |
||
1173 | return qir_FADD(c, src, dst); |
||
1174 | case PIPE_BLEND_SUBTRACT: |
||
1175 | return qir_FSUB(c, src, dst); |
||
1176 | case PIPE_BLEND_REVERSE_SUBTRACT: |
||
1177 | return qir_FSUB(c, dst, src); |
||
1178 | case PIPE_BLEND_MIN: |
||
1179 | return qir_FMIN(c, src, dst); |
||
1180 | case PIPE_BLEND_MAX: |
||
1181 | return qir_FMAX(c, src, dst); |
||
1182 | |||
1183 | default: |
||
1184 | /* Unsupported. */ |
||
1185 | fprintf(stderr, "Unknown blend func %d\n", func); |
||
1186 | return src; |
||
1187 | |||
1188 | } |
||
1189 | } |
||
1190 | |||
1191 | /** |
||
1192 | * Implements fixed function blending in shader code. |
||
1193 | * |
||
1194 | * VC4 doesn't have any hardware support for blending. Instead, you read the |
||
1195 | * current contents of the destination from the tile buffer after having |
||
1196 | * waited for the scoreboard (which is handled by vc4_qpu_emit.c), then do |
||
1197 | * math using your output color and that destination value, and update the |
||
1198 | * output color appropriately. |
||
1199 | */ |
||
1200 | static void |
||
1201 | vc4_blend(struct vc4_compile *c, struct qreg *result, |
||
1202 | struct qreg *dst_color, struct qreg *src_color) |
||
1203 | { |
||
1204 | struct pipe_rt_blend_state *blend = &c->fs_key->blend; |
||
1205 | |||
1206 | if (!blend->blend_enable) { |
||
1207 | for (int i = 0; i < 4; i++) |
||
1208 | result[i] = src_color[i]; |
||
1209 | return; |
||
1210 | } |
||
1211 | |||
1212 | struct qreg clamped_src[4]; |
||
1213 | struct qreg clamped_dst[4]; |
||
1214 | for (int i = 0; i < 4; i++) { |
||
1215 | clamped_src[i] = qir_SAT(c, src_color[i]); |
||
1216 | clamped_dst[i] = qir_SAT(c, dst_color[i]); |
||
1217 | } |
||
1218 | src_color = clamped_src; |
||
1219 | dst_color = clamped_dst; |
||
1220 | |||
1221 | struct qreg src_blend[4], dst_blend[4]; |
||
1222 | for (int i = 0; i < 3; i++) { |
||
1223 | src_blend[i] = vc4_blend_channel(c, |
||
1224 | dst_color, src_color, |
||
1225 | src_color[i], |
||
1226 | blend->rgb_src_factor, i); |
||
1227 | dst_blend[i] = vc4_blend_channel(c, |
||
1228 | dst_color, src_color, |
||
1229 | dst_color[i], |
||
1230 | blend->rgb_dst_factor, i); |
||
1231 | } |
||
1232 | src_blend[3] = vc4_blend_channel(c, |
||
1233 | dst_color, src_color, |
||
1234 | src_color[3], |
||
1235 | blend->alpha_src_factor, 3); |
||
1236 | dst_blend[3] = vc4_blend_channel(c, |
||
1237 | dst_color, src_color, |
||
1238 | dst_color[3], |
||
1239 | blend->alpha_dst_factor, 3); |
||
1240 | |||
1241 | for (int i = 0; i < 3; i++) { |
||
1242 | result[i] = vc4_blend_func(c, |
||
1243 | src_blend[i], dst_blend[i], |
||
1244 | blend->rgb_func); |
||
1245 | } |
||
1246 | result[3] = vc4_blend_func(c, |
||
1247 | src_blend[3], dst_blend[3], |
||
1248 | blend->alpha_func); |
||
1249 | } |
||
1250 | |||
1251 | static void |
||
1252 | clip_distance_discard(struct vc4_compile *c) |
||
1253 | { |
||
1254 | for (int i = 0; i < PIPE_MAX_CLIP_PLANES; i++) { |
||
1255 | if (!(c->key->ucp_enables & (1 << i))) |
||
1256 | continue; |
||
1257 | |||
1258 | struct qreg dist = emit_fragment_varying(c, |
||
1259 | TGSI_SEMANTIC_CLIPDIST, |
||
1260 | i, |
||
1261 | TGSI_SWIZZLE_X); |
||
1262 | |||
1263 | qir_SF(c, dist); |
||
1264 | |||
1265 | if (c->discard.file == QFILE_NULL) |
||
1266 | c->discard = qir_uniform_ui(c, 0); |
||
1267 | |||
1268 | c->discard = qir_SEL_X_Y_NS(c, qir_uniform_ui(c, ~0), |
||
1269 | c->discard); |
||
1270 | } |
||
1271 | } |
||
1272 | |||
1273 | static void |
||
1274 | alpha_test_discard(struct vc4_compile *c) |
||
1275 | { |
||
1276 | struct qreg src_alpha; |
||
1277 | struct qreg alpha_ref = qir_uniform(c, QUNIFORM_ALPHA_REF, 0); |
||
1278 | |||
1279 | if (!c->fs_key->alpha_test) |
||
1280 | return; |
||
1281 | |||
1282 | if (c->output_color_index != -1) |
||
1283 | src_alpha = c->outputs[c->output_color_index + 3]; |
||
1284 | else |
||
1285 | src_alpha = qir_uniform_f(c, 1.0); |
||
1286 | |||
1287 | if (c->discard.file == QFILE_NULL) |
||
1288 | c->discard = qir_uniform_ui(c, 0); |
||
1289 | |||
1290 | switch (c->fs_key->alpha_test_func) { |
||
1291 | case PIPE_FUNC_NEVER: |
||
1292 | c->discard = qir_uniform_ui(c, ~0); |
||
1293 | break; |
||
1294 | case PIPE_FUNC_ALWAYS: |
||
1295 | break; |
||
1296 | case PIPE_FUNC_EQUAL: |
||
1297 | qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref)); |
||
1298 | c->discard = qir_SEL_X_Y_ZS(c, c->discard, |
||
1299 | qir_uniform_ui(c, ~0)); |
||
1300 | break; |
||
1301 | case PIPE_FUNC_NOTEQUAL: |
||
1302 | qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref)); |
||
1303 | c->discard = qir_SEL_X_Y_ZC(c, c->discard, |
||
1304 | qir_uniform_ui(c, ~0)); |
||
1305 | break; |
||
1306 | case PIPE_FUNC_GREATER: |
||
1307 | qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref)); |
||
1308 | c->discard = qir_SEL_X_Y_NC(c, c->discard, |
||
1309 | qir_uniform_ui(c, ~0)); |
||
1310 | break; |
||
1311 | case PIPE_FUNC_GEQUAL: |
||
1312 | qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha)); |
||
1313 | c->discard = qir_SEL_X_Y_NS(c, c->discard, |
||
1314 | qir_uniform_ui(c, ~0)); |
||
1315 | break; |
||
1316 | case PIPE_FUNC_LESS: |
||
1317 | qir_SF(c, qir_FSUB(c, src_alpha, alpha_ref)); |
||
1318 | c->discard = qir_SEL_X_Y_NS(c, c->discard, |
||
1319 | qir_uniform_ui(c, ~0)); |
||
1320 | break; |
||
1321 | case PIPE_FUNC_LEQUAL: |
||
1322 | qir_SF(c, qir_FSUB(c, alpha_ref, src_alpha)); |
||
1323 | c->discard = qir_SEL_X_Y_NC(c, c->discard, |
||
1324 | qir_uniform_ui(c, ~0)); |
||
1325 | break; |
||
1326 | } |
||
1327 | } |
||
1328 | |||
1329 | static struct qreg |
||
1330 | vc4_logicop(struct vc4_compile *c, struct qreg src, struct qreg dst) |
||
1331 | { |
||
1332 | switch (c->fs_key->logicop_func) { |
||
1333 | case PIPE_LOGICOP_CLEAR: |
||
1334 | return qir_uniform_f(c, 0.0); |
||
1335 | case PIPE_LOGICOP_NOR: |
||
1336 | return qir_NOT(c, qir_OR(c, src, dst)); |
||
1337 | case PIPE_LOGICOP_AND_INVERTED: |
||
1338 | return qir_AND(c, qir_NOT(c, src), dst); |
||
1339 | case PIPE_LOGICOP_COPY_INVERTED: |
||
1340 | return qir_NOT(c, src); |
||
1341 | case PIPE_LOGICOP_AND_REVERSE: |
||
1342 | return qir_AND(c, src, qir_NOT(c, dst)); |
||
1343 | case PIPE_LOGICOP_INVERT: |
||
1344 | return qir_NOT(c, dst); |
||
1345 | case PIPE_LOGICOP_XOR: |
||
1346 | return qir_XOR(c, src, dst); |
||
1347 | case PIPE_LOGICOP_NAND: |
||
1348 | return qir_NOT(c, qir_AND(c, src, dst)); |
||
1349 | case PIPE_LOGICOP_AND: |
||
1350 | return qir_AND(c, src, dst); |
||
1351 | case PIPE_LOGICOP_EQUIV: |
||
1352 | return qir_NOT(c, qir_XOR(c, src, dst)); |
||
1353 | case PIPE_LOGICOP_NOOP: |
||
1354 | return dst; |
||
1355 | case PIPE_LOGICOP_OR_INVERTED: |
||
1356 | return qir_OR(c, qir_NOT(c, src), dst); |
||
1357 | case PIPE_LOGICOP_OR_REVERSE: |
||
1358 | return qir_OR(c, src, qir_NOT(c, dst)); |
||
1359 | case PIPE_LOGICOP_OR: |
||
1360 | return qir_OR(c, src, dst); |
||
1361 | case PIPE_LOGICOP_SET: |
||
1362 | return qir_uniform_ui(c, ~0); |
||
1363 | case PIPE_LOGICOP_COPY: |
||
1364 | default: |
||
1365 | return src; |
||
1366 | } |
||
1367 | } |
||
1368 | |||
1369 | static void |
||
1370 | emit_frag_end(struct vc4_compile *c) |
||
1371 | { |
||
1372 | clip_distance_discard(c); |
||
1373 | alpha_test_discard(c); |
||
1374 | |||
1375 | enum pipe_format color_format = c->fs_key->color_format; |
||
1376 | const uint8_t *format_swiz = vc4_get_format_swizzle(color_format); |
||
1377 | struct qreg tlb_read_color[4] = { c->undef, c->undef, c->undef, c->undef }; |
||
1378 | struct qreg dst_color[4] = { c->undef, c->undef, c->undef, c->undef }; |
||
1379 | struct qreg linear_dst_color[4] = { c->undef, c->undef, c->undef, c->undef }; |
||
1380 | struct qreg packed_dst_color = c->undef; |
||
1381 | |||
1382 | if (c->fs_key->blend.blend_enable || |
||
1383 | c->fs_key->blend.colormask != 0xf || |
||
1384 | c->fs_key->logicop_func != PIPE_LOGICOP_COPY) { |
||
1385 | struct qreg r4 = qir_TLB_COLOR_READ(c); |
||
1386 | for (int i = 0; i < 4; i++) |
||
1387 | tlb_read_color[i] = qir_R4_UNPACK(c, r4, i); |
||
1388 | for (int i = 0; i < 4; i++) { |
||
1389 | dst_color[i] = get_swizzled_channel(c, |
||
1390 | tlb_read_color, |
||
1391 | format_swiz[i]); |
||
1392 | if (util_format_is_srgb(color_format) && i != 3) { |
||
1393 | linear_dst_color[i] = |
||
1394 | qir_srgb_decode(c, dst_color[i]); |
||
1395 | } else { |
||
1396 | linear_dst_color[i] = dst_color[i]; |
||
1397 | } |
||
1398 | } |
||
1399 | |||
1400 | /* Save the packed value for logic ops. Can't reuse r4 |
||
1401 | * because other things might smash it (like sRGB) |
||
1402 | */ |
||
1403 | packed_dst_color = qir_MOV(c, r4); |
||
1404 | } |
||
1405 | |||
1406 | struct qreg blend_color[4]; |
||
1407 | struct qreg undef_array[4] = { |
||
1408 | c->undef, c->undef, c->undef, c->undef |
||
1409 | }; |
||
1410 | vc4_blend(c, blend_color, linear_dst_color, |
||
1411 | (c->output_color_index != -1 ? |
||
1412 | c->outputs + c->output_color_index : |
||
1413 | undef_array)); |
||
1414 | |||
1415 | if (util_format_is_srgb(color_format)) { |
||
1416 | for (int i = 0; i < 3; i++) |
||
1417 | blend_color[i] = qir_srgb_encode(c, blend_color[i]); |
||
1418 | } |
||
1419 | |||
1420 | /* Debug: Sometimes you're getting a black output and just want to see |
||
1421 | * if the FS is getting executed at all. Spam magenta into the color |
||
1422 | * output. |
||
1423 | */ |
||
1424 | if (0) { |
||
1425 | blend_color[0] = qir_uniform_f(c, 1.0); |
||
1426 | blend_color[1] = qir_uniform_f(c, 0.0); |
||
1427 | blend_color[2] = qir_uniform_f(c, 1.0); |
||
1428 | blend_color[3] = qir_uniform_f(c, 0.5); |
||
1429 | } |
||
1430 | |||
1431 | struct qreg swizzled_outputs[4]; |
||
1432 | for (int i = 0; i < 4; i++) { |
||
1433 | swizzled_outputs[i] = get_swizzled_channel(c, blend_color, |
||
1434 | format_swiz[i]); |
||
1435 | } |
||
1436 | |||
1437 | if (c->discard.file != QFILE_NULL) |
||
1438 | qir_TLB_DISCARD_SETUP(c, c->discard); |
||
1439 | |||
1440 | if (c->fs_key->stencil_enabled) { |
||
1441 | qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 0)); |
||
1442 | if (c->fs_key->stencil_twoside) { |
||
1443 | qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 1)); |
||
1444 | } |
||
1445 | if (c->fs_key->stencil_full_writemasks) { |
||
1446 | qir_TLB_STENCIL_SETUP(c, qir_uniform(c, QUNIFORM_STENCIL, 2)); |
||
1447 | } |
||
1448 | } |
||
1449 | |||
1450 | if (c->fs_key->depth_enabled) { |
||
1451 | struct qreg z; |
||
1452 | if (c->output_position_index != -1) { |
||
1453 | z = qir_FTOI(c, qir_FMUL(c, c->outputs[c->output_position_index + 2], |
||
1454 | qir_uniform_f(c, 0xffffff))); |
||
1455 | } else { |
||
1456 | z = qir_FRAG_Z(c); |
||
1457 | } |
||
1458 | qir_TLB_Z_WRITE(c, z); |
||
1459 | } |
||
1460 | |||
1461 | struct qreg packed_color = c->undef; |
||
1462 | for (int i = 0; i < 4; i++) { |
||
1463 | if (swizzled_outputs[i].file == QFILE_NULL) |
||
1464 | continue; |
||
1465 | if (packed_color.file == QFILE_NULL) { |
||
1466 | packed_color = qir_PACK_8888_F(c, swizzled_outputs[i]); |
||
1467 | } else { |
||
1468 | packed_color = qir_PACK_8_F(c, |
||
1469 | packed_color, |
||
1470 | swizzled_outputs[i], |
||
1471 | i); |
||
1472 | } |
||
1473 | } |
||
1474 | |||
1475 | if (packed_color.file == QFILE_NULL) |
||
1476 | packed_color = qir_uniform_ui(c, 0); |
||
1477 | |||
1478 | if (c->fs_key->logicop_func != PIPE_LOGICOP_COPY) { |
||
1479 | packed_color = vc4_logicop(c, packed_color, packed_dst_color); |
||
1480 | } |
||
1481 | |||
1482 | /* If the bit isn't set in the color mask, then just return the |
||
1483 | * original dst color, instead. |
||
1484 | */ |
||
1485 | uint32_t colormask = 0xffffffff; |
||
1486 | for (int i = 0; i < 4; i++) { |
||
1487 | if (format_swiz[i] < 4 && |
||
1488 | !(c->fs_key->blend.colormask & (1 << format_swiz[i]))) { |
||
1489 | colormask &= ~(0xff << (i * 8)); |
||
1490 | } |
||
1491 | } |
||
1492 | if (colormask != 0xffffffff) { |
||
1493 | packed_color = qir_OR(c, |
||
1494 | qir_AND(c, packed_color, |
||
1495 | qir_uniform_ui(c, colormask)), |
||
1496 | qir_AND(c, packed_dst_color, |
||
1497 | qir_uniform_ui(c, ~colormask))); |
||
1498 | } |
||
1499 | |||
1500 | qir_emit(c, qir_inst(QOP_TLB_COLOR_WRITE, c->undef, |
||
1501 | packed_color, c->undef)); |
||
1502 | } |
||
1503 | |||
1504 | static void |
||
1505 | emit_scaled_viewport_write(struct vc4_compile *c, struct qreg rcp_w) |
||
1506 | { |
||
1507 | struct qreg xyi[2]; |
||
1508 | |||
1509 | for (int i = 0; i < 2; i++) { |
||
1510 | struct qreg scale = |
||
1511 | qir_uniform(c, QUNIFORM_VIEWPORT_X_SCALE + i, 0); |
||
1512 | |||
1513 | xyi[i] = qir_FTOI(c, qir_FMUL(c, |
||
1514 | qir_FMUL(c, |
||
1515 | c->outputs[c->output_position_index + i], |
||
1516 | scale), |
||
1517 | rcp_w)); |
||
1518 | } |
||
1519 | |||
1520 | qir_VPM_WRITE(c, qir_PACK_SCALED(c, xyi[0], xyi[1])); |
||
1521 | } |
||
1522 | |||
1523 | static void |
||
1524 | emit_zs_write(struct vc4_compile *c, struct qreg rcp_w) |
||
1525 | { |
||
1526 | struct qreg zscale = qir_uniform(c, QUNIFORM_VIEWPORT_Z_SCALE, 0); |
||
1527 | struct qreg zoffset = qir_uniform(c, QUNIFORM_VIEWPORT_Z_OFFSET, 0); |
||
1528 | |||
1529 | qir_VPM_WRITE(c, qir_FADD(c, qir_FMUL(c, qir_FMUL(c, |
||
1530 | c->outputs[c->output_position_index + 2], |
||
1531 | zscale), |
||
1532 | rcp_w), |
||
1533 | zoffset)); |
||
1534 | } |
||
1535 | |||
1536 | static void |
||
1537 | emit_rcp_wc_write(struct vc4_compile *c, struct qreg rcp_w) |
||
1538 | { |
||
1539 | qir_VPM_WRITE(c, rcp_w); |
||
1540 | } |
||
1541 | |||
1542 | static void |
||
1543 | emit_point_size_write(struct vc4_compile *c) |
||
1544 | { |
||
1545 | struct qreg point_size; |
||
1546 | |||
1547 | if (c->output_point_size_index != -1) |
||
1548 | point_size = c->outputs[c->output_point_size_index + 3]; |
||
1549 | else |
||
1550 | point_size = qir_uniform_f(c, 1.0); |
||
1551 | |||
1552 | /* Workaround: HW-2726 PTB does not handle zero-size points (BCM2835, |
||
1553 | * BCM21553). |
||
1554 | */ |
||
1555 | point_size = qir_FMAX(c, point_size, qir_uniform_f(c, .125)); |
||
1556 | |||
1557 | qir_VPM_WRITE(c, point_size); |
||
1558 | } |
||
1559 | |||
1560 | /** |
||
1561 | * Emits a VPM read of the stub vertex attribute set up by vc4_draw.c. |
||
1562 | * |
||
1563 | * The simulator insists that there be at least one vertex attribute, so |
||
1564 | * vc4_draw.c will emit one if it wouldn't have otherwise. The simulator also |
||
1565 | * insists that all vertex attributes loaded get read by the VS/CS, so we have |
||
1566 | * to consume it here. |
||
1567 | */ |
||
1568 | static void |
||
1569 | emit_stub_vpm_read(struct vc4_compile *c) |
||
1570 | { |
||
1571 | if (c->num_inputs) |
||
1572 | return; |
||
1573 | |||
1574 | c->vattr_sizes[0] = 4; |
||
1575 | struct qreg vpm = { QFILE_VPM, 0 }; |
||
1576 | (void)qir_MOV(c, vpm); |
||
1577 | c->num_inputs++; |
||
1578 | } |
||
1579 | |||
1580 | static void |
||
1581 | emit_ucp_clipdistance(struct vc4_compile *c) |
||
1582 | { |
||
1583 | unsigned cv; |
||
1584 | if (c->output_clipvertex_index != -1) |
||
1585 | cv = c->output_clipvertex_index; |
||
1586 | else if (c->output_position_index != -1) |
||
1587 | cv = c->output_position_index; |
||
1588 | else |
||
1589 | return; |
||
1590 | |||
1591 | for (int plane = 0; plane < PIPE_MAX_CLIP_PLANES; plane++) { |
||
1592 | if (!(c->key->ucp_enables & (1 << plane))) |
||
1593 | continue; |
||
1594 | |||
1595 | /* Pick the next outputs[] that hasn't been written to, since |
||
1596 | * there are no other program writes left to be processed at |
||
1597 | * this point. If something had been declared but not written |
||
1598 | * (like a w component), we'll just smash over the top of it. |
||
1599 | */ |
||
1600 | uint32_t output_index = c->num_outputs++; |
||
1601 | add_output(c, output_index, |
||
1602 | TGSI_SEMANTIC_CLIPDIST, |
||
1603 | plane, |
||
1604 | TGSI_SWIZZLE_X); |
||
1605 | |||
1606 | |||
1607 | struct qreg dist = qir_uniform_f(c, 0.0); |
||
1608 | for (int i = 0; i < 4; i++) { |
||
1609 | struct qreg pos_chan = c->outputs[cv + i]; |
||
1610 | struct qreg ucp = |
||
1611 | qir_uniform(c, QUNIFORM_USER_CLIP_PLANE, |
||
1612 | plane * 4 + i); |
||
1613 | dist = qir_FADD(c, dist, qir_FMUL(c, pos_chan, ucp)); |
||
1614 | } |
||
1615 | |||
1616 | c->outputs[output_index] = dist; |
||
1617 | } |
||
1618 | } |
||
1619 | |||
1620 | static void |
||
1621 | emit_vert_end(struct vc4_compile *c, |
||
1622 | struct vc4_varying_semantic *fs_inputs, |
||
1623 | uint32_t num_fs_inputs) |
||
1624 | { |
||
1625 | struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]); |
||
1626 | |||
1627 | emit_stub_vpm_read(c); |
||
1628 | emit_ucp_clipdistance(c); |
||
1629 | |||
1630 | emit_scaled_viewport_write(c, rcp_w); |
||
1631 | emit_zs_write(c, rcp_w); |
||
1632 | emit_rcp_wc_write(c, rcp_w); |
||
1633 | if (c->vs_key->per_vertex_point_size) |
||
1634 | emit_point_size_write(c); |
||
1635 | |||
1636 | for (int i = 0; i < num_fs_inputs; i++) { |
||
1637 | struct vc4_varying_semantic *input = &fs_inputs[i]; |
||
1638 | int j; |
||
1639 | |||
1640 | for (j = 0; j < c->num_outputs; j++) { |
||
1641 | struct vc4_varying_semantic *output = |
||
1642 | &c->output_semantics[j]; |
||
1643 | |||
1644 | if (input->semantic == output->semantic && |
||
1645 | input->index == output->index && |
||
1646 | input->swizzle == output->swizzle) { |
||
1647 | qir_VPM_WRITE(c, c->outputs[j]); |
||
1648 | break; |
||
1649 | } |
||
1650 | } |
||
1651 | /* Emit padding if we didn't find a declared VS output for |
||
1652 | * this FS input. |
||
1653 | */ |
||
1654 | if (j == c->num_outputs) |
||
1655 | qir_VPM_WRITE(c, qir_uniform_f(c, 0.0)); |
||
1656 | } |
||
1657 | } |
||
1658 | |||
1659 | static void |
||
1660 | emit_coord_end(struct vc4_compile *c) |
||
1661 | { |
||
1662 | struct qreg rcp_w = qir_RCP(c, c->outputs[c->output_position_index + 3]); |
||
1663 | |||
1664 | emit_stub_vpm_read(c); |
||
1665 | |||
1666 | for (int i = 0; i < 4; i++) |
||
1667 | qir_VPM_WRITE(c, c->outputs[c->output_position_index + i]); |
||
1668 | |||
1669 | emit_scaled_viewport_write(c, rcp_w); |
||
1670 | emit_zs_write(c, rcp_w); |
||
1671 | emit_rcp_wc_write(c, rcp_w); |
||
1672 | if (c->vs_key->per_vertex_point_size) |
||
1673 | emit_point_size_write(c); |
||
1674 | } |
||
1675 | |||
1676 | static void |
||
1677 | vc4_optimize_nir(struct nir_shader *s) |
||
1678 | { |
||
1679 | bool progress; |
||
1680 | |||
1681 | do { |
||
1682 | progress = false; |
||
1683 | |||
1684 | nir_lower_vars_to_ssa(s); |
||
1685 | nir_lower_alu_to_scalar(s); |
||
1686 | |||
1687 | progress = nir_copy_prop(s) || progress; |
||
1688 | progress = nir_opt_dce(s) || progress; |
||
1689 | progress = nir_opt_cse(s) || progress; |
||
1690 | progress = nir_opt_peephole_select(s) || progress; |
||
1691 | progress = nir_opt_algebraic(s) || progress; |
||
1692 | progress = nir_opt_constant_folding(s) || progress; |
||
1693 | } while (progress); |
||
1694 | } |
||
1695 | |||
1696 | static int |
||
1697 | driver_location_compare(const void *in_a, const void *in_b) |
||
1698 | { |
||
1699 | const nir_variable *const *a = in_a; |
||
1700 | const nir_variable *const *b = in_b; |
||
1701 | |||
1702 | return (*a)->data.driver_location - (*b)->data.driver_location; |
||
1703 | } |
||
1704 | |||
1705 | static void |
||
1706 | ntq_setup_inputs(struct vc4_compile *c) |
||
1707 | { |
||
1708 | unsigned num_entries = 0; |
||
1709 | foreach_list_typed(nir_variable, var, node, &c->s->inputs) |
||
1710 | num_entries++; |
||
1711 | |||
1712 | nir_variable *vars[num_entries]; |
||
1713 | |||
1714 | unsigned i = 0; |
||
1715 | foreach_list_typed(nir_variable, var, node, &c->s->inputs) |
||
1716 | vars[i++] = var; |
||
1717 | |||
1718 | /* Sort the variables so that we emit the input setup in |
||
1719 | * driver_location order. This is required for VPM reads, whose data |
||
1720 | * is fetched into the VPM in driver_location (TGSI register index) |
||
1721 | * order. |
||
1722 | */ |
||
1723 | qsort(&vars, num_entries, sizeof(*vars), driver_location_compare); |
||
1724 | |||
1725 | for (unsigned i = 0; i < num_entries; i++) { |
||
1726 | nir_variable *var = vars[i]; |
||
1727 | unsigned array_len = MAX2(glsl_get_length(var->type), 1); |
||
1728 | /* XXX: map loc slots to semantics */ |
||
1729 | unsigned semantic_name = var->data.location; |
||
1730 | unsigned semantic_index = var->data.index; |
||
1731 | unsigned loc = var->data.driver_location; |
||
1732 | |||
1733 | assert(array_len == 1); |
||
1734 | resize_qreg_array(c, &c->inputs, &c->inputs_array_size, |
||
1735 | (loc + 1) * 4); |
||
1736 | |||
1737 | if (c->stage == QSTAGE_FRAG) { |
||
1738 | if (semantic_name == TGSI_SEMANTIC_POSITION) { |
||
1739 | emit_fragcoord_input(c, loc); |
||
1740 | } else if (semantic_name == TGSI_SEMANTIC_FACE) { |
||
1741 | emit_face_input(c, loc); |
||
1742 | } else if (semantic_name == TGSI_SEMANTIC_GENERIC && |
||
1743 | (c->fs_key->point_sprite_mask & |
||
1744 | (1 << semantic_index))) { |
||
1745 | emit_point_coord_input(c, loc); |
||
1746 | } else { |
||
1747 | emit_fragment_input(c, loc, |
||
1748 | semantic_name, |
||
1749 | semantic_index); |
||
1750 | } |
||
1751 | } else { |
||
1752 | emit_vertex_input(c, loc); |
||
1753 | } |
||
1754 | } |
||
1755 | } |
||
1756 | |||
1757 | static void |
||
1758 | ntq_setup_outputs(struct vc4_compile *c) |
||
1759 | { |
||
1760 | foreach_list_typed(nir_variable, var, node, &c->s->outputs) { |
||
1761 | unsigned array_len = MAX2(glsl_get_length(var->type), 1); |
||
1762 | /* XXX: map loc slots to semantics */ |
||
1763 | unsigned semantic_name = var->data.location; |
||
1764 | unsigned semantic_index = var->data.index; |
||
1765 | unsigned loc = var->data.driver_location * 4; |
||
1766 | |||
1767 | assert(array_len == 1); |
||
1768 | |||
1769 | for (int i = 0; i < 4; i++) { |
||
1770 | add_output(c, |
||
1771 | loc + i, |
||
1772 | semantic_name, |
||
1773 | semantic_index, |
||
1774 | i); |
||
1775 | } |
||
1776 | |||
1777 | switch (semantic_name) { |
||
1778 | case TGSI_SEMANTIC_POSITION: |
||
1779 | c->output_position_index = loc; |
||
1780 | break; |
||
1781 | case TGSI_SEMANTIC_CLIPVERTEX: |
||
1782 | c->output_clipvertex_index = loc; |
||
1783 | break; |
||
1784 | case TGSI_SEMANTIC_COLOR: |
||
1785 | c->output_color_index = loc; |
||
1786 | break; |
||
1787 | case TGSI_SEMANTIC_PSIZE: |
||
1788 | c->output_point_size_index = loc; |
||
1789 | break; |
||
1790 | } |
||
1791 | |||
1792 | } |
||
1793 | } |
||
1794 | |||
1795 | static void |
||
1796 | ntq_setup_uniforms(struct vc4_compile *c) |
||
1797 | { |
||
1798 | foreach_list_typed(nir_variable, var, node, &c->s->uniforms) { |
||
1799 | unsigned array_len = MAX2(glsl_get_length(var->type), 1); |
||
1800 | unsigned array_elem_size = 4 * sizeof(float); |
||
1801 | |||
1802 | declare_uniform_range(c, var->data.driver_location * array_elem_size, |
||
1803 | array_len * array_elem_size); |
||
1804 | |||
1805 | } |
||
1806 | } |
||
1807 | |||
1808 | /** |
||
1809 | * Sets up the mapping from nir_register to struct qreg *. |
||
1810 | * |
||
1811 | * Each nir_register gets a struct qreg per 32-bit component being stored. |
||
1812 | */ |
||
1813 | static void |
||
1814 | ntq_setup_registers(struct vc4_compile *c, struct exec_list *list) |
||
1815 | { |
||
1816 | foreach_list_typed(nir_register, nir_reg, node, list) { |
||
1817 | unsigned array_len = MAX2(nir_reg->num_array_elems, 1); |
||
1818 | struct qreg *qregs = ralloc_array(c->def_ht, struct qreg, |
||
1819 | array_len * |
||
1820 | nir_reg->num_components); |
||
1821 | |||
1822 | _mesa_hash_table_insert(c->def_ht, nir_reg, qregs); |
||
1823 | |||
1824 | for (int i = 0; i < array_len * nir_reg->num_components; i++) |
||
1825 | qregs[i] = qir_uniform_ui(c, 0); |
||
1826 | } |
||
1827 | } |
||
1828 | |||
1829 | static void |
||
1830 | ntq_emit_load_const(struct vc4_compile *c, nir_load_const_instr *instr) |
||
1831 | { |
||
1832 | struct qreg *qregs = ralloc_array(c->def_ht, struct qreg, |
||
1833 | instr->def.num_components); |
||
1834 | for (int i = 0; i < instr->def.num_components; i++) |
||
1835 | qregs[i] = qir_uniform_ui(c, instr->value.u[i]); |
||
1836 | |||
1837 | _mesa_hash_table_insert(c->def_ht, &instr->def, qregs); |
||
1838 | } |
||
1839 | |||
1840 | static void |
||
1841 | ntq_emit_intrinsic(struct vc4_compile *c, nir_intrinsic_instr *instr) |
||
1842 | { |
||
1843 | const nir_intrinsic_info *info = &nir_intrinsic_infos[instr->intrinsic]; |
||
1844 | struct qreg *dest = NULL; |
||
1845 | |||
1846 | if (info->has_dest) { |
||
1847 | dest = ntq_get_dest(c, instr->dest); |
||
1848 | } |
||
1849 | |||
1850 | switch (instr->intrinsic) { |
||
1851 | case nir_intrinsic_load_uniform: |
||
1852 | assert(instr->const_index[1] == 1); |
||
1853 | |||
1854 | for (int i = 0; i < instr->num_components; i++) { |
||
1855 | dest[i] = qir_uniform(c, QUNIFORM_UNIFORM, |
||
1856 | instr->const_index[0] * 4 + i); |
||
1857 | } |
||
1858 | break; |
||
1859 | |||
1860 | case nir_intrinsic_load_uniform_indirect: |
||
1861 | assert(instr->const_index[1] == 1); |
||
1862 | |||
1863 | for (int i = 0; i < instr->num_components; i++) { |
||
1864 | dest[i] = indirect_uniform_load(c, |
||
1865 | ntq_get_src(c, instr->src[0], 0), |
||
1866 | (instr->const_index[0] * |
||
1867 | 4 + i) * sizeof(float)); |
||
1868 | } |
||
1869 | |||
1870 | break; |
||
1871 | |||
1872 | case nir_intrinsic_load_input: |
||
1873 | assert(instr->const_index[1] == 1); |
||
1874 | |||
1875 | for (int i = 0; i < instr->num_components; i++) |
||
1876 | dest[i] = c->inputs[instr->const_index[0] * 4 + i]; |
||
1877 | |||
1878 | break; |
||
1879 | |||
1880 | case nir_intrinsic_store_output: |
||
1881 | for (int i = 0; i < instr->num_components; i++) { |
||
1882 | c->outputs[instr->const_index[0] * 4 + i] = |
||
1883 | qir_MOV(c, ntq_get_src(c, instr->src[0], i)); |
||
1884 | } |
||
1885 | c->num_outputs = MAX2(c->num_outputs, |
||
1886 | instr->const_index[0] * 4 + |
||
1887 | instr->num_components + 1); |
||
1888 | break; |
||
1889 | |||
1890 | case nir_intrinsic_discard: |
||
1891 | c->discard = qir_uniform_ui(c, ~0); |
||
1892 | break; |
||
1893 | |||
1894 | case nir_intrinsic_discard_if: |
||
1895 | if (c->discard.file == QFILE_NULL) |
||
1896 | c->discard = qir_uniform_ui(c, 0); |
||
1897 | c->discard = qir_OR(c, c->discard, |
||
1898 | ntq_get_src(c, instr->src[0], 0)); |
||
1899 | break; |
||
1900 | |||
1901 | default: |
||
1902 | fprintf(stderr, "Unknown intrinsic: "); |
||
1903 | nir_print_instr(&instr->instr, stderr); |
||
1904 | fprintf(stderr, "\n"); |
||
1905 | break; |
||
1906 | } |
||
1907 | } |
||
1908 | |||
1909 | static void |
||
1910 | ntq_emit_if(struct vc4_compile *c, nir_if *if_stmt) |
||
1911 | { |
||
1912 | fprintf(stderr, "general IF statements not handled.\n"); |
||
1913 | } |
||
1914 | |||
1915 | static void |
||
1916 | ntq_emit_instr(struct vc4_compile *c, nir_instr *instr) |
||
1917 | { |
||
1918 | switch (instr->type) { |
||
1919 | case nir_instr_type_alu: |
||
1920 | ntq_emit_alu(c, nir_instr_as_alu(instr)); |
||
1921 | break; |
||
1922 | |||
1923 | case nir_instr_type_intrinsic: |
||
1924 | ntq_emit_intrinsic(c, nir_instr_as_intrinsic(instr)); |
||
1925 | break; |
||
1926 | |||
1927 | case nir_instr_type_load_const: |
||
1928 | ntq_emit_load_const(c, nir_instr_as_load_const(instr)); |
||
1929 | break; |
||
1930 | |||
1931 | case nir_instr_type_tex: |
||
1932 | ntq_emit_tex(c, nir_instr_as_tex(instr)); |
||
1933 | break; |
||
1934 | |||
1935 | default: |
||
1936 | fprintf(stderr, "Unknown NIR instr type: "); |
||
1937 | nir_print_instr(instr, stderr); |
||
1938 | fprintf(stderr, "\n"); |
||
1939 | abort(); |
||
1940 | } |
||
1941 | } |
||
1942 | |||
1943 | static void |
||
1944 | ntq_emit_block(struct vc4_compile *c, nir_block *block) |
||
1945 | { |
||
1946 | nir_foreach_instr(block, instr) { |
||
1947 | ntq_emit_instr(c, instr); |
||
1948 | } |
||
1949 | } |
||
1950 | |||
1951 | static void |
||
1952 | ntq_emit_cf_list(struct vc4_compile *c, struct exec_list *list) |
||
1953 | { |
||
1954 | foreach_list_typed(nir_cf_node, node, node, list) { |
||
1955 | switch (node->type) { |
||
1956 | /* case nir_cf_node_loop: */ |
||
1957 | case nir_cf_node_block: |
||
1958 | ntq_emit_block(c, nir_cf_node_as_block(node)); |
||
1959 | break; |
||
1960 | |||
1961 | case nir_cf_node_if: |
||
1962 | ntq_emit_if(c, nir_cf_node_as_if(node)); |
||
1963 | break; |
||
1964 | |||
1965 | default: |
||
1966 | assert(0); |
||
1967 | } |
||
1968 | } |
||
1969 | } |
||
1970 | |||
1971 | static void |
||
1972 | ntq_emit_impl(struct vc4_compile *c, nir_function_impl *impl) |
||
1973 | { |
||
1974 | ntq_setup_registers(c, &impl->registers); |
||
1975 | ntq_emit_cf_list(c, &impl->body); |
||
1976 | } |
||
1977 | |||
1978 | static void |
||
1979 | nir_to_qir(struct vc4_compile *c) |
||
1980 | { |
||
1981 | ntq_setup_inputs(c); |
||
1982 | ntq_setup_outputs(c); |
||
1983 | ntq_setup_uniforms(c); |
||
1984 | ntq_setup_registers(c, &c->s->registers); |
||
1985 | |||
1986 | /* Find the main function and emit the body. */ |
||
1987 | nir_foreach_overload(c->s, overload) { |
||
1988 | assert(strcmp(overload->function->name, "main") == 0); |
||
1989 | assert(overload->impl); |
||
1990 | ntq_emit_impl(c, overload->impl); |
||
1991 | } |
||
1992 | } |
||
1993 | |||
1994 | static const nir_shader_compiler_options nir_options = { |
||
1995 | .lower_ffma = true, |
||
1996 | .lower_flrp = true, |
||
1997 | .lower_fpow = true, |
||
1998 | .lower_fsat = true, |
||
1999 | .lower_fsqrt = true, |
||
2000 | .lower_negate = true, |
||
2001 | }; |
||
2002 | |||
2003 | static bool |
||
2004 | count_nir_instrs_in_block(nir_block *block, void *state) |
||
2005 | { |
||
2006 | int *count = (int *) state; |
||
2007 | nir_foreach_instr(block, instr) { |
||
2008 | *count = *count + 1; |
||
2009 | } |
||
2010 | return true; |
||
2011 | } |
||
2012 | |||
2013 | static int |
||
2014 | count_nir_instrs(nir_shader *nir) |
||
2015 | { |
||
2016 | int count = 0; |
||
2017 | nir_foreach_overload(nir, overload) { |
||
2018 | if (!overload->impl) |
||
2019 | continue; |
||
2020 | nir_foreach_block(overload->impl, count_nir_instrs_in_block, &count); |
||
2021 | } |
||
2022 | return count; |
||
2023 | } |
||
2024 | |||
2025 | static struct vc4_compile * |
||
2026 | vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage, |
||
2027 | struct vc4_key *key) |
||
2028 | { |
||
2029 | struct vc4_compile *c = qir_compile_init(); |
||
2030 | |||
2031 | c->stage = stage; |
||
2032 | c->shader_state = &key->shader_state->base; |
||
2033 | c->program_id = key->shader_state->program_id; |
||
2034 | c->variant_id = key->shader_state->compiled_variant_count++; |
||
2035 | |||
2036 | c->key = key; |
||
2037 | switch (stage) { |
||
2038 | case QSTAGE_FRAG: |
||
2039 | c->fs_key = (struct vc4_fs_key *)key; |
||
2040 | if (c->fs_key->is_points) { |
||
2041 | c->point_x = emit_fragment_varying(c, ~0, ~0, 0); |
||
2042 | c->point_y = emit_fragment_varying(c, ~0, ~0, 0); |
||
2043 | } else if (c->fs_key->is_lines) { |
||
2044 | c->line_x = emit_fragment_varying(c, ~0, ~0, 0); |
||
2045 | } |
||
2046 | break; |
||
2047 | case QSTAGE_VERT: |
||
2048 | c->vs_key = (struct vc4_vs_key *)key; |
||
2049 | break; |
||
2050 | case QSTAGE_COORD: |
||
2051 | c->vs_key = (struct vc4_vs_key *)key; |
||
2052 | break; |
||
2053 | } |
||
2054 | |||
2055 | const struct tgsi_token *tokens = key->shader_state->base.tokens; |
||
2056 | if (c->fs_key && c->fs_key->light_twoside) { |
||
2057 | if (!key->shader_state->twoside_tokens) { |
||
2058 | const struct tgsi_lowering_config lowering_config = { |
||
2059 | .color_two_side = true, |
||
2060 | }; |
||
2061 | struct tgsi_shader_info info; |
||
2062 | key->shader_state->twoside_tokens = |
||
2063 | tgsi_transform_lowering(&lowering_config, |
||
2064 | key->shader_state->base.tokens, |
||
2065 | &info); |
||
2066 | |||
2067 | /* If no transformation occurred, then NULL is |
||
2068 | * returned and we just use our original tokens. |
||
2069 | */ |
||
2070 | if (!key->shader_state->twoside_tokens) { |
||
2071 | key->shader_state->twoside_tokens = |
||
2072 | key->shader_state->base.tokens; |
||
2073 | } |
||
2074 | } |
||
2075 | tokens = key->shader_state->twoside_tokens; |
||
2076 | } |
||
2077 | |||
2078 | if (vc4_debug & VC4_DEBUG_TGSI) { |
||
2079 | fprintf(stderr, "%s prog %d/%d TGSI:\n", |
||
2080 | qir_get_stage_name(c->stage), |
||
2081 | c->program_id, c->variant_id); |
||
2082 | tgsi_dump(tokens, 0); |
||
2083 | } |
||
2084 | |||
2085 | c->s = tgsi_to_nir(tokens, &nir_options); |
||
2086 | nir_opt_global_to_local(c->s); |
||
2087 | nir_convert_to_ssa(c->s); |
||
2088 | nir_lower_idiv(c->s); |
||
2089 | |||
2090 | vc4_optimize_nir(c->s); |
||
2091 | |||
2092 | nir_remove_dead_variables(c->s); |
||
2093 | |||
2094 | nir_convert_from_ssa(c->s); |
||
2095 | |||
2096 | if (vc4_debug & VC4_DEBUG_SHADERDB) { |
||
2097 | fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d NIR instructions\n", |
||
2098 | qir_get_stage_name(c->stage), |
||
2099 | c->program_id, c->variant_id, |
||
2100 | count_nir_instrs(c->s)); |
||
2101 | } |
||
2102 | |||
2103 | if (vc4_debug & VC4_DEBUG_NIR) { |
||
2104 | fprintf(stderr, "%s prog %d/%d NIR:\n", |
||
2105 | qir_get_stage_name(c->stage), |
||
2106 | c->program_id, c->variant_id); |
||
2107 | nir_print_shader(c->s, stderr); |
||
2108 | } |
||
2109 | |||
2110 | nir_to_qir(c); |
||
2111 | |||
2112 | switch (stage) { |
||
2113 | case QSTAGE_FRAG: |
||
2114 | emit_frag_end(c); |
||
2115 | break; |
||
2116 | case QSTAGE_VERT: |
||
2117 | emit_vert_end(c, |
||
2118 | vc4->prog.fs->input_semantics, |
||
2119 | vc4->prog.fs->num_inputs); |
||
2120 | break; |
||
2121 | case QSTAGE_COORD: |
||
2122 | emit_coord_end(c); |
||
2123 | break; |
||
2124 | } |
||
2125 | |||
2126 | if (vc4_debug & VC4_DEBUG_QIR) { |
||
2127 | fprintf(stderr, "%s prog %d/%d pre-opt QIR:\n", |
||
2128 | qir_get_stage_name(c->stage), |
||
2129 | c->program_id, c->variant_id); |
||
2130 | qir_dump(c); |
||
2131 | } |
||
2132 | |||
2133 | qir_optimize(c); |
||
2134 | qir_lower_uniforms(c); |
||
2135 | |||
2136 | if (vc4_debug & VC4_DEBUG_QIR) { |
||
2137 | fprintf(stderr, "%s prog %d/%d QIR:\n", |
||
2138 | qir_get_stage_name(c->stage), |
||
2139 | c->program_id, c->variant_id); |
||
2140 | qir_dump(c); |
||
2141 | } |
||
2142 | qir_reorder_uniforms(c); |
||
2143 | vc4_generate_code(vc4, c); |
||
2144 | |||
2145 | if (vc4_debug & VC4_DEBUG_SHADERDB) { |
||
2146 | fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d instructions\n", |
||
2147 | qir_get_stage_name(c->stage), |
||
2148 | c->program_id, c->variant_id, |
||
2149 | c->qpu_inst_count); |
||
2150 | fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d uniforms\n", |
||
2151 | qir_get_stage_name(c->stage), |
||
2152 | c->program_id, c->variant_id, |
||
2153 | c->num_uniforms); |
||
2154 | } |
||
2155 | |||
2156 | ralloc_free(c->s); |
||
2157 | |||
2158 | return c; |
||
2159 | } |
||
2160 | |||
2161 | static void * |
||
2162 | vc4_shader_state_create(struct pipe_context *pctx, |
||
2163 | const struct pipe_shader_state *cso) |
||
2164 | { |
||
2165 | struct vc4_context *vc4 = vc4_context(pctx); |
||
2166 | struct vc4_uncompiled_shader *so = CALLOC_STRUCT(vc4_uncompiled_shader); |
||
2167 | if (!so) |
||
2168 | return NULL; |
||
2169 | |||
2170 | so->base.tokens = tgsi_dup_tokens(cso->tokens); |
||
2171 | so->program_id = vc4->next_uncompiled_program_id++; |
||
2172 | |||
2173 | return so; |
||
2174 | } |
||
2175 | |||
2176 | static void |
||
2177 | copy_uniform_state_to_shader(struct vc4_compiled_shader *shader, |
||
2178 | struct vc4_compile *c) |
||
2179 | { |
||
2180 | int count = c->num_uniforms; |
||
2181 | struct vc4_shader_uniform_info *uinfo = &shader->uniforms; |
||
2182 | |||
2183 | uinfo->count = count; |
||
2184 | uinfo->data = ralloc_array(shader, uint32_t, count); |
||
2185 | memcpy(uinfo->data, c->uniform_data, |
||
2186 | count * sizeof(*uinfo->data)); |
||
2187 | uinfo->contents = ralloc_array(shader, enum quniform_contents, count); |
||
2188 | memcpy(uinfo->contents, c->uniform_contents, |
||
2189 | count * sizeof(*uinfo->contents)); |
||
2190 | uinfo->num_texture_samples = c->num_texture_samples; |
||
2191 | } |
||
2192 | |||
2193 | static struct vc4_compiled_shader * |
||
2194 | vc4_get_compiled_shader(struct vc4_context *vc4, enum qstage stage, |
||
2195 | struct vc4_key *key) |
||
2196 | { |
||
2197 | struct hash_table *ht; |
||
2198 | uint32_t key_size; |
||
2199 | if (stage == QSTAGE_FRAG) { |
||
2200 | ht = vc4->fs_cache; |
||
2201 | key_size = sizeof(struct vc4_fs_key); |
||
2202 | } else { |
||
2203 | ht = vc4->vs_cache; |
||
2204 | key_size = sizeof(struct vc4_vs_key); |
||
2205 | } |
||
2206 | |||
2207 | struct vc4_compiled_shader *shader; |
||
2208 | struct hash_entry *entry = _mesa_hash_table_search(ht, key); |
||
2209 | if (entry) |
||
2210 | return entry->data; |
||
2211 | |||
2212 | struct vc4_compile *c = vc4_shader_ntq(vc4, stage, key); |
||
2213 | shader = rzalloc(NULL, struct vc4_compiled_shader); |
||
2214 | |||
2215 | shader->program_id = vc4->next_compiled_program_id++; |
||
2216 | if (stage == QSTAGE_FRAG) { |
||
2217 | bool input_live[c->num_input_semantics]; |
||
2218 | struct simple_node *node; |
||
2219 | |||
2220 | memset(input_live, 0, sizeof(input_live)); |
||
2221 | foreach(node, &c->instructions) { |
||
2222 | struct qinst *inst = (struct qinst *)node; |
||
2223 | for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) { |
||
2224 | if (inst->src[i].file == QFILE_VARY) |
||
2225 | input_live[inst->src[i].index] = true; |
||
2226 | } |
||
2227 | } |
||
2228 | |||
2229 | shader->input_semantics = ralloc_array(shader, |
||
2230 | struct vc4_varying_semantic, |
||
2231 | c->num_input_semantics); |
||
2232 | |||
2233 | for (int i = 0; i < c->num_input_semantics; i++) { |
||
2234 | struct vc4_varying_semantic *sem = &c->input_semantics[i]; |
||
2235 | |||
2236 | if (!input_live[i]) |
||
2237 | continue; |
||
2238 | |||
2239 | /* Skip non-VS-output inputs. */ |
||
2240 | if (sem->semantic == (uint8_t)~0) |
||
2241 | continue; |
||
2242 | |||
2243 | if (sem->semantic == TGSI_SEMANTIC_COLOR || |
||
2244 | sem->semantic == TGSI_SEMANTIC_BCOLOR) { |
||
2245 | shader->color_inputs |= (1 << shader->num_inputs); |
||
2246 | } |
||
2247 | |||
2248 | shader->input_semantics[shader->num_inputs] = *sem; |
||
2249 | shader->num_inputs++; |
||
2250 | } |
||
2251 | } else { |
||
2252 | shader->num_inputs = c->num_inputs; |
||
2253 | |||
2254 | shader->vattr_offsets[0] = 0; |
||
2255 | for (int i = 0; i < 8; i++) { |
||
2256 | shader->vattr_offsets[i + 1] = |
||
2257 | shader->vattr_offsets[i] + c->vattr_sizes[i]; |
||
2258 | |||
2259 | if (c->vattr_sizes[i]) |
||
2260 | shader->vattrs_live |= (1 << i); |
||
2261 | } |
||
2262 | } |
||
2263 | |||
2264 | copy_uniform_state_to_shader(shader, c); |
||
2265 | shader->bo = vc4_bo_alloc_mem(vc4->screen, c->qpu_insts, |
||
2266 | c->qpu_inst_count * sizeof(uint64_t), |
||
2267 | "code"); |
||
2268 | |||
2269 | /* Copy the compiler UBO range state to the compiled shader, dropping |
||
2270 | * out arrays that were never referenced by an indirect load. |
||
2271 | * |
||
2272 | * (Note that QIR dead code elimination of an array access still |
||
2273 | * leaves that array alive, though) |
||
2274 | */ |
||
2275 | if (c->num_ubo_ranges) { |
||
2276 | shader->num_ubo_ranges = c->num_ubo_ranges; |
||
2277 | shader->ubo_ranges = ralloc_array(shader, struct vc4_ubo_range, |
||
2278 | c->num_ubo_ranges); |
||
2279 | uint32_t j = 0; |
||
2280 | for (int i = 0; i < c->num_uniform_ranges; i++) { |
||
2281 | struct vc4_compiler_ubo_range *range = |
||
2282 | &c->ubo_ranges[i]; |
||
2283 | if (!range->used) |
||
2284 | continue; |
||
2285 | |||
2286 | shader->ubo_ranges[j].dst_offset = range->dst_offset; |
||
2287 | shader->ubo_ranges[j].src_offset = range->src_offset; |
||
2288 | shader->ubo_ranges[j].size = range->size; |
||
2289 | shader->ubo_size += c->ubo_ranges[i].size; |
||
2290 | j++; |
||
2291 | } |
||
2292 | } |
||
2293 | if (shader->ubo_size) { |
||
2294 | fprintf(stderr, "SHADER-DB: %s prog %d/%d: %d UBO uniforms\n", |
||
2295 | qir_get_stage_name(c->stage), |
||
2296 | c->program_id, c->variant_id, |
||
2297 | shader->ubo_size / 4); |
||
2298 | } |
||
2299 | |||
2300 | qir_compile_destroy(c); |
||
2301 | |||
2302 | struct vc4_key *dup_key; |
||
2303 | dup_key = ralloc_size(shader, key_size); |
||
2304 | memcpy(dup_key, key, key_size); |
||
2305 | _mesa_hash_table_insert(ht, dup_key, shader); |
||
2306 | |||
2307 | return shader; |
||
2308 | } |
||
2309 | |||
2310 | static void |
||
2311 | vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key, |
||
2312 | struct vc4_texture_stateobj *texstate) |
||
2313 | { |
||
2314 | for (int i = 0; i < texstate->num_textures; i++) { |
||
2315 | struct pipe_sampler_view *sampler = texstate->textures[i]; |
||
2316 | struct pipe_sampler_state *sampler_state = |
||
2317 | texstate->samplers[i]; |
||
2318 | |||
2319 | if (sampler) { |
||
2320 | key->tex[i].format = sampler->format; |
||
2321 | key->tex[i].swizzle[0] = sampler->swizzle_r; |
||
2322 | key->tex[i].swizzle[1] = sampler->swizzle_g; |
||
2323 | key->tex[i].swizzle[2] = sampler->swizzle_b; |
||
2324 | key->tex[i].swizzle[3] = sampler->swizzle_a; |
||
2325 | key->tex[i].compare_mode = sampler_state->compare_mode; |
||
2326 | key->tex[i].compare_func = sampler_state->compare_func; |
||
2327 | key->tex[i].wrap_s = sampler_state->wrap_s; |
||
2328 | key->tex[i].wrap_t = sampler_state->wrap_t; |
||
2329 | } |
||
2330 | } |
||
2331 | |||
2332 | key->ucp_enables = vc4->rasterizer->base.clip_plane_enable; |
||
2333 | } |
||
2334 | |||
2335 | static void |
||
2336 | vc4_update_compiled_fs(struct vc4_context *vc4, uint8_t prim_mode) |
||
2337 | { |
||
2338 | struct vc4_fs_key local_key; |
||
2339 | struct vc4_fs_key *key = &local_key; |
||
2340 | |||
2341 | if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE | |
||
2342 | VC4_DIRTY_BLEND | |
||
2343 | VC4_DIRTY_FRAMEBUFFER | |
||
2344 | VC4_DIRTY_ZSA | |
||
2345 | VC4_DIRTY_RASTERIZER | |
||
2346 | VC4_DIRTY_FRAGTEX | |
||
2347 | VC4_DIRTY_TEXSTATE | |
||
2348 | VC4_DIRTY_UNCOMPILED_FS))) { |
||
2349 | return; |
||
2350 | } |
||
2351 | |||
2352 | memset(key, 0, sizeof(*key)); |
||
2353 | vc4_setup_shared_key(vc4, &key->base, &vc4->fragtex); |
||
2354 | key->base.shader_state = vc4->prog.bind_fs; |
||
2355 | key->is_points = (prim_mode == PIPE_PRIM_POINTS); |
||
2356 | key->is_lines = (prim_mode >= PIPE_PRIM_LINES && |
||
2357 | prim_mode <= PIPE_PRIM_LINE_STRIP); |
||
2358 | key->blend = vc4->blend->rt[0]; |
||
2359 | if (vc4->blend->logicop_enable) { |
||
2360 | key->logicop_func = vc4->blend->logicop_func; |
||
2361 | } else { |
||
2362 | key->logicop_func = PIPE_LOGICOP_COPY; |
||
2363 | } |
||
2364 | if (vc4->framebuffer.cbufs[0]) |
||
2365 | key->color_format = vc4->framebuffer.cbufs[0]->format; |
||
2366 | |||
2367 | key->stencil_enabled = vc4->zsa->stencil_uniforms[0] != 0; |
||
2368 | key->stencil_twoside = vc4->zsa->stencil_uniforms[1] != 0; |
||
2369 | key->stencil_full_writemasks = vc4->zsa->stencil_uniforms[2] != 0; |
||
2370 | key->depth_enabled = (vc4->zsa->base.depth.enabled || |
||
2371 | key->stencil_enabled); |
||
2372 | if (vc4->zsa->base.alpha.enabled) { |
||
2373 | key->alpha_test = true; |
||
2374 | key->alpha_test_func = vc4->zsa->base.alpha.func; |
||
2375 | } |
||
2376 | |||
2377 | if (key->is_points) { |
||
2378 | key->point_sprite_mask = |
||
2379 | vc4->rasterizer->base.sprite_coord_enable; |
||
2380 | key->point_coord_upper_left = |
||
2381 | (vc4->rasterizer->base.sprite_coord_mode == |
||
2382 | PIPE_SPRITE_COORD_UPPER_LEFT); |
||
2383 | } |
||
2384 | |||
2385 | key->light_twoside = vc4->rasterizer->base.light_twoside; |
||
2386 | |||
2387 | struct vc4_compiled_shader *old_fs = vc4->prog.fs; |
||
2388 | vc4->prog.fs = vc4_get_compiled_shader(vc4, QSTAGE_FRAG, &key->base); |
||
2389 | if (vc4->prog.fs == old_fs) |
||
2390 | return; |
||
2391 | |||
2392 | vc4->dirty |= VC4_DIRTY_COMPILED_FS; |
||
2393 | if (vc4->rasterizer->base.flatshade && |
||
2394 | old_fs && vc4->prog.fs->color_inputs != old_fs->color_inputs) { |
||
2395 | vc4->dirty |= VC4_DIRTY_FLAT_SHADE_FLAGS; |
||
2396 | } |
||
2397 | } |
||
2398 | |||
2399 | static void |
||
2400 | vc4_update_compiled_vs(struct vc4_context *vc4, uint8_t prim_mode) |
||
2401 | { |
||
2402 | struct vc4_vs_key local_key; |
||
2403 | struct vc4_vs_key *key = &local_key; |
||
2404 | |||
2405 | if (!(vc4->dirty & (VC4_DIRTY_PRIM_MODE | |
||
2406 | VC4_DIRTY_RASTERIZER | |
||
2407 | VC4_DIRTY_VERTTEX | |
||
2408 | VC4_DIRTY_TEXSTATE | |
||
2409 | VC4_DIRTY_VTXSTATE | |
||
2410 | VC4_DIRTY_UNCOMPILED_VS | |
||
2411 | VC4_DIRTY_COMPILED_FS))) { |
||
2412 | return; |
||
2413 | } |
||
2414 | |||
2415 | memset(key, 0, sizeof(*key)); |
||
2416 | vc4_setup_shared_key(vc4, &key->base, &vc4->verttex); |
||
2417 | key->base.shader_state = vc4->prog.bind_vs; |
||
2418 | key->compiled_fs_id = vc4->prog.fs->program_id; |
||
2419 | |||
2420 | for (int i = 0; i < ARRAY_SIZE(key->attr_formats); i++) |
||
2421 | key->attr_formats[i] = vc4->vtx->pipe[i].src_format; |
||
2422 | |||
2423 | key->per_vertex_point_size = |
||
2424 | (prim_mode == PIPE_PRIM_POINTS && |
||
2425 | vc4->rasterizer->base.point_size_per_vertex); |
||
2426 | |||
2427 | vc4->prog.vs = vc4_get_compiled_shader(vc4, QSTAGE_VERT, &key->base); |
||
2428 | key->is_coord = true; |
||
2429 | vc4->prog.cs = vc4_get_compiled_shader(vc4, QSTAGE_COORD, &key->base); |
||
2430 | } |
||
2431 | |||
2432 | void |
||
2433 | vc4_update_compiled_shaders(struct vc4_context *vc4, uint8_t prim_mode) |
||
2434 | { |
||
2435 | vc4_update_compiled_fs(vc4, prim_mode); |
||
2436 | vc4_update_compiled_vs(vc4, prim_mode); |
||
2437 | } |
||
2438 | |||
2439 | static uint32_t |
||
2440 | fs_cache_hash(const void *key) |
||
2441 | { |
||
2442 | return _mesa_hash_data(key, sizeof(struct vc4_fs_key)); |
||
2443 | } |
||
2444 | |||
2445 | static uint32_t |
||
2446 | vs_cache_hash(const void *key) |
||
2447 | { |
||
2448 | return _mesa_hash_data(key, sizeof(struct vc4_vs_key)); |
||
2449 | } |
||
2450 | |||
2451 | static bool |
||
2452 | fs_cache_compare(const void *key1, const void *key2) |
||
2453 | { |
||
2454 | return memcmp(key1, key2, sizeof(struct vc4_fs_key)) == 0; |
||
2455 | } |
||
2456 | |||
2457 | static bool |
||
2458 | vs_cache_compare(const void *key1, const void *key2) |
||
2459 | { |
||
2460 | return memcmp(key1, key2, sizeof(struct vc4_vs_key)) == 0; |
||
2461 | } |
||
2462 | |||
2463 | static void |
||
2464 | delete_from_cache_if_matches(struct hash_table *ht, |
||
2465 | struct hash_entry *entry, |
||
2466 | struct vc4_uncompiled_shader *so) |
||
2467 | { |
||
2468 | const struct vc4_key *key = entry->key; |
||
2469 | |||
2470 | if (key->shader_state == so) { |
||
2471 | struct vc4_compiled_shader *shader = entry->data; |
||
2472 | _mesa_hash_table_remove(ht, entry); |
||
2473 | vc4_bo_unreference(&shader->bo); |
||
2474 | ralloc_free(shader); |
||
2475 | } |
||
2476 | } |
||
2477 | |||
2478 | static void |
||
2479 | vc4_shader_state_delete(struct pipe_context *pctx, void *hwcso) |
||
2480 | { |
||
2481 | struct vc4_context *vc4 = vc4_context(pctx); |
||
2482 | struct vc4_uncompiled_shader *so = hwcso; |
||
2483 | |||
2484 | struct hash_entry *entry; |
||
2485 | hash_table_foreach(vc4->fs_cache, entry) |
||
2486 | delete_from_cache_if_matches(vc4->fs_cache, entry, so); |
||
2487 | hash_table_foreach(vc4->vs_cache, entry) |
||
2488 | delete_from_cache_if_matches(vc4->vs_cache, entry, so); |
||
2489 | |||
2490 | if (so->twoside_tokens != so->base.tokens) |
||
2491 | free((void *)so->twoside_tokens); |
||
2492 | free((void *)so->base.tokens); |
||
2493 | free(so); |
||
2494 | } |
||
2495 | |||
2496 | static uint32_t translate_wrap(uint32_t p_wrap, bool using_nearest) |
||
2497 | { |
||
2498 | switch (p_wrap) { |
||
2499 | case PIPE_TEX_WRAP_REPEAT: |
||
2500 | return 0; |
||
2501 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
2502 | return 1; |
||
2503 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
2504 | return 2; |
||
2505 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
2506 | return 3; |
||
2507 | case PIPE_TEX_WRAP_CLAMP: |
||
2508 | return (using_nearest ? 1 : 3); |
||
2509 | default: |
||
2510 | fprintf(stderr, "Unknown wrap mode %d\n", p_wrap); |
||
2511 | assert(!"not reached"); |
||
2512 | return 0; |
||
2513 | } |
||
2514 | } |
||
2515 | |||
2516 | static void |
||
2517 | write_texture_p0(struct vc4_context *vc4, |
||
2518 | struct vc4_texture_stateobj *texstate, |
||
2519 | uint32_t unit) |
||
2520 | { |
||
2521 | struct pipe_sampler_view *texture = texstate->textures[unit]; |
||
2522 | struct vc4_resource *rsc = vc4_resource(texture->texture); |
||
2523 | |||
2524 | cl_reloc(vc4, &vc4->uniforms, rsc->bo, |
||
2525 | VC4_SET_FIELD(rsc->slices[0].offset >> 12, VC4_TEX_P0_OFFSET) | |
||
2526 | VC4_SET_FIELD(texture->u.tex.last_level - |
||
2527 | texture->u.tex.first_level, VC4_TEX_P0_MIPLVLS) | |
||
2528 | VC4_SET_FIELD(texture->target == PIPE_TEXTURE_CUBE, |
||
2529 | VC4_TEX_P0_CMMODE) | |
||
2530 | VC4_SET_FIELD(rsc->vc4_format & 15, VC4_TEX_P0_TYPE)); |
||
2531 | } |
||
2532 | |||
2533 | static void |
||
2534 | write_texture_p1(struct vc4_context *vc4, |
||
2535 | struct vc4_texture_stateobj *texstate, |
||
2536 | uint32_t unit) |
||
2537 | { |
||
2538 | struct pipe_sampler_view *texture = texstate->textures[unit]; |
||
2539 | struct vc4_resource *rsc = vc4_resource(texture->texture); |
||
2540 | struct pipe_sampler_state *sampler = texstate->samplers[unit]; |
||
2541 | static const uint8_t minfilter_map[6] = { |
||
2542 | VC4_TEX_P1_MINFILT_NEAR_MIP_NEAR, |
||
2543 | VC4_TEX_P1_MINFILT_LIN_MIP_NEAR, |
||
2544 | VC4_TEX_P1_MINFILT_NEAR_MIP_LIN, |
||
2545 | VC4_TEX_P1_MINFILT_LIN_MIP_LIN, |
||
2546 | VC4_TEX_P1_MINFILT_NEAREST, |
||
2547 | VC4_TEX_P1_MINFILT_LINEAR, |
||
2548 | }; |
||
2549 | static const uint32_t magfilter_map[] = { |
||
2550 | [PIPE_TEX_FILTER_NEAREST] = VC4_TEX_P1_MAGFILT_NEAREST, |
||
2551 | [PIPE_TEX_FILTER_LINEAR] = VC4_TEX_P1_MAGFILT_LINEAR, |
||
2552 | }; |
||
2553 | |||
2554 | bool either_nearest = |
||
2555 | (sampler->mag_img_filter == PIPE_TEX_MIPFILTER_NEAREST || |
||
2556 | sampler->min_img_filter == PIPE_TEX_MIPFILTER_NEAREST); |
||
2557 | |||
2558 | cl_aligned_u32(&vc4->uniforms, |
||
2559 | VC4_SET_FIELD(rsc->vc4_format >> 4, VC4_TEX_P1_TYPE4) | |
||
2560 | VC4_SET_FIELD(texture->texture->height0 & 2047, |
||
2561 | VC4_TEX_P1_HEIGHT) | |
||
2562 | VC4_SET_FIELD(texture->texture->width0 & 2047, |
||
2563 | VC4_TEX_P1_WIDTH) | |
||
2564 | VC4_SET_FIELD(magfilter_map[sampler->mag_img_filter], |
||
2565 | VC4_TEX_P1_MAGFILT) | |
||
2566 | VC4_SET_FIELD(minfilter_map[sampler->min_mip_filter * 2 + |
||
2567 | sampler->min_img_filter], |
||
2568 | VC4_TEX_P1_MINFILT) | |
||
2569 | VC4_SET_FIELD(translate_wrap(sampler->wrap_s, either_nearest), |
||
2570 | VC4_TEX_P1_WRAP_S) | |
||
2571 | VC4_SET_FIELD(translate_wrap(sampler->wrap_t, either_nearest), |
||
2572 | VC4_TEX_P1_WRAP_T)); |
||
2573 | } |
||
2574 | |||
2575 | static void |
||
2576 | write_texture_p2(struct vc4_context *vc4, |
||
2577 | struct vc4_texture_stateobj *texstate, |
||
2578 | uint32_t data) |
||
2579 | { |
||
2580 | uint32_t unit = data & 0xffff; |
||
2581 | struct pipe_sampler_view *texture = texstate->textures[unit]; |
||
2582 | struct vc4_resource *rsc = vc4_resource(texture->texture); |
||
2583 | |||
2584 | cl_aligned_u32(&vc4->uniforms, |
||
2585 | VC4_SET_FIELD(VC4_TEX_P2_PTYPE_CUBE_MAP_STRIDE, |
||
2586 | VC4_TEX_P2_PTYPE) | |
||
2587 | VC4_SET_FIELD(rsc->cube_map_stride >> 12, VC4_TEX_P2_CMST) | |
||
2588 | VC4_SET_FIELD((data >> 16) & 1, VC4_TEX_P2_BSLOD)); |
||
2589 | } |
||
2590 | |||
2591 | |||
2592 | #define SWIZ(x,y,z,w) { \ |
||
2593 | UTIL_FORMAT_SWIZZLE_##x, \ |
||
2594 | UTIL_FORMAT_SWIZZLE_##y, \ |
||
2595 | UTIL_FORMAT_SWIZZLE_##z, \ |
||
2596 | UTIL_FORMAT_SWIZZLE_##w \ |
||
2597 | } |
||
2598 | |||
2599 | static void |
||
2600 | write_texture_border_color(struct vc4_context *vc4, |
||
2601 | struct vc4_texture_stateobj *texstate, |
||
2602 | uint32_t unit) |
||
2603 | { |
||
2604 | struct pipe_sampler_state *sampler = texstate->samplers[unit]; |
||
2605 | struct pipe_sampler_view *texture = texstate->textures[unit]; |
||
2606 | struct vc4_resource *rsc = vc4_resource(texture->texture); |
||
2607 | union util_color uc; |
||
2608 | |||
2609 | const struct util_format_description *tex_format_desc = |
||
2610 | util_format_description(texture->format); |
||
2611 | |||
2612 | float border_color[4]; |
||
2613 | for (int i = 0; i < 4; i++) |
||
2614 | border_color[i] = sampler->border_color.f[i]; |
||
2615 | if (util_format_is_srgb(texture->format)) { |
||
2616 | for (int i = 0; i < 3; i++) |
||
2617 | border_color[i] = |
||
2618 | util_format_linear_to_srgb_float(border_color[i]); |
||
2619 | } |
||
2620 | |||
2621 | /* Turn the border color into the layout of channels that it would |
||
2622 | * have when stored as texture contents. |
||
2623 | */ |
||
2624 | float storage_color[4]; |
||
2625 | util_format_unswizzle_4f(storage_color, |
||
2626 | border_color, |
||
2627 | tex_format_desc->swizzle); |
||
2628 | |||
2629 | /* Now, pack so that when the vc4_format-sampled texture contents are |
||
2630 | * replaced with our border color, the vc4_get_format_swizzle() |
||
2631 | * swizzling will get the right channels. |
||
2632 | */ |
||
2633 | if (util_format_is_depth_or_stencil(texture->format)) { |
||
2634 | uc.ui[0] = util_pack_z(PIPE_FORMAT_Z24X8_UNORM, |
||
2635 | sampler->border_color.f[0]) << 8; |
||
2636 | } else { |
||
2637 | switch (rsc->vc4_format) { |
||
2638 | default: |
||
2639 | case VC4_TEXTURE_TYPE_RGBA8888: |
||
2640 | util_pack_color(storage_color, |
||
2641 | PIPE_FORMAT_R8G8B8A8_UNORM, &uc); |
||
2642 | break; |
||
2643 | case VC4_TEXTURE_TYPE_RGBA4444: |
||
2644 | util_pack_color(storage_color, |
||
2645 | PIPE_FORMAT_A8B8G8R8_UNORM, &uc); |
||
2646 | break; |
||
2647 | case VC4_TEXTURE_TYPE_RGB565: |
||
2648 | util_pack_color(storage_color, |
||
2649 | PIPE_FORMAT_B8G8R8A8_UNORM, &uc); |
||
2650 | break; |
||
2651 | case VC4_TEXTURE_TYPE_ALPHA: |
||
2652 | uc.ui[0] = float_to_ubyte(storage_color[0]) << 24; |
||
2653 | break; |
||
2654 | case VC4_TEXTURE_TYPE_LUMALPHA: |
||
2655 | uc.ui[0] = ((float_to_ubyte(storage_color[1]) << 24) | |
||
2656 | (float_to_ubyte(storage_color[0]) << 0)); |
||
2657 | break; |
||
2658 | } |
||
2659 | } |
||
2660 | |||
2661 | cl_aligned_u32(&vc4->uniforms, uc.ui[0]); |
||
2662 | } |
||
2663 | |||
2664 | static uint32_t |
||
2665 | get_texrect_scale(struct vc4_texture_stateobj *texstate, |
||
2666 | enum quniform_contents contents, |
||
2667 | uint32_t data) |
||
2668 | { |
||
2669 | struct pipe_sampler_view *texture = texstate->textures[data]; |
||
2670 | uint32_t dim; |
||
2671 | |||
2672 | if (contents == QUNIFORM_TEXRECT_SCALE_X) |
||
2673 | dim = texture->texture->width0; |
||
2674 | else |
||
2675 | dim = texture->texture->height0; |
||
2676 | |||
2677 | return fui(1.0f / dim); |
||
2678 | } |
||
2679 | |||
2680 | static struct vc4_bo * |
||
2681 | vc4_upload_ubo(struct vc4_context *vc4, struct vc4_compiled_shader *shader, |
||
2682 | const uint32_t *gallium_uniforms) |
||
2683 | { |
||
2684 | if (!shader->ubo_size) |
||
2685 | return NULL; |
||
2686 | |||
2687 | struct vc4_bo *ubo = vc4_bo_alloc(vc4->screen, shader->ubo_size, "ubo"); |
||
2688 | uint32_t *data = vc4_bo_map(ubo); |
||
2689 | for (uint32_t i = 0; i < shader->num_ubo_ranges; i++) { |
||
2690 | memcpy(data + shader->ubo_ranges[i].dst_offset, |
||
2691 | gallium_uniforms + shader->ubo_ranges[i].src_offset, |
||
2692 | shader->ubo_ranges[i].size); |
||
2693 | } |
||
2694 | |||
2695 | return ubo; |
||
2696 | } |
||
2697 | |||
2698 | void |
||
2699 | vc4_write_uniforms(struct vc4_context *vc4, struct vc4_compiled_shader *shader, |
||
2700 | struct vc4_constbuf_stateobj *cb, |
||
2701 | struct vc4_texture_stateobj *texstate) |
||
2702 | { |
||
2703 | struct vc4_shader_uniform_info *uinfo = &shader->uniforms; |
||
2704 | const uint32_t *gallium_uniforms = cb->cb[0].user_buffer; |
||
2705 | struct vc4_bo *ubo = vc4_upload_ubo(vc4, shader, gallium_uniforms); |
||
2706 | |||
2707 | cl_ensure_space(&vc4->uniforms, (uinfo->count + |
||
2708 | uinfo->num_texture_samples) * 4); |
||
2709 | |||
2710 | cl_start_shader_reloc(&vc4->uniforms, uinfo->num_texture_samples); |
||
2711 | |||
2712 | for (int i = 0; i < uinfo->count; i++) { |
||
2713 | |||
2714 | switch (uinfo->contents[i]) { |
||
2715 | case QUNIFORM_CONSTANT: |
||
2716 | cl_aligned_u32(&vc4->uniforms, uinfo->data[i]); |
||
2717 | break; |
||
2718 | case QUNIFORM_UNIFORM: |
||
2719 | cl_aligned_u32(&vc4->uniforms, |
||
2720 | gallium_uniforms[uinfo->data[i]]); |
||
2721 | break; |
||
2722 | case QUNIFORM_VIEWPORT_X_SCALE: |
||
2723 | cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[0] * 16.0f); |
||
2724 | break; |
||
2725 | case QUNIFORM_VIEWPORT_Y_SCALE: |
||
2726 | cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[1] * 16.0f); |
||
2727 | break; |
||
2728 | |||
2729 | case QUNIFORM_VIEWPORT_Z_OFFSET: |
||
2730 | cl_aligned_f(&vc4->uniforms, vc4->viewport.translate[2]); |
||
2731 | break; |
||
2732 | case QUNIFORM_VIEWPORT_Z_SCALE: |
||
2733 | cl_aligned_f(&vc4->uniforms, vc4->viewport.scale[2]); |
||
2734 | break; |
||
2735 | |||
2736 | case QUNIFORM_USER_CLIP_PLANE: |
||
2737 | cl_aligned_f(&vc4->uniforms, |
||
2738 | vc4->clip.ucp[uinfo->data[i] / 4][uinfo->data[i] % 4]); |
||
2739 | break; |
||
2740 | |||
2741 | case QUNIFORM_TEXTURE_CONFIG_P0: |
||
2742 | write_texture_p0(vc4, texstate, uinfo->data[i]); |
||
2743 | break; |
||
2744 | |||
2745 | case QUNIFORM_TEXTURE_CONFIG_P1: |
||
2746 | write_texture_p1(vc4, texstate, uinfo->data[i]); |
||
2747 | break; |
||
2748 | |||
2749 | case QUNIFORM_TEXTURE_CONFIG_P2: |
||
2750 | write_texture_p2(vc4, texstate, uinfo->data[i]); |
||
2751 | break; |
||
2752 | |||
2753 | case QUNIFORM_UBO_ADDR: |
||
2754 | cl_aligned_reloc(vc4, &vc4->uniforms, ubo, 0); |
||
2755 | break; |
||
2756 | |||
2757 | case QUNIFORM_TEXTURE_BORDER_COLOR: |
||
2758 | write_texture_border_color(vc4, texstate, uinfo->data[i]); |
||
2759 | break; |
||
2760 | |||
2761 | case QUNIFORM_TEXRECT_SCALE_X: |
||
2762 | case QUNIFORM_TEXRECT_SCALE_Y: |
||
2763 | cl_aligned_u32(&vc4->uniforms, |
||
2764 | get_texrect_scale(texstate, |
||
2765 | uinfo->contents[i], |
||
2766 | uinfo->data[i])); |
||
2767 | break; |
||
2768 | |||
2769 | case QUNIFORM_BLEND_CONST_COLOR: |
||
2770 | cl_aligned_f(&vc4->uniforms, |
||
2771 | CLAMP(vc4->blend_color.color[uinfo->data[i]], 0, 1)); |
||
2772 | break; |
||
2773 | |||
2774 | case QUNIFORM_STENCIL: |
||
2775 | cl_aligned_u32(&vc4->uniforms, |
||
2776 | vc4->zsa->stencil_uniforms[uinfo->data[i]] | |
||
2777 | (uinfo->data[i] <= 1 ? |
||
2778 | (vc4->stencil_ref.ref_value[uinfo->data[i]] << 8) : |
||
2779 | 0)); |
||
2780 | break; |
||
2781 | |||
2782 | case QUNIFORM_ALPHA_REF: |
||
2783 | cl_aligned_f(&vc4->uniforms, |
||
2784 | vc4->zsa->base.alpha.ref_value); |
||
2785 | break; |
||
2786 | } |
||
2787 | #if 0 |
||
2788 | uint32_t written_val = *(uint32_t *)(vc4->uniforms.next - 4); |
||
2789 | fprintf(stderr, "%p: %d / 0x%08x (%f)\n", |
||
2790 | shader, i, written_val, uif(written_val)); |
||
2791 | #endif |
||
2792 | } |
||
2793 | } |
||
2794 | |||
2795 | static void |
||
2796 | vc4_fp_state_bind(struct pipe_context *pctx, void *hwcso) |
||
2797 | { |
||
2798 | struct vc4_context *vc4 = vc4_context(pctx); |
||
2799 | vc4->prog.bind_fs = hwcso; |
||
2800 | vc4->dirty |= VC4_DIRTY_UNCOMPILED_FS; |
||
2801 | } |
||
2802 | |||
2803 | static void |
||
2804 | vc4_vp_state_bind(struct pipe_context *pctx, void *hwcso) |
||
2805 | { |
||
2806 | struct vc4_context *vc4 = vc4_context(pctx); |
||
2807 | vc4->prog.bind_vs = hwcso; |
||
2808 | vc4->dirty |= VC4_DIRTY_UNCOMPILED_VS; |
||
2809 | } |
||
2810 | |||
2811 | void |
||
2812 | vc4_program_init(struct pipe_context *pctx) |
||
2813 | { |
||
2814 | struct vc4_context *vc4 = vc4_context(pctx); |
||
2815 | |||
2816 | pctx->create_vs_state = vc4_shader_state_create; |
||
2817 | pctx->delete_vs_state = vc4_shader_state_delete; |
||
2818 | |||
2819 | pctx->create_fs_state = vc4_shader_state_create; |
||
2820 | pctx->delete_fs_state = vc4_shader_state_delete; |
||
2821 | |||
2822 | pctx->bind_fs_state = vc4_fp_state_bind; |
||
2823 | pctx->bind_vs_state = vc4_vp_state_bind; |
||
2824 | |||
2825 | vc4->fs_cache = _mesa_hash_table_create(pctx, fs_cache_hash, |
||
2826 | fs_cache_compare); |
||
2827 | vc4->vs_cache = _mesa_hash_table_create(pctx, vs_cache_hash, |
||
2828 | vs_cache_compare); |
||
2829 | } |
||
2830 | |||
2831 | void |
||
2832 | vc4_program_fini(struct pipe_context *pctx) |
||
2833 | { |
||
2834 | struct vc4_context *vc4 = vc4_context(pctx); |
||
2835 | |||
2836 | struct hash_entry *entry; |
||
2837 | hash_table_foreach(vc4->fs_cache, entry) { |
||
2838 | struct vc4_compiled_shader *shader = entry->data; |
||
2839 | vc4_bo_unreference(&shader->bo); |
||
2840 | ralloc_free(shader); |
||
2841 | _mesa_hash_table_remove(vc4->fs_cache, entry); |
||
2842 | } |
||
2843 | |||
2844 | hash_table_foreach(vc4->vs_cache, entry) { |
||
2845 | struct vc4_compiled_shader *shader = entry->data; |
||
2846 | vc4_bo_unreference(&shader->bo); |
||
2847 | ralloc_free(shader); |
||
2848 | _mesa_hash_table_remove(vc4->vs_cache, entry); |
||
2849 | } |
||
2850 | }><>=>>>><>><>><>><>>>>=>>>><>>><>>>>>>>>>>><>>>>>>><>>>><>><>>>>>>>>><>>>>>>>>>>>>>>>>>>>>>>><>>>>>>> |