Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2010 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #include "pipe/p_defines.h" |
||
24 | |||
25 | #include "nvc0_context.h" |
||
26 | |||
27 | #include "nv50/codegen/nv50_ir_driver.h" |
||
28 | #include "nve4_compute.h" |
||
29 | |||
30 | /* NOTE: Using a[0x270] in FP may cause an error even if we're using less than |
||
31 | * 124 scalar varying values. |
||
32 | */ |
||
33 | static uint32_t |
||
34 | nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) |
||
35 | { |
||
36 | switch (sn) { |
||
37 | case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; |
||
38 | case TGSI_SEMANTIC_PRIMID: return 0x060; |
||
39 | case TGSI_SEMANTIC_PSIZE: return 0x06c; |
||
40 | case TGSI_SEMANTIC_POSITION: return 0x070; |
||
41 | case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; |
||
42 | case TGSI_SEMANTIC_FOG: return 0x2e8; |
||
43 | case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; |
||
44 | case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; |
||
45 | case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; |
||
46 | case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; |
||
47 | case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; |
||
48 | case TGSI_SEMANTIC_PCOORD: return 0x2e0; |
||
49 | case NV50_SEMANTIC_TESSCOORD: return 0x2f0; |
||
50 | case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; |
||
51 | case TGSI_SEMANTIC_VERTEXID: return 0x2fc; |
||
52 | case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; |
||
53 | case TGSI_SEMANTIC_FACE: return 0x3fc; |
||
54 | case NV50_SEMANTIC_INVOCATIONID: return ~0; |
||
55 | default: |
||
56 | assert(!"invalid TGSI input semantic"); |
||
57 | return ~0; |
||
58 | } |
||
59 | } |
||
60 | |||
61 | static uint32_t |
||
62 | nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) |
||
63 | { |
||
64 | switch (sn) { |
||
65 | case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; |
||
66 | case TGSI_SEMANTIC_PRIMID: return 0x060; |
||
67 | case NV50_SEMANTIC_LAYER: return 0x064; |
||
68 | case NV50_SEMANTIC_VIEWPORTINDEX: return 0x068; |
||
69 | case TGSI_SEMANTIC_PSIZE: return 0x06c; |
||
70 | case TGSI_SEMANTIC_POSITION: return 0x070; |
||
71 | case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; |
||
72 | case TGSI_SEMANTIC_FOG: return 0x2e8; |
||
73 | case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; |
||
74 | case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; |
||
75 | case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; |
||
76 | case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; |
||
77 | case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; |
||
78 | case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; |
||
79 | case TGSI_SEMANTIC_EDGEFLAG: return ~0; |
||
80 | default: |
||
81 | assert(!"invalid TGSI output semantic"); |
||
82 | return ~0; |
||
83 | } |
||
84 | } |
||
85 | |||
86 | static int |
||
87 | nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) |
||
88 | { |
||
89 | unsigned i, c, n; |
||
90 | |||
91 | for (n = 0, i = 0; i < info->numInputs; ++i) { |
||
92 | switch (info->in[i].sn) { |
||
93 | case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ |
||
94 | case TGSI_SEMANTIC_VERTEXID: |
||
95 | info->in[i].mask = 0x1; |
||
96 | info->in[i].slot[0] = |
||
97 | nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4; |
||
98 | continue; |
||
99 | default: |
||
100 | break; |
||
101 | } |
||
102 | for (c = 0; c < 4; ++c) |
||
103 | info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4; |
||
104 | ++n; |
||
105 | } |
||
106 | |||
107 | return 0; |
||
108 | } |
||
109 | |||
110 | static int |
||
111 | nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) |
||
112 | { |
||
113 | unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); |
||
114 | unsigned offset; |
||
115 | unsigned i, c; |
||
116 | |||
117 | for (i = 0; i < info->numInputs; ++i) { |
||
118 | offset = nvc0_shader_input_address(info->in[i].sn, |
||
119 | info->in[i].si, ubase); |
||
120 | if (info->in[i].patch && offset >= 0x20) |
||
121 | offset = 0x20 + info->in[i].si * 0x10; |
||
122 | |||
123 | if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD) |
||
124 | info->in[i].mask &= 3; |
||
125 | |||
126 | for (c = 0; c < 4; ++c) |
||
127 | info->in[i].slot[c] = (offset + c * 0x4) / 4; |
||
128 | } |
||
129 | |||
130 | return 0; |
||
131 | } |
||
132 | |||
133 | static int |
||
134 | nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) |
||
135 | { |
||
136 | unsigned count = info->prop.fp.numColourResults * 4; |
||
137 | unsigned i, c; |
||
138 | |||
139 | for (i = 0; i < info->numOutputs; ++i) |
||
140 | if (info->out[i].sn == TGSI_SEMANTIC_COLOR) |
||
141 | for (c = 0; c < 4; ++c) |
||
142 | info->out[i].slot[c] = info->out[i].si * 4 + c; |
||
143 | |||
144 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) |
||
145 | info->out[info->io.sampleMask].slot[0] = count++; |
||
146 | else |
||
147 | if (info->target >= 0xe0) |
||
148 | count++; /* on Kepler, depth is always last colour reg + 2 */ |
||
149 | |||
150 | if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) |
||
151 | info->out[info->io.fragDepth].slot[2] = count; |
||
152 | |||
153 | return 0; |
||
154 | } |
||
155 | |||
156 | static int |
||
157 | nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) |
||
158 | { |
||
159 | unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); |
||
160 | unsigned offset; |
||
161 | unsigned i, c; |
||
162 | |||
163 | for (i = 0; i < info->numOutputs; ++i) { |
||
164 | offset = nvc0_shader_output_address(info->out[i].sn, |
||
165 | info->out[i].si, ubase); |
||
166 | if (info->out[i].patch && offset >= 0x20) |
||
167 | offset = 0x20 + info->out[i].si * 0x10; |
||
168 | |||
169 | for (c = 0; c < 4; ++c) |
||
170 | info->out[i].slot[c] = (offset + c * 0x4) / 4; |
||
171 | } |
||
172 | |||
173 | return 0; |
||
174 | } |
||
175 | |||
176 | static int |
||
177 | nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) |
||
178 | { |
||
179 | int ret; |
||
180 | |||
181 | if (info->type == PIPE_SHADER_VERTEX) |
||
182 | ret = nvc0_vp_assign_input_slots(info); |
||
183 | else |
||
184 | ret = nvc0_sp_assign_input_slots(info); |
||
185 | if (ret) |
||
186 | return ret; |
||
187 | |||
188 | if (info->type == PIPE_SHADER_FRAGMENT) |
||
189 | ret = nvc0_fp_assign_output_slots(info); |
||
190 | else |
||
191 | ret = nvc0_sp_assign_output_slots(info); |
||
192 | return ret; |
||
193 | } |
||
194 | |||
195 | static INLINE void |
||
196 | nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) |
||
197 | { |
||
198 | uint8_t min = (vp->hdr[4] >> 12) & 0xff; |
||
199 | uint8_t max = (vp->hdr[4] >> 24); |
||
200 | |||
201 | min = MIN2(min, slot); |
||
202 | max = MAX2(max, slot); |
||
203 | |||
204 | vp->hdr[4] = (max << 24) | (min << 12); |
||
205 | } |
||
206 | |||
207 | /* Common part of header generation for VP, TCP, TEP and GP. */ |
||
208 | static int |
||
209 | nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) |
||
210 | { |
||
211 | unsigned i, c, a; |
||
212 | |||
213 | for (i = 0; i < info->numInputs; ++i) { |
||
214 | if (info->in[i].patch) |
||
215 | continue; |
||
216 | for (c = 0; c < 4; ++c) { |
||
217 | a = info->in[i].slot[c]; |
||
218 | if (info->in[i].mask & (1 << c)) { |
||
219 | if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD) |
||
220 | vp->hdr[5 + a / 32] |= 1 << (a % 32); |
||
221 | else |
||
222 | nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]); |
||
223 | } |
||
224 | } |
||
225 | } |
||
226 | |||
227 | for (i = 0; i < info->numOutputs; ++i) { |
||
228 | if (info->out[i].patch) |
||
229 | continue; |
||
230 | for (c = 0; c < 4; ++c) { |
||
231 | if (!(info->out[i].mask & (1 << c))) |
||
232 | continue; |
||
233 | assert(info->out[i].slot[c] >= 0x40 / 4); |
||
234 | a = info->out[i].slot[c] - 0x40 / 4; |
||
235 | vp->hdr[13 + a / 32] |= 1 << (a % 32); |
||
236 | if (info->out[i].oread) |
||
237 | nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]); |
||
238 | } |
||
239 | } |
||
240 | |||
241 | for (i = 0; i < info->numSysVals; ++i) { |
||
242 | switch (info->sv[i].sn) { |
||
243 | case TGSI_SEMANTIC_PRIMID: |
||
244 | vp->hdr[5] |= 1 << 24; |
||
245 | break; |
||
246 | case TGSI_SEMANTIC_INSTANCEID: |
||
247 | vp->hdr[10] |= 1 << 30; |
||
248 | break; |
||
249 | case TGSI_SEMANTIC_VERTEXID: |
||
250 | vp->hdr[10] |= 1 << 31; |
||
251 | break; |
||
252 | default: |
||
253 | break; |
||
254 | } |
||
255 | } |
||
256 | |||
257 | vp->vp.clip_enable = info->io.clipDistanceMask; |
||
258 | for (i = 0; i < 8; ++i) |
||
259 | if (info->io.cullDistanceMask & (1 << i)) |
||
260 | vp->vp.clip_mode |= 1 << (i * 4); |
||
261 | |||
262 | if (info->io.genUserClip < 0) |
||
263 | vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ |
||
264 | |||
265 | return 0; |
||
266 | } |
||
267 | |||
268 | static int |
||
269 | nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) |
||
270 | { |
||
271 | vp->hdr[0] = 0x20061 | (1 << 10); |
||
272 | vp->hdr[4] = 0xff000; |
||
273 | |||
274 | vp->hdr[18] = info->io.clipDistanceMask; |
||
275 | |||
276 | return nvc0_vtgp_gen_header(vp, info); |
||
277 | } |
||
278 | |||
279 | #if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN) |
||
280 | static void |
||
281 | nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) |
||
282 | { |
||
283 | if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) { |
||
284 | tp->tp.tess_mode = ~0; |
||
285 | return; |
||
286 | } |
||
287 | switch (info->prop.tp.domain) { |
||
288 | case PIPE_PRIM_LINES: |
||
289 | tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES; |
||
290 | break; |
||
291 | case PIPE_PRIM_TRIANGLES: |
||
292 | tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES; |
||
293 | if (info->prop.tp.winding > 0) |
||
294 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; |
||
295 | break; |
||
296 | case PIPE_PRIM_QUADS: |
||
297 | tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS; |
||
298 | break; |
||
299 | default: |
||
300 | tp->tp.tess_mode = ~0; |
||
301 | return; |
||
302 | } |
||
303 | if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) |
||
304 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; |
||
305 | |||
306 | switch (info->prop.tp.partitioning) { |
||
307 | case PIPE_TESS_PART_INTEGER: |
||
308 | case PIPE_TESS_PART_POW2: |
||
309 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL; |
||
310 | break; |
||
311 | case PIPE_TESS_PART_FRACT_ODD: |
||
312 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD; |
||
313 | break; |
||
314 | case PIPE_TESS_PART_FRACT_EVEN: |
||
315 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN; |
||
316 | break; |
||
317 | default: |
||
318 | assert(!"invalid tessellator partitioning"); |
||
319 | break; |
||
320 | } |
||
321 | } |
||
322 | #endif |
||
323 | |||
324 | #ifdef PIPE_SHADER_HULL |
||
325 | static int |
||
326 | nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) |
||
327 | { |
||
328 | unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ |
||
329 | |||
330 | tcp->tp.input_patch_size = info->prop.tp.inputPatchSize; |
||
331 | |||
332 | if (info->numPatchConstants) |
||
333 | opcs = 8 + info->numPatchConstants * 4; |
||
334 | |||
335 | tcp->hdr[0] = 0x20061 | (2 << 10); |
||
336 | |||
337 | tcp->hdr[1] = opcs << 24; |
||
338 | tcp->hdr[2] = info->prop.tp.outputPatchSize << 24; |
||
339 | |||
340 | tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */ |
||
341 | |||
342 | nvc0_vtgp_gen_header(tcp, info); |
||
343 | |||
344 | nvc0_tp_get_tess_mode(tcp, info); |
||
345 | |||
346 | return 0; |
||
347 | } |
||
348 | #endif |
||
349 | |||
350 | #ifdef PIPE_SHADER_DOMAIN |
||
351 | static int |
||
352 | nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) |
||
353 | { |
||
354 | tep->tp.input_patch_size = ~0; |
||
355 | |||
356 | tep->hdr[0] = 0x20061 | (3 << 10); |
||
357 | tep->hdr[4] = 0xff000; |
||
358 | |||
359 | nvc0_vtgp_gen_header(tep, info); |
||
360 | |||
361 | nvc0_tp_get_tess_mode(tep, info); |
||
362 | |||
363 | tep->hdr[18] |= 0x3 << 12; /* ? */ |
||
364 | |||
365 | return 0; |
||
366 | } |
||
367 | #endif |
||
368 | |||
369 | static int |
||
370 | nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) |
||
371 | { |
||
372 | gp->hdr[0] = 0x20061 | (4 << 10); |
||
373 | |||
374 | gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; |
||
375 | |||
376 | switch (info->prop.gp.outputPrim) { |
||
377 | case PIPE_PRIM_POINTS: |
||
378 | gp->hdr[3] = 0x01000000; |
||
379 | gp->hdr[0] |= 0xf0000000; |
||
380 | break; |
||
381 | case PIPE_PRIM_LINE_STRIP: |
||
382 | gp->hdr[3] = 0x06000000; |
||
383 | gp->hdr[0] |= 0x10000000; |
||
384 | break; |
||
385 | case PIPE_PRIM_TRIANGLE_STRIP: |
||
386 | gp->hdr[3] = 0x07000000; |
||
387 | gp->hdr[0] |= 0x10000000; |
||
388 | break; |
||
389 | default: |
||
390 | assert(0); |
||
391 | break; |
||
392 | } |
||
393 | |||
394 | gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff; |
||
395 | |||
396 | return nvc0_vtgp_gen_header(gp, info); |
||
397 | } |
||
398 | |||
399 | #define NVC0_INTERP_FLAT (1 << 0) |
||
400 | #define NVC0_INTERP_PERSPECTIVE (2 << 0) |
||
401 | #define NVC0_INTERP_LINEAR (3 << 0) |
||
402 | #define NVC0_INTERP_CENTROID (1 << 2) |
||
403 | |||
404 | static uint8_t |
||
405 | nvc0_hdr_interp_mode(const struct nv50_ir_varying *var) |
||
406 | { |
||
407 | if (var->linear) |
||
408 | return NVC0_INTERP_LINEAR; |
||
409 | if (var->flat) |
||
410 | return NVC0_INTERP_FLAT; |
||
411 | return NVC0_INTERP_PERSPECTIVE; |
||
412 | } |
||
413 | |||
414 | static int |
||
415 | nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) |
||
416 | { |
||
417 | unsigned i, c, a, m; |
||
418 | |||
419 | /* just 00062 on Kepler */ |
||
420 | fp->hdr[0] = 0x20062 | (5 << 10); |
||
421 | fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ |
||
422 | |||
423 | if (info->prop.fp.usesDiscard) |
||
424 | fp->hdr[0] |= 0x8000; |
||
425 | if (info->prop.fp.numColourResults > 1) |
||
426 | fp->hdr[0] |= 0x4000; |
||
427 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) |
||
428 | fp->hdr[19] |= 0x1; |
||
429 | if (info->prop.fp.writesDepth) { |
||
430 | fp->hdr[19] |= 0x2; |
||
431 | fp->flags[0] = 0x11; /* deactivate ZCULL */ |
||
432 | } |
||
433 | |||
434 | for (i = 0; i < info->numInputs; ++i) { |
||
435 | m = nvc0_hdr_interp_mode(&info->in[i]); |
||
436 | for (c = 0; c < 4; ++c) { |
||
437 | if (!(info->in[i].mask & (1 << c))) |
||
438 | continue; |
||
439 | a = info->in[i].slot[c]; |
||
440 | if (info->in[i].slot[0] >= (0x060 / 4) && |
||
441 | info->in[i].slot[0] <= (0x07c / 4)) { |
||
442 | fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); |
||
443 | } else |
||
444 | if (info->in[i].slot[0] >= (0x2c0 / 4) && |
||
445 | info->in[i].slot[0] <= (0x2fc / 4)) { |
||
446 | fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; |
||
447 | } else { |
||
448 | if (info->in[i].slot[c] < (0x040 / 4) || |
||
449 | info->in[i].slot[c] > (0x380 / 4)) |
||
450 | continue; |
||
451 | a *= 2; |
||
452 | if (info->in[i].slot[0] >= (0x300 / 4)) |
||
453 | a -= 32; |
||
454 | fp->hdr[4 + a / 32] |= m << (a % 32); |
||
455 | } |
||
456 | } |
||
457 | } |
||
458 | |||
459 | for (i = 0; i < info->numOutputs; ++i) { |
||
460 | if (info->out[i].sn == TGSI_SEMANTIC_COLOR) |
||
461 | fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0]; |
||
462 | } |
||
463 | |||
464 | fp->fp.early_z = info->prop.fp.earlyFragTests; |
||
465 | |||
466 | return 0; |
||
467 | } |
||
468 | |||
469 | static struct nvc0_transform_feedback_state * |
||
470 | nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, |
||
471 | const struct pipe_stream_output_info *pso) |
||
472 | { |
||
473 | struct nvc0_transform_feedback_state *tfb; |
||
474 | unsigned b, i, c; |
||
475 | |||
476 | tfb = MALLOC_STRUCT(nvc0_transform_feedback_state); |
||
477 | if (!tfb) |
||
478 | return NULL; |
||
479 | for (b = 0; b < 4; ++b) { |
||
480 | tfb->stride[b] = pso->stride[b] * 4; |
||
481 | tfb->varying_count[b] = 0; |
||
482 | } |
||
483 | memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */ |
||
484 | |||
485 | for (i = 0; i < pso->num_outputs; ++i) { |
||
486 | unsigned s = pso->output[i].start_component; |
||
487 | unsigned p = pso->output[i].dst_offset; |
||
488 | b = pso->output[i].output_buffer; |
||
489 | |||
490 | for (c = 0; c < pso->output[i].num_components; ++c) |
||
491 | tfb->varying_index[b][p++] = |
||
492 | info->out[pso->output[i].register_index].slot[s + c]; |
||
493 | |||
494 | tfb->varying_count[b] = MAX2(tfb->varying_count[b], p); |
||
495 | } |
||
496 | for (b = 0; b < 4; ++b) // zero unused indices (looks nicer) |
||
497 | for (c = tfb->varying_count[b]; c & 3; ++c) |
||
498 | tfb->varying_index[b][c] = 0; |
||
499 | |||
500 | return tfb; |
||
501 | } |
||
502 | |||
503 | #ifdef DEBUG |
||
504 | static void |
||
505 | nvc0_program_dump(struct nvc0_program *prog) |
||
506 | { |
||
507 | unsigned pos; |
||
508 | |||
509 | if (prog->type != PIPE_SHADER_COMPUTE) { |
||
510 | for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos) |
||
511 | debug_printf("HDR[%02lx] = 0x%08x\n", |
||
512 | pos * sizeof(prog->hdr[0]), prog->hdr[pos]); |
||
513 | } |
||
514 | debug_printf("shader binary code (0x%x bytes):", prog->code_size); |
||
515 | for (pos = 0; pos < prog->code_size / 4; ++pos) { |
||
516 | if ((pos % 8) == 0) |
||
517 | debug_printf("\n"); |
||
518 | debug_printf("%08x ", prog->code[pos]); |
||
519 | } |
||
520 | debug_printf("\n"); |
||
521 | } |
||
522 | #endif |
||
523 | |||
524 | boolean |
||
525 | nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) |
||
526 | { |
||
527 | struct nv50_ir_prog_info *info; |
||
528 | int ret; |
||
529 | |||
530 | info = CALLOC_STRUCT(nv50_ir_prog_info); |
||
531 | if (!info) |
||
532 | return FALSE; |
||
533 | |||
534 | info->type = prog->type; |
||
535 | info->target = chipset; |
||
536 | info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; |
||
537 | info->bin.source = (void *)prog->pipe.tokens; |
||
538 | |||
539 | info->io.genUserClip = prog->vp.num_ucps; |
||
540 | info->io.ucpBase = 256; |
||
541 | info->io.ucpCBSlot = 15; |
||
542 | |||
543 | if (prog->type == PIPE_SHADER_COMPUTE) { |
||
544 | if (chipset >= NVISA_GK104_CHIPSET) { |
||
545 | info->io.resInfoCBSlot = 0; |
||
546 | info->io.texBindBase = NVE4_CP_INPUT_TEX(0); |
||
547 | info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); |
||
548 | info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); |
||
549 | } |
||
550 | info->io.msInfoCBSlot = 0; |
||
551 | info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; |
||
552 | } else { |
||
553 | if (chipset >= NVISA_GK104_CHIPSET) { |
||
554 | info->io.resInfoCBSlot = 15; |
||
555 | info->io.texBindBase = 0x20; |
||
556 | info->io.suInfoBase = 0; /* TODO */ |
||
557 | } |
||
558 | info->io.msInfoCBSlot = 15; |
||
559 | info->io.msInfoBase = 0; /* TODO */ |
||
560 | } |
||
561 | |||
562 | info->assignSlots = nvc0_program_assign_varying_slots; |
||
563 | |||
564 | #ifdef DEBUG |
||
565 | info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); |
||
566 | info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); |
||
567 | #else |
||
568 | info->optLevel = 3; |
||
569 | #endif |
||
570 | |||
571 | ret = nv50_ir_generate_code(info); |
||
572 | if (ret) { |
||
573 | NOUVEAU_ERR("shader translation failed: %i\n", ret); |
||
574 | goto out; |
||
575 | } |
||
576 | if (prog->type != PIPE_SHADER_COMPUTE) |
||
577 | FREE(info->bin.syms); |
||
578 | |||
579 | prog->code = info->bin.code; |
||
580 | prog->code_size = info->bin.codeSize; |
||
581 | prog->immd_data = info->immd.buf; |
||
582 | prog->immd_size = info->immd.bufSize; |
||
583 | prog->relocs = info->bin.relocData; |
||
584 | prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); |
||
585 | prog->num_barriers = info->numBarriers; |
||
586 | |||
587 | prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; |
||
588 | |||
589 | if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) |
||
590 | info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ |
||
591 | prog->vp.edgeflag = info->io.edgeFlagIn; |
||
592 | |||
593 | switch (prog->type) { |
||
594 | case PIPE_SHADER_VERTEX: |
||
595 | ret = nvc0_vp_gen_header(prog, info); |
||
596 | break; |
||
597 | #ifdef PIPE_SHADER_HULL |
||
598 | case PIPE_SHADER_HULL: |
||
599 | ret = nvc0_tcp_gen_header(prog, info); |
||
600 | break; |
||
601 | #endif |
||
602 | #ifdef PIPE_SHADER_DOMAIN |
||
603 | case PIPE_SHADER_DOMAIN: |
||
604 | ret = nvc0_tep_gen_header(prog, info); |
||
605 | break; |
||
606 | #endif |
||
607 | case PIPE_SHADER_GEOMETRY: |
||
608 | ret = nvc0_gp_gen_header(prog, info); |
||
609 | break; |
||
610 | case PIPE_SHADER_FRAGMENT: |
||
611 | ret = nvc0_fp_gen_header(prog, info); |
||
612 | break; |
||
613 | case PIPE_SHADER_COMPUTE: |
||
614 | prog->cp.syms = info->bin.syms; |
||
615 | prog->cp.num_syms = info->bin.numSyms; |
||
616 | break; |
||
617 | default: |
||
618 | ret = -1; |
||
619 | NOUVEAU_ERR("unknown program type: %u\n", prog->type); |
||
620 | break; |
||
621 | } |
||
622 | if (ret) |
||
623 | goto out; |
||
624 | |||
625 | if (info->bin.tlsSpace) { |
||
626 | assert(info->bin.tlsSpace < (1 << 24)); |
||
627 | prog->hdr[0] |= 1 << 26; |
||
628 | prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */ |
||
629 | prog->need_tls = TRUE; |
||
630 | } |
||
631 | /* TODO: factor 2 only needed where joinat/precont is used, |
||
632 | * and we only have to count non-uniform branches |
||
633 | */ |
||
634 | /* |
||
635 | if ((info->maxCFDepth * 2) > 16) { |
||
636 | prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200; |
||
637 | prog->need_tls = TRUE; |
||
638 | } |
||
639 | */ |
||
640 | if (info->io.globalAccess) |
||
641 | prog->hdr[0] |= 1 << 16; |
||
642 | |||
643 | if (prog->pipe.stream_output.num_outputs) |
||
644 | prog->tfb = nvc0_program_create_tfb_state(info, |
||
645 | &prog->pipe.stream_output); |
||
646 | |||
647 | out: |
||
648 | FREE(info); |
||
649 | return !ret; |
||
650 | } |
||
651 | |||
652 | boolean |
||
653 | nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) |
||
654 | { |
||
655 | struct nvc0_screen *screen = nvc0->screen; |
||
656 | const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE; |
||
657 | int ret; |
||
658 | uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); |
||
659 | uint32_t lib_pos = screen->lib_code->start; |
||
660 | uint32_t code_pos; |
||
661 | |||
662 | /* c[] bindings need to be aligned to 0x100, but we could use relocations |
||
663 | * to save space. */ |
||
664 | if (prog->immd_size) { |
||
665 | prog->immd_base = size; |
||
666 | size = align(size, 0x40); |
||
667 | size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ |
||
668 | } |
||
669 | /* On Fermi, SP_START_ID must be aligned to 0x40. |
||
670 | * On Kepler, the first instruction must be aligned to 0x80 because |
||
671 | * latency information is expected only at certain positions. |
||
672 | */ |
||
673 | if (screen->base.class_3d >= NVE4_3D_CLASS) |
||
674 | size = size + (is_cp ? 0x40 : 0x70); |
||
675 | size = align(size, 0x40); |
||
676 | |||
677 | ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); |
||
678 | if (ret) { |
||
679 | struct nouveau_heap *heap = screen->text_heap; |
||
680 | struct nouveau_heap *iter; |
||
681 | for (iter = heap; iter && iter->next != heap; iter = iter->next) { |
||
682 | struct nvc0_program *evict = iter->priv; |
||
683 | if (evict) |
||
684 | nouveau_heap_free(&evict->mem); |
||
685 | } |
||
686 | debug_printf("WARNING: out of code space, evicting all shaders.\n"); |
||
687 | ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); |
||
688 | if (ret) { |
||
689 | NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); |
||
690 | return FALSE; |
||
691 | } |
||
692 | IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0); |
||
693 | } |
||
694 | prog->code_base = prog->mem->start; |
||
695 | prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100); |
||
696 | assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= |
||
697 | prog->mem->start + prog->mem->size)); |
||
698 | |||
699 | if (!is_cp) { |
||
700 | if (screen->base.class_3d >= NVE4_3D_CLASS) { |
||
701 | switch (prog->mem->start & 0xff) { |
||
702 | case 0x40: prog->code_base += 0x70; break; |
||
703 | case 0x80: prog->code_base += 0x30; break; |
||
704 | case 0xc0: prog->code_base += 0x70; break; |
||
705 | default: |
||
706 | prog->code_base += 0x30; |
||
707 | assert((prog->mem->start & 0xff) == 0x00); |
||
708 | break; |
||
709 | } |
||
710 | } |
||
711 | code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; |
||
712 | } else { |
||
713 | if (screen->base.class_3d >= NVE4_3D_CLASS) { |
||
714 | if (prog->mem->start & 0x40) |
||
715 | prog->code_base += 0x40; |
||
716 | assert((prog->code_base & 0x7f) == 0x00); |
||
717 | } |
||
718 | code_pos = prog->code_base; |
||
719 | } |
||
720 | |||
721 | if (prog->relocs) |
||
722 | nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); |
||
723 | |||
724 | #ifdef DEBUG |
||
725 | if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE)) |
||
726 | nvc0_program_dump(prog); |
||
727 | #endif |
||
728 | |||
729 | if (!is_cp) |
||
730 | nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, |
||
731 | NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); |
||
732 | nvc0->base.push_data(&nvc0->base, screen->text, code_pos, |
||
733 | NOUVEAU_BO_VRAM, prog->code_size, prog->code); |
||
734 | if (prog->immd_size) |
||
735 | nvc0->base.push_data(&nvc0->base, |
||
736 | screen->text, prog->immd_base, NOUVEAU_BO_VRAM, |
||
737 | prog->immd_size, prog->immd_data); |
||
738 | |||
739 | BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); |
||
740 | PUSH_DATA (nvc0->base.pushbuf, 0x1011); |
||
741 | |||
742 | return TRUE; |
||
743 | } |
||
744 | |||
745 | /* Upload code for builtin functions like integer division emulation. */ |
||
746 | void |
||
747 | nvc0_program_library_upload(struct nvc0_context *nvc0) |
||
748 | { |
||
749 | struct nvc0_screen *screen = nvc0->screen; |
||
750 | int ret; |
||
751 | uint32_t size; |
||
752 | const uint32_t *code; |
||
753 | |||
754 | if (screen->lib_code) |
||
755 | return; |
||
756 | |||
757 | nv50_ir_get_target_library(screen->base.device->chipset, &code, &size); |
||
758 | if (!size) |
||
759 | return; |
||
760 | |||
761 | ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL, |
||
762 | &screen->lib_code); |
||
763 | if (ret) |
||
764 | return; |
||
765 | |||
766 | nvc0->base.push_data(&nvc0->base, |
||
767 | screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, |
||
768 | size, code); |
||
769 | /* no need for a memory barrier, will be emitted with first program */ |
||
770 | } |
||
771 | |||
772 | void |
||
773 | nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) |
||
774 | { |
||
775 | const struct pipe_shader_state pipe = prog->pipe; |
||
776 | const ubyte type = prog->type; |
||
777 | |||
778 | if (prog->mem) |
||
779 | nouveau_heap_free(&prog->mem); |
||
780 | if (prog->code) |
||
781 | FREE(prog->code); /* may be 0 for hardcoded shaders */ |
||
782 | FREE(prog->immd_data); |
||
783 | FREE(prog->relocs); |
||
784 | if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) |
||
785 | FREE(prog->cp.syms); |
||
786 | if (prog->tfb) { |
||
787 | if (nvc0->state.tfb == prog->tfb) |
||
788 | nvc0->state.tfb = NULL; |
||
789 | FREE(prog->tfb); |
||
790 | } |
||
791 | |||
792 | memset(prog, 0, sizeof(*prog)); |
||
793 | |||
794 | prog->pipe = pipe; |
||
795 | prog->type = type; |
||
796 | } |
||
797 | |||
798 | uint32_t |
||
799 | nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) |
||
800 | { |
||
801 | const struct nv50_ir_prog_symbol *syms = |
||
802 | (const struct nv50_ir_prog_symbol *)prog->cp.syms; |
||
803 | unsigned base = 0; |
||
804 | unsigned i; |
||
805 | if (prog->type != PIPE_SHADER_COMPUTE) |
||
806 | base = NVC0_SHADER_HEADER_SIZE; |
||
807 | for (i = 0; i < prog->cp.num_syms; ++i) |
||
808 | if (syms[i].label == label) |
||
809 | return prog->code_base + base + syms[i].offset; |
||
810 | return prog->code_base; /* no symbols or symbol not found */ |
||
811 | }>= |