Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* |
2 | * Copyright 2010 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #include "pipe/p_defines.h" |
||
24 | |||
25 | #include "nvc0/nvc0_context.h" |
||
26 | |||
27 | #include "codegen/nv50_ir_driver.h" |
||
28 | #include "nvc0/nve4_compute.h" |
||
29 | |||
30 | /* NOTE: Using a[0x270] in FP may cause an error even if we're using less than |
||
31 | * 124 scalar varying values. |
||
32 | */ |
||
33 | static uint32_t |
||
34 | nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase) |
||
35 | { |
||
36 | switch (sn) { |
||
37 | case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; |
||
38 | case TGSI_SEMANTIC_PRIMID: return 0x060; |
||
39 | case TGSI_SEMANTIC_LAYER: return 0x064; |
||
40 | case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; |
||
41 | case TGSI_SEMANTIC_PSIZE: return 0x06c; |
||
42 | case TGSI_SEMANTIC_POSITION: return 0x070; |
||
43 | case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; |
||
44 | case TGSI_SEMANTIC_FOG: return 0x2e8; |
||
45 | case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; |
||
46 | case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; |
||
47 | case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; |
||
48 | case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; |
||
49 | case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; |
||
50 | case TGSI_SEMANTIC_PCOORD: return 0x2e0; |
||
51 | case NV50_SEMANTIC_TESSCOORD: return 0x2f0; |
||
52 | case TGSI_SEMANTIC_INSTANCEID: return 0x2f8; |
||
53 | case TGSI_SEMANTIC_VERTEXID: return 0x2fc; |
||
54 | case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; |
||
55 | case TGSI_SEMANTIC_FACE: return 0x3fc; |
||
56 | default: |
||
57 | assert(!"invalid TGSI input semantic"); |
||
58 | return ~0; |
||
59 | } |
||
60 | } |
||
61 | |||
62 | static uint32_t |
||
63 | nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase) |
||
64 | { |
||
65 | switch (sn) { |
||
66 | case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4; |
||
67 | case TGSI_SEMANTIC_PRIMID: return 0x060; |
||
68 | case TGSI_SEMANTIC_LAYER: return 0x064; |
||
69 | case TGSI_SEMANTIC_VIEWPORT_INDEX:return 0x068; |
||
70 | case TGSI_SEMANTIC_PSIZE: return 0x06c; |
||
71 | case TGSI_SEMANTIC_POSITION: return 0x070; |
||
72 | case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10; |
||
73 | case TGSI_SEMANTIC_FOG: return 0x2e8; |
||
74 | case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10; |
||
75 | case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10; |
||
76 | case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4; |
||
77 | case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10; |
||
78 | case TGSI_SEMANTIC_CLIPVERTEX: return 0x270; |
||
79 | case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10; |
||
80 | case TGSI_SEMANTIC_EDGEFLAG: return ~0; |
||
81 | default: |
||
82 | assert(!"invalid TGSI output semantic"); |
||
83 | return ~0; |
||
84 | } |
||
85 | } |
||
86 | |||
87 | static int |
||
88 | nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info) |
||
89 | { |
||
90 | unsigned i, c, n; |
||
91 | |||
92 | for (n = 0, i = 0; i < info->numInputs; ++i) { |
||
93 | switch (info->in[i].sn) { |
||
94 | case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */ |
||
95 | case TGSI_SEMANTIC_VERTEXID: |
||
96 | info->in[i].mask = 0x1; |
||
97 | info->in[i].slot[0] = |
||
98 | nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4; |
||
99 | continue; |
||
100 | default: |
||
101 | break; |
||
102 | } |
||
103 | for (c = 0; c < 4; ++c) |
||
104 | info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4; |
||
105 | ++n; |
||
106 | } |
||
107 | |||
108 | return 0; |
||
109 | } |
||
110 | |||
111 | static int |
||
112 | nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info) |
||
113 | { |
||
114 | unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); |
||
115 | unsigned offset; |
||
116 | unsigned i, c; |
||
117 | |||
118 | for (i = 0; i < info->numInputs; ++i) { |
||
119 | offset = nvc0_shader_input_address(info->in[i].sn, |
||
120 | info->in[i].si, ubase); |
||
121 | if (info->in[i].patch && offset >= 0x20) |
||
122 | offset = 0x20 + info->in[i].si * 0x10; |
||
123 | |||
124 | if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD) |
||
125 | info->in[i].mask &= 3; |
||
126 | |||
127 | for (c = 0; c < 4; ++c) |
||
128 | info->in[i].slot[c] = (offset + c * 0x4) / 4; |
||
129 | } |
||
130 | |||
131 | return 0; |
||
132 | } |
||
133 | |||
134 | static int |
||
135 | nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info) |
||
136 | { |
||
137 | unsigned count = info->prop.fp.numColourResults * 4; |
||
138 | unsigned i, c; |
||
139 | |||
140 | for (i = 0; i < info->numOutputs; ++i) |
||
141 | if (info->out[i].sn == TGSI_SEMANTIC_COLOR) |
||
142 | for (c = 0; c < 4; ++c) |
||
143 | info->out[i].slot[c] = info->out[i].si * 4 + c; |
||
144 | |||
145 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) |
||
146 | info->out[info->io.sampleMask].slot[0] = count++; |
||
147 | else |
||
148 | if (info->target >= 0xe0) |
||
149 | count++; /* on Kepler, depth is always last colour reg + 2 */ |
||
150 | |||
151 | if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) |
||
152 | info->out[info->io.fragDepth].slot[2] = count; |
||
153 | |||
154 | return 0; |
||
155 | } |
||
156 | |||
157 | static int |
||
158 | nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info) |
||
159 | { |
||
160 | unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10); |
||
161 | unsigned offset; |
||
162 | unsigned i, c; |
||
163 | |||
164 | for (i = 0; i < info->numOutputs; ++i) { |
||
165 | offset = nvc0_shader_output_address(info->out[i].sn, |
||
166 | info->out[i].si, ubase); |
||
167 | if (info->out[i].patch && offset >= 0x20) |
||
168 | offset = 0x20 + info->out[i].si * 0x10; |
||
169 | |||
170 | for (c = 0; c < 4; ++c) |
||
171 | info->out[i].slot[c] = (offset + c * 0x4) / 4; |
||
172 | } |
||
173 | |||
174 | return 0; |
||
175 | } |
||
176 | |||
177 | static int |
||
178 | nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info) |
||
179 | { |
||
180 | int ret; |
||
181 | |||
182 | if (info->type == PIPE_SHADER_VERTEX) |
||
183 | ret = nvc0_vp_assign_input_slots(info); |
||
184 | else |
||
185 | ret = nvc0_sp_assign_input_slots(info); |
||
186 | if (ret) |
||
187 | return ret; |
||
188 | |||
189 | if (info->type == PIPE_SHADER_FRAGMENT) |
||
190 | ret = nvc0_fp_assign_output_slots(info); |
||
191 | else |
||
192 | ret = nvc0_sp_assign_output_slots(info); |
||
193 | return ret; |
||
194 | } |
||
195 | |||
196 | static INLINE void |
||
197 | nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot) |
||
198 | { |
||
199 | uint8_t min = (vp->hdr[4] >> 12) & 0xff; |
||
200 | uint8_t max = (vp->hdr[4] >> 24); |
||
201 | |||
202 | min = MIN2(min, slot); |
||
203 | max = MAX2(max, slot); |
||
204 | |||
205 | vp->hdr[4] = (max << 24) | (min << 12); |
||
206 | } |
||
207 | |||
208 | /* Common part of header generation for VP, TCP, TEP and GP. */ |
||
209 | static int |
||
210 | nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) |
||
211 | { |
||
212 | unsigned i, c, a; |
||
213 | |||
214 | for (i = 0; i < info->numInputs; ++i) { |
||
215 | if (info->in[i].patch) |
||
216 | continue; |
||
217 | for (c = 0; c < 4; ++c) { |
||
218 | a = info->in[i].slot[c]; |
||
219 | if (info->in[i].mask & (1 << c)) { |
||
220 | if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD) |
||
221 | vp->hdr[5 + a / 32] |= 1 << (a % 32); |
||
222 | else |
||
223 | nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]); |
||
224 | } |
||
225 | } |
||
226 | } |
||
227 | |||
228 | for (i = 0; i < info->numOutputs; ++i) { |
||
229 | if (info->out[i].patch) |
||
230 | continue; |
||
231 | for (c = 0; c < 4; ++c) { |
||
232 | if (!(info->out[i].mask & (1 << c))) |
||
233 | continue; |
||
234 | assert(info->out[i].slot[c] >= 0x40 / 4); |
||
235 | a = info->out[i].slot[c] - 0x40 / 4; |
||
236 | vp->hdr[13 + a / 32] |= 1 << (a % 32); |
||
237 | if (info->out[i].oread) |
||
238 | nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]); |
||
239 | } |
||
240 | } |
||
241 | |||
242 | for (i = 0; i < info->numSysVals; ++i) { |
||
243 | switch (info->sv[i].sn) { |
||
244 | case TGSI_SEMANTIC_PRIMID: |
||
245 | vp->hdr[5] |= 1 << 24; |
||
246 | break; |
||
247 | case TGSI_SEMANTIC_INSTANCEID: |
||
248 | vp->hdr[10] |= 1 << 30; |
||
249 | break; |
||
250 | case TGSI_SEMANTIC_VERTEXID: |
||
251 | vp->hdr[10] |= 1 << 31; |
||
252 | break; |
||
253 | default: |
||
254 | break; |
||
255 | } |
||
256 | } |
||
257 | |||
258 | vp->vp.clip_enable = info->io.clipDistanceMask; |
||
259 | for (i = 0; i < 8; ++i) |
||
260 | if (info->io.cullDistanceMask & (1 << i)) |
||
261 | vp->vp.clip_mode |= 1 << (i * 4); |
||
262 | |||
263 | if (info->io.genUserClip < 0) |
||
264 | vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */ |
||
265 | |||
266 | return 0; |
||
267 | } |
||
268 | |||
269 | static int |
||
270 | nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info) |
||
271 | { |
||
272 | vp->hdr[0] = 0x20061 | (1 << 10); |
||
273 | vp->hdr[4] = 0xff000; |
||
274 | |||
275 | vp->hdr[18] = info->io.clipDistanceMask; |
||
276 | |||
277 | return nvc0_vtgp_gen_header(vp, info); |
||
278 | } |
||
279 | |||
280 | #if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN) |
||
281 | static void |
||
282 | nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info) |
||
283 | { |
||
284 | if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) { |
||
285 | tp->tp.tess_mode = ~0; |
||
286 | return; |
||
287 | } |
||
288 | switch (info->prop.tp.domain) { |
||
289 | case PIPE_PRIM_LINES: |
||
290 | tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES; |
||
291 | break; |
||
292 | case PIPE_PRIM_TRIANGLES: |
||
293 | tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES; |
||
294 | if (info->prop.tp.winding > 0) |
||
295 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW; |
||
296 | break; |
||
297 | case PIPE_PRIM_QUADS: |
||
298 | tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS; |
||
299 | break; |
||
300 | default: |
||
301 | tp->tp.tess_mode = ~0; |
||
302 | return; |
||
303 | } |
||
304 | if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS) |
||
305 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED; |
||
306 | |||
307 | switch (info->prop.tp.partitioning) { |
||
308 | case PIPE_TESS_PART_INTEGER: |
||
309 | case PIPE_TESS_PART_POW2: |
||
310 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL; |
||
311 | break; |
||
312 | case PIPE_TESS_PART_FRACT_ODD: |
||
313 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD; |
||
314 | break; |
||
315 | case PIPE_TESS_PART_FRACT_EVEN: |
||
316 | tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN; |
||
317 | break; |
||
318 | default: |
||
319 | assert(!"invalid tessellator partitioning"); |
||
320 | break; |
||
321 | } |
||
322 | } |
||
323 | #endif |
||
324 | |||
325 | #ifdef PIPE_SHADER_HULL |
||
326 | static int |
||
327 | nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info) |
||
328 | { |
||
329 | unsigned opcs = 6; /* output patch constants (at least the TessFactors) */ |
||
330 | |||
331 | tcp->tp.input_patch_size = info->prop.tp.inputPatchSize; |
||
332 | |||
333 | if (info->numPatchConstants) |
||
334 | opcs = 8 + info->numPatchConstants * 4; |
||
335 | |||
336 | tcp->hdr[0] = 0x20061 | (2 << 10); |
||
337 | |||
338 | tcp->hdr[1] = opcs << 24; |
||
339 | tcp->hdr[2] = info->prop.tp.outputPatchSize << 24; |
||
340 | |||
341 | tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */ |
||
342 | |||
343 | nvc0_vtgp_gen_header(tcp, info); |
||
344 | |||
345 | nvc0_tp_get_tess_mode(tcp, info); |
||
346 | |||
347 | return 0; |
||
348 | } |
||
349 | #endif |
||
350 | |||
351 | #ifdef PIPE_SHADER_DOMAIN |
||
352 | static int |
||
353 | nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info) |
||
354 | { |
||
355 | tep->tp.input_patch_size = ~0; |
||
356 | |||
357 | tep->hdr[0] = 0x20061 | (3 << 10); |
||
358 | tep->hdr[4] = 0xff000; |
||
359 | |||
360 | nvc0_vtgp_gen_header(tep, info); |
||
361 | |||
362 | nvc0_tp_get_tess_mode(tep, info); |
||
363 | |||
364 | tep->hdr[18] |= 0x3 << 12; /* ? */ |
||
365 | |||
366 | return 0; |
||
367 | } |
||
368 | #endif |
||
369 | |||
370 | static int |
||
371 | nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info) |
||
372 | { |
||
373 | gp->hdr[0] = 0x20061 | (4 << 10); |
||
374 | |||
375 | gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24; |
||
376 | |||
377 | switch (info->prop.gp.outputPrim) { |
||
378 | case PIPE_PRIM_POINTS: |
||
379 | gp->hdr[3] = 0x01000000; |
||
380 | gp->hdr[0] |= 0xf0000000; |
||
381 | break; |
||
382 | case PIPE_PRIM_LINE_STRIP: |
||
383 | gp->hdr[3] = 0x06000000; |
||
384 | gp->hdr[0] |= 0x10000000; |
||
385 | break; |
||
386 | case PIPE_PRIM_TRIANGLE_STRIP: |
||
387 | gp->hdr[3] = 0x07000000; |
||
388 | gp->hdr[0] |= 0x10000000; |
||
389 | break; |
||
390 | default: |
||
391 | assert(0); |
||
392 | break; |
||
393 | } |
||
394 | |||
395 | gp->hdr[4] = MIN2(info->prop.gp.maxVertices, 1024); |
||
396 | |||
397 | return nvc0_vtgp_gen_header(gp, info); |
||
398 | } |
||
399 | |||
400 | #define NVC0_INTERP_FLAT (1 << 0) |
||
401 | #define NVC0_INTERP_PERSPECTIVE (2 << 0) |
||
402 | #define NVC0_INTERP_LINEAR (3 << 0) |
||
403 | #define NVC0_INTERP_CENTROID (1 << 2) |
||
404 | |||
405 | static uint8_t |
||
406 | nvc0_hdr_interp_mode(const struct nv50_ir_varying *var) |
||
407 | { |
||
408 | if (var->linear) |
||
409 | return NVC0_INTERP_LINEAR; |
||
410 | if (var->flat) |
||
411 | return NVC0_INTERP_FLAT; |
||
412 | return NVC0_INTERP_PERSPECTIVE; |
||
413 | } |
||
414 | |||
415 | static int |
||
416 | nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info) |
||
417 | { |
||
418 | unsigned i, c, a, m; |
||
419 | |||
420 | /* just 00062 on Kepler */ |
||
421 | fp->hdr[0] = 0x20062 | (5 << 10); |
||
422 | fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */ |
||
423 | |||
424 | if (info->prop.fp.usesDiscard) |
||
425 | fp->hdr[0] |= 0x8000; |
||
426 | if (info->prop.fp.numColourResults > 1) |
||
427 | fp->hdr[0] |= 0x4000; |
||
428 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) |
||
429 | fp->hdr[19] |= 0x1; |
||
430 | if (info->prop.fp.writesDepth) { |
||
431 | fp->hdr[19] |= 0x2; |
||
432 | fp->flags[0] = 0x11; /* deactivate ZCULL */ |
||
433 | } |
||
434 | |||
435 | for (i = 0; i < info->numInputs; ++i) { |
||
436 | m = nvc0_hdr_interp_mode(&info->in[i]); |
||
437 | for (c = 0; c < 4; ++c) { |
||
438 | if (!(info->in[i].mask & (1 << c))) |
||
439 | continue; |
||
440 | a = info->in[i].slot[c]; |
||
441 | if (info->in[i].slot[0] >= (0x060 / 4) && |
||
442 | info->in[i].slot[0] <= (0x07c / 4)) { |
||
443 | fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4)); |
||
444 | } else |
||
445 | if (info->in[i].slot[0] >= (0x2c0 / 4) && |
||
446 | info->in[i].slot[0] <= (0x2fc / 4)) { |
||
447 | fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000; |
||
448 | } else { |
||
449 | if (info->in[i].slot[c] < (0x040 / 4) || |
||
450 | info->in[i].slot[c] > (0x380 / 4)) |
||
451 | continue; |
||
452 | a *= 2; |
||
453 | if (info->in[i].slot[0] >= (0x300 / 4)) |
||
454 | a -= 32; |
||
455 | fp->hdr[4 + a / 32] |= m << (a % 32); |
||
456 | } |
||
457 | } |
||
458 | } |
||
459 | |||
460 | for (i = 0; i < info->numOutputs; ++i) { |
||
461 | if (info->out[i].sn == TGSI_SEMANTIC_COLOR) |
||
462 | fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0]; |
||
463 | } |
||
464 | |||
465 | fp->fp.early_z = info->prop.fp.earlyFragTests; |
||
466 | |||
467 | return 0; |
||
468 | } |
||
469 | |||
470 | static struct nvc0_transform_feedback_state * |
||
471 | nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info, |
||
472 | const struct pipe_stream_output_info *pso) |
||
473 | { |
||
474 | struct nvc0_transform_feedback_state *tfb; |
||
475 | unsigned b, i, c; |
||
476 | |||
477 | tfb = MALLOC_STRUCT(nvc0_transform_feedback_state); |
||
478 | if (!tfb) |
||
479 | return NULL; |
||
480 | for (b = 0; b < 4; ++b) { |
||
481 | tfb->stride[b] = pso->stride[b] * 4; |
||
482 | tfb->varying_count[b] = 0; |
||
483 | } |
||
484 | memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */ |
||
485 | |||
486 | for (i = 0; i < pso->num_outputs; ++i) { |
||
487 | unsigned s = pso->output[i].start_component; |
||
488 | unsigned p = pso->output[i].dst_offset; |
||
489 | b = pso->output[i].output_buffer; |
||
490 | |||
491 | for (c = 0; c < pso->output[i].num_components; ++c) |
||
492 | tfb->varying_index[b][p++] = |
||
493 | info->out[pso->output[i].register_index].slot[s + c]; |
||
494 | |||
495 | tfb->varying_count[b] = MAX2(tfb->varying_count[b], p); |
||
496 | tfb->stream[b] = pso->output[i].stream; |
||
497 | } |
||
498 | for (b = 0; b < 4; ++b) // zero unused indices (looks nicer) |
||
499 | for (c = tfb->varying_count[b]; c & 3; ++c) |
||
500 | tfb->varying_index[b][c] = 0; |
||
501 | |||
502 | return tfb; |
||
503 | } |
||
504 | |||
505 | #ifdef DEBUG |
||
506 | static void |
||
507 | nvc0_program_dump(struct nvc0_program *prog) |
||
508 | { |
||
509 | unsigned pos; |
||
510 | |||
511 | if (prog->type != PIPE_SHADER_COMPUTE) { |
||
512 | for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos) |
||
513 | debug_printf("HDR[%02"PRIxPTR"] = 0x%08x\n", |
||
514 | pos * sizeof(prog->hdr[0]), prog->hdr[pos]); |
||
515 | } |
||
516 | debug_printf("shader binary code (0x%x bytes):", prog->code_size); |
||
517 | for (pos = 0; pos < prog->code_size / 4; ++pos) { |
||
518 | if ((pos % 8) == 0) |
||
519 | debug_printf("\n"); |
||
520 | debug_printf("%08x ", prog->code[pos]); |
||
521 | } |
||
522 | debug_printf("\n"); |
||
523 | } |
||
524 | #endif |
||
525 | |||
526 | boolean |
||
527 | nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset) |
||
528 | { |
||
529 | struct nv50_ir_prog_info *info; |
||
530 | int ret; |
||
531 | |||
532 | info = CALLOC_STRUCT(nv50_ir_prog_info); |
||
533 | if (!info) |
||
534 | return FALSE; |
||
535 | |||
536 | info->type = prog->type; |
||
537 | info->target = chipset; |
||
538 | info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; |
||
539 | info->bin.source = (void *)prog->pipe.tokens; |
||
540 | |||
541 | info->io.genUserClip = prog->vp.num_ucps; |
||
542 | info->io.ucpBase = 256; |
||
543 | info->io.ucpCBSlot = 15; |
||
544 | info->io.sampleInterp = prog->fp.sample_interp; |
||
545 | |||
546 | if (prog->type == PIPE_SHADER_COMPUTE) { |
||
547 | if (chipset >= NVISA_GK104_CHIPSET) { |
||
548 | info->io.resInfoCBSlot = 0; |
||
549 | info->io.texBindBase = NVE4_CP_INPUT_TEX(0); |
||
550 | info->io.suInfoBase = NVE4_CP_INPUT_SUF(0); |
||
551 | info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0); |
||
552 | } |
||
553 | info->io.msInfoCBSlot = 0; |
||
554 | info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS; |
||
555 | } else { |
||
556 | if (chipset >= NVISA_GK104_CHIPSET) { |
||
557 | info->io.texBindBase = 0x20; |
||
558 | info->io.suInfoBase = 0; /* TODO */ |
||
559 | } |
||
560 | info->io.resInfoCBSlot = 15; |
||
561 | info->io.sampleInfoBase = 256 + 128; |
||
562 | info->io.msInfoCBSlot = 15; |
||
563 | info->io.msInfoBase = 0; /* TODO */ |
||
564 | } |
||
565 | |||
566 | info->assignSlots = nvc0_program_assign_varying_slots; |
||
567 | |||
568 | #ifdef DEBUG |
||
569 | info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); |
||
570 | info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); |
||
571 | #else |
||
572 | info->optLevel = 3; |
||
573 | #endif |
||
574 | |||
575 | ret = nv50_ir_generate_code(info); |
||
576 | if (ret) { |
||
577 | NOUVEAU_ERR("shader translation failed: %i\n", ret); |
||
578 | goto out; |
||
579 | } |
||
580 | if (prog->type != PIPE_SHADER_COMPUTE) |
||
581 | FREE(info->bin.syms); |
||
582 | |||
583 | prog->code = info->bin.code; |
||
584 | prog->code_size = info->bin.codeSize; |
||
585 | prog->immd_data = info->immd.buf; |
||
586 | prog->immd_size = info->immd.bufSize; |
||
587 | prog->relocs = info->bin.relocData; |
||
588 | prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1)); |
||
589 | prog->num_barriers = info->numBarriers; |
||
590 | |||
591 | prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS; |
||
592 | |||
593 | if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS) |
||
594 | info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */ |
||
595 | prog->vp.edgeflag = info->io.edgeFlagIn; |
||
596 | |||
597 | switch (prog->type) { |
||
598 | case PIPE_SHADER_VERTEX: |
||
599 | ret = nvc0_vp_gen_header(prog, info); |
||
600 | break; |
||
601 | #ifdef PIPE_SHADER_HULL |
||
602 | case PIPE_SHADER_HULL: |
||
603 | ret = nvc0_tcp_gen_header(prog, info); |
||
604 | break; |
||
605 | #endif |
||
606 | #ifdef PIPE_SHADER_DOMAIN |
||
607 | case PIPE_SHADER_DOMAIN: |
||
608 | ret = nvc0_tep_gen_header(prog, info); |
||
609 | break; |
||
610 | #endif |
||
611 | case PIPE_SHADER_GEOMETRY: |
||
612 | ret = nvc0_gp_gen_header(prog, info); |
||
613 | break; |
||
614 | case PIPE_SHADER_FRAGMENT: |
||
615 | ret = nvc0_fp_gen_header(prog, info); |
||
616 | break; |
||
617 | case PIPE_SHADER_COMPUTE: |
||
618 | prog->cp.syms = info->bin.syms; |
||
619 | prog->cp.num_syms = info->bin.numSyms; |
||
620 | break; |
||
621 | default: |
||
622 | ret = -1; |
||
623 | NOUVEAU_ERR("unknown program type: %u\n", prog->type); |
||
624 | break; |
||
625 | } |
||
626 | if (ret) |
||
627 | goto out; |
||
628 | |||
629 | if (info->bin.tlsSpace) { |
||
630 | assert(info->bin.tlsSpace < (1 << 24)); |
||
631 | prog->hdr[0] |= 1 << 26; |
||
632 | prog->hdr[1] |= align(info->bin.tlsSpace, 0x10); /* l[] size */ |
||
633 | prog->need_tls = TRUE; |
||
634 | } |
||
635 | /* TODO: factor 2 only needed where joinat/precont is used, |
||
636 | * and we only have to count non-uniform branches |
||
637 | */ |
||
638 | /* |
||
639 | if ((info->maxCFDepth * 2) > 16) { |
||
640 | prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200; |
||
641 | prog->need_tls = TRUE; |
||
642 | } |
||
643 | */ |
||
644 | if (info->io.globalAccess) |
||
645 | prog->hdr[0] |= 1 << 16; |
||
646 | if (info->io.fp64) |
||
647 | prog->hdr[0] |= 1 << 27; |
||
648 | |||
649 | if (prog->pipe.stream_output.num_outputs) |
||
650 | prog->tfb = nvc0_program_create_tfb_state(info, |
||
651 | &prog->pipe.stream_output); |
||
652 | |||
653 | out: |
||
654 | FREE(info); |
||
655 | return !ret; |
||
656 | } |
||
657 | |||
658 | boolean |
||
659 | nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog) |
||
660 | { |
||
661 | struct nvc0_screen *screen = nvc0->screen; |
||
662 | const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE; |
||
663 | int ret; |
||
664 | uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE); |
||
665 | uint32_t lib_pos = screen->lib_code->start; |
||
666 | uint32_t code_pos; |
||
667 | |||
668 | /* c[] bindings need to be aligned to 0x100, but we could use relocations |
||
669 | * to save space. */ |
||
670 | if (prog->immd_size) { |
||
671 | prog->immd_base = size; |
||
672 | size = align(size, 0x40); |
||
673 | size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */ |
||
674 | } |
||
675 | /* On Fermi, SP_START_ID must be aligned to 0x40. |
||
676 | * On Kepler, the first instruction must be aligned to 0x80 because |
||
677 | * latency information is expected only at certain positions. |
||
678 | */ |
||
679 | if (screen->base.class_3d >= NVE4_3D_CLASS) |
||
680 | size = size + (is_cp ? 0x40 : 0x70); |
||
681 | size = align(size, 0x40); |
||
682 | |||
683 | ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem); |
||
684 | if (ret) { |
||
685 | struct nouveau_heap *heap = screen->text_heap; |
||
686 | /* Note that the code library, which is allocated before anything else, |
||
687 | * does not have a priv pointer. We can stop once we hit it. |
||
688 | */ |
||
689 | while (heap->next && heap->next->priv) { |
||
690 | struct nvc0_program *evict = heap->next->priv; |
||
691 | nouveau_heap_free(&evict->mem); |
||
692 | } |
||
693 | debug_printf("WARNING: out of code space, evicting all shaders.\n"); |
||
694 | ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); |
||
695 | if (ret) { |
||
696 | NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); |
||
697 | return FALSE; |
||
698 | } |
||
699 | IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0); |
||
700 | } |
||
701 | prog->code_base = prog->mem->start; |
||
702 | prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100); |
||
703 | assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <= |
||
704 | prog->mem->start + prog->mem->size)); |
||
705 | |||
706 | if (!is_cp) { |
||
707 | if (screen->base.class_3d >= NVE4_3D_CLASS) { |
||
708 | switch (prog->mem->start & 0xff) { |
||
709 | case 0x40: prog->code_base += 0x70; break; |
||
710 | case 0x80: prog->code_base += 0x30; break; |
||
711 | case 0xc0: prog->code_base += 0x70; break; |
||
712 | default: |
||
713 | prog->code_base += 0x30; |
||
714 | assert((prog->mem->start & 0xff) == 0x00); |
||
715 | break; |
||
716 | } |
||
717 | } |
||
718 | code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE; |
||
719 | } else { |
||
720 | if (screen->base.class_3d >= NVE4_3D_CLASS) { |
||
721 | if (prog->mem->start & 0x40) |
||
722 | prog->code_base += 0x40; |
||
723 | assert((prog->code_base & 0x7f) == 0x00); |
||
724 | } |
||
725 | code_pos = prog->code_base; |
||
726 | } |
||
727 | |||
728 | if (prog->relocs) |
||
729 | nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0); |
||
730 | |||
731 | #ifdef DEBUG |
||
732 | if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE)) |
||
733 | nvc0_program_dump(prog); |
||
734 | #endif |
||
735 | |||
736 | if (!is_cp) |
||
737 | nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base, |
||
738 | NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr); |
||
739 | nvc0->base.push_data(&nvc0->base, screen->text, code_pos, |
||
740 | NOUVEAU_BO_VRAM, prog->code_size, prog->code); |
||
741 | if (prog->immd_size) |
||
742 | nvc0->base.push_data(&nvc0->base, |
||
743 | screen->text, prog->immd_base, NOUVEAU_BO_VRAM, |
||
744 | prog->immd_size, prog->immd_data); |
||
745 | |||
746 | BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1); |
||
747 | PUSH_DATA (nvc0->base.pushbuf, 0x1011); |
||
748 | |||
749 | return TRUE; |
||
750 | } |
||
751 | |||
752 | /* Upload code for builtin functions like integer division emulation. */ |
||
753 | void |
||
754 | nvc0_program_library_upload(struct nvc0_context *nvc0) |
||
755 | { |
||
756 | struct nvc0_screen *screen = nvc0->screen; |
||
757 | int ret; |
||
758 | uint32_t size; |
||
759 | const uint32_t *code; |
||
760 | |||
761 | if (screen->lib_code) |
||
762 | return; |
||
763 | |||
764 | nv50_ir_get_target_library(screen->base.device->chipset, &code, &size); |
||
765 | if (!size) |
||
766 | return; |
||
767 | |||
768 | ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL, |
||
769 | &screen->lib_code); |
||
770 | if (ret) |
||
771 | return; |
||
772 | |||
773 | nvc0->base.push_data(&nvc0->base, |
||
774 | screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM, |
||
775 | size, code); |
||
776 | /* no need for a memory barrier, will be emitted with first program */ |
||
777 | } |
||
778 | |||
779 | void |
||
780 | nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog) |
||
781 | { |
||
782 | const struct pipe_shader_state pipe = prog->pipe; |
||
783 | const ubyte type = prog->type; |
||
784 | |||
785 | if (prog->mem) |
||
786 | nouveau_heap_free(&prog->mem); |
||
787 | FREE(prog->code); /* may be 0 for hardcoded shaders */ |
||
788 | FREE(prog->immd_data); |
||
789 | FREE(prog->relocs); |
||
790 | if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms) |
||
791 | FREE(prog->cp.syms); |
||
792 | if (prog->tfb) { |
||
793 | if (nvc0->state.tfb == prog->tfb) |
||
794 | nvc0->state.tfb = NULL; |
||
795 | FREE(prog->tfb); |
||
796 | } |
||
797 | |||
798 | memset(prog, 0, sizeof(*prog)); |
||
799 | |||
800 | prog->pipe = pipe; |
||
801 | prog->type = type; |
||
802 | } |
||
803 | |||
804 | uint32_t |
||
805 | nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label) |
||
806 | { |
||
807 | const struct nv50_ir_prog_symbol *syms = |
||
808 | (const struct nv50_ir_prog_symbol *)prog->cp.syms; |
||
809 | unsigned base = 0; |
||
810 | unsigned i; |
||
811 | if (prog->type != PIPE_SHADER_COMPUTE) |
||
812 | base = NVC0_SHADER_HEADER_SIZE; |
||
813 | for (i = 0; i < prog->cp.num_syms; ++i) |
||
814 | if (syms[i].label == label) |
||
815 | return prog->code_base + base + syms[i].offset; |
||
816 | return prog->code_base; /* no symbols or symbol not found */ |
||
817 | }>= |