Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright 2010 Christoph Bumiller |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice shall be included in |
||
12 | * all copies or substantial portions of the Software. |
||
13 | * |
||
14 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
15 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
16 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
17 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR |
||
18 | * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, |
||
19 | * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR |
||
20 | * OTHER DEALINGS IN THE SOFTWARE. |
||
21 | */ |
||
22 | |||
23 | #include "nv50_program.h" |
||
24 | #include "nv50_context.h" |
||
25 | |||
26 | #include "codegen/nv50_ir_driver.h" |
||
27 | |||
28 | static INLINE unsigned |
||
29 | bitcount4(const uint32_t val) |
||
30 | { |
||
31 | static const uint8_t cnt[16] |
||
32 | = { 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4 }; |
||
33 | return cnt[val & 0xf]; |
||
34 | } |
||
35 | |||
36 | static int |
||
37 | nv50_vertprog_assign_slots(struct nv50_ir_prog_info *info) |
||
38 | { |
||
39 | struct nv50_program *prog = (struct nv50_program *)info->driverPriv; |
||
40 | unsigned i, n, c; |
||
41 | |||
42 | n = 0; |
||
43 | for (i = 0; i < info->numInputs; ++i) { |
||
44 | prog->in[i].id = i; |
||
45 | prog->in[i].sn = info->in[i].sn; |
||
46 | prog->in[i].si = info->in[i].si; |
||
47 | prog->in[i].hw = n; |
||
48 | prog->in[i].mask = info->in[i].mask; |
||
49 | |||
50 | prog->vp.attrs[(4 * i) / 32] |= info->in[i].mask << ((4 * i) % 32); |
||
51 | |||
52 | for (c = 0; c < 4; ++c) |
||
53 | if (info->in[i].mask & (1 << c)) |
||
54 | info->in[i].slot[c] = n++; |
||
55 | } |
||
56 | prog->in_nr = info->numInputs; |
||
57 | |||
58 | for (i = 0; i < info->numSysVals; ++i) { |
||
59 | switch (info->sv[i].sn) { |
||
60 | case TGSI_SEMANTIC_INSTANCEID: |
||
61 | prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID; |
||
62 | continue; |
||
63 | case TGSI_SEMANTIC_VERTEXID: |
||
64 | prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID; |
||
65 | prog->vp.attrs[2] |= NV50_3D_VP_GP_BUILTIN_ATTR_EN_UNK12; |
||
66 | continue; |
||
67 | default: |
||
68 | break; |
||
69 | } |
||
70 | } |
||
71 | |||
72 | /* |
||
73 | * Corner case: VP has no inputs, but we will still need to submit data to |
||
74 | * draw it. HW will shout at us and won't draw anything if we don't enable |
||
75 | * any input, so let's just pretend it's the first one. |
||
76 | */ |
||
77 | if (prog->vp.attrs[0] == 0 && |
||
78 | prog->vp.attrs[1] == 0 && |
||
79 | prog->vp.attrs[2] == 0) |
||
80 | prog->vp.attrs[0] |= 0xf; |
||
81 | |||
82 | /* VertexID before InstanceID */ |
||
83 | if (info->io.vertexId < info->numSysVals) |
||
84 | info->sv[info->io.vertexId].slot[0] = n++; |
||
85 | if (info->io.instanceId < info->numSysVals) |
||
86 | info->sv[info->io.instanceId].slot[0] = n++; |
||
87 | |||
88 | n = 0; |
||
89 | for (i = 0; i < info->numOutputs; ++i) { |
||
90 | switch (info->out[i].sn) { |
||
91 | case TGSI_SEMANTIC_PSIZE: |
||
92 | prog->vp.psiz = i; |
||
93 | break; |
||
94 | case TGSI_SEMANTIC_CLIPDIST: |
||
95 | prog->vp.clpd[info->out[i].si] = n; |
||
96 | break; |
||
97 | case TGSI_SEMANTIC_EDGEFLAG: |
||
98 | prog->vp.edgeflag = i; |
||
99 | break; |
||
100 | case TGSI_SEMANTIC_BCOLOR: |
||
101 | prog->vp.bfc[info->out[i].si] = i; |
||
102 | break; |
||
103 | default: |
||
104 | break; |
||
105 | } |
||
106 | prog->out[i].id = i; |
||
107 | prog->out[i].sn = info->out[i].sn; |
||
108 | prog->out[i].si = info->out[i].si; |
||
109 | prog->out[i].hw = n; |
||
110 | prog->out[i].mask = info->out[i].mask; |
||
111 | |||
112 | for (c = 0; c < 4; ++c) |
||
113 | if (info->out[i].mask & (1 << c)) |
||
114 | info->out[i].slot[c] = n++; |
||
115 | } |
||
116 | prog->out_nr = info->numOutputs; |
||
117 | prog->max_out = n; |
||
118 | |||
119 | if (prog->vp.psiz < info->numOutputs) |
||
120 | prog->vp.psiz = prog->out[prog->vp.psiz].hw; |
||
121 | |||
122 | return 0; |
||
123 | } |
||
124 | |||
125 | static int |
||
126 | nv50_fragprog_assign_slots(struct nv50_ir_prog_info *info) |
||
127 | { |
||
128 | struct nv50_program *prog = (struct nv50_program *)info->driverPriv; |
||
129 | unsigned i, n, m, c; |
||
130 | unsigned nvary; |
||
131 | unsigned nflat; |
||
132 | unsigned nintp = 0; |
||
133 | |||
134 | /* count recorded non-flat inputs */ |
||
135 | for (m = 0, i = 0; i < info->numInputs; ++i) { |
||
136 | switch (info->in[i].sn) { |
||
137 | case TGSI_SEMANTIC_POSITION: |
||
138 | case TGSI_SEMANTIC_FACE: |
||
139 | continue; |
||
140 | default: |
||
141 | m += info->in[i].flat ? 0 : 1; |
||
142 | break; |
||
143 | } |
||
144 | } |
||
145 | /* careful: id may be != i in info->in[prog->in[i].id] */ |
||
146 | |||
147 | /* Fill prog->in[] so that non-flat inputs are first and |
||
148 | * kick out special inputs that don't use the RESULT_MAP. |
||
149 | */ |
||
150 | for (n = 0, i = 0; i < info->numInputs; ++i) { |
||
151 | if (info->in[i].sn == TGSI_SEMANTIC_POSITION) { |
||
152 | prog->fp.interp |= info->in[i].mask << 24; |
||
153 | for (c = 0; c < 4; ++c) |
||
154 | if (info->in[i].mask & (1 << c)) |
||
155 | info->in[i].slot[c] = nintp++; |
||
156 | } else |
||
157 | if (info->in[i].sn == TGSI_SEMANTIC_FACE) { |
||
158 | info->in[i].slot[0] = 255; |
||
159 | } else { |
||
160 | unsigned j = info->in[i].flat ? m++ : n++; |
||
161 | |||
162 | if (info->in[i].sn == TGSI_SEMANTIC_COLOR) |
||
163 | prog->vp.bfc[info->in[i].si] = j; |
||
164 | |||
165 | prog->in[j].id = i; |
||
166 | prog->in[j].mask = info->in[i].mask; |
||
167 | prog->in[j].sn = info->in[i].sn; |
||
168 | prog->in[j].si = info->in[i].si; |
||
169 | prog->in[j].linear = info->in[i].linear; |
||
170 | |||
171 | prog->in_nr++; |
||
172 | } |
||
173 | } |
||
174 | if (!(prog->fp.interp & (8 << 24))) { |
||
175 | ++nintp; |
||
176 | prog->fp.interp |= 8 << 24; |
||
177 | } |
||
178 | |||
179 | for (i = 0; i < prog->in_nr; ++i) { |
||
180 | int j = prog->in[i].id; |
||
181 | |||
182 | prog->in[i].hw = nintp; |
||
183 | for (c = 0; c < 4; ++c) |
||
184 | if (prog->in[i].mask & (1 << c)) |
||
185 | info->in[j].slot[c] = nintp++; |
||
186 | } |
||
187 | /* (n == m) if m never increased, i.e. no flat inputs */ |
||
188 | nflat = (n < m) ? (nintp - prog->in[n].hw) : 0; |
||
189 | nintp -= bitcount4(prog->fp.interp >> 24); /* subtract position inputs */ |
||
190 | nvary = nintp - nflat; |
||
191 | |||
192 | prog->fp.interp |= nvary << NV50_3D_FP_INTERPOLANT_CTRL_COUNT_NONFLAT__SHIFT; |
||
193 | prog->fp.interp |= nintp << NV50_3D_FP_INTERPOLANT_CTRL_COUNT__SHIFT; |
||
194 | |||
195 | /* put front/back colors right after HPOS */ |
||
196 | prog->fp.colors = 4 << NV50_3D_SEMANTIC_COLOR_FFC0_ID__SHIFT; |
||
197 | for (i = 0; i < 2; ++i) |
||
198 | if (prog->vp.bfc[i] < 0xff) |
||
199 | prog->fp.colors += bitcount4(prog->in[prog->vp.bfc[i]].mask) << 16; |
||
200 | |||
201 | /* FP outputs */ |
||
202 | |||
203 | if (info->prop.fp.numColourResults > 1) |
||
204 | prog->fp.flags[0] |= NV50_3D_FP_CONTROL_MULTIPLE_RESULTS; |
||
205 | |||
206 | for (i = 0; i < info->numOutputs; ++i) { |
||
207 | prog->out[i].id = i; |
||
208 | prog->out[i].sn = info->out[i].sn; |
||
209 | prog->out[i].si = info->out[i].si; |
||
210 | prog->out[i].mask = info->out[i].mask; |
||
211 | |||
212 | if (i == info->io.fragDepth || i == info->io.sampleMask) |
||
213 | continue; |
||
214 | prog->out[i].hw = info->out[i].si * 4; |
||
215 | |||
216 | for (c = 0; c < 4; ++c) |
||
217 | info->out[i].slot[c] = prog->out[i].hw + c; |
||
218 | |||
219 | prog->max_out = MAX2(prog->max_out, prog->out[i].hw + 4); |
||
220 | } |
||
221 | |||
222 | if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS) |
||
223 | info->out[info->io.sampleMask].slot[0] = prog->max_out++; |
||
224 | |||
225 | if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS) |
||
226 | info->out[info->io.fragDepth].slot[2] = prog->max_out++; |
||
227 | |||
228 | if (!prog->max_out) |
||
229 | prog->max_out = 4; |
||
230 | |||
231 | return 0; |
||
232 | } |
||
233 | |||
234 | static int |
||
235 | nv50_program_assign_varying_slots(struct nv50_ir_prog_info *info) |
||
236 | { |
||
237 | switch (info->type) { |
||
238 | case PIPE_SHADER_VERTEX: |
||
239 | return nv50_vertprog_assign_slots(info); |
||
240 | case PIPE_SHADER_GEOMETRY: |
||
241 | return nv50_vertprog_assign_slots(info); |
||
242 | case PIPE_SHADER_FRAGMENT: |
||
243 | return nv50_fragprog_assign_slots(info); |
||
244 | default: |
||
245 | return -1; |
||
246 | } |
||
247 | } |
||
248 | |||
249 | static struct nv50_stream_output_state * |
||
250 | nv50_program_create_strmout_state(const struct nv50_ir_prog_info *info, |
||
251 | const struct pipe_stream_output_info *pso) |
||
252 | { |
||
253 | struct nv50_stream_output_state *so; |
||
254 | unsigned b, i, c; |
||
255 | unsigned base[4]; |
||
256 | |||
257 | so = MALLOC_STRUCT(nv50_stream_output_state); |
||
258 | if (!so) |
||
259 | return NULL; |
||
260 | memset(so->map, 0xff, sizeof(so->map)); |
||
261 | |||
262 | for (b = 0; b < 4; ++b) |
||
263 | so->num_attribs[b] = 0; |
||
264 | for (i = 0; i < pso->num_outputs; ++i) { |
||
265 | unsigned end = pso->output[i].dst_offset + pso->output[i].num_components; |
||
266 | b = pso->output[i].output_buffer; |
||
267 | assert(b < 4); |
||
268 | so->num_attribs[b] = MAX2(so->num_attribs[b], end); |
||
269 | } |
||
270 | |||
271 | so->ctrl = NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED; |
||
272 | |||
273 | so->stride[0] = pso->stride[0] * 4; |
||
274 | base[0] = 0; |
||
275 | for (b = 1; b < 4; ++b) { |
||
276 | assert(!so->num_attribs[b] || so->num_attribs[b] == pso->stride[b]); |
||
277 | so->stride[b] = so->num_attribs[b] * 4; |
||
278 | if (so->num_attribs[b]) |
||
279 | so->ctrl = (b + 1) << NV50_3D_STRMOUT_BUFFERS_CTRL_SEPARATE__SHIFT; |
||
280 | base[b] = align(base[b - 1] + so->num_attribs[b - 1], 4); |
||
281 | } |
||
282 | if (so->ctrl & NV50_3D_STRMOUT_BUFFERS_CTRL_INTERLEAVED) { |
||
283 | assert(so->stride[0] < NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__MAX); |
||
284 | so->ctrl |= so->stride[0] << NV50_3D_STRMOUT_BUFFERS_CTRL_STRIDE__SHIFT; |
||
285 | } |
||
286 | |||
287 | so->map_size = base[3] + so->num_attribs[3]; |
||
288 | |||
289 | for (i = 0; i < pso->num_outputs; ++i) { |
||
290 | const unsigned s = pso->output[i].start_component; |
||
291 | const unsigned p = pso->output[i].dst_offset; |
||
292 | const unsigned r = pso->output[i].register_index; |
||
293 | b = pso->output[i].output_buffer; |
||
294 | |||
295 | for (c = 0; c < pso->output[i].num_components; ++c) |
||
296 | so->map[base[b] + p + c] = info->out[r].slot[s + c]; |
||
297 | } |
||
298 | |||
299 | return so; |
||
300 | } |
||
301 | |||
302 | boolean |
||
303 | nv50_program_translate(struct nv50_program *prog, uint16_t chipset) |
||
304 | { |
||
305 | struct nv50_ir_prog_info *info; |
||
306 | int ret; |
||
307 | const uint8_t map_undef = (prog->type == PIPE_SHADER_VERTEX) ? 0x40 : 0x80; |
||
308 | |||
309 | info = CALLOC_STRUCT(nv50_ir_prog_info); |
||
310 | if (!info) |
||
311 | return FALSE; |
||
312 | |||
313 | info->type = prog->type; |
||
314 | info->target = chipset; |
||
315 | info->bin.sourceRep = NV50_PROGRAM_IR_TGSI; |
||
316 | info->bin.source = (void *)prog->pipe.tokens; |
||
317 | |||
318 | info->io.ucpCBSlot = 15; |
||
319 | info->io.ucpBase = 0; |
||
320 | info->io.genUserClip = prog->vp.clpd_nr; |
||
321 | |||
322 | info->assignSlots = nv50_program_assign_varying_slots; |
||
323 | |||
324 | prog->vp.bfc[0] = 0xff; |
||
325 | prog->vp.bfc[1] = 0xff; |
||
326 | prog->vp.edgeflag = 0xff; |
||
327 | prog->vp.clpd[0] = map_undef; |
||
328 | prog->vp.clpd[1] = map_undef; |
||
329 | prog->vp.psiz = map_undef; |
||
330 | prog->gp.primid = 0x80; |
||
331 | |||
332 | info->driverPriv = prog; |
||
333 | |||
334 | #ifdef DEBUG |
||
335 | info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3); |
||
336 | info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0); |
||
337 | #else |
||
338 | info->optLevel = 3; |
||
339 | #endif |
||
340 | |||
341 | ret = nv50_ir_generate_code(info); |
||
342 | if (ret) { |
||
343 | NOUVEAU_ERR("shader translation failed: %i\n", ret); |
||
344 | goto out; |
||
345 | } |
||
346 | FREE(info->bin.syms); |
||
347 | |||
348 | prog->code = info->bin.code; |
||
349 | prog->code_size = info->bin.codeSize; |
||
350 | prog->fixups = info->bin.relocData; |
||
351 | prog->max_gpr = MAX2(4, (info->bin.maxGPR >> 1) + 1); |
||
352 | prog->tls_space = info->bin.tlsSpace; |
||
353 | |||
354 | if (prog->type == PIPE_SHADER_FRAGMENT) { |
||
355 | if (info->prop.fp.writesDepth) { |
||
356 | prog->fp.flags[0] |= NV50_3D_FP_CONTROL_EXPORTS_Z; |
||
357 | prog->fp.flags[1] = 0x11; |
||
358 | } |
||
359 | if (info->prop.fp.usesDiscard) |
||
360 | prog->fp.flags[0] |= NV50_3D_FP_CONTROL_USES_KIL; |
||
361 | } |
||
362 | |||
363 | if (prog->pipe.stream_output.num_outputs) |
||
364 | prog->so = nv50_program_create_strmout_state(info, |
||
365 | &prog->pipe.stream_output); |
||
366 | |||
367 | out: |
||
368 | FREE(info); |
||
369 | return !ret; |
||
370 | } |
||
371 | |||
372 | boolean |
||
373 | nv50_program_upload_code(struct nv50_context *nv50, struct nv50_program *prog) |
||
374 | { |
||
375 | struct nouveau_heap *heap; |
||
376 | int ret; |
||
377 | uint32_t size = align(prog->code_size, 0x40); |
||
378 | |||
379 | switch (prog->type) { |
||
380 | case PIPE_SHADER_VERTEX: heap = nv50->screen->vp_code_heap; break; |
||
381 | case PIPE_SHADER_GEOMETRY: heap = nv50->screen->fp_code_heap; break; |
||
382 | case PIPE_SHADER_FRAGMENT: heap = nv50->screen->gp_code_heap; break; |
||
383 | default: |
||
384 | assert(!"invalid program type"); |
||
385 | return FALSE; |
||
386 | } |
||
387 | |||
388 | ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); |
||
389 | if (ret) { |
||
390 | /* Out of space: evict everything to compactify the code segment, hoping |
||
391 | * the working set is much smaller and drifts slowly. Improve me ! |
||
392 | */ |
||
393 | while (heap->next) { |
||
394 | struct nv50_program *evict = heap->next->priv; |
||
395 | if (evict) |
||
396 | nouveau_heap_free(&evict->mem); |
||
397 | } |
||
398 | debug_printf("WARNING: out of code space, evicting all shaders.\n"); |
||
399 | ret = nouveau_heap_alloc(heap, size, prog, &prog->mem); |
||
400 | if (ret) { |
||
401 | NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size); |
||
402 | return FALSE; |
||
403 | } |
||
404 | } |
||
405 | prog->code_base = prog->mem->start; |
||
406 | |||
407 | ret = nv50_tls_realloc(nv50->screen, prog->tls_space); |
||
408 | if (ret < 0) |
||
409 | return FALSE; |
||
410 | if (ret > 0) |
||
411 | nv50->state.new_tls_space = TRUE; |
||
412 | |||
413 | if (prog->fixups) |
||
414 | nv50_ir_relocate_code(prog->fixups, prog->code, prog->code_base, 0, 0); |
||
415 | |||
416 | nv50_sifc_linear_u8(&nv50->base, nv50->screen->code, |
||
417 | (prog->type << NV50_CODE_BO_SIZE_LOG2) + prog->code_base, |
||
418 | NOUVEAU_BO_VRAM, prog->code_size, prog->code); |
||
419 | |||
420 | BEGIN_NV04(nv50->base.pushbuf, NV50_3D(CODE_CB_FLUSH), 1); |
||
421 | PUSH_DATA (nv50->base.pushbuf, 0); |
||
422 | |||
423 | return TRUE; |
||
424 | } |
||
425 | |||
426 | void |
||
427 | nv50_program_destroy(struct nv50_context *nv50, struct nv50_program *p) |
||
428 | { |
||
429 | const struct pipe_shader_state pipe = p->pipe; |
||
430 | const ubyte type = p->type; |
||
431 | |||
432 | if (p->mem) |
||
433 | nouveau_heap_free(&p->mem); |
||
434 | |||
435 | FREE(p->code); |
||
436 | |||
437 | FREE(p->fixups); |
||
438 | |||
439 | FREE(p->so); |
||
440 | |||
441 | memset(p, 0, sizeof(*p)); |
||
442 | |||
443 | p->pipe = pipe; |
||
444 | p->type = type; |
||
445 | }><>>>>><>>><>>>>>>>>>><>>>><>><>><>>><>>>><>><>><>>><>>>>><>>>>>>><>>><>> |