Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
2 | |||
3 | /* |
||
4 | * Copyright (C) 2013 Rob Clark |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
23 | * SOFTWARE. |
||
24 | * |
||
25 | * Authors: |
||
26 | * Rob Clark |
||
27 | */ |
||
28 | |||
29 | #include "pipe/p_state.h" |
||
30 | #include "util/u_string.h" |
||
31 | #include "util/u_memory.h" |
||
32 | #include "util/u_inlines.h" |
||
33 | #include "util/u_format.h" |
||
34 | |||
35 | #include "freedreno_program.h" |
||
36 | |||
37 | #include "fd3_program.h" |
||
38 | #include "fd3_emit.h" |
||
39 | #include "fd3_texture.h" |
||
40 | #include "fd3_format.h" |
||
41 | |||
42 | static void |
||
43 | delete_shader_stateobj(struct fd3_shader_stateobj *so) |
||
44 | { |
||
45 | ir3_shader_destroy(so->shader); |
||
46 | free(so); |
||
47 | } |
||
48 | |||
49 | static struct fd3_shader_stateobj * |
||
50 | create_shader_stateobj(struct pipe_context *pctx, const struct pipe_shader_state *cso, |
||
51 | enum shader_t type) |
||
52 | { |
||
53 | struct fd3_shader_stateobj *so = CALLOC_STRUCT(fd3_shader_stateobj); |
||
54 | so->shader = ir3_shader_create(pctx, cso->tokens, type); |
||
55 | return so; |
||
56 | } |
||
57 | |||
58 | static void * |
||
59 | fd3_fp_state_create(struct pipe_context *pctx, |
||
60 | const struct pipe_shader_state *cso) |
||
61 | { |
||
62 | return create_shader_stateobj(pctx, cso, SHADER_FRAGMENT); |
||
63 | } |
||
64 | |||
65 | static void |
||
66 | fd3_fp_state_delete(struct pipe_context *pctx, void *hwcso) |
||
67 | { |
||
68 | struct fd3_shader_stateobj *so = hwcso; |
||
69 | delete_shader_stateobj(so); |
||
70 | } |
||
71 | |||
72 | static void * |
||
73 | fd3_vp_state_create(struct pipe_context *pctx, |
||
74 | const struct pipe_shader_state *cso) |
||
75 | { |
||
76 | return create_shader_stateobj(pctx, cso, SHADER_VERTEX); |
||
77 | } |
||
78 | |||
79 | static void |
||
80 | fd3_vp_state_delete(struct pipe_context *pctx, void *hwcso) |
||
81 | { |
||
82 | struct fd3_shader_stateobj *so = hwcso; |
||
83 | delete_shader_stateobj(so); |
||
84 | } |
||
85 | |||
86 | static void |
||
87 | emit_shader(struct fd_ringbuffer *ring, const struct ir3_shader_variant *so) |
||
88 | { |
||
89 | const struct ir3_info *si = &so->info; |
||
90 | enum adreno_state_block sb; |
||
91 | enum adreno_state_src src; |
||
92 | uint32_t i, sz, *bin; |
||
93 | |||
94 | if (so->type == SHADER_VERTEX) { |
||
95 | sb = SB_VERT_SHADER; |
||
96 | } else { |
||
97 | sb = SB_FRAG_SHADER; |
||
98 | } |
||
99 | |||
100 | if (fd_mesa_debug & FD_DBG_DIRECT) { |
||
101 | sz = si->sizedwords; |
||
102 | src = SS_DIRECT; |
||
103 | bin = fd_bo_map(so->bo); |
||
104 | } else { |
||
105 | sz = 0; |
||
106 | src = SS_INDIRECT; |
||
107 | bin = NULL; |
||
108 | } |
||
109 | |||
110 | OUT_PKT3(ring, CP_LOAD_STATE, 2 + sz); |
||
111 | OUT_RING(ring, CP_LOAD_STATE_0_DST_OFF(0) | |
||
112 | CP_LOAD_STATE_0_STATE_SRC(src) | |
||
113 | CP_LOAD_STATE_0_STATE_BLOCK(sb) | |
||
114 | CP_LOAD_STATE_0_NUM_UNIT(so->instrlen)); |
||
115 | if (bin) { |
||
116 | OUT_RING(ring, CP_LOAD_STATE_1_EXT_SRC_ADDR(0) | |
||
117 | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER)); |
||
118 | } else { |
||
119 | OUT_RELOC(ring, so->bo, 0, |
||
120 | CP_LOAD_STATE_1_STATE_TYPE(ST_SHADER), 0); |
||
121 | } |
||
122 | for (i = 0; i < sz; i++) { |
||
123 | OUT_RING(ring, bin[i]); |
||
124 | } |
||
125 | } |
||
126 | |||
127 | void |
||
128 | fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit, |
||
129 | int nr, struct pipe_surface **bufs) |
||
130 | { |
||
131 | const struct ir3_shader_variant *vp, *fp; |
||
132 | const struct ir3_info *vsi, *fsi; |
||
133 | enum a3xx_instrbuffermode fpbuffer, vpbuffer; |
||
134 | uint32_t fpbuffersz, vpbuffersz, fsoff; |
||
135 | uint32_t pos_regid, posz_regid, psize_regid, color_regid[4] = {0}; |
||
136 | int constmode; |
||
137 | int i, j, k; |
||
138 | |||
139 | vp = fd3_emit_get_vp(emit); |
||
140 | |||
141 | if (emit->key.binning_pass) { |
||
142 | /* use dummy stateobj to simplify binning vs non-binning: */ |
||
143 | static const struct ir3_shader_variant binning_fp = {}; |
||
144 | fp = &binning_fp; |
||
145 | } else { |
||
146 | fp = fd3_emit_get_fp(emit); |
||
147 | } |
||
148 | |||
149 | vsi = &vp->info; |
||
150 | fsi = &fp->info; |
||
151 | |||
152 | fpbuffer = BUFFER; |
||
153 | vpbuffer = BUFFER; |
||
154 | fpbuffersz = fp->instrlen; |
||
155 | vpbuffersz = vp->instrlen; |
||
156 | |||
157 | /* |
||
158 | * Decide whether to use BUFFER or CACHE mode for VS and FS. It |
||
159 | * appears like 256 is the hard limit, but when the combined size |
||
160 | * exceeds 128 then blob will try to keep FS in BUFFER mode and |
||
161 | * switch to CACHE for VS until VS is too large. The blob seems |
||
162 | * to switch FS out of BUFFER mode at slightly under 128. But |
||
163 | * a bit fuzzy on the decision tree, so use slightly conservative |
||
164 | * limits. |
||
165 | * |
||
166 | * TODO check if these thresholds for BUFFER vs CACHE mode are the |
||
167 | * same for all a3xx or whether we need to consider the gpuid |
||
168 | */ |
||
169 | |||
170 | if ((fpbuffersz + vpbuffersz) > 128) { |
||
171 | if (fpbuffersz < 112) { |
||
172 | /* FP:BUFFER VP:CACHE */ |
||
173 | vpbuffer = CACHE; |
||
174 | vpbuffersz = 256 - fpbuffersz; |
||
175 | } else if (vpbuffersz < 112) { |
||
176 | /* FP:CACHE VP:BUFFER */ |
||
177 | fpbuffer = CACHE; |
||
178 | fpbuffersz = 256 - vpbuffersz; |
||
179 | } else { |
||
180 | /* FP:CACHE VP:CACHE */ |
||
181 | vpbuffer = fpbuffer = CACHE; |
||
182 | vpbuffersz = fpbuffersz = 192; |
||
183 | } |
||
184 | } |
||
185 | |||
186 | if (fpbuffer == BUFFER) { |
||
187 | fsoff = 128 - fpbuffersz; |
||
188 | } else { |
||
189 | fsoff = 256 - fpbuffersz; |
||
190 | } |
||
191 | |||
192 | /* seems like vs->constlen + fs->constlen > 256, then CONSTMODE=1 */ |
||
193 | constmode = ((vp->constlen + fp->constlen) > 256) ? 1 : 0; |
||
194 | |||
195 | pos_regid = ir3_find_output_regid(vp, |
||
196 | ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); |
||
197 | posz_regid = ir3_find_output_regid(fp, |
||
198 | ir3_semantic_name(TGSI_SEMANTIC_POSITION, 0)); |
||
199 | psize_regid = ir3_find_output_regid(vp, |
||
200 | ir3_semantic_name(TGSI_SEMANTIC_PSIZE, 0)); |
||
201 | if (fp->color0_mrt) { |
||
202 | color_regid[0] = color_regid[1] = color_regid[2] = color_regid[3] = |
||
203 | ir3_find_output_regid(fp, ir3_semantic_name(TGSI_SEMANTIC_COLOR, 0)); |
||
204 | } else { |
||
205 | for (int i = 0; i < fp->outputs_count; i++) { |
||
206 | ir3_semantic sem = fp->outputs[i].semantic; |
||
207 | unsigned idx = sem2idx(sem); |
||
208 | if (sem2name(sem) != TGSI_SEMANTIC_COLOR) |
||
209 | continue; |
||
210 | assert(idx < 4); |
||
211 | color_regid[idx] = fp->outputs[i].regid; |
||
212 | } |
||
213 | } |
||
214 | |||
215 | /* adjust regids for alpha output formats. there is no alpha render |
||
216 | * format, so it's just treated like red |
||
217 | */ |
||
218 | for (i = 0; i < nr; i++) |
||
219 | if (util_format_is_alpha(pipe_surface_format(bufs[i]))) |
||
220 | color_regid[i] += 3; |
||
221 | |||
222 | /* we could probably divide this up into things that need to be |
||
223 | * emitted if frag-prog is dirty vs if vert-prog is dirty.. |
||
224 | */ |
||
225 | |||
226 | OUT_PKT0(ring, REG_A3XX_HLSQ_CONTROL_0_REG, 6); |
||
227 | OUT_RING(ring, A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(FOUR_QUADS) | |
||
228 | A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(constmode) | |
||
229 | /* NOTE: I guess SHADERRESTART and CONSTFULLUPDATE maybe |
||
230 | * flush some caches? I think we only need to set those |
||
231 | * bits if we have updated const or shader.. |
||
232 | */ |
||
233 | A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART | |
||
234 | A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE); |
||
235 | OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) | |
||
236 | A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE | |
||
237 | COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD)); |
||
238 | OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31)); |
||
239 | OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid)); |
||
240 | OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) | |
||
241 | A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(0) | |
||
242 | A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(vpbuffersz)); |
||
243 | OUT_RING(ring, A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(fp->constlen) | |
||
244 | A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(128) | |
||
245 | A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(fpbuffersz)); |
||
246 | |||
247 | OUT_PKT0(ring, REG_A3XX_SP_SP_CTRL_REG, 1); |
||
248 | OUT_RING(ring, A3XX_SP_SP_CTRL_REG_CONSTMODE(constmode) | |
||
249 | COND(emit->key.binning_pass, A3XX_SP_SP_CTRL_REG_BINNING) | |
||
250 | A3XX_SP_SP_CTRL_REG_SLEEPMODE(1) | |
||
251 | A3XX_SP_SP_CTRL_REG_L0MODE(0)); |
||
252 | |||
253 | OUT_PKT0(ring, REG_A3XX_SP_VS_LENGTH_REG, 1); |
||
254 | OUT_RING(ring, A3XX_SP_VS_LENGTH_REG_SHADERLENGTH(vp->instrlen)); |
||
255 | |||
256 | OUT_PKT0(ring, REG_A3XX_SP_VS_CTRL_REG0, 3); |
||
257 | OUT_RING(ring, A3XX_SP_VS_CTRL_REG0_THREADMODE(MULTI) | |
||
258 | A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(vpbuffer) | |
||
259 | COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) | |
||
260 | A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) | |
||
261 | A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) | |
||
262 | A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) | |
||
263 | A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) | |
||
264 | A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE | |
||
265 | COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) | |
||
266 | A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz)); |
||
267 | OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) | |
||
268 | A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) | |
||
269 | A3XX_SP_VS_CTRL_REG1_CONSTFOOTPRINT(MAX2(vp->constlen + 1, 0))); |
||
270 | OUT_RING(ring, A3XX_SP_VS_PARAM_REG_POSREGID(pos_regid) | |
||
271 | A3XX_SP_VS_PARAM_REG_PSIZEREGID(psize_regid) | |
||
272 | A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(align(fp->total_in, 4) / 4)); |
||
273 | |||
274 | for (i = 0, j = -1; (i < 8) && (j < (int)fp->inputs_count); i++) { |
||
275 | uint32_t reg = 0; |
||
276 | |||
277 | OUT_PKT0(ring, REG_A3XX_SP_VS_OUT_REG(i), 1); |
||
278 | |||
279 | j = ir3_next_varying(fp, j); |
||
280 | if (j < fp->inputs_count) { |
||
281 | k = ir3_find_output(vp, fp->inputs[j].semantic); |
||
282 | reg |= A3XX_SP_VS_OUT_REG_A_REGID(vp->outputs[k].regid); |
||
283 | reg |= A3XX_SP_VS_OUT_REG_A_COMPMASK(fp->inputs[j].compmask); |
||
284 | } |
||
285 | |||
286 | j = ir3_next_varying(fp, j); |
||
287 | if (j < fp->inputs_count) { |
||
288 | k = ir3_find_output(vp, fp->inputs[j].semantic); |
||
289 | reg |= A3XX_SP_VS_OUT_REG_B_REGID(vp->outputs[k].regid); |
||
290 | reg |= A3XX_SP_VS_OUT_REG_B_COMPMASK(fp->inputs[j].compmask); |
||
291 | } |
||
292 | |||
293 | OUT_RING(ring, reg); |
||
294 | } |
||
295 | |||
296 | for (i = 0, j = -1; (i < 4) && (j < (int)fp->inputs_count); i++) { |
||
297 | uint32_t reg = 0; |
||
298 | |||
299 | OUT_PKT0(ring, REG_A3XX_SP_VS_VPC_DST_REG(i), 1); |
||
300 | |||
301 | j = ir3_next_varying(fp, j); |
||
302 | if (j < fp->inputs_count) |
||
303 | reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC0(fp->inputs[j].inloc); |
||
304 | j = ir3_next_varying(fp, j); |
||
305 | if (j < fp->inputs_count) |
||
306 | reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC1(fp->inputs[j].inloc); |
||
307 | j = ir3_next_varying(fp, j); |
||
308 | if (j < fp->inputs_count) |
||
309 | reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC2(fp->inputs[j].inloc); |
||
310 | j = ir3_next_varying(fp, j); |
||
311 | if (j < fp->inputs_count) |
||
312 | reg |= A3XX_SP_VS_VPC_DST_REG_OUTLOC3(fp->inputs[j].inloc); |
||
313 | |||
314 | OUT_RING(ring, reg); |
||
315 | } |
||
316 | |||
317 | OUT_PKT0(ring, REG_A3XX_SP_VS_OBJ_OFFSET_REG, 2); |
||
318 | OUT_RING(ring, A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(0) | |
||
319 | A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); |
||
320 | OUT_RELOC(ring, vp->bo, 0, 0, 0); /* SP_VS_OBJ_START_REG */ |
||
321 | |||
322 | if (emit->key.binning_pass) { |
||
323 | OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); |
||
324 | OUT_RING(ring, 0x00000000); |
||
325 | |||
326 | OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); |
||
327 | OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | |
||
328 | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(BUFFER)); |
||
329 | OUT_RING(ring, 0x00000000); |
||
330 | |||
331 | OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 1); |
||
332 | OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(128) | |
||
333 | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(0)); |
||
334 | } else { |
||
335 | OUT_PKT0(ring, REG_A3XX_SP_FS_LENGTH_REG, 1); |
||
336 | OUT_RING(ring, A3XX_SP_FS_LENGTH_REG_SHADERLENGTH(fp->instrlen)); |
||
337 | |||
338 | OUT_PKT0(ring, REG_A3XX_SP_FS_CTRL_REG0, 2); |
||
339 | OUT_RING(ring, A3XX_SP_FS_CTRL_REG0_THREADMODE(MULTI) | |
||
340 | A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(fpbuffer) | |
||
341 | COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) | |
||
342 | A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) | |
||
343 | A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) | |
||
344 | A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) | |
||
345 | A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) | |
||
346 | A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE | |
||
347 | COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) | |
||
348 | A3XX_SP_FS_CTRL_REG0_LENGTH(fpbuffersz)); |
||
349 | OUT_RING(ring, A3XX_SP_FS_CTRL_REG1_CONSTLENGTH(fp->constlen) | |
||
350 | A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(fp->total_in) | |
||
351 | A3XX_SP_FS_CTRL_REG1_CONSTFOOTPRINT(MAX2(fp->constlen + 1, 0)) | |
||
352 | A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(63)); |
||
353 | |||
354 | OUT_PKT0(ring, REG_A3XX_SP_FS_OBJ_OFFSET_REG, 2); |
||
355 | OUT_RING(ring, A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET( |
||
356 | MAX2(128, vp->constlen)) | |
||
357 | A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(fsoff)); |
||
358 | OUT_RELOC(ring, fp->bo, 0, 0, 0); /* SP_FS_OBJ_START_REG */ |
||
359 | } |
||
360 | |||
361 | OUT_PKT0(ring, REG_A3XX_SP_FS_OUTPUT_REG, 1); |
||
362 | OUT_RING(ring, |
||
363 | COND(fp->writes_pos, A3XX_SP_FS_OUTPUT_REG_DEPTH_ENABLE) | |
||
364 | A3XX_SP_FS_OUTPUT_REG_DEPTH_REGID(posz_regid) | |
||
365 | A3XX_SP_FS_OUTPUT_REG_MRT(MAX2(1, nr) - 1)); |
||
366 | |||
367 | OUT_PKT0(ring, REG_A3XX_SP_FS_MRT_REG(0), 4); |
||
368 | for (i = 0; i < 4; i++) { |
||
369 | uint32_t mrt_reg = A3XX_SP_FS_MRT_REG_REGID(color_regid[i]) | |
||
370 | COND(fp->key.half_precision, A3XX_SP_FS_MRT_REG_HALF_PRECISION); |
||
371 | |||
372 | if (i < nr) { |
||
373 | enum pipe_format fmt = pipe_surface_format(bufs[i]); |
||
374 | mrt_reg |= COND(util_format_is_pure_uint(fmt), A3XX_SP_FS_MRT_REG_UINT) | |
||
375 | COND(util_format_is_pure_sint(fmt), A3XX_SP_FS_MRT_REG_SINT); |
||
376 | } |
||
377 | OUT_RING(ring, mrt_reg); |
||
378 | } |
||
379 | |||
380 | if (emit->key.binning_pass) { |
||
381 | OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); |
||
382 | OUT_RING(ring, A3XX_VPC_ATTR_THRDASSIGN(1) | |
||
383 | A3XX_VPC_ATTR_LMSIZE(1) | |
||
384 | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); |
||
385 | OUT_RING(ring, 0x00000000); |
||
386 | } else { |
||
387 | uint32_t vinterp[4], flatshade[2], vpsrepl[4]; |
||
388 | |||
389 | memset(vinterp, 0, sizeof(vinterp)); |
||
390 | memset(flatshade, 0, sizeof(flatshade)); |
||
391 | memset(vpsrepl, 0, sizeof(vpsrepl)); |
||
392 | |||
393 | /* figure out VARYING_INTERP / FLAT_SHAD register values: */ |
||
394 | for (j = -1; (j = ir3_next_varying(fp, j)) < (int)fp->inputs_count; ) { |
||
395 | uint32_t interp = fp->inputs[j].interpolate; |
||
396 | |||
397 | /* TODO might be cleaner to just +8 in SP_VS_VPC_DST_REG |
||
398 | * instead.. rather than -8 everywhere else.. |
||
399 | */ |
||
400 | uint32_t inloc = fp->inputs[j].inloc - 8; |
||
401 | |||
402 | /* currently assuming varyings aligned to 4 (not |
||
403 | * packed): |
||
404 | */ |
||
405 | debug_assert((inloc % 4) == 0); |
||
406 | |||
407 | if ((interp == TGSI_INTERPOLATE_CONSTANT) || |
||
408 | ((interp == TGSI_INTERPOLATE_COLOR) && emit->rasterflat)) { |
||
409 | uint32_t loc = inloc; |
||
410 | for (i = 0; i < 4; i++, loc++) { |
||
411 | vinterp[loc / 16] |= FLAT << ((loc % 16) * 2); |
||
412 | flatshade[loc / 32] |= 1 << (loc % 32); |
||
413 | } |
||
414 | } |
||
415 | |||
416 | /* Replace the .xy coordinates with S/T from the point sprite. Set |
||
417 | * interpolation bits for .zw such that they become .01 |
||
418 | */ |
||
419 | if (emit->sprite_coord_enable & (1 << sem2idx(fp->inputs[j].semantic))) { |
||
420 | vpsrepl[inloc / 16] |= (emit->sprite_coord_mode ? 0x0d : 0x09) |
||
421 | << ((inloc % 16) * 2); |
||
422 | vinterp[(inloc + 2) / 16] |= 2 << (((inloc + 2) % 16) * 2); |
||
423 | vinterp[(inloc + 3) / 16] |= 3 << (((inloc + 3) % 16) * 2); |
||
424 | } |
||
425 | } |
||
426 | |||
427 | OUT_PKT0(ring, REG_A3XX_VPC_ATTR, 2); |
||
428 | OUT_RING(ring, A3XX_VPC_ATTR_TOTALATTR(fp->total_in) | |
||
429 | A3XX_VPC_ATTR_THRDASSIGN(1) | |
||
430 | A3XX_VPC_ATTR_LMSIZE(1) | |
||
431 | COND(vp->writes_psize, A3XX_VPC_ATTR_PSIZE)); |
||
432 | OUT_RING(ring, A3XX_VPC_PACK_NUMFPNONPOSVAR(fp->total_in) | |
||
433 | A3XX_VPC_PACK_NUMNONPOSVSVAR(fp->total_in)); |
||
434 | |||
435 | OUT_PKT0(ring, REG_A3XX_VPC_VARYING_INTERP_MODE(0), 4); |
||
436 | OUT_RING(ring, vinterp[0]); /* VPC_VARYING_INTERP[0].MODE */ |
||
437 | OUT_RING(ring, vinterp[1]); /* VPC_VARYING_INTERP[1].MODE */ |
||
438 | OUT_RING(ring, vinterp[2]); /* VPC_VARYING_INTERP[2].MODE */ |
||
439 | OUT_RING(ring, vinterp[3]); /* VPC_VARYING_INTERP[3].MODE */ |
||
440 | |||
441 | OUT_PKT0(ring, REG_A3XX_VPC_VARYING_PS_REPL_MODE(0), 4); |
||
442 | OUT_RING(ring, vpsrepl[0]); /* VPC_VARYING_PS_REPL[0].MODE */ |
||
443 | OUT_RING(ring, vpsrepl[1]); /* VPC_VARYING_PS_REPL[1].MODE */ |
||
444 | OUT_RING(ring, vpsrepl[2]); /* VPC_VARYING_PS_REPL[2].MODE */ |
||
445 | OUT_RING(ring, vpsrepl[3]); /* VPC_VARYING_PS_REPL[3].MODE */ |
||
446 | |||
447 | OUT_PKT0(ring, REG_A3XX_SP_FS_FLAT_SHAD_MODE_REG_0, 2); |
||
448 | OUT_RING(ring, flatshade[0]); /* SP_FS_FLAT_SHAD_MODE_REG_0 */ |
||
449 | OUT_RING(ring, flatshade[1]); /* SP_FS_FLAT_SHAD_MODE_REG_1 */ |
||
450 | } |
||
451 | |||
452 | OUT_PKT0(ring, REG_A3XX_VFD_VS_THREADING_THRESHOLD, 1); |
||
453 | OUT_RING(ring, A3XX_VFD_VS_THREADING_THRESHOLD_REGID_THRESHOLD(15) | |
||
454 | A3XX_VFD_VS_THREADING_THRESHOLD_REGID_VTXCNT(252)); |
||
455 | |||
456 | if (vpbuffer == BUFFER) |
||
457 | emit_shader(ring, vp); |
||
458 | |||
459 | OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); |
||
460 | OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ |
||
461 | |||
462 | if (!emit->key.binning_pass) { |
||
463 | if (fpbuffer == BUFFER) |
||
464 | emit_shader(ring, fp); |
||
465 | |||
466 | OUT_PKT0(ring, REG_A3XX_VFD_PERFCOUNTER0_SELECT, 1); |
||
467 | OUT_RING(ring, 0x00000000); /* VFD_PERFCOUNTER0_SELECT */ |
||
468 | } |
||
469 | } |
||
470 | |||
471 | void |
||
472 | fd3_prog_init(struct pipe_context *pctx) |
||
473 | { |
||
474 | pctx->create_fs_state = fd3_fp_state_create; |
||
475 | pctx->delete_fs_state = fd3_fp_state_delete; |
||
476 | |||
477 | pctx->create_vs_state = fd3_vp_state_create; |
||
478 | pctx->delete_vs_state = fd3_vp_state_delete; |
||
479 | |||
480 | fd_prog_init(pctx); |
||
481 | }><>><>><>><>><>><>>>>>>>>>>>>>>>>>>>>> |