Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | |||
34 | #include "main/glheader.h" |
||
35 | #include "main/context.h" |
||
36 | #include "main/macros.h" |
||
37 | #include "main/enums.h" |
||
38 | #include "program/prog_parameter.h" |
||
39 | #include "program/prog_print.h" |
||
40 | #include "program/prog_statevars.h" |
||
41 | #include "intel_batchbuffer.h" |
||
42 | #include "intel_regions.h" |
||
43 | #include "brw_context.h" |
||
44 | #include "brw_defines.h" |
||
45 | #include "brw_state.h" |
||
46 | #include "brw_util.h" |
||
47 | |||
48 | |||
49 | /** |
||
50 | * Partition the CURBE between the various users of constant values: |
||
51 | * Note that vertex and fragment shaders can now fetch constants out |
||
52 | * of constant buffers. We no longer allocatea block of the GRF for |
||
53 | * constants. That greatly reduces the demand for space in the CURBE. |
||
54 | * Some of the comments within are dated... |
||
55 | */ |
||
56 | static void calculate_curbe_offsets( struct brw_context *brw ) |
||
57 | { |
||
58 | struct gl_context *ctx = &brw->ctx; |
||
59 | /* CACHE_NEW_WM_PROG */ |
||
60 | const GLuint nr_fp_regs = (brw->wm.prog_data->nr_params + 15) / 16; |
||
61 | |||
62 | /* BRW_NEW_VERTEX_PROGRAM */ |
||
63 | const GLuint nr_vp_regs = (brw->vs.prog_data->base.nr_params + 15) / 16; |
||
64 | GLuint nr_clip_regs = 0; |
||
65 | GLuint total_regs; |
||
66 | |||
67 | /* _NEW_TRANSFORM */ |
||
68 | if (ctx->Transform.ClipPlanesEnabled) { |
||
69 | GLuint nr_planes = 6 + _mesa_bitcount_64(ctx->Transform.ClipPlanesEnabled); |
||
70 | nr_clip_regs = (nr_planes * 4 + 15) / 16; |
||
71 | } |
||
72 | |||
73 | |||
74 | total_regs = nr_fp_regs + nr_vp_regs + nr_clip_regs; |
||
75 | |||
76 | /* This can happen - what to do? Probably rather than falling |
||
77 | * back, the best thing to do is emit programs which code the |
||
78 | * constants as immediate values. Could do this either as a static |
||
79 | * cap on WM and VS, or adaptively. |
||
80 | * |
||
81 | * Unfortunately, this is currently dependent on the results of the |
||
82 | * program generation process (in the case of wm), so this would |
||
83 | * introduce the need to re-generate programs in the event of a |
||
84 | * curbe allocation failure. |
||
85 | */ |
||
86 | /* Max size is 32 - just large enough to |
||
87 | * hold the 128 parameters allowed by |
||
88 | * the fragment and vertex program |
||
89 | * api's. It's not clear what happens |
||
90 | * when both VP and FP want to use 128 |
||
91 | * parameters, though. |
||
92 | */ |
||
93 | assert(total_regs <= 32); |
||
94 | |||
95 | /* Lazy resize: |
||
96 | */ |
||
97 | if (nr_fp_regs > brw->curbe.wm_size || |
||
98 | nr_vp_regs > brw->curbe.vs_size || |
||
99 | nr_clip_regs != brw->curbe.clip_size || |
||
100 | (total_regs < brw->curbe.total_size / 4 && |
||
101 | brw->curbe.total_size > 16)) { |
||
102 | |||
103 | GLuint reg = 0; |
||
104 | |||
105 | /* Calculate a new layout: |
||
106 | */ |
||
107 | reg = 0; |
||
108 | brw->curbe.wm_start = reg; |
||
109 | brw->curbe.wm_size = nr_fp_regs; reg += nr_fp_regs; |
||
110 | brw->curbe.clip_start = reg; |
||
111 | brw->curbe.clip_size = nr_clip_regs; reg += nr_clip_regs; |
||
112 | brw->curbe.vs_start = reg; |
||
113 | brw->curbe.vs_size = nr_vp_regs; reg += nr_vp_regs; |
||
114 | brw->curbe.total_size = reg; |
||
115 | |||
116 | if (0) |
||
117 | printf("curbe wm %d+%d clip %d+%d vs %d+%d\n", |
||
118 | brw->curbe.wm_start, |
||
119 | brw->curbe.wm_size, |
||
120 | brw->curbe.clip_start, |
||
121 | brw->curbe.clip_size, |
||
122 | brw->curbe.vs_start, |
||
123 | brw->curbe.vs_size ); |
||
124 | |||
125 | brw->state.dirty.brw |= BRW_NEW_CURBE_OFFSETS; |
||
126 | } |
||
127 | } |
||
128 | |||
129 | |||
130 | const struct brw_tracked_state brw_curbe_offsets = { |
||
131 | .dirty = { |
||
132 | .mesa = _NEW_TRANSFORM, |
||
133 | .brw = BRW_NEW_VERTEX_PROGRAM | BRW_NEW_CONTEXT, |
||
134 | .cache = CACHE_NEW_WM_PROG |
||
135 | }, |
||
136 | .emit = calculate_curbe_offsets |
||
137 | }; |
||
138 | |||
139 | |||
140 | |||
141 | |||
142 | /* Define the number of curbes within CS's urb allocation. Multiple |
||
143 | * urb entries -> multiple curbes. These will be used by |
||
144 | * fixed-function hardware in a double-buffering scheme to avoid a |
||
145 | * pipeline stall each time the contents of the curbe is changed. |
||
146 | */ |
||
147 | void brw_upload_cs_urb_state(struct brw_context *brw) |
||
148 | { |
||
149 | BEGIN_BATCH(2); |
||
150 | /* It appears that this is the state packet for the CS unit, ie. the |
||
151 | * urb entries detailed here are housed in the CS range from the |
||
152 | * URB_FENCE command. |
||
153 | */ |
||
154 | OUT_BATCH(CMD_CS_URB_STATE << 16 | (2-2)); |
||
155 | |||
156 | /* BRW_NEW_URB_FENCE */ |
||
157 | if (brw->urb.csize == 0) { |
||
158 | OUT_BATCH(0); |
||
159 | } else { |
||
160 | /* BRW_NEW_URB_FENCE */ |
||
161 | assert(brw->urb.nr_cs_entries); |
||
162 | OUT_BATCH((brw->urb.csize - 1) << 4 | brw->urb.nr_cs_entries); |
||
163 | } |
||
164 | CACHED_BATCH(); |
||
165 | } |
||
166 | |||
167 | static GLfloat fixed_plane[6][4] = { |
||
168 | { 0, 0, -1, 1 }, |
||
169 | { 0, 0, 1, 1 }, |
||
170 | { 0, -1, 0, 1 }, |
||
171 | { 0, 1, 0, 1 }, |
||
172 | {-1, 0, 0, 1 }, |
||
173 | { 1, 0, 0, 1 } |
||
174 | }; |
||
175 | |||
176 | /* Upload a new set of constants. Too much variability to go into the |
||
177 | * cache mechanism, but maybe would benefit from a comparison against |
||
178 | * the current uploaded set of constants. |
||
179 | */ |
||
180 | static void |
||
181 | brw_upload_constant_buffer(struct brw_context *brw) |
||
182 | { |
||
183 | struct gl_context *ctx = &brw->ctx; |
||
184 | const GLuint sz = brw->curbe.total_size; |
||
185 | const GLuint bufsz = sz * 16 * sizeof(GLfloat); |
||
186 | GLfloat *buf; |
||
187 | GLuint i; |
||
188 | gl_clip_plane *clip_planes; |
||
189 | |||
190 | if (sz == 0) { |
||
191 | brw->curbe.last_bufsz = 0; |
||
192 | goto emit; |
||
193 | } |
||
194 | |||
195 | buf = brw->curbe.next_buf; |
||
196 | |||
197 | /* fragment shader constants */ |
||
198 | if (brw->curbe.wm_size) { |
||
199 | GLuint offset = brw->curbe.wm_start * 16; |
||
200 | |||
201 | /* copy float constants */ |
||
202 | for (i = 0; i < brw->wm.prog_data->nr_params; i++) { |
||
203 | buf[offset + i] = *brw->wm.prog_data->param[i]; |
||
204 | } |
||
205 | } |
||
206 | |||
207 | /* clipper constants */ |
||
208 | if (brw->curbe.clip_size) { |
||
209 | GLuint offset = brw->curbe.clip_start * 16; |
||
210 | GLuint j; |
||
211 | |||
212 | /* If any planes are going this way, send them all this way: |
||
213 | */ |
||
214 | for (i = 0; i < 6; i++) { |
||
215 | buf[offset + i * 4 + 0] = fixed_plane[i][0]; |
||
216 | buf[offset + i * 4 + 1] = fixed_plane[i][1]; |
||
217 | buf[offset + i * 4 + 2] = fixed_plane[i][2]; |
||
218 | buf[offset + i * 4 + 3] = fixed_plane[i][3]; |
||
219 | } |
||
220 | |||
221 | /* Clip planes: _NEW_TRANSFORM plus _NEW_PROJECTION to get to |
||
222 | * clip-space: |
||
223 | */ |
||
224 | clip_planes = brw_select_clip_planes(ctx); |
||
225 | for (j = 0; j < MAX_CLIP_PLANES; j++) { |
||
226 | if (ctx->Transform.ClipPlanesEnabled & (1< |
||
227 | buf[offset + i * 4 + 0] = clip_planes[j][0]; |
||
228 | buf[offset + i * 4 + 1] = clip_planes[j][1]; |
||
229 | buf[offset + i * 4 + 2] = clip_planes[j][2]; |
||
230 | buf[offset + i * 4 + 3] = clip_planes[j][3]; |
||
231 | i++; |
||
232 | } |
||
233 | } |
||
234 | } |
||
235 | |||
236 | /* vertex shader constants */ |
||
237 | if (brw->curbe.vs_size) { |
||
238 | GLuint offset = brw->curbe.vs_start * 16; |
||
239 | |||
240 | for (i = 0; i < brw->vs.prog_data->base.nr_params; i++) { |
||
241 | buf[offset + i] = *brw->vs.prog_data->base.param[i]; |
||
242 | } |
||
243 | } |
||
244 | |||
245 | if (0) { |
||
246 | for (i = 0; i < sz*16; i+=4) |
||
247 | printf("curbe %d.%d: %f %f %f %f\n", i/8, i&4, |
||
248 | buf[i+0], buf[i+1], buf[i+2], buf[i+3]); |
||
249 | |||
250 | printf("last_buf %p buf %p sz %d/%d cmp %d\n", |
||
251 | brw->curbe.last_buf, buf, |
||
252 | bufsz, brw->curbe.last_bufsz, |
||
253 | brw->curbe.last_buf ? memcmp(buf, brw->curbe.last_buf, bufsz) : -1); |
||
254 | } |
||
255 | |||
256 | if (brw->curbe.curbe_bo != NULL && |
||
257 | bufsz == brw->curbe.last_bufsz && |
||
258 | memcmp(buf, brw->curbe.last_buf, bufsz) == 0) { |
||
259 | /* constants have not changed */ |
||
260 | } else { |
||
261 | /* Update the record of what our last set of constants was. We |
||
262 | * don't just flip the pointers because we don't fill in the |
||
263 | * data in the padding between the entries. |
||
264 | */ |
||
265 | memcpy(brw->curbe.last_buf, buf, bufsz); |
||
266 | brw->curbe.last_bufsz = bufsz; |
||
267 | |||
268 | if (brw->curbe.curbe_bo != NULL && |
||
269 | brw->curbe.curbe_next_offset + bufsz > brw->curbe.curbe_bo->size) |
||
270 | { |
||
271 | drm_intel_gem_bo_unmap_gtt(brw->curbe.curbe_bo); |
||
272 | drm_intel_bo_unreference(brw->curbe.curbe_bo); |
||
273 | brw->curbe.curbe_bo = NULL; |
||
274 | } |
||
275 | |||
276 | if (brw->curbe.curbe_bo == NULL) { |
||
277 | /* Allocate a single page for CURBE entries for this batchbuffer. |
||
278 | * They're generally around 64b. |
||
279 | */ |
||
280 | brw->curbe.curbe_bo = drm_intel_bo_alloc(brw->bufmgr, "CURBE", |
||
281 | 4096, 1 << 6); |
||
282 | brw->curbe.curbe_next_offset = 0; |
||
283 | drm_intel_gem_bo_map_gtt(brw->curbe.curbe_bo); |
||
284 | assert(bufsz < 4096); |
||
285 | } |
||
286 | |||
287 | brw->curbe.curbe_offset = brw->curbe.curbe_next_offset; |
||
288 | brw->curbe.curbe_next_offset += bufsz; |
||
289 | brw->curbe.curbe_next_offset = ALIGN(brw->curbe.curbe_next_offset, 64); |
||
290 | |||
291 | /* Copy data to the buffer: |
||
292 | */ |
||
293 | memcpy(brw->curbe.curbe_bo->virtual + brw->curbe.curbe_offset, |
||
294 | buf, |
||
295 | bufsz); |
||
296 | } |
||
297 | |||
298 | /* Because this provokes an action (ie copy the constants into the |
||
299 | * URB), it shouldn't be shortcircuited if identical to the |
||
300 | * previous time - because eg. the urb destination may have |
||
301 | * changed, or the urb contents different to last time. |
||
302 | * |
||
303 | * Note that the data referred to is actually copied internally, |
||
304 | * not just used in place according to passed pointer. |
||
305 | * |
||
306 | * It appears that the CS unit takes care of using each available |
||
307 | * URB entry (Const URB Entry == CURBE) in turn, and issuing |
||
308 | * flushes as necessary when doublebuffering of CURBEs isn't |
||
309 | * possible. |
||
310 | */ |
||
311 | |||
312 | emit: |
||
313 | BEGIN_BATCH(2); |
||
314 | if (brw->curbe.total_size == 0) { |
||
315 | OUT_BATCH((CMD_CONST_BUFFER << 16) | (2 - 2)); |
||
316 | OUT_BATCH(0); |
||
317 | } else { |
||
318 | OUT_BATCH((CMD_CONST_BUFFER << 16) | (1 << 8) | (2 - 2)); |
||
319 | OUT_RELOC(brw->curbe.curbe_bo, |
||
320 | I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
321 | (brw->curbe.total_size - 1) + brw->curbe.curbe_offset); |
||
322 | } |
||
323 | ADVANCE_BATCH(); |
||
324 | } |
||
325 | |||
326 | const struct brw_tracked_state brw_constant_buffer = { |
||
327 | .dirty = { |
||
328 | .mesa = _NEW_PROGRAM_CONSTANTS, |
||
329 | .brw = (BRW_NEW_FRAGMENT_PROGRAM | |
||
330 | BRW_NEW_VERTEX_PROGRAM | |
||
331 | BRW_NEW_URB_FENCE | /* Implicit - hardware requires this, not used above */ |
||
332 | BRW_NEW_PSP | /* Implicit - hardware requires this, not used above */ |
||
333 | BRW_NEW_CURBE_OFFSETS | |
||
334 | BRW_NEW_BATCH), |
||
335 | .cache = (CACHE_NEW_WM_PROG) |
||
336 | }, |
||
337 | .emit = brw_upload_constant_buffer, |
||
338 | };><>><>><>>><>>> |
||
339 |