Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | #include |
||
33 | #include "main/imports.h" |
||
34 | #include "main/enums.h" |
||
35 | #include "main/shaderobj.h" |
||
36 | #include "program/prog_parameter.h" |
||
37 | #include "program/program.h" |
||
38 | #include "program/programopt.h" |
||
39 | #include "tnl/tnl.h" |
||
40 | #include "glsl/ralloc.h" |
||
41 | |||
42 | #include "brw_context.h" |
||
43 | #include "brw_wm.h" |
||
44 | |||
45 | static unsigned |
||
46 | get_new_program_id(struct intel_screen *screen) |
||
47 | { |
||
48 | // static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER; |
||
49 | // pthread_mutex_lock(&m); |
||
50 | unsigned id = screen->program_id++; |
||
51 | // pthread_mutex_unlock(&m); |
||
52 | return id; |
||
53 | } |
||
54 | |||
55 | static void brwBindProgram( struct gl_context *ctx, |
||
56 | GLenum target, |
||
57 | struct gl_program *prog ) |
||
58 | { |
||
59 | struct brw_context *brw = brw_context(ctx); |
||
60 | |||
61 | switch (target) { |
||
62 | case GL_VERTEX_PROGRAM_ARB: |
||
63 | brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; |
||
64 | break; |
||
65 | case GL_FRAGMENT_PROGRAM_ARB: |
||
66 | brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; |
||
67 | break; |
||
68 | } |
||
69 | } |
||
70 | |||
71 | static struct gl_program *brwNewProgram( struct gl_context *ctx, |
||
72 | GLenum target, |
||
73 | GLuint id ) |
||
74 | { |
||
75 | struct brw_context *brw = brw_context(ctx); |
||
76 | |||
77 | switch (target) { |
||
78 | case GL_VERTEX_PROGRAM_ARB: { |
||
79 | struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program); |
||
80 | if (prog) { |
||
81 | prog->id = get_new_program_id(brw->intelScreen); |
||
82 | |||
83 | return _mesa_init_vertex_program( ctx, &prog->program, |
||
84 | target, id ); |
||
85 | } |
||
86 | else |
||
87 | return NULL; |
||
88 | } |
||
89 | |||
90 | case GL_FRAGMENT_PROGRAM_ARB: { |
||
91 | struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program); |
||
92 | if (prog) { |
||
93 | prog->id = get_new_program_id(brw->intelScreen); |
||
94 | |||
95 | return _mesa_init_fragment_program( ctx, &prog->program, |
||
96 | target, id ); |
||
97 | } |
||
98 | else |
||
99 | return NULL; |
||
100 | } |
||
101 | |||
102 | default: |
||
103 | return _mesa_new_program(ctx, target, id); |
||
104 | } |
||
105 | } |
||
106 | |||
107 | static void brwDeleteProgram( struct gl_context *ctx, |
||
108 | struct gl_program *prog ) |
||
109 | { |
||
110 | _mesa_delete_program( ctx, prog ); |
||
111 | } |
||
112 | |||
113 | |||
114 | static GLboolean |
||
115 | brwIsProgramNative(struct gl_context *ctx, |
||
116 | GLenum target, |
||
117 | struct gl_program *prog) |
||
118 | { |
||
119 | return true; |
||
120 | } |
||
121 | |||
122 | static GLboolean |
||
123 | brwProgramStringNotify(struct gl_context *ctx, |
||
124 | GLenum target, |
||
125 | struct gl_program *prog) |
||
126 | { |
||
127 | struct brw_context *brw = brw_context(ctx); |
||
128 | |||
129 | switch (target) { |
||
130 | case GL_FRAGMENT_PROGRAM_ARB: { |
||
131 | struct gl_fragment_program *fprog = (struct gl_fragment_program *) prog; |
||
132 | struct brw_fragment_program *newFP = brw_fragment_program(fprog); |
||
133 | const struct brw_fragment_program *curFP = |
||
134 | brw_fragment_program_const(brw->fragment_program); |
||
135 | |||
136 | if (newFP == curFP) |
||
137 | brw->state.dirty.brw |= BRW_NEW_FRAGMENT_PROGRAM; |
||
138 | newFP->id = get_new_program_id(brw->intelScreen); |
||
139 | break; |
||
140 | } |
||
141 | case GL_VERTEX_PROGRAM_ARB: { |
||
142 | struct gl_vertex_program *vprog = (struct gl_vertex_program *) prog; |
||
143 | struct brw_vertex_program *newVP = brw_vertex_program(vprog); |
||
144 | const struct brw_vertex_program *curVP = |
||
145 | brw_vertex_program_const(brw->vertex_program); |
||
146 | |||
147 | if (newVP == curVP) |
||
148 | brw->state.dirty.brw |= BRW_NEW_VERTEX_PROGRAM; |
||
149 | if (newVP->program.IsPositionInvariant) { |
||
150 | _mesa_insert_mvp_code(ctx, &newVP->program); |
||
151 | } |
||
152 | newVP->id = get_new_program_id(brw->intelScreen); |
||
153 | |||
154 | /* Also tell tnl about it: |
||
155 | */ |
||
156 | _tnl_program_string(ctx, target, prog); |
||
157 | break; |
||
158 | } |
||
159 | default: |
||
160 | /* |
||
161 | * driver->ProgramStringNotify is only called for ARB programs, fixed |
||
162 | * function vertex programs, and ir_to_mesa (which isn't used by the |
||
163 | * i965 back-end). Therefore, even after geometry shaders are added, |
||
164 | * this function should only ever be called with a target of |
||
165 | * GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB. |
||
166 | */ |
||
167 | assert(!"Unexpected target in brwProgramStringNotify"); |
||
168 | break; |
||
169 | } |
||
170 | |||
171 | brw_add_texrect_params(prog); |
||
172 | |||
173 | return true; |
||
174 | } |
||
175 | |||
176 | void |
||
177 | brw_add_texrect_params(struct gl_program *prog) |
||
178 | { |
||
179 | for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) { |
||
180 | if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX))) |
||
181 | continue; |
||
182 | |||
183 | int tokens[STATE_LENGTH] = { |
||
184 | STATE_INTERNAL, |
||
185 | STATE_TEXRECT_SCALE, |
||
186 | texunit, |
||
187 | 0, |
||
188 | |||
189 | }; |
||
190 | |||
191 | _mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens); |
||
192 | } |
||
193 | } |
||
194 | |||
195 | /* Per-thread scratch space is a power-of-two multiple of 1KB. */ |
||
196 | int |
||
197 | brw_get_scratch_size(int size) |
||
198 | { |
||
199 | int i; |
||
200 | |||
201 | for (i = 1024; i < size; i *= 2) |
||
202 | ; |
||
203 | |||
204 | return i; |
||
205 | } |
||
206 | |||
207 | void |
||
208 | brw_get_scratch_bo(struct brw_context *brw, |
||
209 | drm_intel_bo **scratch_bo, int size) |
||
210 | { |
||
211 | drm_intel_bo *old_bo = *scratch_bo; |
||
212 | |||
213 | if (old_bo && old_bo->size < size) { |
||
214 | drm_intel_bo_unreference(old_bo); |
||
215 | old_bo = NULL; |
||
216 | } |
||
217 | |||
218 | if (!old_bo) { |
||
219 | *scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096); |
||
220 | } |
||
221 | } |
||
222 | |||
223 | void brwInitFragProgFuncs( struct dd_function_table *functions ) |
||
224 | { |
||
225 | assert(functions->ProgramStringNotify == _tnl_program_string); |
||
226 | |||
227 | functions->BindProgram = brwBindProgram; |
||
228 | functions->NewProgram = brwNewProgram; |
||
229 | functions->DeleteProgram = brwDeleteProgram; |
||
230 | functions->IsProgramNative = brwIsProgramNative; |
||
231 | functions->ProgramStringNotify = brwProgramStringNotify; |
||
232 | |||
233 | functions->NewShader = brw_new_shader; |
||
234 | functions->NewShaderProgram = brw_new_shader_program; |
||
235 | functions->LinkShader = brw_link_shader; |
||
236 | } |
||
237 | |||
238 | void |
||
239 | brw_init_shader_time(struct brw_context *brw) |
||
240 | { |
||
241 | const int max_entries = 4096; |
||
242 | brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time", |
||
243 | max_entries * SHADER_TIME_STRIDE, |
||
244 | 4096); |
||
245 | brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *, |
||
246 | max_entries); |
||
247 | brw->shader_time.programs = rzalloc_array(brw, struct gl_program *, |
||
248 | max_entries); |
||
249 | brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type, |
||
250 | max_entries); |
||
251 | brw->shader_time.cumulative = rzalloc_array(brw, uint64_t, |
||
252 | max_entries); |
||
253 | brw->shader_time.max_entries = max_entries; |
||
254 | } |
||
255 | |||
256 | static int |
||
257 | compare_time(const void *a, const void *b) |
||
258 | { |
||
259 | uint64_t * const *a_val = a; |
||
260 | uint64_t * const *b_val = b; |
||
261 | |||
262 | /* We don't just subtract because we're turning the value to an int. */ |
||
263 | if (**a_val < **b_val) |
||
264 | return -1; |
||
265 | else if (**a_val == **b_val) |
||
266 | return 0; |
||
267 | else |
||
268 | return 1; |
||
269 | } |
||
270 | |||
271 | static void |
||
272 | get_written_and_reset(struct brw_context *brw, int i, |
||
273 | uint64_t *written, uint64_t *reset) |
||
274 | { |
||
275 | enum shader_time_shader_type type = brw->shader_time.types[i]; |
||
276 | assert(type == ST_VS || type == ST_FS8 || type == ST_FS16); |
||
277 | |||
278 | /* Find where we recorded written and reset. */ |
||
279 | int wi, ri; |
||
280 | |||
281 | for (wi = i; brw->shader_time.types[wi] != type + 1; wi++) |
||
282 | ; |
||
283 | |||
284 | for (ri = i; brw->shader_time.types[ri] != type + 2; ri++) |
||
285 | ; |
||
286 | |||
287 | *written = brw->shader_time.cumulative[wi]; |
||
288 | *reset = brw->shader_time.cumulative[ri]; |
||
289 | } |
||
290 | |||
291 | static void |
||
292 | print_shader_time_line(const char *stage, const char *name, |
||
293 | int shader_num, uint64_t time, uint64_t total) |
||
294 | { |
||
295 | printf("%-6s%-6s", stage, name); |
||
296 | |||
297 | if (shader_num != -1) |
||
298 | printf("%4d: ", shader_num); |
||
299 | else |
||
300 | printf(" : "); |
||
301 | |||
302 | printf("%16lld (%7.2f Gcycles) %4.1f%%\n", |
||
303 | (long long)time, |
||
304 | (double)time / 1000000000.0, |
||
305 | (double)time / total * 100.0); |
||
306 | } |
||
307 | |||
308 | static void |
||
309 | brw_report_shader_time(struct brw_context *brw) |
||
310 | { |
||
311 | if (!brw->shader_time.bo || !brw->shader_time.num_entries) |
||
312 | return; |
||
313 | |||
314 | uint64_t scaled[brw->shader_time.num_entries]; |
||
315 | uint64_t *sorted[brw->shader_time.num_entries]; |
||
316 | uint64_t total_by_type[ST_FS16 + 1]; |
||
317 | memset(total_by_type, 0, sizeof(total_by_type)); |
||
318 | double total = 0; |
||
319 | for (int i = 0; i < brw->shader_time.num_entries; i++) { |
||
320 | uint64_t written = 0, reset = 0; |
||
321 | enum shader_time_shader_type type = brw->shader_time.types[i]; |
||
322 | |||
323 | sorted[i] = &scaled[i]; |
||
324 | |||
325 | switch (type) { |
||
326 | case ST_VS_WRITTEN: |
||
327 | case ST_VS_RESET: |
||
328 | case ST_FS8_WRITTEN: |
||
329 | case ST_FS8_RESET: |
||
330 | case ST_FS16_WRITTEN: |
||
331 | case ST_FS16_RESET: |
||
332 | /* We'll handle these when along with the time. */ |
||
333 | scaled[i] = 0; |
||
334 | continue; |
||
335 | |||
336 | case ST_VS: |
||
337 | case ST_FS8: |
||
338 | case ST_FS16: |
||
339 | get_written_and_reset(brw, i, &written, &reset); |
||
340 | break; |
||
341 | |||
342 | default: |
||
343 | /* I sometimes want to print things that aren't the 3 shader times. |
||
344 | * Just print the sum in that case. |
||
345 | */ |
||
346 | written = 1; |
||
347 | reset = 0; |
||
348 | break; |
||
349 | } |
||
350 | |||
351 | uint64_t time = brw->shader_time.cumulative[i]; |
||
352 | if (written) { |
||
353 | scaled[i] = time / written * (written + reset); |
||
354 | } else { |
||
355 | scaled[i] = time; |
||
356 | } |
||
357 | |||
358 | switch (type) { |
||
359 | case ST_VS: |
||
360 | case ST_FS8: |
||
361 | case ST_FS16: |
||
362 | total_by_type[type] += scaled[i]; |
||
363 | break; |
||
364 | default: |
||
365 | break; |
||
366 | } |
||
367 | |||
368 | total += scaled[i]; |
||
369 | } |
||
370 | |||
371 | if (total == 0) { |
||
372 | printf("No shader time collected yet\n"); |
||
373 | return; |
||
374 | } |
||
375 | |||
376 | qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time); |
||
377 | |||
378 | printf("\n"); |
||
379 | printf("type ID cycles spent %% of total\n"); |
||
380 | for (int s = 0; s < brw->shader_time.num_entries; s++) { |
||
381 | const char *shader_name; |
||
382 | const char *stage; |
||
383 | /* Work back from the sorted pointers times to a time to print. */ |
||
384 | int i = sorted[s] - scaled; |
||
385 | |||
386 | if (scaled[i] == 0) |
||
387 | continue; |
||
388 | |||
389 | int shader_num = -1; |
||
390 | if (brw->shader_time.shader_programs[i]) { |
||
391 | shader_num = brw->shader_time.shader_programs[i]->Name; |
||
392 | |||
393 | /* The fixed function fragment shader generates GLSL IR with a Name |
||
394 | * of 0, and nothing else does. |
||
395 | */ |
||
396 | if (shader_num == 0 && |
||
397 | (brw->shader_time.types[i] == ST_FS8 || |
||
398 | brw->shader_time.types[i] == ST_FS16)) { |
||
399 | shader_name = "ff"; |
||
400 | shader_num = -1; |
||
401 | } else { |
||
402 | shader_name = "glsl"; |
||
403 | } |
||
404 | } else if (brw->shader_time.programs[i]) { |
||
405 | shader_num = brw->shader_time.programs[i]->Id; |
||
406 | if (shader_num == 0) { |
||
407 | shader_name = "ff"; |
||
408 | shader_num = -1; |
||
409 | } else { |
||
410 | shader_name = "prog"; |
||
411 | } |
||
412 | } else { |
||
413 | shader_name = "other"; |
||
414 | } |
||
415 | |||
416 | switch (brw->shader_time.types[i]) { |
||
417 | case ST_VS: |
||
418 | stage = "vs"; |
||
419 | break; |
||
420 | case ST_FS8: |
||
421 | stage = "fs8"; |
||
422 | break; |
||
423 | case ST_FS16: |
||
424 | stage = "fs16"; |
||
425 | break; |
||
426 | default: |
||
427 | stage = "other"; |
||
428 | break; |
||
429 | } |
||
430 | |||
431 | print_shader_time_line(stage, shader_name, shader_num, |
||
432 | scaled[i], total); |
||
433 | } |
||
434 | |||
435 | printf("\n"); |
||
436 | print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total); |
||
437 | print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total); |
||
438 | print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total); |
||
439 | } |
||
440 | |||
441 | static void |
||
442 | brw_collect_shader_time(struct brw_context *brw) |
||
443 | { |
||
444 | if (!brw->shader_time.bo) |
||
445 | return; |
||
446 | |||
447 | /* This probably stalls on the last rendering. We could fix that by |
||
448 | * delaying reading the reports, but it doesn't look like it's a big |
||
449 | * overhead compared to the cost of tracking the time in the first place. |
||
450 | */ |
||
451 | drm_intel_bo_map(brw->shader_time.bo, true); |
||
452 | |||
453 | uint32_t *times = brw->shader_time.bo->virtual; |
||
454 | |||
455 | for (int i = 0; i < brw->shader_time.num_entries; i++) { |
||
456 | brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4]; |
||
457 | } |
||
458 | |||
459 | /* Zero the BO out to clear it out for our next collection. |
||
460 | */ |
||
461 | memset(times, 0, brw->shader_time.bo->size); |
||
462 | drm_intel_bo_unmap(brw->shader_time.bo); |
||
463 | } |
||
464 | |||
465 | void |
||
466 | brw_collect_and_report_shader_time(struct brw_context *brw) |
||
467 | { |
||
468 | brw_collect_shader_time(brw); |
||
469 | |||
470 | if (brw->shader_time.report_time == 0 || |
||
471 | get_time() - brw->shader_time.report_time >= 1.0) { |
||
472 | brw_report_shader_time(brw); |
||
473 | brw->shader_time.report_time = get_time(); |
||
474 | } |
||
475 | } |
||
476 | |||
477 | /** |
||
478 | * Chooses an index in the shader_time buffer and sets up tracking information |
||
479 | * for our printouts. |
||
480 | * |
||
481 | * Note that this holds on to references to the underlying programs, which may |
||
482 | * change their lifetimes compared to normal operation. |
||
483 | */ |
||
484 | int |
||
485 | brw_get_shader_time_index(struct brw_context *brw, |
||
486 | struct gl_shader_program *shader_prog, |
||
487 | struct gl_program *prog, |
||
488 | enum shader_time_shader_type type) |
||
489 | { |
||
490 | struct gl_context *ctx = &brw->ctx; |
||
491 | |||
492 | int shader_time_index = brw->shader_time.num_entries++; |
||
493 | assert(shader_time_index < brw->shader_time.max_entries); |
||
494 | brw->shader_time.types[shader_time_index] = type; |
||
495 | |||
496 | _mesa_reference_shader_program(ctx, |
||
497 | &brw->shader_time.shader_programs[shader_time_index], |
||
498 | shader_prog); |
||
499 | |||
500 | _mesa_reference_program(ctx, |
||
501 | &brw->shader_time.programs[shader_time_index], |
||
502 | prog); |
||
503 | |||
504 | return shader_time_index; |
||
505 | } |
||
506 | |||
507 | void |
||
508 | brw_destroy_shader_time(struct brw_context *brw) |
||
509 | { |
||
510 | drm_intel_bo_unreference(brw->shader_time.bo); |
||
511 | brw->shader_time.bo = NULL; |
||
512 | }>>>>>>>><>> |