WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_program.c

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	Copyright (C) Intel Corp. 2006. All Rights Reserved.
		3	Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
		4	develop this 3D driver.
		5
		6	Permission is hereby granted, free of charge, to any person obtaining
		7	a copy of this software and associated documentation files (the
		8	"Software"), to deal in the Software without restriction, including
		9	without limitation the rights to use, copy, modify, merge, publish,
		10	distribute, sublicense, and/or sell copies of the Software, and to
		11	permit persons to whom the Software is furnished to do so, subject to
		12	the following conditions:
		13
		14	The above copyright notice and this permission notice (including the
		15	next paragraph) shall be included in all copies or substantial
		16	portions of the Software.
		17
		18	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		19	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
		21	IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
		22	LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		23	OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		24	WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25
		26	**********************************************************************/
		27	/*
		28	* Authors:
		29	* Keith Whitwell
		30	*/
		31
		32	#include
		33	#include "main/imports.h"
		34	#include "main/enums.h"
		35	#include "main/shaderobj.h"
		36	#include "program/prog_parameter.h"
		37	#include "program/program.h"
		38	#include "program/programopt.h"
		39	#include "tnl/tnl.h"
		40	#include "glsl/ralloc.h"
		41
		42	#include "brw_context.h"
		43	#include "brw_wm.h"
		44
		45	static unsigned
		46	get_new_program_id(struct intel_screen *screen)
		47	{
		48	// static pthread_mutex_t m = PTHREAD_MUTEX_INITIALIZER;
		49	// pthread_mutex_lock(&m);
		50	unsigned id = screen->program_id++;
		51	// pthread_mutex_unlock(&m);
		52	return id;
		53	}
		54
		55	static void brwBindProgram( struct gl_context *ctx,
		56	GLenum target,
		57	struct gl_program *prog )
		58	{
		59	struct brw_context *brw = brw_context(ctx);
		60
		61	switch (target) {
		62	case GL_VERTEX_PROGRAM_ARB:
		63	brw->state.dirty.brw \|= BRW_NEW_VERTEX_PROGRAM;
		64	break;
		65	case GL_FRAGMENT_PROGRAM_ARB:
		66	brw->state.dirty.brw \|= BRW_NEW_FRAGMENT_PROGRAM;
		67	break;
		68	}
		69	}
		70
		71	static struct gl_program brwNewProgram( struct gl_context ctx,
		72	GLenum target,
		73	GLuint id )
		74	{
		75	struct brw_context *brw = brw_context(ctx);
		76
		77	switch (target) {
		78	case GL_VERTEX_PROGRAM_ARB: {
		79	struct brw_vertex_program *prog = CALLOC_STRUCT(brw_vertex_program);
		80	if (prog) {
		81	prog->id = get_new_program_id(brw->intelScreen);
		82
		83	return _mesa_init_vertex_program( ctx, &prog->program,
		84	target, id );
		85	}
		86	else
		87	return NULL;
		88	}
		89
		90	case GL_FRAGMENT_PROGRAM_ARB: {
		91	struct brw_fragment_program *prog = CALLOC_STRUCT(brw_fragment_program);
		92	if (prog) {
		93	prog->id = get_new_program_id(brw->intelScreen);
		94
		95	return _mesa_init_fragment_program( ctx, &prog->program,
		96	target, id );
		97	}
		98	else
		99	return NULL;
		100	}
		101
		102	default:
		103	return _mesa_new_program(ctx, target, id);
		104	}
		105	}
		106
		107	static void brwDeleteProgram( struct gl_context *ctx,
		108	struct gl_program *prog )
		109	{
		110	_mesa_delete_program( ctx, prog );
		111	}
		112
		113
		114	static GLboolean
		115	brwIsProgramNative(struct gl_context *ctx,
		116	GLenum target,
		117	struct gl_program *prog)
		118	{
		119	return true;
		120	}
		121
		122	static GLboolean
		123	brwProgramStringNotify(struct gl_context *ctx,
		124	GLenum target,
		125	struct gl_program *prog)
		126	{
		127	struct brw_context *brw = brw_context(ctx);
		128
		129	switch (target) {
		130	case GL_FRAGMENT_PROGRAM_ARB: {
		131	struct gl_fragment_program fprog = (struct gl_fragment_program ) prog;
		132	struct brw_fragment_program *newFP = brw_fragment_program(fprog);
		133	const struct brw_fragment_program *curFP =
		134	brw_fragment_program_const(brw->fragment_program);
		135
		136	if (newFP == curFP)
		137	brw->state.dirty.brw \|= BRW_NEW_FRAGMENT_PROGRAM;
		138	newFP->id = get_new_program_id(brw->intelScreen);
		139	break;
		140	}
		141	case GL_VERTEX_PROGRAM_ARB: {
		142	struct gl_vertex_program vprog = (struct gl_vertex_program ) prog;
		143	struct brw_vertex_program *newVP = brw_vertex_program(vprog);
		144	const struct brw_vertex_program *curVP =
		145	brw_vertex_program_const(brw->vertex_program);
		146
		147	if (newVP == curVP)
		148	brw->state.dirty.brw \|= BRW_NEW_VERTEX_PROGRAM;
		149	if (newVP->program.IsPositionInvariant) {
		150	_mesa_insert_mvp_code(ctx, &newVP->program);
		151	}
		152	newVP->id = get_new_program_id(brw->intelScreen);
		153
		154	/* Also tell tnl about it:
		155	*/
		156	_tnl_program_string(ctx, target, prog);
		157	break;
		158	}
		159	default:
		160	/*
		161	* driver->ProgramStringNotify is only called for ARB programs, fixed
		162	* function vertex programs, and ir_to_mesa (which isn't used by the
		163	* i965 back-end). Therefore, even after geometry shaders are added,
		164	* this function should only ever be called with a target of
		165	* GL_VERTEX_PROGRAM_ARB or GL_FRAGMENT_PROGRAM_ARB.
		166	*/
		167	assert(!"Unexpected target in brwProgramStringNotify");
		168	break;
		169	}
		170
		171	brw_add_texrect_params(prog);
		172
		173	return true;
		174	}
		175
		176	void
		177	brw_add_texrect_params(struct gl_program *prog)
		178	{
		179	for (int texunit = 0; texunit < BRW_MAX_TEX_UNIT; texunit++) {
		180	if (!(prog->TexturesUsed[texunit] & (1 << TEXTURE_RECT_INDEX)))
		181	continue;
		182
		183	int tokens[STATE_LENGTH] = {
		184	STATE_INTERNAL,
		185	STATE_TEXRECT_SCALE,
		186	texunit,
		187	0,
		188
		189	};
		190
		191	_mesa_add_state_reference(prog->Parameters, (gl_state_index *)tokens);
		192	}
		193	}
		194
		195	/* Per-thread scratch space is a power-of-two multiple of 1KB. */
		196	int
		197	brw_get_scratch_size(int size)
		198	{
		199	int i;
		200
		201	for (i = 1024; i < size; i *= 2)
		202	;
		203
		204	return i;
		205	}
		206
		207	void
		208	brw_get_scratch_bo(struct brw_context *brw,
		209	drm_intel_bo **scratch_bo, int size)
		210	{
		211	drm_intel_bo old_bo = scratch_bo;
		212
		213	if (old_bo && old_bo->size < size) {
		214	drm_intel_bo_unreference(old_bo);
		215	old_bo = NULL;
		216	}
		217
		218	if (!old_bo) {
		219	*scratch_bo = drm_intel_bo_alloc(brw->bufmgr, "scratch bo", size, 4096);
		220	}
		221	}
		222
		223	void brwInitFragProgFuncs( struct dd_function_table *functions )
		224	{
		225	assert(functions->ProgramStringNotify == _tnl_program_string);
		226
		227	functions->BindProgram = brwBindProgram;
		228	functions->NewProgram = brwNewProgram;
		229	functions->DeleteProgram = brwDeleteProgram;
		230	functions->IsProgramNative = brwIsProgramNative;
		231	functions->ProgramStringNotify = brwProgramStringNotify;
		232
		233	functions->NewShader = brw_new_shader;
		234	functions->NewShaderProgram = brw_new_shader_program;
		235	functions->LinkShader = brw_link_shader;
		236	}
		237
		238	void
		239	brw_init_shader_time(struct brw_context *brw)
		240	{
		241	const int max_entries = 4096;
		242	brw->shader_time.bo = drm_intel_bo_alloc(brw->bufmgr, "shader time",
		243	max_entries * SHADER_TIME_STRIDE,
		244	4096);
		245	brw->shader_time.shader_programs = rzalloc_array(brw, struct gl_shader_program *,
		246	max_entries);
		247	brw->shader_time.programs = rzalloc_array(brw, struct gl_program *,
		248	max_entries);
		249	brw->shader_time.types = rzalloc_array(brw, enum shader_time_shader_type,
		250	max_entries);
		251	brw->shader_time.cumulative = rzalloc_array(brw, uint64_t,
		252	max_entries);
		253	brw->shader_time.max_entries = max_entries;
		254	}
		255
		256	static int
		257	compare_time(const void a, const void b)
		258	{
		259	uint64_t * const *a_val = a;
		260	uint64_t * const *b_val = b;
		261
		262	/* We don't just subtract because we're turning the value to an int. */
		263	if (a_val < b_val)
		264	return -1;
		265	else if (a_val == b_val)
		266	return 0;
		267	else
		268	return 1;
		269	}
		270
		271	static void
		272	get_written_and_reset(struct brw_context *brw, int i,
		273	uint64_t written, uint64_t reset)
		274	{
		275	enum shader_time_shader_type type = brw->shader_time.types[i];
		276	assert(type == ST_VS \|\| type == ST_FS8 \|\| type == ST_FS16);
		277
		278	/* Find where we recorded written and reset. */
		279	int wi, ri;
		280
		281	for (wi = i; brw->shader_time.types[wi] != type + 1; wi++)
		282	;
		283
		284	for (ri = i; brw->shader_time.types[ri] != type + 2; ri++)
		285	;
		286
		287	*written = brw->shader_time.cumulative[wi];
		288	*reset = brw->shader_time.cumulative[ri];
		289	}
		290
		291	static void
		292	print_shader_time_line(const char stage, const char name,
		293	int shader_num, uint64_t time, uint64_t total)
		294	{
		295	printf("%-6s%-6s", stage, name);
		296
		297	if (shader_num != -1)
		298	printf("%4d: ", shader_num);
		299	else
		300	printf(" : ");
		301
		302	printf("%16lld (%7.2f Gcycles) %4.1f%%\n",
		303	(long long)time,
		304	(double)time / 1000000000.0,
		305	(double)time / total * 100.0);
		306	}
		307
		308	static void
		309	brw_report_shader_time(struct brw_context *brw)
		310	{
		311	if (!brw->shader_time.bo \|\| !brw->shader_time.num_entries)
		312	return;
		313
		314	uint64_t scaled[brw->shader_time.num_entries];
		315	uint64_t *sorted[brw->shader_time.num_entries];
		316	uint64_t total_by_type[ST_FS16 + 1];
		317	memset(total_by_type, 0, sizeof(total_by_type));
		318	double total = 0;
		319	for (int i = 0; i < brw->shader_time.num_entries; i++) {
		320	uint64_t written = 0, reset = 0;
		321	enum shader_time_shader_type type = brw->shader_time.types[i];
		322
		323	sorted[i] = &scaled[i];
		324
		325	switch (type) {
		326	case ST_VS_WRITTEN:
		327	case ST_VS_RESET:
		328	case ST_FS8_WRITTEN:
		329	case ST_FS8_RESET:
		330	case ST_FS16_WRITTEN:
		331	case ST_FS16_RESET:
		332	/* We'll handle these when along with the time. */
		333	scaled[i] = 0;
		334	continue;
		335
		336	case ST_VS:
		337	case ST_FS8:
		338	case ST_FS16:
		339	get_written_and_reset(brw, i, &written, &reset);
		340	break;
		341
		342	default:
		343	/* I sometimes want to print things that aren't the 3 shader times.
		344	* Just print the sum in that case.
		345	*/
		346	written = 1;
		347	reset = 0;
		348	break;
		349	}
		350
		351	uint64_t time = brw->shader_time.cumulative[i];
		352	if (written) {
		353	scaled[i] = time / written * (written + reset);
		354	} else {
		355	scaled[i] = time;
		356	}
		357
		358	switch (type) {
		359	case ST_VS:
		360	case ST_FS8:
		361	case ST_FS16:
		362	total_by_type[type] += scaled[i];
		363	break;
		364	default:
		365	break;
		366	}
		367
		368	total += scaled[i];
		369	}
		370
		371	if (total == 0) {
		372	printf("No shader time collected yet\n");
		373	return;
		374	}
		375
		376	qsort(sorted, brw->shader_time.num_entries, sizeof(sorted[0]), compare_time);
		377
		378	printf("\n");
		379	printf("type ID cycles spent %% of total\n");
		380	for (int s = 0; s < brw->shader_time.num_entries; s++) {
		381	const char *shader_name;
		382	const char *stage;
		383	/* Work back from the sorted pointers times to a time to print. */
		384	int i = sorted[s] - scaled;
		385
		386	if (scaled[i] == 0)
		387	continue;
		388
		389	int shader_num = -1;
		390	if (brw->shader_time.shader_programs[i]) {
		391	shader_num = brw->shader_time.shader_programs[i]->Name;
		392
		393	/* The fixed function fragment shader generates GLSL IR with a Name
		394	* of 0, and nothing else does.
		395	*/
		396	if (shader_num == 0 &&
		397	(brw->shader_time.types[i] == ST_FS8 \|\|
		398	brw->shader_time.types[i] == ST_FS16)) {
		399	shader_name = "ff";
		400	shader_num = -1;
		401	} else {
		402	shader_name = "glsl";
		403	}
		404	} else if (brw->shader_time.programs[i]) {
		405	shader_num = brw->shader_time.programs[i]->Id;
		406	if (shader_num == 0) {
		407	shader_name = "ff";
		408	shader_num = -1;
		409	} else {
		410	shader_name = "prog";
		411	}
		412	} else {
		413	shader_name = "other";
		414	}
		415
		416	switch (brw->shader_time.types[i]) {
		417	case ST_VS:
		418	stage = "vs";
		419	break;
		420	case ST_FS8:
		421	stage = "fs8";
		422	break;
		423	case ST_FS16:
		424	stage = "fs16";
		425	break;
		426	default:
		427	stage = "other";
		428	break;
		429	}
		430
		431	print_shader_time_line(stage, shader_name, shader_num,
		432	scaled[i], total);
		433	}
		434
		435	printf("\n");
		436	print_shader_time_line("total", "vs", -1, total_by_type[ST_VS], total);
		437	print_shader_time_line("total", "fs8", -1, total_by_type[ST_FS8], total);
		438	print_shader_time_line("total", "fs16", -1, total_by_type[ST_FS16], total);
		439	}
		440
		441	static void
		442	brw_collect_shader_time(struct brw_context *brw)
		443	{
		444	if (!brw->shader_time.bo)
		445	return;
		446
		447	/* This probably stalls on the last rendering. We could fix that by
		448	* delaying reading the reports, but it doesn't look like it's a big
		449	* overhead compared to the cost of tracking the time in the first place.
		450	*/
		451	drm_intel_bo_map(brw->shader_time.bo, true);
		452
		453	uint32_t *times = brw->shader_time.bo->virtual;
		454
		455	for (int i = 0; i < brw->shader_time.num_entries; i++) {
		456	brw->shader_time.cumulative[i] += times[i * SHADER_TIME_STRIDE / 4];
		457	}
		458
		459	/* Zero the BO out to clear it out for our next collection.
		460	*/
		461	memset(times, 0, brw->shader_time.bo->size);
		462	drm_intel_bo_unmap(brw->shader_time.bo);
		463	}
		464
		465	void
		466	brw_collect_and_report_shader_time(struct brw_context *brw)
		467	{
		468	brw_collect_shader_time(brw);
		469
		470	if (brw->shader_time.report_time == 0 \|\|
		471	get_time() - brw->shader_time.report_time >= 1.0) {
		472	brw_report_shader_time(brw);
		473	brw->shader_time.report_time = get_time();
		474	}
		475	}
		476
		477	/**
		478	* Chooses an index in the shader_time buffer and sets up tracking information
		479	* for our printouts.
		480	*
		481	* Note that this holds on to references to the underlying programs, which may
		482	* change their lifetimes compared to normal operation.
		483	*/
		484	int
		485	brw_get_shader_time_index(struct brw_context *brw,
		486	struct gl_shader_program *shader_prog,
		487	struct gl_program *prog,
		488	enum shader_time_shader_type type)
		489	{
		490	struct gl_context *ctx = &brw->ctx;
		491
		492	int shader_time_index = brw->shader_time.num_entries++;
		493	assert(shader_time_index < brw->shader_time.max_entries);
		494	brw->shader_time.types[shader_time_index] = type;
		495
		496	_mesa_reference_shader_program(ctx,
		497	&brw->shader_time.shader_programs[shader_time_index],
		498	shader_prog);
		499
		500	_mesa_reference_program(ctx,
		501	&brw->shader_time.programs[shader_time_index],
		502	prog);
		503
		504	return shader_time_index;
		505	}
		506
		507	void
		508	brw_destroy_shader_time(struct brw_context *brw)
		509	{
		510	drm_intel_bo_unreference(brw->shader_time.bo);
		511	brw->shader_time.bo = NULL;
		512	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_program.c – Rev 4358