WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	Copyright (C) Intel Corp. 2006. All Rights Reserved.
		3	Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to
		4	develop this 3D driver.
		5
		6	Permission is hereby granted, free of charge, to any person obtaining
		7	a copy of this software and associated documentation files (the
		8	"Software"), to deal in the Software without restriction, including
		9	without limitation the rights to use, copy, modify, merge, publish,
		10	distribute, sublicense, and/or sell copies of the Software, and to
		11	permit persons to whom the Software is furnished to do so, subject to
		12	the following conditions:
		13
		14	The above copyright notice and this permission notice (including the
		15	next paragraph) shall be included in all copies or substantial
		16	portions of the Software.
		17
		18	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		19	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
		21	IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
		22	LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		23	OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		24	WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25
		26	**********************************************************************/
		27	/*
		28	* Authors:
		29	* Keith Whitwell
		30	*/
		31
		32
		33	#include "main/context.h"
		34	#include "main/blend.h"
		35	#include "main/mtypes.h"
		36	#include "main/samplerobj.h"
		37	#include "program/prog_parameter.h"
		38
		39	#include "intel_mipmap_tree.h"
		40	#include "intel_batchbuffer.h"
		41	#include "intel_tex.h"
		42	#include "intel_fbo.h"
		43	#include "intel_buffer_objects.h"
		44
		45	#include "brw_context.h"
		46	#include "brw_state.h"
		47	#include "brw_defines.h"
		48	#include "brw_wm.h"
		49
		50	GLuint
		51	translate_tex_target(GLenum target)
		52	{
		53	switch (target) {
		54	case GL_TEXTURE_1D:
		55	case GL_TEXTURE_1D_ARRAY_EXT:
		56	return BRW_SURFACE_1D;
		57
		58	case GL_TEXTURE_RECTANGLE_NV:
		59	return BRW_SURFACE_2D;
		60
		61	case GL_TEXTURE_2D:
		62	case GL_TEXTURE_2D_ARRAY_EXT:
		63	case GL_TEXTURE_EXTERNAL_OES:
		64	case GL_TEXTURE_2D_MULTISAMPLE:
		65	case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
		66	return BRW_SURFACE_2D;
		67
		68	case GL_TEXTURE_3D:
		69	return BRW_SURFACE_3D;
		70
		71	case GL_TEXTURE_CUBE_MAP:
		72	case GL_TEXTURE_CUBE_MAP_ARRAY:
		73	return BRW_SURFACE_CUBE;
		74
		75	default:
		76	assert(0);
		77	return 0;
		78	}
		79	}
		80
		81	uint32_t
		82	brw_get_surface_tiling_bits(uint32_t tiling)
		83	{
		84	switch (tiling) {
		85	case I915_TILING_X:
		86	return BRW_SURFACE_TILED;
		87	case I915_TILING_Y:
		88	return BRW_SURFACE_TILED \| BRW_SURFACE_TILED_Y;
		89	default:
		90	return 0;
		91	}
		92	}
		93
		94
		95	uint32_t
		96	brw_get_surface_num_multisamples(unsigned num_samples)
		97	{
		98	if (num_samples > 1)
		99	return BRW_SURFACE_MULTISAMPLECOUNT_4;
		100	else
		101	return BRW_SURFACE_MULTISAMPLECOUNT_1;
		102	}
		103
		104
		105	/**
		106	* Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
		107	* swizzling.
		108	*/
		109	int
		110	brw_get_texture_swizzle(const struct gl_context *ctx,
		111	const struct gl_texture_object *t)
		112	{
		113	const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
		114
		115	int swizzles[SWIZZLE_NIL + 1] = {
		116	SWIZZLE_X,
		117	SWIZZLE_Y,
		118	SWIZZLE_Z,
		119	SWIZZLE_W,
		120	SWIZZLE_ZERO,
		121	SWIZZLE_ONE,
		122	SWIZZLE_NIL
		123	};
		124
		125	if (img->_BaseFormat == GL_DEPTH_COMPONENT \|\|
		126	img->_BaseFormat == GL_DEPTH_STENCIL) {
		127	GLenum depth_mode = t->DepthMode;
		128
		129	/* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
		130	* with depth component data specified with a sized internal format.
		131	* Otherwise, it's left at the old default, GL_LUMINANCE.
		132	*/
		133	if (_mesa_is_gles3(ctx) &&
		134	img->InternalFormat != GL_DEPTH_COMPONENT &&
		135	img->InternalFormat != GL_DEPTH_STENCIL) {
		136	depth_mode = GL_RED;
		137	}
		138
		139	switch (depth_mode) {
		140	case GL_ALPHA:
		141	swizzles[0] = SWIZZLE_ZERO;
		142	swizzles[1] = SWIZZLE_ZERO;
		143	swizzles[2] = SWIZZLE_ZERO;
		144	swizzles[3] = SWIZZLE_X;
		145	break;
		146	case GL_LUMINANCE:
		147	swizzles[0] = SWIZZLE_X;
		148	swizzles[1] = SWIZZLE_X;
		149	swizzles[2] = SWIZZLE_X;
		150	swizzles[3] = SWIZZLE_ONE;
		151	break;
		152	case GL_INTENSITY:
		153	swizzles[0] = SWIZZLE_X;
		154	swizzles[1] = SWIZZLE_X;
		155	swizzles[2] = SWIZZLE_X;
		156	swizzles[3] = SWIZZLE_X;
		157	break;
		158	case GL_RED:
		159	swizzles[0] = SWIZZLE_X;
		160	swizzles[1] = SWIZZLE_ZERO;
		161	swizzles[2] = SWIZZLE_ZERO;
		162	swizzles[3] = SWIZZLE_ONE;
		163	break;
		164	}
		165	}
		166
		167	/* If the texture's format is alpha-only, force R, G, and B to
		168	* 0.0. Similarly, if the texture's format has no alpha channel,
		169	* force the alpha value read to 1.0. This allows for the
		170	* implementation to use an RGBA texture for any of these formats
		171	* without leaking any unexpected values.
		172	*/
		173	switch (img->_BaseFormat) {
		174	case GL_ALPHA:
		175	swizzles[0] = SWIZZLE_ZERO;
		176	swizzles[1] = SWIZZLE_ZERO;
		177	swizzles[2] = SWIZZLE_ZERO;
		178	break;
		179	case GL_RED:
		180	case GL_RG:
		181	case GL_RGB:
		182	if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
		183	swizzles[3] = SWIZZLE_ONE;
		184	break;
		185	}
		186
		187	return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
		188	swizzles[GET_SWZ(t->_Swizzle, 1)],
		189	swizzles[GET_SWZ(t->_Swizzle, 2)],
		190	swizzles[GET_SWZ(t->_Swizzle, 3)]);
		191	}
		192
		193
		194	static void
		195	brw_update_buffer_texture_surface(struct gl_context *ctx,
		196	unsigned unit,
		197	uint32_t *binding_table,
		198	unsigned surf_index)
		199	{
		200	struct brw_context *brw = brw_context(ctx);
		201	struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
		202	uint32_t *surf;
		203	struct intel_buffer_object *intel_obj =
		204	intel_buffer_object(tObj->BufferObject);
		205	drm_intel_bo *bo = intel_obj ? intel_obj->buffer : NULL;
		206	gl_format format = tObj->_BufferObjectFormat;
		207	uint32_t brw_format = brw_format_for_mesa_format(format);
		208	int texel_size = _mesa_get_format_bytes(format);
		209
		210	if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
		211	_mesa_problem(NULL, "bad format %s for texture buffer\n",
		212	_mesa_get_format_name(format));
		213	}
		214
		215	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		216	6 * 4, 32, &binding_table[surf_index]);
		217
		218	surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT \|
		219	(brw_format_for_mesa_format(format) << BRW_SURFACE_FORMAT_SHIFT));
		220
		221	if (brw->gen >= 6)
		222	surf[0] \|= BRW_SURFACE_RC_READ_WRITE;
		223
		224	if (bo) {
		225	surf[1] = bo->offset; /* reloc */
		226
		227	/* Emit relocation to surface contents. */
		228	drm_intel_bo_emit_reloc(brw->batch.bo,
		229	binding_table[surf_index] + 4,
		230	bo, 0, I915_GEM_DOMAIN_SAMPLER, 0);
		231
		232	int w = intel_obj->Base.Size / texel_size;
		233	surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT \|
		234	((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
		235	surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT \|
		236	(texel_size - 1) << BRW_SURFACE_PITCH_SHIFT);
		237	} else {
		238	surf[1] = 0;
		239	surf[2] = 0;
		240	surf[3] = 0;
		241	}
		242
		243	surf[4] = 0;
		244	surf[5] = 0;
		245	}
		246
		247	static void
		248	brw_update_texture_surface(struct gl_context *ctx,
		249	unsigned unit,
		250	uint32_t *binding_table,
		251	unsigned surf_index)
		252	{
		253	struct brw_context *brw = brw_context(ctx);
		254	struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
		255	struct intel_texture_object *intelObj = intel_texture_object(tObj);
		256	struct intel_mipmap_tree *mt = intelObj->mt;
		257	struct gl_texture_image *firstImage = tObj->Image[0][tObj->BaseLevel];
		258	struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
		259	uint32_t *surf;
		260	uint32_t tile_x, tile_y;
		261
		262	if (tObj->Target == GL_TEXTURE_BUFFER) {
		263	brw_update_buffer_texture_surface(ctx, unit, binding_table, surf_index);
		264	return;
		265	}
		266
		267	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		268	6 * 4, 32, &binding_table[surf_index]);
		269
		270	surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT \|
		271	BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT \|
		272	BRW_SURFACE_CUBEFACE_ENABLES \|
		273	(translate_tex_format(brw,
		274	mt->format,
		275	tObj->DepthMode,
		276	sampler->sRGBDecode) <<
		277	BRW_SURFACE_FORMAT_SHIFT));
		278
		279	surf[1] = intelObj->mt->region->bo->offset + intelObj->mt->offset; /* reloc */
		280	surf[1] += intel_miptree_get_tile_offsets(intelObj->mt, firstImage->Level, 0,
		281	&tile_x, &tile_y);
		282
		283	surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT \|
		284	(mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT \|
		285	(mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
		286
		287	surf[3] = (brw_get_surface_tiling_bits(intelObj->mt->region->tiling) \|
		288	(mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT \|
		289	(intelObj->mt->region->pitch - 1) <<
		290	BRW_SURFACE_PITCH_SHIFT);
		291
		292	surf[4] = brw_get_surface_num_multisamples(intelObj->mt->num_samples);
		293
		294	assert(brw->has_surface_tile_offset \|\| (tile_x == 0 && tile_y == 0));
		295	/* Note that the low bits of these fields are missing, so
		296	* there's the possibility of getting in trouble.
		297	*/
		298	assert(tile_x % 4 == 0);
		299	assert(tile_y % 2 == 0);
		300	surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT \|
		301	(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT \|
		302	(mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
		303
		304	/* Emit relocation to surface contents */
		305	drm_intel_bo_emit_reloc(brw->batch.bo,
		306	binding_table[surf_index] + 4,
		307	intelObj->mt->region->bo,
		308	surf[1] - intelObj->mt->region->bo->offset,
		309	I915_GEM_DOMAIN_SAMPLER, 0);
		310	}
		311
		312	/**
		313	* Create the constant buffer surface. Vertex/fragment shader constants will be
		314	* read from this buffer with Data Port Read instructions/messages.
		315	*/
		316	static void
		317	brw_create_constant_surface(struct brw_context *brw,
		318	drm_intel_bo *bo,
		319	uint32_t offset,
		320	uint32_t size,
		321	uint32_t *out_offset,
		322	bool dword_pitch)
		323	{
		324	uint32_t stride = dword_pitch ? 4 : 16;
		325	uint32_t elements = ALIGN(size, stride) / stride;
		326	const GLint w = elements - 1;
		327	uint32_t *surf;
		328
		329	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		330	6 * 4, 32, out_offset);
		331
		332	surf[0] = (BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT \|
		333	BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT \|
		334	BRW_SURFACEFORMAT_R32G32B32A32_FLOAT << BRW_SURFACE_FORMAT_SHIFT);
		335
		336	if (brw->gen >= 6)
		337	surf[0] \|= BRW_SURFACE_RC_READ_WRITE;
		338
		339	surf[1] = bo->offset + offset; /* reloc */
		340
		341	surf[2] = ((w & 0x7f) << BRW_SURFACE_WIDTH_SHIFT \|
		342	((w >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT);
		343
		344	surf[3] = (((w >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT \|
		345	(stride - 1) << BRW_SURFACE_PITCH_SHIFT);
		346
		347	surf[4] = 0;
		348	surf[5] = 0;
		349
		350	/* Emit relocation to surface contents. The 965 PRM, Volume 4, section
		351	* 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
		352	* physical cache. It is mapped in hardware to the sampler cache."
		353	*/
		354	drm_intel_bo_emit_reloc(brw->batch.bo,
		355	*out_offset + 4,
		356	bo, offset,
		357	I915_GEM_DOMAIN_SAMPLER, 0);
		358	}
		359
		360	/**
		361	* Set up a binding table entry for use by stream output logic (transform
		362	* feedback).
		363	*
		364	* buffer_size_minus_1 must me less than BRW_MAX_NUM_BUFFER_ENTRIES.
		365	*/
		366	void
		367	brw_update_sol_surface(struct brw_context *brw,
		368	struct gl_buffer_object *buffer_obj,
		369	uint32_t *out_offset, unsigned num_vector_components,
		370	unsigned stride_dwords, unsigned offset_dwords)
		371	{
		372	struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
		373	drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_WRITE_PART);
		374	uint32_t surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 4, 32,
		375	out_offset);
		376	uint32_t pitch_minus_1 = 4*stride_dwords - 1;
		377	uint32_t offset_bytes = 4 * offset_dwords;
		378	size_t size_dwords = buffer_obj->Size / 4;
		379	uint32_t buffer_size_minus_1, width, height, depth, surface_format;
		380
		381	/* FIXME: can we rely on core Mesa to ensure that the buffer isn't
		382	* too big to map using a single binding table entry?
		383	*/
		384	assert((size_dwords - offset_dwords) / stride_dwords
		385	<= BRW_MAX_NUM_BUFFER_ENTRIES);
		386
		387	if (size_dwords > offset_dwords + num_vector_components) {
		388	/* There is room for at least 1 transform feedback output in the buffer.
		389	* Compute the number of additional transform feedback outputs the
		390	* buffer has room for.
		391	*/
		392	buffer_size_minus_1 =
		393	(size_dwords - offset_dwords - num_vector_components) / stride_dwords;
		394	} else {
		395	/* There isn't even room for a single transform feedback output in the
		396	* buffer. We can't configure the binding table entry to prevent output
		397	* entirely; we'll have to rely on the geometry shader to detect
		398	* overflow. But to minimize the damage in case of a bug, set up the
		399	* binding table entry to just allow a single output.
		400	*/
		401	buffer_size_minus_1 = 0;
		402	}
		403	width = buffer_size_minus_1 & 0x7f;
		404	height = (buffer_size_minus_1 & 0xfff80) >> 7;
		405	depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
		406
		407	switch (num_vector_components) {
		408	case 1:
		409	surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
		410	break;
		411	case 2:
		412	surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
		413	break;
		414	case 3:
		415	surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
		416	break;
		417	case 4:
		418	surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
		419	break;
		420	default:
		421	assert(!"Invalid vector size for transform feedback output");
		422	surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
		423	break;
		424	}
		425
		426	surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT \|
		427	BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT \|
		428	surface_format << BRW_SURFACE_FORMAT_SHIFT \|
		429	BRW_SURFACE_RC_READ_WRITE;
		430	surf[1] = bo->offset + offset_bytes; /* reloc */
		431	surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT \|
		432	height << BRW_SURFACE_HEIGHT_SHIFT);
		433	surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT \|
		434	pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
		435	surf[4] = 0;
		436	surf[5] = 0;
		437
		438	/* Emit relocation to surface contents. */
		439	drm_intel_bo_emit_reloc(brw->batch.bo,
		440	*out_offset + 4,
		441	bo, offset_bytes,
		442	I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
		443	}
		444
		445	/* Creates a new WM constant buffer reflecting the current fragment program's
		446	* constants, if needed by the fragment program.
		447	*
		448	* Otherwise, constants go through the CURBEs using the brw_constant_buffer
		449	* state atom.
		450	*/
		451	static void
		452	brw_upload_wm_pull_constants(struct brw_context *brw)
		453	{
		454	struct gl_context *ctx = &brw->ctx;
		455	/* BRW_NEW_FRAGMENT_PROGRAM */
		456	struct brw_fragment_program *fp =
		457	(struct brw_fragment_program *) brw->fragment_program;
		458	struct gl_program_parameter_list *params = fp->program.Base.Parameters;
		459	const int size = brw->wm.prog_data->nr_pull_params * sizeof(float);
		460	const int surf_index = SURF_INDEX_FRAG_CONST_BUFFER;
		461	float *constants;
		462	unsigned int i;
		463
		464	_mesa_load_state_parameters(ctx, params);
		465
		466	/* CACHE_NEW_WM_PROG */
		467	if (brw->wm.prog_data->nr_pull_params == 0) {
		468	if (brw->wm.const_bo) {
		469	drm_intel_bo_unreference(brw->wm.const_bo);
		470	brw->wm.const_bo = NULL;
		471	brw->wm.surf_offset[surf_index] = 0;
		472	brw->state.dirty.brw \|= BRW_NEW_SURFACES;
		473	}
		474	return;
		475	}
		476
		477	drm_intel_bo_unreference(brw->wm.const_bo);
		478	brw->wm.const_bo = drm_intel_bo_alloc(brw->bufmgr, "WM const bo",
		479	size, 64);
		480
		481	/* _NEW_PROGRAM_CONSTANTS */
		482	drm_intel_gem_bo_map_gtt(brw->wm.const_bo);
		483	constants = brw->wm.const_bo->virtual;
		484	for (i = 0; i < brw->wm.prog_data->nr_pull_params; i++) {
		485	constants[i] = *brw->wm.prog_data->pull_param[i];
		486	}
		487	drm_intel_gem_bo_unmap_gtt(brw->wm.const_bo);
		488
		489	brw->vtbl.create_constant_surface(brw, brw->wm.const_bo, 0, size,
		490	&brw->wm.surf_offset[surf_index],
		491	true);
		492
		493	brw->state.dirty.brw \|= BRW_NEW_SURFACES;
		494	}
		495
		496	const struct brw_tracked_state brw_wm_pull_constants = {
		497	.dirty = {
		498	.mesa = (_NEW_PROGRAM_CONSTANTS),
		499	.brw = (BRW_NEW_BATCH \| BRW_NEW_FRAGMENT_PROGRAM),
		500	.cache = CACHE_NEW_WM_PROG,
		501	},
		502	.emit = brw_upload_wm_pull_constants,
		503	};
		504
		505	static void
		506	brw_update_null_renderbuffer_surface(struct brw_context *brw, unsigned int unit)
		507	{
		508	/* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
		509	* Notes):
		510	*
		511	* A null surface will be used in instances where an actual surface is
		512	* not bound. When a write message is generated to a null surface, no
		513	* actual surface is written to. When a read message (including any
		514	* sampling engine message) is generated to a null surface, the result
		515	* is all zeros. Note that a null surface type is allowed to be used
		516	* with all messages, even if it is not specificially indicated as
		517	* supported. All of the remaining fields in surface state are ignored
		518	* for null surfaces, with the following exceptions:
		519	*
		520	* - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
		521	* depth buffer’s corresponding state for all render target surfaces,
		522	* including null.
		523	*
		524	* - Surface Format must be R8G8B8A8_UNORM.
		525	*/
		526	struct gl_context *ctx = &brw->ctx;
		527	uint32_t *surf;
		528	unsigned surface_type = BRW_SURFACE_NULL;
		529	drm_intel_bo *bo = NULL;
		530	unsigned pitch_minus_1 = 0;
		531	uint32_t multisampling_state = 0;
		532
		533	/* _NEW_BUFFERS */
		534	const struct gl_framebuffer *fb = ctx->DrawBuffer;
		535
		536	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		537	6 * 4, 32, &brw->wm.surf_offset[unit]);
		538
		539	if (fb->Visual.samples > 1) {
		540	/* On Gen6, null render targets seem to cause GPU hangs when
		541	* multisampling. So work around this problem by rendering into dummy
		542	* color buffer.
		543	*
		544	* To decrease the amount of memory needed by the workaround buffer, we
		545	* set its pitch to 128 bytes (the width of a Y tile). This means that
		546	* the amount of memory needed for the workaround buffer is
		547	* (width_in_tiles + height_in_tiles - 1) tiles.
		548	*
		549	* Note that since the workaround buffer will be interpreted by the
		550	* hardware as an interleaved multisampled buffer, we need to compute
		551	* width_in_tiles and height_in_tiles by dividing the width and height
		552	* by 16 rather than the normal Y-tile size of 32.
		553	*/
		554	unsigned width_in_tiles = ALIGN(fb->Width, 16) / 16;
		555	unsigned height_in_tiles = ALIGN(fb->Height, 16) / 16;
		556	unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
		557	brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
		558	size_needed);
		559	bo = brw->wm.multisampled_null_render_target_bo;
		560	surface_type = BRW_SURFACE_2D;
		561	pitch_minus_1 = 127;
		562	multisampling_state =
		563	brw_get_surface_num_multisamples(fb->Visual.samples);
		564	}
		565
		566	surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT \|
		567	BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
		568	if (brw->gen < 6) {
		569	surf[0] \|= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT \|
		570	1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT \|
		571	1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT \|
		572	1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
		573	}
		574	surf[1] = bo ? bo->offset : 0;
		575	surf[2] = ((fb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT \|
		576	(fb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
		577
		578	/* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
		579	* Notes):
		580	*
		581	* If Surface Type is SURFTYPE_NULL, this field must be TRUE
		582	*/
		583	surf[3] = (BRW_SURFACE_TILED \| BRW_SURFACE_TILED_Y \|
		584	pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
		585	surf[4] = multisampling_state;
		586	surf[5] = 0;
		587
		588	if (bo) {
		589	drm_intel_bo_emit_reloc(brw->batch.bo,
		590	brw->wm.surf_offset[unit] + 4,
		591	bo, 0,
		592	I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
		593	}
		594	}
		595
		596	/**
		597	* Sets up a surface state structure to point at the given region.
		598	* While it is only used for the front/back buffer currently, it should be
		599	* usable for further buffers when doing ARB_draw_buffer support.
		600	*/
		601	static void
		602	brw_update_renderbuffer_surface(struct brw_context *brw,
		603	struct gl_renderbuffer *rb,
		604	bool layered,
		605	unsigned int unit)
		606	{
		607	struct gl_context *ctx = &brw->ctx;
		608	struct intel_renderbuffer *irb = intel_renderbuffer(rb);
		609	struct intel_mipmap_tree *mt = irb->mt;
		610	struct intel_region *region;
		611	uint32_t *surf;
		612	uint32_t tile_x, tile_y;
		613	uint32_t format = 0;
		614	/* _NEW_BUFFERS */
		615	gl_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
		616
		617	assert(!layered);
		618
		619	if (rb->TexImage && !brw->has_surface_tile_offset) {
		620	intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
		621
		622	if (tile_x != 0 \|\| tile_y != 0) {
		623	/* Original gen4 hardware couldn't draw to a non-tile-aligned
		624	* destination in a miptree unless you actually setup your renderbuffer
		625	* as a miptree and used the fragile lod/array_index/etc. controls to
		626	* select the image. So, instead, we just make a new single-level
		627	* miptree and render into that.
		628	*/
		629	intel_renderbuffer_move_to_temp(brw, irb, false);
		630	mt = irb->mt;
		631	}
		632	}
		633
		634	intel_miptree_used_for_rendering(irb->mt);
		635
		636	region = irb->mt->region;
		637
		638	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		639	6 * 4, 32, &brw->wm.surf_offset[unit]);
		640
		641	format = brw->render_target_format[rb_format];
		642	if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
		643	_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
		644	__FUNCTION__, _mesa_get_format_name(rb_format));
		645	}
		646
		647	surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT \|
		648	format << BRW_SURFACE_FORMAT_SHIFT);
		649
		650	/* reloc */
		651	surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
		652	region->bo->offset);
		653
		654	surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT \|
		655	(rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
		656
		657	surf[3] = (brw_get_surface_tiling_bits(region->tiling) \|
		658	(region->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
		659
		660	surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
		661
		662	assert(brw->has_surface_tile_offset \|\| (tile_x == 0 && tile_y == 0));
		663	/* Note that the low bits of these fields are missing, so
		664	* there's the possibility of getting in trouble.
		665	*/
		666	assert(tile_x % 4 == 0);
		667	assert(tile_y % 2 == 0);
		668	surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT \|
		669	(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT \|
		670	(mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
		671
		672	if (brw->gen < 6) {
		673	/* _NEW_COLOR */
		674	if (!ctx->Color.ColorLogicOpEnabled &&
		675	(ctx->Color.BlendEnabled & (1 << unit)))
		676	surf[0] \|= BRW_SURFACE_BLEND_ENABLED;
		677
		678	if (!ctx->Color.ColorMask[unit][0])
		679	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
		680	if (!ctx->Color.ColorMask[unit][1])
		681	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
		682	if (!ctx->Color.ColorMask[unit][2])
		683	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
		684
		685	/* As mentioned above, disable writes to the alpha component when the
		686	* renderbuffer is XRGB.
		687	*/
		688	if (ctx->DrawBuffer->Visual.alphaBits == 0 \|\|
		689	!ctx->Color.ColorMask[unit][3]) {
		690	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
		691	}
		692	}
		693	#if 0
		694	printf("brw_update_renderbuffer_surface\n"
		695	"bind bo(handle=%d format=%d width=%d height=%d\n"
		696	"pitch=%d, tiling=%d\n"
		697	"ss[0] %x ss[1] %x ss[2] %x ss[3] %x ss[4] %x ss[5] %x\n",
		698	region->bo->handle, format, rb->Width, rb->Height,
		699	region->pitch, region->tiling,
		700	surf[0],surf[1],surf[2],surf[3], surf[4],surf[5]);
		701	#endif
		702
		703	drm_intel_bo_emit_reloc(brw->batch.bo,
		704	brw->wm.surf_offset[unit] + 4,
		705	region->bo,
		706	surf[1] - region->bo->offset,
		707	I915_GEM_DOMAIN_RENDER,
		708	I915_GEM_DOMAIN_RENDER);
		709	}
		710
		711	/**
		712	* Construct SURFACE_STATE objects for renderbuffers/draw buffers.
		713	*/
		714	static void
		715	brw_update_renderbuffer_surfaces(struct brw_context *brw)
		716	{
		717	struct gl_context *ctx = &brw->ctx;
		718	GLuint i;
		719
		720	/* _NEW_BUFFERS \| _NEW_COLOR */
		721	/* Update surfaces for drawing buffers */
		722	if (ctx->DrawBuffer->_NumColorDrawBuffers >= 1) {
		723	for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
		724	if (intel_renderbuffer(ctx->DrawBuffer->_ColorDrawBuffers[i])) {
		725	brw->vtbl.update_renderbuffer_surface(brw, ctx->DrawBuffer->_ColorDrawBuffers[i],
		726	ctx->DrawBuffer->Layered, i);
		727	} else {
		728	brw->vtbl.update_null_renderbuffer_surface(brw, i);
		729	}
		730	}
		731	} else {
		732	brw->vtbl.update_null_renderbuffer_surface(brw, 0);
		733	}
		734	brw->state.dirty.brw \|= BRW_NEW_SURFACES;
		735	}
		736
		737	const struct brw_tracked_state brw_renderbuffer_surfaces = {
		738	.dirty = {
		739	.mesa = (_NEW_COLOR \|
		740	_NEW_BUFFERS),
		741	.brw = BRW_NEW_BATCH,
		742	.cache = 0
		743	},
		744	.emit = brw_update_renderbuffer_surfaces,
		745	};
		746
		747	const struct brw_tracked_state gen6_renderbuffer_surfaces = {
		748	.dirty = {
		749	.mesa = _NEW_BUFFERS,
		750	.brw = BRW_NEW_BATCH,
		751	.cache = 0
		752	},
		753	.emit = brw_update_renderbuffer_surfaces,
		754	};
		755
		756	/**
		757	* Construct SURFACE_STATE objects for enabled textures.
		758	*/
		759	static void
		760	brw_update_texture_surfaces(struct brw_context *brw)
		761	{
		762	struct gl_context *ctx = &brw->ctx;
		763
		764	/* BRW_NEW_VERTEX_PROGRAM and BRW_NEW_FRAGMENT_PROGRAM:
		765	* Unfortunately, we're stuck using the gl_program structs until the
		766	* ARB_fragment_program front-end gets converted to GLSL IR. These
		767	* have the downside that SamplerUnits is split and only contains the
		768	* mappings for samplers active in that stage.
		769	*/
		770	struct gl_program vs = (struct gl_program ) brw->vertex_program;
		771	struct gl_program fs = (struct gl_program ) brw->fragment_program;
		772
		773	unsigned num_samplers = _mesa_fls(vs->SamplersUsed \| fs->SamplersUsed);
		774
		775	for (unsigned s = 0; s < num_samplers; s++) {
		776	brw->vs.surf_offset[SURF_INDEX_VS_TEXTURE(s)] = 0;
		777	brw->wm.surf_offset[SURF_INDEX_TEXTURE(s)] = 0;
		778
		779	if (vs->SamplersUsed & (1 << s)) {
		780	const unsigned unit = vs->SamplerUnits[s];
		781
		782	/* _NEW_TEXTURE */
		783	if (ctx->Texture.Unit[unit]._ReallyEnabled) {
		784	brw->vtbl.update_texture_surface(ctx, unit,
		785	brw->vs.surf_offset,
		786	SURF_INDEX_VS_TEXTURE(s));
		787	}
		788	}
		789
		790	if (fs->SamplersUsed & (1 << s)) {
		791	const unsigned unit = fs->SamplerUnits[s];
		792
		793	/* _NEW_TEXTURE */
		794	if (ctx->Texture.Unit[unit]._ReallyEnabled) {
		795	brw->vtbl.update_texture_surface(ctx, unit,
		796	brw->wm.surf_offset,
		797	SURF_INDEX_TEXTURE(s));
		798	}
		799	}
		800	}
		801
		802	brw->state.dirty.brw \|= BRW_NEW_SURFACES;
		803	}
		804
		805	const struct brw_tracked_state brw_texture_surfaces = {
		806	.dirty = {
		807	.mesa = _NEW_TEXTURE,
		808	.brw = BRW_NEW_BATCH \|
		809	BRW_NEW_VERTEX_PROGRAM \|
		810	BRW_NEW_FRAGMENT_PROGRAM,
		811	.cache = 0
		812	},
		813	.emit = brw_update_texture_surfaces,
		814	};
		815
		816	void
		817	brw_upload_ubo_surfaces(struct brw_context *brw,
		818	struct gl_shader *shader,
		819	uint32_t *surf_offsets)
		820	{
		821	struct gl_context *ctx = &brw->ctx;
		822
		823	if (!shader)
		824	return;
		825
		826	for (int i = 0; i < shader->NumUniformBlocks; i++) {
		827	struct gl_uniform_buffer_binding *binding;
		828	struct intel_buffer_object *intel_bo;
		829
		830	binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
		831	intel_bo = intel_buffer_object(binding->BufferObject);
		832	drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo, INTEL_READ);
		833
		834	/* Because behavior for referencing outside of the binding's size in the
		835	* glBindBufferRange case is undefined, we can just bind the whole buffer
		836	* glBindBufferBase wants and be a correct implementation.
		837	*/
		838	brw->vtbl.create_constant_surface(brw, bo, binding->Offset,
		839	bo->size - binding->Offset,
		840	&surf_offsets[i],
		841	shader->Type == GL_FRAGMENT_SHADER);
		842	}
		843
		844	if (shader->NumUniformBlocks)
		845	brw->state.dirty.brw \|= BRW_NEW_SURFACES;
		846	}
		847
		848	static void
		849	brw_upload_wm_ubo_surfaces(struct brw_context *brw)
		850	{
		851	struct gl_context *ctx = &brw->ctx;
		852	/* _NEW_PROGRAM */
		853	struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
		854
		855	if (!prog)
		856	return;
		857
		858	brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
		859	&brw->wm.surf_offset[SURF_INDEX_WM_UBO(0)]);
		860	}
		861
		862	const struct brw_tracked_state brw_wm_ubo_surfaces = {
		863	.dirty = {
		864	.mesa = _NEW_PROGRAM,
		865	.brw = BRW_NEW_BATCH \| BRW_NEW_UNIFORM_BUFFER,
		866	.cache = 0,
		867	},
		868	.emit = brw_upload_wm_ubo_surfaces,
		869	};
		870
		871	/**
		872	* Constructs the binding table for the WM surface state, which maps unit
		873	* numbers to surface state objects.
		874	*/
		875	static void
		876	brw_upload_wm_binding_table(struct brw_context *brw)
		877	{
		878	uint32_t *bind;
		879	int i;
		880
		881	if (INTEL_DEBUG & DEBUG_SHADER_TIME) {
		882	gen7_create_shader_time_surface(brw, &brw->wm.surf_offset[SURF_INDEX_WM_SHADER_TIME]);
		883	}
		884
		885	/* Might want to calculate nr_surfaces first, to avoid taking up so much
		886	* space for the binding table.
		887	*/
		888	bind = brw_state_batch(brw, AUB_TRACE_BINDING_TABLE,
		889	sizeof(uint32_t) * BRW_MAX_WM_SURFACES,
		890	32, &brw->wm.bind_bo_offset);
		891
		892	/* BRW_NEW_SURFACES */
		893	for (i = 0; i < BRW_MAX_WM_SURFACES; i++) {
		894	bind[i] = brw->wm.surf_offset[i];
		895	}
		896
		897	brw->state.dirty.brw \|= BRW_NEW_PS_BINDING_TABLE;
		898	}
		899
		900	const struct brw_tracked_state brw_wm_binding_table = {
		901	.dirty = {
		902	.mesa = 0,
		903	.brw = (BRW_NEW_BATCH \|
		904	BRW_NEW_SURFACES),
		905	.cache = 0
		906	},
		907	.emit = brw_upload_wm_binding_table,
		908	};
		909
		910	void
		911	gen4_init_vtable_surface_functions(struct brw_context *brw)
		912	{
		913	brw->vtbl.update_texture_surface = brw_update_texture_surface;
		914	brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
		915	brw->vtbl.update_null_renderbuffer_surface =
		916	brw_update_null_renderbuffer_surface;
		917	brw->vtbl.create_constant_surface = brw_create_constant_surface;
		918	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_wm_surface_state.c @ 4525 – Rev 4358