WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/mesa/drivers/dri/i965/brw_wm_surface_state.c

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	Copyright (C) Intel Corp. 2006. All Rights Reserved.
		3	Intel funded Tungsten Graphics to
		4	develop this 3D driver.
		5
		6	Permission is hereby granted, free of charge, to any person obtaining
		7	a copy of this software and associated documentation files (the
		8	"Software"), to deal in the Software without restriction, including
		9	without limitation the rights to use, copy, modify, merge, publish,
		10	distribute, sublicense, and/or sell copies of the Software, and to
		11	permit persons to whom the Software is furnished to do so, subject to
		12	the following conditions:
		13
		14	The above copyright notice and this permission notice (including the
		15	next paragraph) shall be included in all copies or substantial
		16	portions of the Software.
		17
		18	THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
		19	EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
		20	MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
		21	IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
		22	LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
		23	OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
		24	WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
		25
		26	**********************************************************************/
		27	/*
		28	* Authors:
		29	* Keith Whitwell
		30	*/
		31
		32
		33	#include "main/context.h"
		34	#include "main/blend.h"
		35	#include "main/mtypes.h"
		36	#include "main/samplerobj.h"
		37	#include "program/prog_parameter.h"
		38
		39	#include "intel_mipmap_tree.h"
		40	#include "intel_batchbuffer.h"
		41	#include "intel_tex.h"
		42	#include "intel_fbo.h"
		43	#include "intel_buffer_objects.h"
		44
		45	#include "brw_context.h"
		46	#include "brw_state.h"
		47	#include "brw_defines.h"
		48	#include "brw_wm.h"
		49
		50	GLuint
		51	translate_tex_target(GLenum target)
		52	{
		53	switch (target) {
		54	case GL_TEXTURE_1D:
		55	case GL_TEXTURE_1D_ARRAY_EXT:
		56	return BRW_SURFACE_1D;
		57
		58	case GL_TEXTURE_RECTANGLE_NV:
		59	return BRW_SURFACE_2D;
		60
		61	case GL_TEXTURE_2D:
		62	case GL_TEXTURE_2D_ARRAY_EXT:
		63	case GL_TEXTURE_EXTERNAL_OES:
		64	case GL_TEXTURE_2D_MULTISAMPLE:
		65	case GL_TEXTURE_2D_MULTISAMPLE_ARRAY:
		66	return BRW_SURFACE_2D;
		67
		68	case GL_TEXTURE_3D:
		69	return BRW_SURFACE_3D;
		70
		71	case GL_TEXTURE_CUBE_MAP:
		72	case GL_TEXTURE_CUBE_MAP_ARRAY:
		73	return BRW_SURFACE_CUBE;
		74
		75	default:
		76	unreachable("not reached");
		77	}
		78	}
		79
		80	uint32_t
		81	brw_get_surface_tiling_bits(uint32_t tiling)
		82	{
		83	switch (tiling) {
		84	case I915_TILING_X:
		85	return BRW_SURFACE_TILED;
		86	case I915_TILING_Y:
		87	return BRW_SURFACE_TILED \| BRW_SURFACE_TILED_Y;
		88	default:
		89	return 0;
		90	}
		91	}
		92
		93
		94	uint32_t
		95	brw_get_surface_num_multisamples(unsigned num_samples)
		96	{
		97	if (num_samples > 1)
		98	return BRW_SURFACE_MULTISAMPLECOUNT_4;
		99	else
		100	return BRW_SURFACE_MULTISAMPLECOUNT_1;
		101	}
		102
		103	void
		104	brw_configure_w_tiled(const struct intel_mipmap_tree *mt,
		105	bool is_render_target,
		106	unsigned width, unsigned height,
		107	unsigned pitch, uint32_t tiling, unsigned *format)
		108	{
		109	static const unsigned halign_stencil = 8;
		110
		111	/* In Y-tiling row is twice as wide as in W-tiling, and subsequently
		112	* there are half as many rows.
		113	* In addition, mip-levels are accessed manually by the program and
		114	* therefore the surface is setup to cover all the mip-levels for one slice.
		115	* (Hardware is still used to access individual slices).
		116	*/
		117	*tiling = I915_TILING_Y;
		118	pitch = mt->pitch 2;
		119	width = ALIGN(mt->total_width, halign_stencil) 2;
		120	*height = (mt->total_height / mt->physical_depth0) / 2;
		121
		122	if (is_render_target) {
		123	*format = BRW_SURFACEFORMAT_R8_UINT;
		124	}
		125	}
		126
		127
		128	/**
		129	* Compute the combination of DEPTH_TEXTURE_MODE and EXT_texture_swizzle
		130	* swizzling.
		131	*/
		132	int
		133	brw_get_texture_swizzle(const struct gl_context *ctx,
		134	const struct gl_texture_object *t)
		135	{
		136	const struct gl_texture_image *img = t->Image[0][t->BaseLevel];
		137
		138	int swizzles[SWIZZLE_NIL + 1] = {
		139	SWIZZLE_X,
		140	SWIZZLE_Y,
		141	SWIZZLE_Z,
		142	SWIZZLE_W,
		143	SWIZZLE_ZERO,
		144	SWIZZLE_ONE,
		145	SWIZZLE_NIL
		146	};
		147
		148	if (img->_BaseFormat == GL_DEPTH_COMPONENT \|\|
		149	img->_BaseFormat == GL_DEPTH_STENCIL) {
		150	GLenum depth_mode = t->DepthMode;
		151
		152	/* In ES 3.0, DEPTH_TEXTURE_MODE is expected to be GL_RED for textures
		153	* with depth component data specified with a sized internal format.
		154	* Otherwise, it's left at the old default, GL_LUMINANCE.
		155	*/
		156	if (_mesa_is_gles3(ctx) &&
		157	img->InternalFormat != GL_DEPTH_COMPONENT &&
		158	img->InternalFormat != GL_DEPTH_STENCIL) {
		159	depth_mode = GL_RED;
		160	}
		161
		162	switch (depth_mode) {
		163	case GL_ALPHA:
		164	swizzles[0] = SWIZZLE_ZERO;
		165	swizzles[1] = SWIZZLE_ZERO;
		166	swizzles[2] = SWIZZLE_ZERO;
		167	swizzles[3] = SWIZZLE_X;
		168	break;
		169	case GL_LUMINANCE:
		170	swizzles[0] = SWIZZLE_X;
		171	swizzles[1] = SWIZZLE_X;
		172	swizzles[2] = SWIZZLE_X;
		173	swizzles[3] = SWIZZLE_ONE;
		174	break;
		175	case GL_INTENSITY:
		176	swizzles[0] = SWIZZLE_X;
		177	swizzles[1] = SWIZZLE_X;
		178	swizzles[2] = SWIZZLE_X;
		179	swizzles[3] = SWIZZLE_X;
		180	break;
		181	case GL_RED:
		182	swizzles[0] = SWIZZLE_X;
		183	swizzles[1] = SWIZZLE_ZERO;
		184	swizzles[2] = SWIZZLE_ZERO;
		185	swizzles[3] = SWIZZLE_ONE;
		186	break;
		187	}
		188	}
		189
		190	GLenum datatype = _mesa_get_format_datatype(img->TexFormat);
		191
		192	/* If the texture's format is alpha-only, force R, G, and B to
		193	* 0.0. Similarly, if the texture's format has no alpha channel,
		194	* force the alpha value read to 1.0. This allows for the
		195	* implementation to use an RGBA texture for any of these formats
		196	* without leaking any unexpected values.
		197	*/
		198	switch (img->_BaseFormat) {
		199	case GL_ALPHA:
		200	swizzles[0] = SWIZZLE_ZERO;
		201	swizzles[1] = SWIZZLE_ZERO;
		202	swizzles[2] = SWIZZLE_ZERO;
		203	break;
		204	case GL_LUMINANCE:
		205	if (t->_IsIntegerFormat \|\| datatype == GL_SIGNED_NORMALIZED) {
		206	swizzles[0] = SWIZZLE_X;
		207	swizzles[1] = SWIZZLE_X;
		208	swizzles[2] = SWIZZLE_X;
		209	swizzles[3] = SWIZZLE_ONE;
		210	}
		211	break;
		212	case GL_LUMINANCE_ALPHA:
		213	if (datatype == GL_SIGNED_NORMALIZED) {
		214	swizzles[0] = SWIZZLE_X;
		215	swizzles[1] = SWIZZLE_X;
		216	swizzles[2] = SWIZZLE_X;
		217	swizzles[3] = SWIZZLE_W;
		218	}
		219	break;
		220	case GL_INTENSITY:
		221	if (datatype == GL_SIGNED_NORMALIZED) {
		222	swizzles[0] = SWIZZLE_X;
		223	swizzles[1] = SWIZZLE_X;
		224	swizzles[2] = SWIZZLE_X;
		225	swizzles[3] = SWIZZLE_X;
		226	}
		227	break;
		228	case GL_RED:
		229	case GL_RG:
		230	case GL_RGB:
		231	if (_mesa_get_format_bits(img->TexFormat, GL_ALPHA_BITS) > 0)
		232	swizzles[3] = SWIZZLE_ONE;
		233	break;
		234	}
		235
		236	return MAKE_SWIZZLE4(swizzles[GET_SWZ(t->_Swizzle, 0)],
		237	swizzles[GET_SWZ(t->_Swizzle, 1)],
		238	swizzles[GET_SWZ(t->_Swizzle, 2)],
		239	swizzles[GET_SWZ(t->_Swizzle, 3)]);
		240	}
		241
		242	static void
		243	gen4_emit_buffer_surface_state(struct brw_context *brw,
		244	uint32_t *out_offset,
		245	drm_intel_bo *bo,
		246	unsigned buffer_offset,
		247	unsigned surface_format,
		248	unsigned buffer_size,
		249	unsigned pitch,
		250	bool rw)
		251	{
		252	uint32_t *surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		253	6 * 4, 32, out_offset);
		254	memset(surf, 0, 6 * 4);
		255
		256	surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT \|
		257	surface_format << BRW_SURFACE_FORMAT_SHIFT \|
		258	(brw->gen >= 6 ? BRW_SURFACE_RC_READ_WRITE : 0);
		259	surf[1] = (bo ? bo->offset64 : 0) + buffer_offset; /* reloc */
		260	surf[2] = (buffer_size & 0x7f) << BRW_SURFACE_WIDTH_SHIFT \|
		261	((buffer_size >> 7) & 0x1fff) << BRW_SURFACE_HEIGHT_SHIFT;
		262	surf[3] = ((buffer_size >> 20) & 0x7f) << BRW_SURFACE_DEPTH_SHIFT \|
		263	(pitch - 1) << BRW_SURFACE_PITCH_SHIFT;
		264
		265	/* Emit relocation to surface contents. The 965 PRM, Volume 4, section
		266	* 5.1.2 "Data Cache" says: "the data cache does not exist as a separate
		267	* physical cache. It is mapped in hardware to the sampler cache."
		268	*/
		269	if (bo) {
		270	drm_intel_bo_emit_reloc(brw->batch.bo, *out_offset + 4,
		271	bo, buffer_offset,
		272	I915_GEM_DOMAIN_SAMPLER,
		273	(rw ? I915_GEM_DOMAIN_SAMPLER : 0));
		274	}
		275	}
		276
		277	void
		278	brw_update_buffer_texture_surface(struct gl_context *ctx,
		279	unsigned unit,
		280	uint32_t *surf_offset)
		281	{
		282	struct brw_context *brw = brw_context(ctx);
		283	struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
		284	struct intel_buffer_object *intel_obj =
		285	intel_buffer_object(tObj->BufferObject);
		286	uint32_t size = tObj->BufferSize;
		287	drm_intel_bo *bo = NULL;
		288	mesa_format format = tObj->_BufferObjectFormat;
		289	uint32_t brw_format = brw_format_for_mesa_format(format);
		290	int texel_size = _mesa_get_format_bytes(format);
		291
		292	if (intel_obj) {
		293	size = MIN2(size, intel_obj->Base.Size);
		294	bo = intel_bufferobj_buffer(brw, intel_obj, tObj->BufferOffset, size);
		295	}
		296
		297	if (brw_format == 0 && format != MESA_FORMAT_RGBA_FLOAT32) {
		298	_mesa_problem(NULL, "bad format %s for texture buffer\n",
		299	_mesa_get_format_name(format));
		300	}
		301
		302	brw->vtbl.emit_buffer_surface_state(brw, surf_offset, bo,
		303	tObj->BufferOffset,
		304	brw_format,
		305	size / texel_size,
		306	texel_size,
		307	false /* rw */);
		308	}
		309
		310	static void
		311	brw_update_texture_surface(struct gl_context *ctx,
		312	unsigned unit,
		313	uint32_t *surf_offset,
		314	bool for_gather)
		315	{
		316	struct brw_context *brw = brw_context(ctx);
		317	struct gl_texture_object *tObj = ctx->Texture.Unit[unit]._Current;
		318	struct intel_texture_object *intelObj = intel_texture_object(tObj);
		319	struct intel_mipmap_tree *mt = intelObj->mt;
		320	struct gl_sampler_object *sampler = _mesa_get_samplerobj(ctx, unit);
		321	uint32_t *surf;
		322
		323	/* BRW_NEW_TEXTURE_BUFFER */
		324	if (tObj->Target == GL_TEXTURE_BUFFER) {
		325	brw_update_buffer_texture_surface(ctx, unit, surf_offset);
		326	return;
		327	}
		328
		329	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE,
		330	6 * 4, 32, surf_offset);
		331
		332	uint32_t tex_format = translate_tex_format(brw, mt->format,
		333	sampler->sRGBDecode);
		334
		335	if (for_gather) {
		336	/* Sandybridge's gather4 message is broken for integer formats.
		337	* To work around this, we pretend the surface is UNORM for
		338	* 8 or 16-bit formats, and emit shader instructions to recover
		339	* the real INT/UINT value. For 32-bit formats, we pretend
		340	* the surface is FLOAT, and simply reinterpret the resulting
		341	* bits.
		342	*/
		343	switch (tex_format) {
		344	case BRW_SURFACEFORMAT_R8_SINT:
		345	case BRW_SURFACEFORMAT_R8_UINT:
		346	tex_format = BRW_SURFACEFORMAT_R8_UNORM;
		347	break;
		348
		349	case BRW_SURFACEFORMAT_R16_SINT:
		350	case BRW_SURFACEFORMAT_R16_UINT:
		351	tex_format = BRW_SURFACEFORMAT_R16_UNORM;
		352	break;
		353
		354	case BRW_SURFACEFORMAT_R32_SINT:
		355	case BRW_SURFACEFORMAT_R32_UINT:
		356	tex_format = BRW_SURFACEFORMAT_R32_FLOAT;
		357	break;
		358
		359	default:
		360	break;
		361	}
		362	}
		363
		364	surf[0] = (translate_tex_target(tObj->Target) << BRW_SURFACE_TYPE_SHIFT \|
		365	BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT \|
		366	BRW_SURFACE_CUBEFACE_ENABLES \|
		367	tex_format << BRW_SURFACE_FORMAT_SHIFT);
		368
		369	surf[1] = mt->bo->offset64 + mt->offset; /* reloc */
		370
		371	surf[2] = ((intelObj->_MaxLevel - tObj->BaseLevel) << BRW_SURFACE_LOD_SHIFT \|
		372	(mt->logical_width0 - 1) << BRW_SURFACE_WIDTH_SHIFT \|
		373	(mt->logical_height0 - 1) << BRW_SURFACE_HEIGHT_SHIFT);
		374
		375	surf[3] = (brw_get_surface_tiling_bits(mt->tiling) \|
		376	(mt->logical_depth0 - 1) << BRW_SURFACE_DEPTH_SHIFT \|
		377	(mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
		378
		379	surf[4] = (brw_get_surface_num_multisamples(mt->num_samples) \|
		380	SET_FIELD(tObj->BaseLevel - mt->first_level, BRW_SURFACE_MIN_LOD));
		381
		382	surf[5] = mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0;
		383
		384	/* Emit relocation to surface contents */
		385	drm_intel_bo_emit_reloc(brw->batch.bo,
		386	*surf_offset + 4,
		387	mt->bo,
		388	surf[1] - mt->bo->offset64,
		389	I915_GEM_DOMAIN_SAMPLER, 0);
		390	}
		391
		392	/**
		393	* Create the constant buffer surface. Vertex/fragment shader constants will be
		394	* read from this buffer with Data Port Read instructions/messages.
		395	*/
		396	void
		397	brw_create_constant_surface(struct brw_context *brw,
		398	drm_intel_bo *bo,
		399	uint32_t offset,
		400	uint32_t size,
		401	uint32_t *out_offset,
		402	bool dword_pitch)
		403	{
		404	uint32_t stride = dword_pitch ? 4 : 16;
		405	uint32_t elements = ALIGN(size, stride) / stride;
		406
		407	brw->vtbl.emit_buffer_surface_state(brw, out_offset, bo, offset,
		408	BRW_SURFACEFORMAT_R32G32B32A32_FLOAT,
		409	elements, stride, false);
		410	}
		411
		412	/**
		413	* Set up a binding table entry for use by stream output logic (transform
		414	* feedback).
		415	*
		416	* buffer_size_minus_1 must be less than BRW_MAX_NUM_BUFFER_ENTRIES.
		417	*/
		418	void
		419	brw_update_sol_surface(struct brw_context *brw,
		420	struct gl_buffer_object *buffer_obj,
		421	uint32_t *out_offset, unsigned num_vector_components,
		422	unsigned stride_dwords, unsigned offset_dwords)
		423	{
		424	struct intel_buffer_object *intel_bo = intel_buffer_object(buffer_obj);
		425	uint32_t offset_bytes = 4 * offset_dwords;
		426	drm_intel_bo *bo = intel_bufferobj_buffer(brw, intel_bo,
		427	offset_bytes,
		428	buffer_obj->Size - offset_bytes);
		429	uint32_t surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 4, 32,
		430	out_offset);
		431	uint32_t pitch_minus_1 = 4*stride_dwords - 1;
		432	size_t size_dwords = buffer_obj->Size / 4;
		433	uint32_t buffer_size_minus_1, width, height, depth, surface_format;
		434
		435	/* FIXME: can we rely on core Mesa to ensure that the buffer isn't
		436	* too big to map using a single binding table entry?
		437	*/
		438	assert((size_dwords - offset_dwords) / stride_dwords
		439	<= BRW_MAX_NUM_BUFFER_ENTRIES);
		440
		441	if (size_dwords > offset_dwords + num_vector_components) {
		442	/* There is room for at least 1 transform feedback output in the buffer.
		443	* Compute the number of additional transform feedback outputs the
		444	* buffer has room for.
		445	*/
		446	buffer_size_minus_1 =
		447	(size_dwords - offset_dwords - num_vector_components) / stride_dwords;
		448	} else {
		449	/* There isn't even room for a single transform feedback output in the
		450	* buffer. We can't configure the binding table entry to prevent output
		451	* entirely; we'll have to rely on the geometry shader to detect
		452	* overflow. But to minimize the damage in case of a bug, set up the
		453	* binding table entry to just allow a single output.
		454	*/
		455	buffer_size_minus_1 = 0;
		456	}
		457	width = buffer_size_minus_1 & 0x7f;
		458	height = (buffer_size_minus_1 & 0xfff80) >> 7;
		459	depth = (buffer_size_minus_1 & 0x7f00000) >> 20;
		460
		461	switch (num_vector_components) {
		462	case 1:
		463	surface_format = BRW_SURFACEFORMAT_R32_FLOAT;
		464	break;
		465	case 2:
		466	surface_format = BRW_SURFACEFORMAT_R32G32_FLOAT;
		467	break;
		468	case 3:
		469	surface_format = BRW_SURFACEFORMAT_R32G32B32_FLOAT;
		470	break;
		471	case 4:
		472	surface_format = BRW_SURFACEFORMAT_R32G32B32A32_FLOAT;
		473	break;
		474	default:
		475	unreachable("Invalid vector size for transform feedback output");
		476	}
		477
		478	surf[0] = BRW_SURFACE_BUFFER << BRW_SURFACE_TYPE_SHIFT \|
		479	BRW_SURFACE_MIPMAPLAYOUT_BELOW << BRW_SURFACE_MIPLAYOUT_SHIFT \|
		480	surface_format << BRW_SURFACE_FORMAT_SHIFT \|
		481	BRW_SURFACE_RC_READ_WRITE;
		482	surf[1] = bo->offset64 + offset_bytes; /* reloc */
		483	surf[2] = (width << BRW_SURFACE_WIDTH_SHIFT \|
		484	height << BRW_SURFACE_HEIGHT_SHIFT);
		485	surf[3] = (depth << BRW_SURFACE_DEPTH_SHIFT \|
		486	pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
		487	surf[4] = 0;
		488	surf[5] = 0;
		489
		490	/* Emit relocation to surface contents. */
		491	drm_intel_bo_emit_reloc(brw->batch.bo,
		492	*out_offset + 4,
		493	bo, offset_bytes,
		494	I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
		495	}
		496
		497	/* Creates a new WM constant buffer reflecting the current fragment program's
		498	* constants, if needed by the fragment program.
		499	*
		500	* Otherwise, constants go through the CURBEs using the brw_constant_buffer
		501	* state atom.
		502	*/
		503	static void
		504	brw_upload_wm_pull_constants(struct brw_context *brw)
		505	{
		506	struct brw_stage_state *stage_state = &brw->wm.base;
		507	/* BRW_NEW_FRAGMENT_PROGRAM */
		508	struct brw_fragment_program *fp =
		509	(struct brw_fragment_program *) brw->fragment_program;
		510	/* BRW_NEW_FS_PROG_DATA */
		511	struct brw_stage_prog_data *prog_data = &brw->wm.prog_data->base;
		512
		513	/* _NEW_PROGRAM_CONSTANTS */
		514	brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &fp->program.Base,
		515	stage_state, prog_data, true);
		516	}
		517
		518	const struct brw_tracked_state brw_wm_pull_constants = {
		519	.dirty = {
		520	.mesa = _NEW_PROGRAM_CONSTANTS,
		521	.brw = BRW_NEW_BATCH \|
		522	BRW_NEW_FRAGMENT_PROGRAM \|
		523	BRW_NEW_FS_PROG_DATA,
		524	},
		525	.emit = brw_upload_wm_pull_constants,
		526	};
		527
		528	/**
		529	* Creates a null renderbuffer surface.
		530	*
		531	* This is used when the shader doesn't write to any color output. An FB
		532	* write to target 0 will still be emitted, because that's how the thread is
		533	* terminated (and computed depth is returned), so we need to have the
		534	* hardware discard the target 0 color output..
		535	*/
		536	static void
		537	brw_emit_null_surface_state(struct brw_context *brw,
		538	unsigned width,
		539	unsigned height,
		540	unsigned samples,
		541	uint32_t *out_offset)
		542	{
		543	/* From the Sandy bridge PRM, Vol4 Part1 p71 (Surface Type: Programming
		544	* Notes):
		545	*
		546	* A null surface will be used in instances where an actual surface is
		547	* not bound. When a write message is generated to a null surface, no
		548	* actual surface is written to. When a read message (including any
		549	* sampling engine message) is generated to a null surface, the result
		550	* is all zeros. Note that a null surface type is allowed to be used
		551	* with all messages, even if it is not specificially indicated as
		552	* supported. All of the remaining fields in surface state are ignored
		553	* for null surfaces, with the following exceptions:
		554	*
		555	* - [DevSNB+]: Width, Height, Depth, and LOD fields must match the
		556	* depth buffer’s corresponding state for all render target surfaces,
		557	* including null.
		558	*
		559	* - Surface Format must be R8G8B8A8_UNORM.
		560	*/
		561	unsigned surface_type = BRW_SURFACE_NULL;
		562	drm_intel_bo *bo = NULL;
		563	unsigned pitch_minus_1 = 0;
		564	uint32_t multisampling_state = 0;
		565	uint32_t surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 4, 32,
		566	out_offset);
		567
		568	if (samples > 1) {
		569	/* On Gen6, null render targets seem to cause GPU hangs when
		570	* multisampling. So work around this problem by rendering into dummy
		571	* color buffer.
		572	*
		573	* To decrease the amount of memory needed by the workaround buffer, we
		574	* set its pitch to 128 bytes (the width of a Y tile). This means that
		575	* the amount of memory needed for the workaround buffer is
		576	* (width_in_tiles + height_in_tiles - 1) tiles.
		577	*
		578	* Note that since the workaround buffer will be interpreted by the
		579	* hardware as an interleaved multisampled buffer, we need to compute
		580	* width_in_tiles and height_in_tiles by dividing the width and height
		581	* by 16 rather than the normal Y-tile size of 32.
		582	*/
		583	unsigned width_in_tiles = ALIGN(width, 16) / 16;
		584	unsigned height_in_tiles = ALIGN(height, 16) / 16;
		585	unsigned size_needed = (width_in_tiles + height_in_tiles - 1) * 4096;
		586	brw_get_scratch_bo(brw, &brw->wm.multisampled_null_render_target_bo,
		587	size_needed);
		588	bo = brw->wm.multisampled_null_render_target_bo;
		589	surface_type = BRW_SURFACE_2D;
		590	pitch_minus_1 = 127;
		591	multisampling_state = brw_get_surface_num_multisamples(samples);
		592	}
		593
		594	surf[0] = (surface_type << BRW_SURFACE_TYPE_SHIFT \|
		595	BRW_SURFACEFORMAT_B8G8R8A8_UNORM << BRW_SURFACE_FORMAT_SHIFT);
		596	if (brw->gen < 6) {
		597	surf[0] \|= (1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT \|
		598	1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT \|
		599	1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT \|
		600	1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT);
		601	}
		602	surf[1] = bo ? bo->offset64 : 0;
		603	surf[2] = ((width - 1) << BRW_SURFACE_WIDTH_SHIFT \|
		604	(height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
		605
		606	/* From Sandy bridge PRM, Vol4 Part1 p82 (Tiled Surface: Programming
		607	* Notes):
		608	*
		609	* If Surface Type is SURFTYPE_NULL, this field must be TRUE
		610	*/
		611	surf[3] = (BRW_SURFACE_TILED \| BRW_SURFACE_TILED_Y \|
		612	pitch_minus_1 << BRW_SURFACE_PITCH_SHIFT);
		613	surf[4] = multisampling_state;
		614	surf[5] = 0;
		615
		616	if (bo) {
		617	drm_intel_bo_emit_reloc(brw->batch.bo,
		618	*out_offset + 4,
		619	bo, 0,
		620	I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER);
		621	}
		622	}
		623
		624	/**
		625	* Sets up a surface state structure to point at the given region.
		626	* While it is only used for the front/back buffer currently, it should be
		627	* usable for further buffers when doing ARB_draw_buffer support.
		628	*/
		629	static uint32_t
		630	brw_update_renderbuffer_surface(struct brw_context *brw,
		631	struct gl_renderbuffer *rb,
		632	bool layered, unsigned unit,
		633	uint32_t surf_index)
		634	{
		635	struct gl_context *ctx = &brw->ctx;
		636	struct intel_renderbuffer *irb = intel_renderbuffer(rb);
		637	struct intel_mipmap_tree *mt = irb->mt;
		638	uint32_t *surf;
		639	uint32_t tile_x, tile_y;
		640	uint32_t format = 0;
		641	uint32_t offset;
		642	/* _NEW_BUFFERS */
		643	mesa_format rb_format = _mesa_get_render_format(ctx, intel_rb_format(irb));
		644	/* BRW_NEW_FS_PROG_DATA */
		645
		646	assert(!layered);
		647
		648	if (rb->TexImage && !brw->has_surface_tile_offset) {
		649	intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y);
		650
		651	if (tile_x != 0 \|\| tile_y != 0) {
		652	/* Original gen4 hardware couldn't draw to a non-tile-aligned
		653	* destination in a miptree unless you actually setup your renderbuffer
		654	* as a miptree and used the fragile lod/array_index/etc. controls to
		655	* select the image. So, instead, we just make a new single-level
		656	* miptree and render into that.
		657	*/
		658	intel_renderbuffer_move_to_temp(brw, irb, false);
		659	mt = irb->mt;
		660	}
		661	}
		662
		663	intel_miptree_used_for_rendering(irb->mt);
		664
		665	surf = brw_state_batch(brw, AUB_TRACE_SURFACE_STATE, 6 * 4, 32, &offset);
		666
		667	format = brw->render_target_format[rb_format];
		668	if (unlikely(!brw->format_supported_as_render_target[rb_format])) {
		669	_mesa_problem(ctx, "%s: renderbuffer format %s unsupported\n",
		670	__func__, _mesa_get_format_name(rb_format));
		671	}
		672
		673	surf[0] = (BRW_SURFACE_2D << BRW_SURFACE_TYPE_SHIFT \|
		674	format << BRW_SURFACE_FORMAT_SHIFT);
		675
		676	/* reloc */
		677	assert(mt->offset % mt->cpp == 0);
		678	surf[1] = (intel_renderbuffer_get_tile_offsets(irb, &tile_x, &tile_y) +
		679	mt->bo->offset64 + mt->offset);
		680
		681	surf[2] = ((rb->Width - 1) << BRW_SURFACE_WIDTH_SHIFT \|
		682	(rb->Height - 1) << BRW_SURFACE_HEIGHT_SHIFT);
		683
		684	surf[3] = (brw_get_surface_tiling_bits(mt->tiling) \|
		685	(mt->pitch - 1) << BRW_SURFACE_PITCH_SHIFT);
		686
		687	surf[4] = brw_get_surface_num_multisamples(mt->num_samples);
		688
		689	assert(brw->has_surface_tile_offset \|\| (tile_x == 0 && tile_y == 0));
		690	/* Note that the low bits of these fields are missing, so
		691	* there's the possibility of getting in trouble.
		692	*/
		693	assert(tile_x % 4 == 0);
		694	assert(tile_y % 2 == 0);
		695	surf[5] = ((tile_x / 4) << BRW_SURFACE_X_OFFSET_SHIFT \|
		696	(tile_y / 2) << BRW_SURFACE_Y_OFFSET_SHIFT \|
		697	(mt->align_h == 4 ? BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
		698
		699	if (brw->gen < 6) {
		700	/* _NEW_COLOR */
		701	if (!ctx->Color.ColorLogicOpEnabled &&
		702	(ctx->Color.BlendEnabled & (1 << unit)))
		703	surf[0] \|= BRW_SURFACE_BLEND_ENABLED;
		704
		705	if (!ctx->Color.ColorMask[unit][0])
		706	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_R_SHIFT;
		707	if (!ctx->Color.ColorMask[unit][1])
		708	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_G_SHIFT;
		709	if (!ctx->Color.ColorMask[unit][2])
		710	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_B_SHIFT;
		711
		712	/* As mentioned above, disable writes to the alpha component when the
		713	* renderbuffer is XRGB.
		714	*/
		715	if (ctx->DrawBuffer->Visual.alphaBits == 0 \|\|
		716	!ctx->Color.ColorMask[unit][3]) {
		717	surf[0] \|= 1 << BRW_SURFACE_WRITEDISABLE_A_SHIFT;
		718	}
		719	}
		720
		721	drm_intel_bo_emit_reloc(brw->batch.bo,
		722	offset + 4,
		723	mt->bo,
		724	surf[1] - mt->bo->offset64,
		725	I915_GEM_DOMAIN_RENDER,
		726	I915_GEM_DOMAIN_RENDER);
		727
		728	return offset;
		729	}
		730
		731	/**
		732	* Construct SURFACE_STATE objects for renderbuffers/draw buffers.
		733	*/
		734	void
		735	brw_update_renderbuffer_surfaces(struct brw_context *brw,
		736	const struct gl_framebuffer *fb,
		737	uint32_t render_target_start,
		738	uint32_t *surf_offset)
		739	{
		740	GLuint i;
		741
		742	/* Update surfaces for drawing buffers */
		743	if (fb->_NumColorDrawBuffers >= 1) {
		744	for (i = 0; i < fb->_NumColorDrawBuffers; i++) {
		745	const uint32_t surf_index = render_target_start + i;
		746
		747	if (intel_renderbuffer(fb->_ColorDrawBuffers[i])) {
		748	surf_offset[surf_index] =
		749	brw->vtbl.update_renderbuffer_surface(
		750	brw, fb->_ColorDrawBuffers[i],
		751	fb->MaxNumLayers > 0, i, surf_index);
		752	} else {
		753	brw->vtbl.emit_null_surface_state(
		754	brw, fb->Width, fb->Height, fb->Visual.samples,
		755	&surf_offset[surf_index]);
		756	}
		757	}
		758	} else {
		759	const uint32_t surf_index = render_target_start;
		760	brw->vtbl.emit_null_surface_state(
		761	brw, fb->Width, fb->Height, fb->Visual.samples,
		762	&surf_offset[surf_index]);
		763	}
		764	}
		765
		766	static void
		767	update_renderbuffer_surfaces(struct brw_context *brw)
		768	{
		769	const struct gl_context *ctx = &brw->ctx;
		770
		771	/* _NEW_BUFFERS \| _NEW_COLOR */
		772	const struct gl_framebuffer *fb = ctx->DrawBuffer;
		773	brw_update_renderbuffer_surfaces(
		774	brw, fb,
		775	brw->wm.prog_data->binding_table.render_target_start,
		776	brw->wm.base.surf_offset);
		777	brw->ctx.NewDriverState \|= BRW_NEW_SURFACES;
		778	}
		779
		780	const struct brw_tracked_state brw_renderbuffer_surfaces = {
		781	.dirty = {
		782	.mesa = _NEW_BUFFERS \|
		783	_NEW_COLOR,
		784	.brw = BRW_NEW_BATCH \|
		785	BRW_NEW_FS_PROG_DATA,
		786	},
		787	.emit = update_renderbuffer_surfaces,
		788	};
		789
		790	const struct brw_tracked_state gen6_renderbuffer_surfaces = {
		791	.dirty = {
		792	.mesa = _NEW_BUFFERS,
		793	.brw = BRW_NEW_BATCH,
		794	},
		795	.emit = update_renderbuffer_surfaces,
		796	};
		797
		798
		799	static void
		800	update_stage_texture_surfaces(struct brw_context *brw,
		801	const struct gl_program *prog,
		802	struct brw_stage_state *stage_state,
		803	bool for_gather)
		804	{
		805	if (!prog)
		806	return;
		807
		808	struct gl_context *ctx = &brw->ctx;
		809
		810	uint32_t *surf_offset = stage_state->surf_offset;
		811
		812	/* BRW_NEW__PROG_DATA /
		813	if (for_gather)
		814	surf_offset += stage_state->prog_data->binding_table.gather_texture_start;
		815	else
		816	surf_offset += stage_state->prog_data->binding_table.texture_start;
		817
		818	unsigned num_samplers = _mesa_fls(prog->SamplersUsed);
		819	for (unsigned s = 0; s < num_samplers; s++) {
		820	surf_offset[s] = 0;
		821
		822	if (prog->SamplersUsed & (1 << s)) {
		823	const unsigned unit = prog->SamplerUnits[s];
		824
		825	/* _NEW_TEXTURE */
		826	if (ctx->Texture.Unit[unit]._Current) {
		827	brw->vtbl.update_texture_surface(ctx, unit, surf_offset + s, for_gather);
		828	}
		829	}
		830	}
		831	}
		832
		833
		834	/**
		835	* Construct SURFACE_STATE objects for enabled textures.
		836	*/
		837	static void
		838	brw_update_texture_surfaces(struct brw_context *brw)
		839	{
		840	/* BRW_NEW_VERTEX_PROGRAM */
		841	struct gl_program vs = (struct gl_program ) brw->vertex_program;
		842
		843	/* BRW_NEW_GEOMETRY_PROGRAM */
		844	struct gl_program gs = (struct gl_program ) brw->geometry_program;
		845
		846	/* BRW_NEW_FRAGMENT_PROGRAM */
		847	struct gl_program fs = (struct gl_program ) brw->fragment_program;
		848
		849	/* _NEW_TEXTURE */
		850	update_stage_texture_surfaces(brw, vs, &brw->vs.base, false);
		851	update_stage_texture_surfaces(brw, gs, &brw->gs.base, false);
		852	update_stage_texture_surfaces(brw, fs, &brw->wm.base, false);
		853
		854	/* emit alternate set of surface state for gather. this
		855	* allows the surface format to be overriden for only the
		856	* gather4 messages. */
		857	if (brw->gen < 8) {
		858	if (vs && vs->UsesGather)
		859	update_stage_texture_surfaces(brw, vs, &brw->vs.base, true);
		860	if (gs && gs->UsesGather)
		861	update_stage_texture_surfaces(brw, gs, &brw->gs.base, true);
		862	if (fs && fs->UsesGather)
		863	update_stage_texture_surfaces(brw, fs, &brw->wm.base, true);
		864	}
		865
		866	brw->ctx.NewDriverState \|= BRW_NEW_SURFACES;
		867	}
		868
		869	const struct brw_tracked_state brw_texture_surfaces = {
		870	.dirty = {
		871	.mesa = _NEW_TEXTURE,
		872	.brw = BRW_NEW_BATCH \|
		873	BRW_NEW_FRAGMENT_PROGRAM \|
		874	BRW_NEW_FS_PROG_DATA \|
		875	BRW_NEW_GEOMETRY_PROGRAM \|
		876	BRW_NEW_GS_PROG_DATA \|
		877	BRW_NEW_TEXTURE_BUFFER \|
		878	BRW_NEW_VERTEX_PROGRAM \|
		879	BRW_NEW_VS_PROG_DATA,
		880	},
		881	.emit = brw_update_texture_surfaces,
		882	};
		883
		884	void
		885	brw_upload_ubo_surfaces(struct brw_context *brw,
		886	struct gl_shader *shader,
		887	struct brw_stage_state *stage_state,
		888	struct brw_stage_prog_data *prog_data,
		889	bool dword_pitch)
		890	{
		891	struct gl_context *ctx = &brw->ctx;
		892
		893	if (!shader)
		894	return;
		895
		896	uint32_t *surf_offsets =
		897	&stage_state->surf_offset[prog_data->binding_table.ubo_start];
		898
		899	for (int i = 0; i < shader->NumUniformBlocks; i++) {
		900	struct gl_uniform_buffer_binding *binding;
		901	struct intel_buffer_object *intel_bo;
		902
		903	binding = &ctx->UniformBufferBindings[shader->UniformBlocks[i].Binding];
		904	intel_bo = intel_buffer_object(binding->BufferObject);
		905	drm_intel_bo *bo =
		906	intel_bufferobj_buffer(brw, intel_bo,
		907	binding->Offset,
		908	binding->BufferObject->Size - binding->Offset);
		909
		910	/* Because behavior for referencing outside of the binding's size in the
		911	* glBindBufferRange case is undefined, we can just bind the whole buffer
		912	* glBindBufferBase wants and be a correct implementation.
		913	*/
		914	brw_create_constant_surface(brw, bo, binding->Offset,
		915	bo->size - binding->Offset,
		916	&surf_offsets[i],
		917	dword_pitch);
		918	}
		919
		920	if (shader->NumUniformBlocks)
		921	brw->ctx.NewDriverState \|= BRW_NEW_SURFACES;
		922	}
		923
		924	static void
		925	brw_upload_wm_ubo_surfaces(struct brw_context *brw)
		926	{
		927	struct gl_context *ctx = &brw->ctx;
		928	/* _NEW_PROGRAM */
		929	struct gl_shader_program *prog = ctx->_Shader->_CurrentFragmentProgram;
		930
		931	if (!prog)
		932	return;
		933
		934	/* BRW_NEW_FS_PROG_DATA */
		935	brw_upload_ubo_surfaces(brw, prog->_LinkedShaders[MESA_SHADER_FRAGMENT],
		936	&brw->wm.base, &brw->wm.prog_data->base, true);
		937	}
		938
		939	const struct brw_tracked_state brw_wm_ubo_surfaces = {
		940	.dirty = {
		941	.mesa = _NEW_PROGRAM,
		942	.brw = BRW_NEW_BATCH \|
		943	BRW_NEW_FS_PROG_DATA \|
		944	BRW_NEW_UNIFORM_BUFFER,
		945	},
		946	.emit = brw_upload_wm_ubo_surfaces,
		947	};
		948
		949	void
		950	brw_upload_abo_surfaces(struct brw_context *brw,
		951	struct gl_shader_program *prog,
		952	struct brw_stage_state *stage_state,
		953	struct brw_stage_prog_data *prog_data)
		954	{
		955	struct gl_context *ctx = &brw->ctx;
		956	uint32_t *surf_offsets =
		957	&stage_state->surf_offset[prog_data->binding_table.abo_start];
		958
		959	for (int i = 0; i < prog->NumAtomicBuffers; i++) {
		960	struct gl_atomic_buffer_binding *binding =
		961	&ctx->AtomicBufferBindings[prog->AtomicBuffers[i].Binding];
		962	struct intel_buffer_object *intel_bo =
		963	intel_buffer_object(binding->BufferObject);
		964	drm_intel_bo *bo = intel_bufferobj_buffer(
		965	brw, intel_bo, binding->Offset, intel_bo->Base.Size - binding->Offset);
		966
		967	brw->vtbl.emit_buffer_surface_state(brw, &surf_offsets[i], bo,
		968	binding->Offset, BRW_SURFACEFORMAT_RAW,
		969	bo->size - binding->Offset, 1, true);
		970	}
		971
		972	if (prog->NumAtomicBuffers)
		973	brw->ctx.NewDriverState \|= BRW_NEW_SURFACES;
		974	}
		975
		976	static void
		977	brw_upload_wm_abo_surfaces(struct brw_context *brw)
		978	{
		979	struct gl_context *ctx = &brw->ctx;
		980	/* _NEW_PROGRAM */
		981	struct gl_shader_program *prog = ctx->Shader._CurrentFragmentProgram;
		982
		983	if (prog) {
		984	/* BRW_NEW_FS_PROG_DATA */
		985	brw_upload_abo_surfaces(brw, prog, &brw->wm.base,
		986	&brw->wm.prog_data->base);
		987	}
		988	}
		989
		990	const struct brw_tracked_state brw_wm_abo_surfaces = {
		991	.dirty = {
		992	.mesa = _NEW_PROGRAM,
		993	.brw = BRW_NEW_ATOMIC_BUFFER \|
		994	BRW_NEW_BATCH \|
		995	BRW_NEW_FS_PROG_DATA,
		996	},
		997	.emit = brw_upload_wm_abo_surfaces,
		998	};
		999
		1000	static void
		1001	brw_upload_cs_abo_surfaces(struct brw_context *brw)
		1002	{
		1003	struct gl_context *ctx = &brw->ctx;
		1004	/* _NEW_PROGRAM */
		1005	struct gl_shader_program *prog =
		1006	ctx->_Shader->CurrentProgram[MESA_SHADER_COMPUTE];
		1007
		1008	if (prog) {
		1009	/* BRW_NEW_CS_PROG_DATA */
		1010	brw_upload_abo_surfaces(brw, prog, &brw->cs.base,
		1011	&brw->cs.prog_data->base);
		1012	}
		1013	}
		1014
		1015	const struct brw_tracked_state brw_cs_abo_surfaces = {
		1016	.dirty = {
		1017	.mesa = _NEW_PROGRAM,
		1018	.brw = BRW_NEW_ATOMIC_BUFFER \|
		1019	BRW_NEW_BATCH \|
		1020	BRW_NEW_CS_PROG_DATA,
		1021	},
		1022	.emit = brw_upload_cs_abo_surfaces,
		1023	};
		1024
		1025	void
		1026	gen4_init_vtable_surface_functions(struct brw_context *brw)
		1027	{
		1028	brw->vtbl.update_texture_surface = brw_update_texture_surface;
		1029	brw->vtbl.update_renderbuffer_surface = brw_update_renderbuffer_surface;
		1030	brw->vtbl.emit_null_surface_state = brw_emit_null_surface_state;
		1031	brw->vtbl.emit_buffer_surface_state = gen4_emit_buffer_surface_state;
		1032	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/mesa/drivers/dri/i965/brw_wm_surface_state.c – Rev 5564