WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/mesa/drivers/dri/i965/gen8_ps_state.c

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	* Copyright © 2012 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*/
		23
		24	#include
		25	#include "program/program.h"
		26	#include "brw_state.h"
		27	#include "brw_defines.h"
		28	#include "intel_batchbuffer.h"
		29
		30	void
		31	gen8_upload_ps_extra(struct brw_context *brw,
		32	const struct gl_fragment_program *fp,
		33	const struct brw_wm_prog_data *prog_data,
		34	bool multisampled_fbo)
		35	{
		36	struct gl_context *ctx = &brw->ctx;
		37	uint32_t dw1 = 0;
		38
		39	dw1 \|= GEN8_PSX_PIXEL_SHADER_VALID;
		40	dw1 \|= prog_data->computed_depth_mode << GEN8_PSX_COMPUTED_DEPTH_MODE_SHIFT;
		41
		42	if (prog_data->uses_kill)
		43	dw1 \|= GEN8_PSX_KILL_ENABLE;
		44
		45	if (prog_data->num_varying_inputs != 0)
		46	dw1 \|= GEN8_PSX_ATTRIBUTE_ENABLE;
		47
		48	if (fp->Base.InputsRead & VARYING_BIT_POS)
		49	dw1 \|= GEN8_PSX_USES_SOURCE_DEPTH \| GEN8_PSX_USES_SOURCE_W;
		50
		51	if (multisampled_fbo &&
		52	_mesa_get_min_invocations_per_fragment(ctx, fp, false) > 1)
		53	dw1 \|= GEN8_PSX_SHADER_IS_PER_SAMPLE;
		54
		55	if (fp->Base.SystemValuesRead & SYSTEM_BIT_SAMPLE_MASK_IN)
		56	dw1 \|= GEN8_PSX_SHADER_USES_INPUT_COVERAGE_MASK;
		57
		58	if (prog_data->uses_omask)
		59	dw1 \|= GEN8_PSX_OMASK_TO_RENDER_TARGET;
		60
		61	BEGIN_BATCH(2);
		62	OUT_BATCH(_3DSTATE_PS_EXTRA << 16 \| (2 - 2));
		63	OUT_BATCH(dw1);
		64	ADVANCE_BATCH();
		65	}
		66
		67	static void
		68	upload_ps_extra(struct brw_context *brw)
		69	{
		70	/* BRW_NEW_FRAGMENT_PROGRAM */
		71	const struct brw_fragment_program *fp =
		72	brw_fragment_program_const(brw->fragment_program);
		73	/* BRW_NEW_FS_PROG_DATA */
		74	const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
		75	/* BRW_NEW_NUM_SAMPLES \| _NEW_MULTISAMPLE */
		76	const bool multisampled_fbo = brw->num_samples > 1;
		77
		78	gen8_upload_ps_extra(brw, &fp->program, prog_data, multisampled_fbo);
		79	}
		80
		81	const struct brw_tracked_state gen8_ps_extra = {
		82	.dirty = {
		83	.mesa = _NEW_MULTISAMPLE,
		84	.brw = BRW_NEW_CONTEXT \|
		85	BRW_NEW_FRAGMENT_PROGRAM \|
		86	BRW_NEW_FS_PROG_DATA \|
		87	BRW_NEW_NUM_SAMPLES,
		88	},
		89	.emit = upload_ps_extra,
		90	};
		91
		92	static void
		93	upload_wm_state(struct brw_context *brw)
		94	{
		95	struct gl_context *ctx = &brw->ctx;
		96	uint32_t dw1 = 0;
		97
		98	dw1 \|= GEN7_WM_STATISTICS_ENABLE;
		99	dw1 \|= GEN7_WM_LINE_AA_WIDTH_1_0;
		100	dw1 \|= GEN7_WM_LINE_END_CAP_AA_WIDTH_0_5;
		101	dw1 \|= GEN7_WM_POINT_RASTRULE_UPPER_RIGHT;
		102
		103	/* _NEW_LINE */
		104	if (ctx->Line.StippleFlag)
		105	dw1 \|= GEN7_WM_LINE_STIPPLE_ENABLE;
		106
		107	/* _NEW_POLYGON */
		108	if (ctx->Polygon.StippleFlag)
		109	dw1 \|= GEN7_WM_POLYGON_STIPPLE_ENABLE;
		110
		111	/* BRW_NEW_FS_PROG_DATA */
		112	dw1 \|= brw->wm.prog_data->barycentric_interp_modes <<
		113	GEN7_WM_BARYCENTRIC_INTERPOLATION_MODE_SHIFT;
		114
		115	BEGIN_BATCH(2);
		116	OUT_BATCH(_3DSTATE_WM << 16 \| (2 - 2));
		117	OUT_BATCH(dw1);
		118	ADVANCE_BATCH();
		119	}
		120
		121	const struct brw_tracked_state gen8_wm_state = {
		122	.dirty = {
		123	.mesa = _NEW_LINE \|
		124	_NEW_POLYGON,
		125	.brw = BRW_NEW_CONTEXT \|
		126	BRW_NEW_FS_PROG_DATA,
		127	},
		128	.emit = upload_wm_state,
		129	};
		130
		131	void
		132	gen8_upload_ps_state(struct brw_context *brw,
		133	const struct gl_fragment_program *fp,
		134	const struct brw_stage_state *stage_state,
		135	const struct brw_wm_prog_data *prog_data,
		136	uint32_t fast_clear_op)
		137	{
		138	struct gl_context *ctx = &brw->ctx;
		139	uint32_t dw3 = 0, dw6 = 0, dw7 = 0, ksp0, ksp2 = 0;
		140
		141	/* Initialize the execution mask with VMask. Otherwise, derivatives are
		142	* incorrect for subspans where some of the pixels are unlit. We believe
		143	* the bit just didn't take effect in previous generations.
		144	*/
		145	dw3 \|= GEN7_PS_VECTOR_MASK_ENABLE;
		146
		147	const unsigned sampler_count =
		148	DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
		149	dw3 \|= SET_FIELD(sampler_count, GEN7_PS_SAMPLER_COUNT);
		150
		151	/* BRW_NEW_FS_PROG_DATA */
		152	dw3 \|=
		153	((prog_data->base.binding_table.size_bytes / 4) <<
		154	GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT);
		155
		156	if (prog_data->base.use_alt_mode)
		157	dw3 \|= GEN7_PS_FLOATING_POINT_MODE_ALT;
		158
		159	/* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
		160	* it implicitly scales for different GT levels (which have some # of PSDs).
		161	*
		162	* In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
		163	*/
		164	if (brw->gen >= 9)
		165	dw6 \|= (64 - 1) << HSW_PS_MAX_THREADS_SHIFT;
		166	else
		167	dw6 \|= (64 - 2) << HSW_PS_MAX_THREADS_SHIFT;
		168
		169	if (prog_data->base.nr_params > 0)
		170	dw6 \|= GEN7_PS_PUSH_CONSTANT_ENABLE;
		171
		172	/* From the documentation for this packet:
		173	* "If the PS kernel does not need the Position XY Offsets to
		174	* compute a Position Value, then this field should be programmed
		175	* to POSOFFSET_NONE."
		176	*
		177	* "SW Recommendation: If the PS kernel needs the Position Offsets
		178	* to compute a Position XY value, this field should match Position
		179	* ZW Interpolation Mode to ensure a consistent position.xyzw
		180	* computation."
		181	*
		182	* We only require XY sample offsets. So, this recommendation doesn't
		183	* look useful at the moment. We might need this in future.
		184	*/
		185	if (prog_data->uses_pos_offset)
		186	dw6 \|= GEN7_PS_POSOFFSET_SAMPLE;
		187	else
		188	dw6 \|= GEN7_PS_POSOFFSET_NONE;
		189
		190	dw6 \|= fast_clear_op;
		191
		192	/* _NEW_MULTISAMPLE
		193	* In case of non 1x per sample shading, only one of SIMD8 and SIMD16
		194	* should be enabled. We do 'SIMD16 only' dispatch if a SIMD16 shader
		195	* is successfully compiled. In majority of the cases that bring us
		196	* better performance than 'SIMD8 only' dispatch.
		197	*/
		198	int min_invocations_per_fragment =
		199	_mesa_get_min_invocations_per_fragment(ctx, fp, false);
		200	assert(min_invocations_per_fragment >= 1);
		201
		202	if (prog_data->prog_offset_16 \|\| prog_data->no_8) {
		203	dw6 \|= GEN7_PS_16_DISPATCH_ENABLE;
		204	if (!prog_data->no_8 && min_invocations_per_fragment == 1) {
		205	dw6 \|= GEN7_PS_8_DISPATCH_ENABLE;
		206	dw7 \|= (prog_data->base.dispatch_grf_start_reg <<
		207	GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
		208	dw7 \|= (prog_data->dispatch_grf_start_reg_16 <<
		209	GEN7_PS_DISPATCH_START_GRF_SHIFT_2);
		210	ksp0 = stage_state->prog_offset;
		211	ksp2 = stage_state->prog_offset + prog_data->prog_offset_16;
		212	} else {
		213	dw7 \|= (prog_data->dispatch_grf_start_reg_16 <<
		214	GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
		215
		216	ksp0 = stage_state->prog_offset + prog_data->prog_offset_16;
		217	}
		218	} else {
		219	dw6 \|= GEN7_PS_8_DISPATCH_ENABLE;
		220	dw7 \|= (prog_data->base.dispatch_grf_start_reg <<
		221	GEN7_PS_DISPATCH_START_GRF_SHIFT_0);
		222	ksp0 = stage_state->prog_offset;
		223	}
		224
		225	BEGIN_BATCH(12);
		226	OUT_BATCH(_3DSTATE_PS << 16 \| (12 - 2));
		227	OUT_BATCH(ksp0);
		228	OUT_BATCH(0);
		229	OUT_BATCH(dw3);
		230	if (prog_data->base.total_scratch) {
		231	OUT_RELOC64(stage_state->scratch_bo,
		232	I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
		233	ffs(prog_data->base.total_scratch) - 11);
		234	} else {
		235	OUT_BATCH(0);
		236	OUT_BATCH(0);
		237	}
		238	OUT_BATCH(dw6);
		239	OUT_BATCH(dw7);
		240	OUT_BATCH(0); /* kernel 1 pointer */
		241	OUT_BATCH(0);
		242	OUT_BATCH(ksp2);
		243	OUT_BATCH(0);
		244	ADVANCE_BATCH();
		245	}
		246
		247	static void
		248	upload_ps_state(struct brw_context *brw)
		249	{
		250	/* BRW_NEW_FS_PROG_DATA */
		251	const struct brw_wm_prog_data *prog_data = brw->wm.prog_data;
		252	gen8_upload_ps_state(brw, brw->fragment_program, &brw->wm.base, prog_data,
		253	brw->wm.fast_clear_op);
		254	}
		255
		256	const struct brw_tracked_state gen8_ps_state = {
		257	.dirty = {
		258	.mesa = _NEW_MULTISAMPLE,
		259	.brw = BRW_NEW_BATCH \|
		260	BRW_NEW_FRAGMENT_PROGRAM \|
		261	BRW_NEW_FS_PROG_DATA,
		262	},
		263	.emit = upload_ps_state,
		264	};

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/mesa/drivers/dri/i965/gen8_ps_state.c – Rev 5568