WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/shader/ilo_shader_vs.c

Rev	Author	Line No.	Line
5564	serge	1	/*
		2	* Mesa 3-D graphics library
		3	*
		4	* Copyright (C) 2012-2013 LunarG, Inc.
		5	*
		6	* Permission is hereby granted, free of charge, to any person obtaining a
		7	* copy of this software and associated documentation files (the "Software"),
		8	* to deal in the Software without restriction, including without limitation
		9	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		10	* and/or sell copies of the Software, and to permit persons to whom the
		11	* Software is furnished to do so, subject to the following conditions:
		12	*
		13	* The above copyright notice and this permission notice shall be included
		14	* in all copies or substantial portions of the Software.
		15	*
		16	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		17	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		18	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		19	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		20	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		21	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		22	* DEALINGS IN THE SOFTWARE.
		23	*
		24	* Authors:
		25	* Chia-I Wu
		26	*/
		27
		28	#include "tgsi/tgsi_dump.h"
		29	#include "tgsi/tgsi_util.h"
		30	#include "toy_compiler.h"
		31	#include "toy_tgsi.h"
		32	#include "toy_legalize.h"
		33	#include "toy_optimize.h"
		34	#include "toy_helpers.h"
		35	#include "ilo_shader_internal.h"
		36
		37	struct vs_compile_context {
		38	struct ilo_shader *shader;
		39	const struct ilo_shader_variant *variant;
		40
		41	struct toy_compiler tc;
		42	struct toy_tgsi tgsi;
		43	int const_cache;
		44
		45	int output_map[PIPE_MAX_SHADER_OUTPUTS];
		46
		47	int num_grf_per_vrf;
		48	int first_const_grf;
		49	int first_ucp_grf;
		50	int first_vue_grf;
		51	int first_free_grf;
		52	int last_free_grf;
		53
		54	int first_free_mrf;
		55	int last_free_mrf;
		56	};
		57
		58	static void
		59	vs_lower_opcode_tgsi_in(struct vs_compile_context *vcc,
		60	struct toy_dst dst, int dim, int idx)
		61	{
		62	struct toy_compiler *tc = &vcc->tc;
		63	int slot;
		64
		65	assert(!dim);
		66
		67	slot = toy_tgsi_find_input(&vcc->tgsi, idx);
		68	if (slot >= 0) {
		69	const int first_in_grf = vcc->first_vue_grf +
		70	(vcc->shader->in.count - vcc->tgsi.num_inputs);
		71	const int grf = first_in_grf + vcc->tgsi.inputs[slot].semantic_index;
		72	const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
		73
		74	tc_MOV(tc, dst, src);
		75	}
		76	else {
		77	/* undeclared input */
		78	tc_MOV(tc, dst, tsrc_imm_f(0.0f));
		79	}
		80	}
		81
		82	static bool
		83	vs_lower_opcode_tgsi_const_pcb(struct vs_compile_context *vcc,
		84	struct toy_dst dst, int dim,
		85	struct toy_src idx)
		86	{
		87	const int i = idx.val32;
		88	const int grf = vcc->first_const_grf + i / 2;
		89	const int grf_subreg = (i & 1) * 16;
		90	struct toy_src src;
		91
		92	if (!vcc->variant->use_pcb \|\| dim != 0 \|\| idx.file != TOY_FILE_IMM \|\|
		93	grf >= vcc->first_ucp_grf)
		94	return false;
		95
		96
		97	src = tsrc_rect(tsrc(TOY_FILE_GRF, grf, grf_subreg), TOY_RECT_041);
		98	tc_MOV(&vcc->tc, dst, src);
		99
		100	return true;
		101	}
		102
		103	static void
		104	vs_lower_opcode_tgsi_const_gen6(struct vs_compile_context *vcc,
		105	struct toy_dst dst, int dim,
		106	struct toy_src idx)
		107	{
		108	const struct toy_dst header =
		109	tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
		110	const struct toy_dst block_offsets =
		111	tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf + 1, 0));
		112	const struct toy_src r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
		113	struct toy_compiler *tc = &vcc->tc;
		114	unsigned msg_type, msg_ctrl, msg_len;
		115	struct toy_inst *inst;
		116	struct toy_src desc;
		117
		118	if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
		119	return;
		120
		121	/* set message header */
		122	inst = tc_MOV(tc, header, r0);
		123	inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
		124
		125	/* set block offsets */
		126	tc_MOV(tc, block_offsets, idx);
		127
		128	msg_type = GEN6_MSG_DP_OWORD_DUAL_BLOCK_READ;
		129	msg_ctrl = GEN6_MSG_DP_OWORD_DUAL_BLOCK_SIZE_1;;
		130	msg_len = 2;
		131
		132	desc = tsrc_imm_mdesc_data_port(tc, false, msg_len, 1, true, false,
		133	msg_type, msg_ctrl, vcc->shader->bt.const_base + dim);
		134
		135	tc_SEND(tc, dst, tsrc_from(header), desc, vcc->const_cache);
		136	}
		137
		138	static void
		139	vs_lower_opcode_tgsi_const_gen7(struct vs_compile_context *vcc,
		140	struct toy_dst dst, int dim,
		141	struct toy_src idx)
		142	{
		143	struct toy_compiler *tc = &vcc->tc;
		144	const struct toy_dst offset =
		145	tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
		146	struct toy_src desc;
		147
		148	if (vs_lower_opcode_tgsi_const_pcb(vcc, dst, dim, idx))
		149	return;
		150
		151	/*
		152	* In 259b65e2e7938de4aab323033cfe2b33369ddb07, pull constant load was
		153	* changed from OWord Dual Block Read to ld to increase performance in the
		154	* classic driver. Since we use the constant cache instead of the data
		155	* cache, I wonder if we still want to follow the classic driver.
		156	*/
		157
		158	/* set offset */
		159	tc_MOV(tc, offset, idx);
		160
		161	desc = tsrc_imm_mdesc_sampler(tc, 1, 1, false,
		162	GEN6_MSG_SAMPLER_SIMD4X2,
		163	GEN6_MSG_SAMPLER_LD,
		164	0,
		165	vcc->shader->bt.const_base + dim);
		166
		167	tc_SEND(tc, dst, tsrc_from(offset), desc, GEN6_SFID_SAMPLER);
		168	}
		169
		170	static void
		171	vs_lower_opcode_tgsi_imm(struct vs_compile_context *vcc,
		172	struct toy_dst dst, int idx)
		173	{
		174	const uint32_t *imm;
		175	int ch;
		176
		177	imm = toy_tgsi_get_imm(&vcc->tgsi, idx, NULL);
		178
		179	for (ch = 0; ch < 4; ch++) {
		180	/* raw moves */
		181	tc_MOV(&vcc->tc,
		182	tdst_writemask(tdst_ud(dst), 1 << ch),
		183	tsrc_imm_ud(imm[ch]));
		184	}
		185	}
		186
		187
		188	static void
		189	vs_lower_opcode_tgsi_sv(struct vs_compile_context *vcc,
		190	struct toy_dst dst, int dim, int idx)
		191	{
		192	struct toy_compiler *tc = &vcc->tc;
		193	const struct toy_tgsi *tgsi = &vcc->tgsi;
		194	int slot;
		195
		196	assert(!dim);
		197
		198	slot = toy_tgsi_find_system_value(tgsi, idx);
		199	if (slot < 0)
		200	return;
		201
		202	switch (tgsi->system_values[slot].semantic_name) {
		203	case TGSI_SEMANTIC_INSTANCEID:
		204	case TGSI_SEMANTIC_VERTEXID:
		205	/*
		206	* In 3DSTATE_VERTEX_ELEMENTS, we prepend an extra vertex element for
		207	* the generated IDs, with VID in the X channel and IID in the Y
		208	* channel.
		209	*/
		210	{
		211	const int grf = vcc->first_vue_grf;
		212	const struct toy_src src = tsrc(TOY_FILE_GRF, grf, 0);
		213	const enum toy_swizzle swizzle =
		214	(tgsi->system_values[slot].semantic_name ==
		215	TGSI_SEMANTIC_INSTANCEID) ? TOY_SWIZZLE_Y : TOY_SWIZZLE_X;
		216
		217	tc_MOV(tc, tdst_d(dst), tsrc_d(tsrc_swizzle1(src, swizzle)));
		218	}
		219	break;
		220	case TGSI_SEMANTIC_PRIMID:
		221	default:
		222	tc_fail(tc, "unhandled system value");
		223	tc_MOV(tc, dst, tsrc_imm_d(0));
		224	break;
		225	}
		226	}
		227
		228	static void
		229	vs_lower_opcode_tgsi_direct(struct vs_compile_context *vcc,
		230	struct toy_inst *inst)
		231	{
		232	struct toy_compiler *tc = &vcc->tc;
		233	int dim, idx;
		234
		235	assert(inst->src[0].file == TOY_FILE_IMM);
		236	dim = inst->src[0].val32;
		237
		238	assert(inst->src[1].file == TOY_FILE_IMM);
		239	idx = inst->src[1].val32;
		240
		241	switch (inst->opcode) {
		242	case TOY_OPCODE_TGSI_IN:
		243	vs_lower_opcode_tgsi_in(vcc, inst->dst, dim, idx);
		244	break;
		245	case TOY_OPCODE_TGSI_CONST:
		246	if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
		247	vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, inst->src[1]);
		248	else
		249	vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, inst->src[1]);
		250	break;
		251	case TOY_OPCODE_TGSI_SV:
		252	vs_lower_opcode_tgsi_sv(vcc, inst->dst, dim, idx);
		253	break;
		254	case TOY_OPCODE_TGSI_IMM:
		255	assert(!dim);
		256	vs_lower_opcode_tgsi_imm(vcc, inst->dst, idx);
		257	break;
		258	default:
		259	tc_fail(tc, "unhandled TGSI fetch");
		260	break;
		261	}
		262
		263	tc_discard_inst(tc, inst);
		264	}
		265
		266	static void
		267	vs_lower_opcode_tgsi_indirect(struct vs_compile_context *vcc,
		268	struct toy_inst *inst)
		269	{
		270	struct toy_compiler *tc = &vcc->tc;
		271	enum tgsi_file_type file;
		272	int dim, idx;
		273	struct toy_src indirect_dim, indirect_idx;
		274
		275	assert(inst->src[0].file == TOY_FILE_IMM);
		276	file = inst->src[0].val32;
		277
		278	assert(inst->src[1].file == TOY_FILE_IMM);
		279	dim = inst->src[1].val32;
		280	indirect_dim = inst->src[2];
		281
		282	assert(inst->src[3].file == TOY_FILE_IMM);
		283	idx = inst->src[3].val32;
		284	indirect_idx = inst->src[4];
		285
		286	/* no dimension indirection */
		287	assert(indirect_dim.file == TOY_FILE_IMM);
		288	dim += indirect_dim.val32;
		289
		290	switch (inst->opcode) {
		291	case TOY_OPCODE_TGSI_INDIRECT_FETCH:
		292	if (file == TGSI_FILE_CONSTANT) {
		293	if (idx) {
		294	struct toy_dst tmp = tc_alloc_tmp(tc);
		295
		296	tc_ADD(tc, tmp, indirect_idx, tsrc_imm_d(idx));
		297	indirect_idx = tsrc_from(tmp);
		298	}
		299
		300	if (ilo_dev_gen(tc->dev) >= ILO_GEN(7))
		301	vs_lower_opcode_tgsi_const_gen7(vcc, inst->dst, dim, indirect_idx);
		302	else
		303	vs_lower_opcode_tgsi_const_gen6(vcc, inst->dst, dim, indirect_idx);
		304	break;
		305	}
		306	/* fall through */
		307	case TOY_OPCODE_TGSI_INDIRECT_STORE:
		308	default:
		309	tc_fail(tc, "unhandled TGSI indirection");
		310	break;
		311	}
		312
		313	tc_discard_inst(tc, inst);
		314	}
		315
		316	/**
		317	* Emit instructions to move sampling parameters to the message registers.
		318	*/
		319	static int
		320	vs_add_sampler_params(struct toy_compiler *tc, int msg_type, int base_mrf,
		321	struct toy_src coords, int num_coords,
		322	struct toy_src bias_or_lod, struct toy_src ref_or_si,
		323	struct toy_src ddx, struct toy_src ddy, int num_derivs)
		324	{
		325	const unsigned coords_writemask = (1 << num_coords) - 1;
		326	struct toy_dst m[3];
		327	int num_params, i;
		328
		329	assert(num_coords <= 4);
		330	assert(num_derivs <= 3 && num_derivs <= num_coords);
		331
		332	for (i = 0; i < Elements(m); i++)
		333	m[i] = tdst(TOY_FILE_MRF, base_mrf + i, 0);
		334
		335	switch (msg_type) {
		336	case GEN6_MSG_SAMPLER_SAMPLE_L:
		337	tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
		338	tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), bias_or_lod);
		339	num_params = 5;
		340	break;
		341	case GEN6_MSG_SAMPLER_SAMPLE_D:
		342	tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
		343	tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_XZ),
		344	tsrc_swizzle(ddx, 0, 0, 1, 1));
		345	tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_YW),
		346	tsrc_swizzle(ddy, 0, 0, 1, 1));
		347	if (num_derivs > 2) {
		348	tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_X),
		349	tsrc_swizzle1(ddx, 2));
		350	tc_MOV(tc, tdst_writemask(m[2], TOY_WRITEMASK_Y),
		351	tsrc_swizzle1(ddy, 2));
		352	}
		353	num_params = 4 + num_derivs * 2;
		354	break;
		355	case GEN6_MSG_SAMPLER_SAMPLE_L_C:
		356	tc_MOV(tc, tdst_writemask(m[0], coords_writemask), coords);
		357	tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_X), ref_or_si);
		358	tc_MOV(tc, tdst_writemask(m[1], TOY_WRITEMASK_Y), bias_or_lod);
		359	num_params = 6;
		360	break;
		361	case GEN6_MSG_SAMPLER_LD:
		362	assert(num_coords <= 3);
		363	tc_MOV(tc, tdst_writemask(tdst_d(m[0]), coords_writemask), coords);
		364	tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_W), bias_or_lod);
		365	if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
		366	num_params = 4;
		367	}
		368	else {
		369	tc_MOV(tc, tdst_writemask(tdst_d(m[1]), TOY_WRITEMASK_X), ref_or_si);
		370	num_params = 5;
		371	}
		372	break;
		373	case GEN6_MSG_SAMPLER_RESINFO:
		374	tc_MOV(tc, tdst_writemask(tdst_d(m[0]), TOY_WRITEMASK_X), bias_or_lod);
		375	num_params = 1;
		376	break;
		377	default:
		378	tc_fail(tc, "unknown sampler opcode");
		379	num_params = 0;
		380	break;
		381	}
		382
		383	return (num_params + 3) / 4;
		384	}
		385
		386	/**
		387	* Set up message registers and return the message descriptor for sampling.
		388	*/
		389	static struct toy_src
		390	vs_prepare_tgsi_sampling(struct vs_compile_context *vcc,
		391	const struct toy_inst *inst,
		392	int base_mrf, unsigned *ret_sampler_index)
		393	{
		394	struct toy_compiler *tc = &vcc->tc;
		395	unsigned simd_mode, msg_type, msg_len, sampler_index, binding_table_index;
		396	struct toy_src coords, ddx, ddy, bias_or_lod, ref_or_si;
		397	int num_coords, ref_pos, num_derivs;
		398	int sampler_src;
		399
		400	simd_mode = GEN6_MSG_SAMPLER_SIMD4X2;
		401
		402	coords = inst->src[0];
		403	ddx = tsrc_null();
		404	ddy = tsrc_null();
		405	bias_or_lod = tsrc_null();
		406	ref_or_si = tsrc_null();
		407	num_derivs = 0;
		408	sampler_src = 1;
		409
		410	num_coords = tgsi_util_get_texture_coord_dim(inst->tex.target, &ref_pos);
		411
		412	/* extract the parameters */
		413	switch (inst->opcode) {
		414	case TOY_OPCODE_TGSI_TXD:
		415	if (ref_pos >= 0) {
		416	assert(ref_pos < 4);
		417
		418	msg_type = GEN7_MSG_SAMPLER_SAMPLE_D_C;
		419	ref_or_si = tsrc_swizzle1(coords, ref_pos);
		420
		421	if (ilo_dev_gen(tc->dev) < ILO_GEN(7.5))
		422	tc_fail(tc, "TXD with shadow sampler not supported");
		423	}
		424	else {
		425	msg_type = GEN6_MSG_SAMPLER_SAMPLE_D;
		426	}
		427
		428	ddx = inst->src[1];
		429	ddy = inst->src[2];
		430	num_derivs = num_coords;
		431	sampler_src = 3;
		432	break;
		433	case TOY_OPCODE_TGSI_TXL:
		434	if (ref_pos >= 0) {
		435	assert(ref_pos < 3);
		436
		437	msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
		438	ref_or_si = tsrc_swizzle1(coords, ref_pos);
		439	}
		440	else {
		441	msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
		442	}
		443
		444	bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
		445	break;
		446	case TOY_OPCODE_TGSI_TXF:
		447	msg_type = GEN6_MSG_SAMPLER_LD;
		448
		449	switch (inst->tex.target) {
		450	case TGSI_TEXTURE_2D_MSAA:
		451	case TGSI_TEXTURE_2D_ARRAY_MSAA:
		452	assert(ref_pos >= 0 && ref_pos < 4);
		453	/* lod is always 0 */
		454	bias_or_lod = tsrc_imm_d(0);
		455	ref_or_si = tsrc_swizzle1(coords, ref_pos);
		456	break;
		457	default:
		458	bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_W);
		459	break;
		460	}
		461
		462	/* offset the coordinates */
		463	if (!tsrc_is_null(inst->tex.offsets[0])) {
		464	struct toy_dst tmp;
		465
		466	tmp = tc_alloc_tmp(tc);
		467	tc_ADD(tc, tmp, coords, inst->tex.offsets[0]);
		468	coords = tsrc_from(tmp);
		469	}
		470
		471	sampler_src = 1;
		472	break;
		473	case TOY_OPCODE_TGSI_TXQ:
		474	msg_type = GEN6_MSG_SAMPLER_RESINFO;
		475	num_coords = 0;
		476	bias_or_lod = tsrc_swizzle1(coords, TOY_SWIZZLE_X);
		477	break;
		478	case TOY_OPCODE_TGSI_TXQ_LZ:
		479	msg_type = GEN6_MSG_SAMPLER_RESINFO;
		480	num_coords = 0;
		481	sampler_src = 0;
		482	break;
		483	case TOY_OPCODE_TGSI_TXL2:
		484	if (ref_pos >= 0) {
		485	assert(ref_pos < 4);
		486
		487	msg_type = GEN6_MSG_SAMPLER_SAMPLE_L_C;
		488	ref_or_si = tsrc_swizzle1(coords, ref_pos);
		489	}
		490	else {
		491	msg_type = GEN6_MSG_SAMPLER_SAMPLE_L;
		492	}
		493
		494	bias_or_lod = tsrc_swizzle1(inst->src[1], TOY_SWIZZLE_X);
		495	sampler_src = 2;
		496	break;
		497	default:
		498	assert(!"unhandled sampling opcode");
		499	if (ret_sampler_index)
		500	*ret_sampler_index = 0;
		501	return tsrc_null();
		502	break;
		503	}
		504
		505	assert(inst->src[sampler_src].file == TOY_FILE_IMM);
		506	sampler_index = inst->src[sampler_src].val32;
		507	binding_table_index = vcc->shader->bt.tex_base + sampler_index;
		508
		509	/*
		510	* From the Sandy Bridge PRM, volume 4 part 1, page 18:
		511	*
		512	* "Note that the (cube map) coordinates delivered to the sampling
		513	* engine must already have been divided by the component with the
		514	* largest absolute value."
		515	*/
		516	switch (inst->tex.target) {
		517	case TGSI_TEXTURE_CUBE:
		518	case TGSI_TEXTURE_SHADOWCUBE:
		519	case TGSI_TEXTURE_CUBE_ARRAY:
		520	case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
		521	/* TXQ does not need coordinates */
		522	if (num_coords >= 3) {
		523	struct toy_dst tmp, max;
		524	struct toy_src abs_coords[3];
		525	int i;
		526
		527	tmp = tc_alloc_tmp(tc);
		528	max = tdst_writemask(tmp, TOY_WRITEMASK_W);
		529
		530	for (i = 0; i < 3; i++)
		531	abs_coords[i] = tsrc_absolute(tsrc_swizzle1(coords, i));
		532
		533	tc_SEL(tc, max, abs_coords[0], abs_coords[0], GEN6_COND_GE);
		534	tc_SEL(tc, max, tsrc_from(max), abs_coords[0], GEN6_COND_GE);
		535	tc_INV(tc, max, tsrc_from(max));
		536
		537	for (i = 0; i < 3; i++)
		538	tc_MUL(tc, tdst_writemask(tmp, 1 << i), coords, tsrc_from(max));
		539
		540	coords = tsrc_from(tmp);
		541	}
		542	break;
		543	}
		544
		545	/* set up sampler parameters */
		546	msg_len = vs_add_sampler_params(tc, msg_type, base_mrf,
		547	coords, num_coords, bias_or_lod, ref_or_si, ddx, ddy, num_derivs);
		548
		549	/*
		550	* From the Sandy Bridge PRM, volume 4 part 1, page 136:
		551	*
		552	* "The maximum message length allowed to the sampler is 11. This would
		553	* disallow sample_d, sample_b_c, and sample_l_c with a SIMD Mode of
		554	* SIMD16."
		555	*/
		556	if (msg_len > 11)
		557	tc_fail(tc, "maximum length for messages to the sampler is 11");
		558
		559	if (ret_sampler_index)
		560	*ret_sampler_index = sampler_index;
		561
		562	return tsrc_imm_mdesc_sampler(tc, msg_len, 1,
		563	false, simd_mode, msg_type, sampler_index, binding_table_index);
		564	}
		565
		566	static void
		567	vs_lower_opcode_tgsi_sampling(struct vs_compile_context *vcc,
		568	struct toy_inst *inst)
		569	{
		570	struct toy_compiler *tc = &vcc->tc;
		571	struct toy_src desc;
		572	struct toy_dst dst, tmp;
		573	unsigned sampler_index;
		574	int swizzles[4], i;
		575	unsigned swizzle_zero_mask, swizzle_one_mask, swizzle_normal_mask;
		576	bool need_filter;
		577
		578	desc = vs_prepare_tgsi_sampling(vcc, inst,
		579	vcc->first_free_mrf, &sampler_index);
		580
		581	switch (inst->opcode) {
		582	case TOY_OPCODE_TGSI_TXF:
		583	case TOY_OPCODE_TGSI_TXQ:
		584	case TOY_OPCODE_TGSI_TXQ_LZ:
		585	need_filter = false;
		586	break;
		587	default:
		588	need_filter = true;
		589	break;
		590	}
		591
		592	toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_SAMPLER);
		593	inst->src[0] = tsrc(TOY_FILE_MRF, vcc->first_free_mrf, 0);
		594	inst->src[1] = desc;
		595
		596	/* write to a temp first */
		597	tmp = tc_alloc_tmp(tc);
		598	tmp.type = inst->dst.type;
		599	dst = inst->dst;
		600	inst->dst = tmp;
		601
		602	tc_move_inst(tc, inst);
		603
		604	if (need_filter) {
		605	assert(sampler_index < vcc->variant->num_sampler_views);
		606	swizzles[0] = vcc->variant->sampler_view_swizzles[sampler_index].r;
		607	swizzles[1] = vcc->variant->sampler_view_swizzles[sampler_index].g;
		608	swizzles[2] = vcc->variant->sampler_view_swizzles[sampler_index].b;
		609	swizzles[3] = vcc->variant->sampler_view_swizzles[sampler_index].a;
		610	}
		611	else {
		612	swizzles[0] = PIPE_SWIZZLE_RED;
		613	swizzles[1] = PIPE_SWIZZLE_GREEN;
		614	swizzles[2] = PIPE_SWIZZLE_BLUE;
		615	swizzles[3] = PIPE_SWIZZLE_ALPHA;
		616	}
		617
		618	swizzle_zero_mask = 0;
		619	swizzle_one_mask = 0;
		620	swizzle_normal_mask = 0;
		621	for (i = 0; i < 4; i++) {
		622	switch (swizzles[i]) {
		623	case PIPE_SWIZZLE_ZERO:
		624	swizzle_zero_mask \|= 1 << i;
		625	swizzles[i] = i;
		626	break;
		627	case PIPE_SWIZZLE_ONE:
		628	swizzle_one_mask \|= 1 << i;
		629	swizzles[i] = i;
		630	break;
		631	default:
		632	swizzle_normal_mask \|= 1 << i;
		633	break;
		634	}
		635	}
		636
		637	/* swizzle the results */
		638	if (swizzle_normal_mask) {
		639	tc_MOV(tc, tdst_writemask(dst, swizzle_normal_mask),
		640	tsrc_swizzle(tsrc_from(tmp), swizzles[0],
		641	swizzles[1], swizzles[2], swizzles[3]));
		642	}
		643	if (swizzle_zero_mask)
		644	tc_MOV(tc, tdst_writemask(dst, swizzle_zero_mask), tsrc_imm_f(0.0f));
		645	if (swizzle_one_mask)
		646	tc_MOV(tc, tdst_writemask(dst, swizzle_one_mask), tsrc_imm_f(1.0f));
		647	}
		648
		649	static void
		650	vs_lower_opcode_urb_write(struct toy_compiler tc, struct toy_inst inst)
		651	{
		652	/* vs_write_vue() has set up the message registers */
		653	toy_compiler_lower_to_send(tc, inst, false, GEN6_SFID_URB);
		654	}
		655
		656	static void
		657	vs_lower_virtual_opcodes(struct vs_compile_context *vcc)
		658	{
		659	struct toy_compiler *tc = &vcc->tc;
		660	struct toy_inst *inst;
		661
		662	tc_head(tc);
		663	while ((inst = tc_next(tc)) != NULL) {
		664	switch (inst->opcode) {
		665	case TOY_OPCODE_TGSI_IN:
		666	case TOY_OPCODE_TGSI_CONST:
		667	case TOY_OPCODE_TGSI_SV:
		668	case TOY_OPCODE_TGSI_IMM:
		669	vs_lower_opcode_tgsi_direct(vcc, inst);
		670	break;
		671	case TOY_OPCODE_TGSI_INDIRECT_FETCH:
		672	case TOY_OPCODE_TGSI_INDIRECT_STORE:
		673	vs_lower_opcode_tgsi_indirect(vcc, inst);
		674	break;
		675	case TOY_OPCODE_TGSI_TEX:
		676	case TOY_OPCODE_TGSI_TXB:
		677	case TOY_OPCODE_TGSI_TXD:
		678	case TOY_OPCODE_TGSI_TXL:
		679	case TOY_OPCODE_TGSI_TXP:
		680	case TOY_OPCODE_TGSI_TXF:
		681	case TOY_OPCODE_TGSI_TXQ:
		682	case TOY_OPCODE_TGSI_TXQ_LZ:
		683	case TOY_OPCODE_TGSI_TEX2:
		684	case TOY_OPCODE_TGSI_TXB2:
		685	case TOY_OPCODE_TGSI_TXL2:
		686	case TOY_OPCODE_TGSI_SAMPLE:
		687	case TOY_OPCODE_TGSI_SAMPLE_I:
		688	case TOY_OPCODE_TGSI_SAMPLE_I_MS:
		689	case TOY_OPCODE_TGSI_SAMPLE_B:
		690	case TOY_OPCODE_TGSI_SAMPLE_C:
		691	case TOY_OPCODE_TGSI_SAMPLE_C_LZ:
		692	case TOY_OPCODE_TGSI_SAMPLE_D:
		693	case TOY_OPCODE_TGSI_SAMPLE_L:
		694	case TOY_OPCODE_TGSI_GATHER4:
		695	case TOY_OPCODE_TGSI_SVIEWINFO:
		696	case TOY_OPCODE_TGSI_SAMPLE_POS:
		697	case TOY_OPCODE_TGSI_SAMPLE_INFO:
		698	vs_lower_opcode_tgsi_sampling(vcc, inst);
		699	break;
		700	case TOY_OPCODE_INV:
		701	case TOY_OPCODE_LOG:
		702	case TOY_OPCODE_EXP:
		703	case TOY_OPCODE_SQRT:
		704	case TOY_OPCODE_RSQ:
		705	case TOY_OPCODE_SIN:
		706	case TOY_OPCODE_COS:
		707	case TOY_OPCODE_FDIV:
		708	case TOY_OPCODE_POW:
		709	case TOY_OPCODE_INT_DIV_QUOTIENT:
		710	case TOY_OPCODE_INT_DIV_REMAINDER:
		711	toy_compiler_lower_math(tc, inst);
		712	break;
		713	case TOY_OPCODE_URB_WRITE:
		714	vs_lower_opcode_urb_write(tc, inst);
		715	break;
		716	default:
		717	if (inst->opcode > 127)
		718	tc_fail(tc, "unhandled virtual opcode");
		719	break;
		720	}
		721	}
		722	}
		723
		724	/**
		725	* Compile the shader.
		726	*/
		727	static bool
		728	vs_compile(struct vs_compile_context *vcc)
		729	{
		730	struct toy_compiler *tc = &vcc->tc;
		731	struct ilo_shader *sh = vcc->shader;
		732
		733	vs_lower_virtual_opcodes(vcc);
		734	toy_compiler_legalize_for_ra(tc);
		735	toy_compiler_optimize(tc);
		736	toy_compiler_allocate_registers(tc,
		737	vcc->first_free_grf,
		738	vcc->last_free_grf,
		739	vcc->num_grf_per_vrf);
		740	toy_compiler_legalize_for_asm(tc);
		741
		742	if (tc->fail) {
		743	ilo_err("failed to legalize VS instructions: %s\n", tc->reason);
		744	return false;
		745	}
		746
		747	if (ilo_debug & ILO_DEBUG_VS) {
		748	ilo_printf("legalized instructions:\n");
		749	toy_compiler_dump(tc);
		750	ilo_printf("\n");
		751	}
		752
		753	if (true) {
		754	sh->kernel = toy_compiler_assemble(tc, &sh->kernel_size);
		755	}
		756	else {
		757	static const uint32_t microcode[] = {
		758	/* fill in the microcode here */
		759	0x0, 0x0, 0x0, 0x0,
		760	};
		761	const bool swap = true;
		762
		763	sh->kernel_size = sizeof(microcode);
		764	sh->kernel = MALLOC(sh->kernel_size);
		765
		766	if (sh->kernel) {
		767	const int num_dwords = sizeof(microcode) / 4;
		768	const uint32_t *src = microcode;
		769	uint32_t dst = (uint32_t ) sh->kernel;
		770	int i;
		771
		772	for (i = 0; i < num_dwords; i += 4) {
		773	if (swap) {
		774	dst[i + 0] = src[i + 3];
		775	dst[i + 1] = src[i + 2];
		776	dst[i + 2] = src[i + 1];
		777	dst[i + 3] = src[i + 0];
		778	}
		779	else {
		780	memcpy(dst, src, 16);
		781	}
		782	}
		783	}
		784	}
		785
		786	if (!sh->kernel) {
		787	ilo_err("failed to compile VS: %s\n", tc->reason);
		788	return false;
		789	}
		790
		791	if (ilo_debug & ILO_DEBUG_VS) {
		792	ilo_printf("disassembly:\n");
		793	toy_compiler_disassemble(tc->dev, sh->kernel, sh->kernel_size, false);
		794	ilo_printf("\n");
		795	}
		796
		797	return true;
		798	}
		799
		800	/**
		801	* Collect the toy registers to be written to the VUE.
		802	*/
		803	static int
		804	vs_collect_outputs(struct vs_compile_context vcc, struct toy_src outs)
		805	{
		806	const struct toy_tgsi *tgsi = &vcc->tgsi;
		807	int i;
		808
		809	for (i = 0; i < vcc->shader->out.count; i++) {
		810	const int slot = vcc->output_map[i];
		811	const int vrf = (slot >= 0) ? toy_tgsi_get_vrf(tgsi,
		812	TGSI_FILE_OUTPUT, 0, tgsi->outputs[slot].index) : -1;
		813	struct toy_src src;
		814
		815	if (vrf >= 0) {
		816	struct toy_dst dst;
		817
		818	dst = tdst(TOY_FILE_VRF, vrf, 0);
		819	src = tsrc_from(dst);
		820
		821	if (i == 0) {
		822	/* PSIZE is at channel W */
		823	tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_W),
		824	tsrc_swizzle1(src, TOY_SWIZZLE_X));
		825
		826	/* the other channels are for the header */
		827	dst = tdst_d(dst);
		828	tc_MOV(&vcc->tc, tdst_writemask(dst, TOY_WRITEMASK_XYZ),
		829	tsrc_imm_d(0));
		830	}
		831	else {
		832	/* initialize unused channels to 0.0f */
		833	if (tgsi->outputs[slot].undefined_mask) {
		834	dst = tdst_writemask(dst, tgsi->outputs[slot].undefined_mask);
		835	tc_MOV(&vcc->tc, dst, tsrc_imm_f(0.0f));
		836	}
		837	}
		838	}
		839	else {
		840	/* XXX this is too ugly */
		841	if (vcc->shader->out.semantic_names[i] == TGSI_SEMANTIC_CLIPDIST &&
		842	slot < 0) {
		843	/* ok, we need to compute clip distance */
		844	int clipvert_slot = -1, clipvert_vrf, j;
		845
		846	for (j = 0; j < tgsi->num_outputs; j++) {
		847	if (tgsi->outputs[j].semantic_name ==
		848	TGSI_SEMANTIC_CLIPVERTEX) {
		849	clipvert_slot = j;
		850	break;
		851	}
		852	else if (tgsi->outputs[j].semantic_name ==
		853	TGSI_SEMANTIC_POSITION) {
		854	/* remember pos, but keep looking */
		855	clipvert_slot = j;
		856	}
		857	}
		858
		859	clipvert_vrf = (clipvert_slot >= 0) ? toy_tgsi_get_vrf(tgsi,
		860	TGSI_FILE_OUTPUT, 0, tgsi->outputs[clipvert_slot].index) : -1;
		861	if (clipvert_vrf >= 0) {
		862	struct toy_dst tmp = tc_alloc_tmp(&vcc->tc);
		863	struct toy_src clipvert = tsrc(TOY_FILE_VRF, clipvert_vrf, 0);
		864	int first_ucp, last_ucp;
		865
		866	if (vcc->shader->out.semantic_indices[i]) {
		867	first_ucp = 4;
		868	last_ucp = MIN2(7, vcc->variant->u.vs.num_ucps - 1);
		869	}
		870	else {
		871	first_ucp = 0;
		872	last_ucp = MIN2(3, vcc->variant->u.vs.num_ucps - 1);
		873	}
		874
		875	for (j = first_ucp; j <= last_ucp; j++) {
		876	const int plane_grf = vcc->first_ucp_grf + j / 2;
		877	const int plane_subreg = (j & 1) * 16;
		878	const struct toy_src plane = tsrc_rect(tsrc(TOY_FILE_GRF,
		879	plane_grf, plane_subreg), TOY_RECT_041);
		880	const unsigned writemask = 1 << ((j >= 4) ? j - 4 : j);
		881
		882	tc_DP4(&vcc->tc, tdst_writemask(tmp, writemask),
		883	clipvert, plane);
		884	}
		885
		886	src = tsrc_from(tmp);
		887	}
		888	else {
		889	src = tsrc_imm_f(0.0f);
		890	}
		891	}
		892	else {
		893	src = (i == 0) ? tsrc_imm_d(0) : tsrc_imm_f(0.0f);
		894	}
		895	}
		896
		897	outs[i] = src;
		898	}
		899
		900	return i;
		901	}
		902
		903	/**
		904	* Emit instructions to write the VUE.
		905	*/
		906	static void
		907	vs_write_vue(struct vs_compile_context *vcc)
		908	{
		909	struct toy_compiler *tc = &vcc->tc;
		910	struct toy_src outs[PIPE_MAX_SHADER_OUTPUTS];
		911	struct toy_dst header;
		912	struct toy_src r0;
		913	struct toy_inst *inst;
		914	int sent_attrs, total_attrs;
		915
		916	header = tdst_ud(tdst(TOY_FILE_MRF, vcc->first_free_mrf, 0));
		917	r0 = tsrc_ud(tsrc(TOY_FILE_GRF, 0, 0));
		918	inst = tc_MOV(tc, header, r0);
		919	inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
		920
		921	if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
		922	inst = tc_OR(tc, tdst_offset(header, 0, 5),
		923	tsrc_rect(tsrc_offset(r0, 0, 5), TOY_RECT_010),
		924	tsrc_rect(tsrc_imm_ud(0xff00), TOY_RECT_010));
		925	inst->exec_size = GEN6_EXECSIZE_1;
		926	inst->access_mode = GEN6_ALIGN_1;
		927	inst->mask_ctrl = GEN6_MASKCTRL_NOMASK;
		928	}
		929
		930	total_attrs = vs_collect_outputs(vcc, outs);
		931	sent_attrs = 0;
		932	while (sent_attrs < total_attrs) {
		933	struct toy_src desc;
		934	int mrf = vcc->first_free_mrf + 1, avail_mrf_for_attrs;
		935	int num_attrs, msg_len, i;
		936	bool eot;
		937
		938	num_attrs = total_attrs - sent_attrs;
		939	eot = true;
		940
		941	/* see if we need another message */
		942	avail_mrf_for_attrs = vcc->last_free_mrf - mrf + 1;
		943	if (num_attrs > avail_mrf_for_attrs) {
		944	/*
		945	* From the Sandy Bridge PRM, volume 4 part 2, page 22:
		946	*
		947	* "Offset. This field specifies a destination offset (in 256-bit
		948	* units) from the start of the URB entry(s), as referenced by
		949	* URB Return Handle n, at which the data (if any) will be
		950	* written."
		951	*
		952	* As we need to offset the following messages, we must make sure
		953	* this one writes an even number of attributes.
		954	*/
		955	num_attrs = avail_mrf_for_attrs & ~1;
		956	eot = false;
		957	}
		958
		959	if (ilo_dev_gen(tc->dev) >= ILO_GEN(7)) {
		960	/* do not forget about the header */
		961	msg_len = 1 + num_attrs;
		962	}
		963	else {
		964	/*
		965	* From the Sandy Bridge PRM, volume 4 part 2, page 26:
		966	*
		967	* "At least 256 bits per vertex (512 bits total, M1 & M2) must
		968	* be written. Writing only 128 bits per vertex (256 bits
		969	* total, M1 only) results in UNDEFINED operation."
		970	*
		971	* "[DevSNB] Interleave writes must be in multiples of 256 per
		972	* vertex."
		973	*
		974	* That is, we must write or appear to write an even number of
		975	* attributes, starting from two.
		976	*/
		977	if (num_attrs % 2 && num_attrs == avail_mrf_for_attrs) {
		978	num_attrs--;
		979	eot = false;
		980	}
		981
		982	msg_len = 1 + align(num_attrs, 2);
		983	}
		984
		985	for (i = 0; i < num_attrs; i++)
		986	tc_MOV(tc, tdst(TOY_FILE_MRF, mrf++, 0), outs[sent_attrs + i]);
		987
		988	assert(sent_attrs % 2 == 0);
		989	desc = tsrc_imm_mdesc_urb(tc, eot, msg_len, 0,
		990	eot, true, false, true, sent_attrs / 2, 0);
		991
		992	tc_add2(tc, TOY_OPCODE_URB_WRITE, tdst_null(), tsrc_from(header), desc);
		993
		994	sent_attrs += num_attrs;
		995	}
		996	}
		997
		998	/**
		999	* Set up shader inputs for fixed-function units.
		1000	*/
		1001	static void
		1002	vs_setup_shader_in(struct ilo_shader sh, const struct toy_tgsi tgsi)
		1003	{
		1004	int num_attrs, i;
		1005
		1006	/* vertex/instance id is the first VE if exists */
		1007	for (i = 0; i < tgsi->num_system_values; i++) {
		1008	bool found = false;
		1009
		1010	switch (tgsi->system_values[i].semantic_name) {
		1011	case TGSI_SEMANTIC_INSTANCEID:
		1012	case TGSI_SEMANTIC_VERTEXID:
		1013	found = true;
		1014	break;
		1015	default:
		1016	break;
		1017	}
		1018
		1019	if (found) {
		1020	sh->in.semantic_names[sh->in.count] =
		1021	tgsi->system_values[i].semantic_name;
		1022	sh->in.semantic_indices[sh->in.count] =
		1023	tgsi->system_values[i].semantic_index;
		1024	sh->in.interp[sh->in.count] = TGSI_INTERPOLATE_CONSTANT;
		1025	sh->in.centroid[sh->in.count] = false;
		1026
		1027	sh->in.count++;
		1028	break;
		1029	}
		1030	}
		1031
		1032	num_attrs = 0;
		1033	for (i = 0; i < tgsi->num_inputs; i++) {
		1034	assert(tgsi->inputs[i].semantic_name == TGSI_SEMANTIC_GENERIC);
		1035	if (tgsi->inputs[i].semantic_index >= num_attrs)
		1036	num_attrs = tgsi->inputs[i].semantic_index + 1;
		1037	}
		1038	assert(num_attrs <= PIPE_MAX_ATTRIBS);
		1039
		1040	/* VF cannot remap VEs. VE[i] must be used as GENERIC[i]. */
		1041	for (i = 0; i < num_attrs; i++) {
		1042	sh->in.semantic_names[sh->in.count + i] = TGSI_SEMANTIC_GENERIC;
		1043	sh->in.semantic_indices[sh->in.count + i] = i;
		1044	sh->in.interp[sh->in.count + i] = TGSI_INTERPOLATE_CONSTANT;
		1045	sh->in.centroid[sh->in.count + i] = false;
		1046	}
		1047
		1048	sh->in.count += num_attrs;
		1049
		1050	sh->in.has_pos = false;
		1051	sh->in.has_linear_interp = false;
		1052	sh->in.barycentric_interpolation_mode = 0;
		1053	}
		1054
		1055	/**
		1056	* Set up shader outputs for fixed-function units.
		1057	*/
		1058	static void
		1059	vs_setup_shader_out(struct ilo_shader sh, const struct toy_tgsi tgsi,
		1060	bool output_clipdist, int *output_map)
		1061	{
		1062	int psize_slot = -1, pos_slot = -1;
		1063	int clipdist_slot[2] = { -1, -1 };
		1064	int color_slot[4] = { -1, -1, -1, -1 };
		1065	int num_outs, i;
		1066
		1067	/* find out the slots of outputs that need special care */
		1068	for (i = 0; i < tgsi->num_outputs; i++) {
		1069	switch (tgsi->outputs[i].semantic_name) {
		1070	case TGSI_SEMANTIC_PSIZE:
		1071	psize_slot = i;
		1072	break;
		1073	case TGSI_SEMANTIC_POSITION:
		1074	pos_slot = i;
		1075	break;
		1076	case TGSI_SEMANTIC_CLIPDIST:
		1077	if (tgsi->outputs[i].semantic_index)
		1078	clipdist_slot[1] = i;
		1079	else
		1080	clipdist_slot[0] = i;
		1081	break;
		1082	case TGSI_SEMANTIC_COLOR:
		1083	if (tgsi->outputs[i].semantic_index)
		1084	color_slot[2] = i;
		1085	else
		1086	color_slot[0] = i;
		1087	break;
		1088	case TGSI_SEMANTIC_BCOLOR:
		1089	if (tgsi->outputs[i].semantic_index)
		1090	color_slot[3] = i;
		1091	else
		1092	color_slot[1] = i;
		1093	break;
		1094	default:
		1095	break;
		1096	}
		1097	}
		1098
		1099	/* the first two VUEs are always PSIZE and POSITION */
		1100	num_outs = 2;
		1101	output_map[0] = psize_slot;
		1102	output_map[1] = pos_slot;
		1103
		1104	sh->out.register_indices[0] =
		1105	(psize_slot >= 0) ? tgsi->outputs[psize_slot].index : -1;
		1106	sh->out.semantic_names[0] = TGSI_SEMANTIC_PSIZE;
		1107	sh->out.semantic_indices[0] = 0;
		1108
		1109	sh->out.register_indices[1] =
		1110	(pos_slot >= 0) ? tgsi->outputs[pos_slot].index : -1;
		1111	sh->out.semantic_names[1] = TGSI_SEMANTIC_POSITION;
		1112	sh->out.semantic_indices[1] = 0;
		1113
		1114	sh->out.has_pos = true;
		1115
		1116	/* followed by optional clip distances */
		1117	if (output_clipdist) {
		1118	sh->out.register_indices[num_outs] =
		1119	(clipdist_slot[0] >= 0) ? tgsi->outputs[clipdist_slot[0]].index : -1;
		1120	sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
		1121	sh->out.semantic_indices[num_outs] = 0;
		1122	output_map[num_outs++] = clipdist_slot[0];
		1123
		1124	sh->out.register_indices[num_outs] =
		1125	(clipdist_slot[1] >= 0) ? tgsi->outputs[clipdist_slot[1]].index : -1;
		1126	sh->out.semantic_names[num_outs] = TGSI_SEMANTIC_CLIPDIST;
		1127	sh->out.semantic_indices[num_outs] = 1;
		1128	output_map[num_outs++] = clipdist_slot[1];
		1129	}
		1130
		1131	/*
		1132	* make BCOLOR follow COLOR so that we can make use of
		1133	* ATTRIBUTE_SWIZZLE_INPUTATTR_FACING in 3DSTATE_SF
		1134	*/
		1135	for (i = 0; i < 4; i++) {
		1136	const int slot = color_slot[i];
		1137
		1138	if (slot < 0)
		1139	continue;
		1140
		1141	sh->out.register_indices[num_outs] = tgsi->outputs[slot].index;
		1142	sh->out.semantic_names[num_outs] = tgsi->outputs[slot].semantic_name;
		1143	sh->out.semantic_indices[num_outs] = tgsi->outputs[slot].semantic_index;
		1144
		1145	output_map[num_outs++] = slot;
		1146	}
		1147
		1148	/* add the rest of the outputs */
		1149	for (i = 0; i < tgsi->num_outputs; i++) {
		1150	switch (tgsi->outputs[i].semantic_name) {
		1151	case TGSI_SEMANTIC_PSIZE:
		1152	case TGSI_SEMANTIC_POSITION:
		1153	case TGSI_SEMANTIC_CLIPDIST:
		1154	case TGSI_SEMANTIC_COLOR:
		1155	case TGSI_SEMANTIC_BCOLOR:
		1156	break;
		1157	default:
		1158	sh->out.register_indices[num_outs] = tgsi->outputs[i].index;
		1159	sh->out.semantic_names[num_outs] = tgsi->outputs[i].semantic_name;
		1160	sh->out.semantic_indices[num_outs] = tgsi->outputs[i].semantic_index;
		1161	output_map[num_outs++] = i;
		1162	break;
		1163	}
		1164	}
		1165
		1166	sh->out.count = num_outs;
		1167	}
		1168
		1169	/**
		1170	* Translate the TGSI tokens.
		1171	*/
		1172	static bool
		1173	vs_setup_tgsi(struct toy_compiler tc, const struct tgsi_token tokens,
		1174	struct toy_tgsi *tgsi)
		1175	{
		1176	if (ilo_debug & ILO_DEBUG_VS) {
		1177	ilo_printf("dumping vertex shader\n");
		1178	ilo_printf("\n");
		1179
		1180	tgsi_dump(tokens, 0);
		1181	ilo_printf("\n");
		1182	}
		1183
		1184	toy_compiler_translate_tgsi(tc, tokens, true, tgsi);
		1185	if (tc->fail) {
		1186	ilo_err("failed to translate VS TGSI tokens: %s\n", tc->reason);
		1187	return false;
		1188	}
		1189
		1190	if (ilo_debug & ILO_DEBUG_VS) {
		1191	ilo_printf("TGSI translator:\n");
		1192	toy_tgsi_dump(tgsi);
		1193	ilo_printf("\n");
		1194	toy_compiler_dump(tc);
		1195	ilo_printf("\n");
		1196	}
		1197
		1198	return true;
		1199	}
		1200
		1201	/**
		1202	* Set up VS compile context. This includes translating the TGSI tokens.
		1203	*/
		1204	static bool
		1205	vs_setup(struct vs_compile_context *vcc,
		1206	const struct ilo_shader_state *state,
		1207	const struct ilo_shader_variant *variant)
		1208	{
		1209	int num_consts;
		1210
		1211	memset(vcc, 0, sizeof(*vcc));
		1212
		1213	vcc->shader = CALLOC_STRUCT(ilo_shader);
		1214	if (!vcc->shader)
		1215	return false;
		1216
		1217	vcc->variant = variant;
		1218
		1219	toy_compiler_init(&vcc->tc, state->info.dev);
		1220	vcc->tc.templ.access_mode = GEN6_ALIGN_16;
		1221	vcc->tc.templ.exec_size = GEN6_EXECSIZE_8;
		1222	vcc->tc.rect_linear_width = 4;
		1223
		1224	/*
		1225	* The classic driver uses the sampler cache (gen6) or the data cache
		1226	* (gen7). Why?
		1227	*/
		1228	vcc->const_cache = GEN6_SFID_DP_CC;
		1229
		1230	if (!vs_setup_tgsi(&vcc->tc, state->info.tokens, &vcc->tgsi)) {
		1231	toy_compiler_cleanup(&vcc->tc);
		1232	FREE(vcc->shader);
		1233	return false;
		1234	}
		1235
		1236	vs_setup_shader_in(vcc->shader, &vcc->tgsi);
		1237	vs_setup_shader_out(vcc->shader, &vcc->tgsi,
		1238	(vcc->variant->u.vs.num_ucps > 0), vcc->output_map);
		1239
		1240	if (vcc->variant->use_pcb && !vcc->tgsi.const_indirect) {
		1241	num_consts = (vcc->tgsi.const_count + 1) / 2;
		1242
		1243	/*
		1244	* From the Sandy Bridge PRM, volume 2 part 1, page 138:
		1245	*
		1246	* "The sum of all four read length fields (each incremented to
		1247	* represent the actual read length) must be less than or equal to
		1248	* 32"
		1249	*/
		1250	if (num_consts > 32)
		1251	num_consts = 0;
		1252	}
		1253	else {
		1254	num_consts = 0;
		1255	}
		1256
		1257	vcc->shader->skip_cbuf0_upload = (!vcc->tgsi.const_count \|\| num_consts);
		1258	vcc->shader->pcb.cbuf0_size = num_consts * (sizeof(float) * 8);
		1259
		1260	/* r0 is reserved for payload header */
		1261	vcc->first_const_grf = 1;
		1262	vcc->first_ucp_grf = vcc->first_const_grf + num_consts;
		1263
		1264	/* fit each pair of user clip planes into a register */
		1265	vcc->first_vue_grf = vcc->first_ucp_grf +
		1266	(vcc->variant->u.vs.num_ucps + 1) / 2;
		1267
		1268	vcc->first_free_grf = vcc->first_vue_grf + vcc->shader->in.count;
		1269	vcc->last_free_grf = 127;
		1270
		1271	/* m0 is reserved for system routines */
		1272	vcc->first_free_mrf = 1;
		1273	vcc->last_free_mrf = 15;
		1274
		1275	vcc->num_grf_per_vrf = 1;
		1276
		1277	if (ilo_dev_gen(vcc->tc.dev) >= ILO_GEN(7)) {
		1278	vcc->last_free_grf -= 15;
		1279	vcc->first_free_mrf = vcc->last_free_grf + 1;
		1280	vcc->last_free_mrf = vcc->first_free_mrf + 14;
		1281	}
		1282
		1283	vcc->shader->in.start_grf = vcc->first_const_grf;
		1284	vcc->shader->pcb.clip_state_size =
		1285	vcc->variant->u.vs.num_ucps * (sizeof(float) * 4);
		1286
		1287	vcc->shader->bt.tex_base = 0;
		1288	vcc->shader->bt.tex_count = vcc->variant->num_sampler_views;
		1289
		1290	vcc->shader->bt.const_base = vcc->shader->bt.tex_base +
		1291	vcc->shader->bt.tex_count;
		1292	vcc->shader->bt.const_count = state->info.constant_buffer_count;
		1293
		1294	vcc->shader->bt.total_count = vcc->shader->bt.const_base +
		1295	vcc->shader->bt.const_count;
		1296
		1297	return true;
		1298	}
		1299
		1300	/**
		1301	* Compile the vertex shader.
		1302	*/
		1303	struct ilo_shader *
		1304	ilo_shader_compile_vs(const struct ilo_shader_state *state,
		1305	const struct ilo_shader_variant *variant)
		1306	{
		1307	struct vs_compile_context vcc;
		1308	bool need_gs;
		1309
		1310	if (!vs_setup(&vcc, state, variant))
		1311	return NULL;
		1312
		1313	if (ilo_dev_gen(vcc.tc.dev) >= ILO_GEN(7)) {
		1314	need_gs = false;
		1315	}
		1316	else {
		1317	need_gs = variant->u.vs.rasterizer_discard \|\|
		1318	state->info.stream_output.num_outputs;
		1319	}
		1320
		1321	vs_write_vue(&vcc);
		1322
		1323	if (!vs_compile(&vcc)) {
		1324	FREE(vcc.shader);
		1325	vcc.shader = NULL;
		1326	}
		1327
		1328	toy_tgsi_cleanup(&vcc.tgsi);
		1329	toy_compiler_cleanup(&vcc.tc);
		1330
		1331	if (need_gs) {
		1332	int so_mapping[PIPE_MAX_SHADER_OUTPUTS];
		1333	int i, j;
		1334
		1335	for (i = 0; i < vcc.tgsi.num_outputs; i++) {
		1336	int attr = 0;
		1337
		1338	for (j = 0; j < vcc.shader->out.count; j++) {
		1339	if (vcc.tgsi.outputs[i].semantic_name ==
		1340	vcc.shader->out.semantic_names[j] &&
		1341	vcc.tgsi.outputs[i].semantic_index ==
		1342	vcc.shader->out.semantic_indices[j]) {
		1343	attr = j;
		1344	break;
		1345	}
		1346	}
		1347
		1348	so_mapping[i] = attr;
		1349	}
		1350
		1351	if (!ilo_shader_compile_gs_passthrough(state, variant,
		1352	so_mapping, vcc.shader)) {
		1353	ilo_shader_destroy_kernel(vcc.shader);
		1354	vcc.shader = NULL;
		1355	}
		1356	}
		1357
		1358	return vcc.shader;
		1359	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/mesa-10.6.0/src/gallium/drivers/ilo/shader/ilo_shader_vs.c – Rev 5568