WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	* Copyright © 2010 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
		21	* IN THE SOFTWARE.
		22	*/
		23
		24	/** @file brw_fs_visitor.cpp
		25	*
		26	* This file supports generating the FS LIR from the GLSL IR. The LIR
		27	* makes it easier to do backend-specific optimizations than doing so
		28	* in the GLSL IR or in the native code.
		29	*/
		30	extern "C" {
		31
		32	#include
		33
		34	#include "main/macros.h"
		35	#include "main/shaderobj.h"
		36	#include "main/uniforms.h"
		37	#include "program/prog_parameter.h"
		38	#include "program/prog_print.h"
		39	#include "program/prog_optimize.h"
		40	#include "program/register_allocate.h"
		41	#include "program/sampler.h"
		42	#include "program/hash_table.h"
		43	#include "brw_context.h"
		44	#include "brw_eu.h"
		45	#include "brw_wm.h"
		46	}
		47	#include "brw_fs.h"
		48	#include "glsl/glsl_types.h"
		49	#include "glsl/ir_optimization.h"
		50
		51	void
		52	fs_visitor::visit(ir_variable *ir)
		53	{
		54	fs_reg *reg = NULL;
		55
		56	if (variable_storage(ir))
		57	return;
		58
		59	if (ir->mode == ir_var_shader_in) {
		60	if (!strcmp(ir->name, "gl_FragCoord")) {
		61	reg = emit_fragcoord_interpolation(ir);
		62	} else if (!strcmp(ir->name, "gl_FrontFacing")) {
		63	reg = emit_frontfacing_interpolation(ir);
		64	} else {
		65	reg = emit_general_interpolation(ir);
		66	}
		67	assert(reg);
		68	hash_table_insert(this->variable_ht, reg, ir);
		69	return;
		70	} else if (ir->mode == ir_var_shader_out) {
		71	reg = new(this->mem_ctx) fs_reg(this, ir->type);
		72
		73	if (ir->index > 0) {
		74	assert(ir->location == FRAG_RESULT_DATA0);
		75	assert(ir->index == 1);
		76	this->dual_src_output = *reg;
		77	} else if (ir->location == FRAG_RESULT_COLOR) {
		78	/* Writing gl_FragColor outputs to all color regions. */
		79	for (unsigned int i = 0; i < MAX2(c->key.nr_color_regions, 1); i++) {
		80	this->outputs[i] = *reg;
		81	this->output_components[i] = 4;
		82	}
		83	} else if (ir->location == FRAG_RESULT_DEPTH) {
		84	this->frag_depth = *reg;
		85	} else {
		86	/* gl_FragData or a user-defined FS output */
		87	assert(ir->location >= FRAG_RESULT_DATA0 &&
		88	ir->location < FRAG_RESULT_DATA0 + BRW_MAX_DRAW_BUFFERS);
		89
		90	int vector_elements =
		91	ir->type->is_array() ? ir->type->fields.array->vector_elements
		92	: ir->type->vector_elements;
		93
		94	/* General color output. */
		95	for (unsigned int i = 0; i < MAX2(1, ir->type->length); i++) {
		96	int output = ir->location - FRAG_RESULT_DATA0 + i;
		97	this->outputs[output] = *reg;
		98	this->outputs[output].reg_offset += vector_elements * i;
		99	this->output_components[output] = vector_elements;
		100	}
		101	}
		102	} else if (ir->mode == ir_var_uniform) {
		103	int param_index = c->prog_data.nr_params;
		104
		105	/* Thanks to the lower_ubo_reference pass, we will see only
		106	* ir_binop_ubo_load expressions and not ir_dereference_variable for UBO
		107	* variables, so no need for them to be in variable_ht.
		108	*/
		109	if (ir->is_in_uniform_block())
		110	return;
		111
		112	if (dispatch_width == 16) {
		113	if (!variable_storage(ir)) {
		114	fail("Failed to find uniform '%s' in 16-wide\n", ir->name);
		115	}
		116	return;
		117	}
		118
		119	param_size[param_index] = type_size(ir->type);
		120	if (!strncmp(ir->name, "gl_", 3)) {
		121	setup_builtin_uniform_values(ir);
		122	} else {
		123	setup_uniform_values(ir);
		124	}
		125
		126	reg = new(this->mem_ctx) fs_reg(UNIFORM, param_index);
		127	reg->type = brw_type_for_base_type(ir->type);
		128	}
		129
		130	if (!reg)
		131	reg = new(this->mem_ctx) fs_reg(this, ir->type);
		132
		133	hash_table_insert(this->variable_ht, reg, ir);
		134	}
		135
		136	void
		137	fs_visitor::visit(ir_dereference_variable *ir)
		138	{
		139	fs_reg *reg = variable_storage(ir->var);
		140	this->result = *reg;
		141	}
		142
		143	void
		144	fs_visitor::visit(ir_dereference_record *ir)
		145	{
		146	const glsl_type *struct_type = ir->record->type;
		147
		148	ir->record->accept(this);
		149
		150	unsigned int offset = 0;
		151	for (unsigned int i = 0; i < struct_type->length; i++) {
		152	if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
		153	break;
		154	offset += type_size(struct_type->fields.structure[i].type);
		155	}
		156	this->result.reg_offset += offset;
		157	this->result.type = brw_type_for_base_type(ir->type);
		158	}
		159
		160	void
		161	fs_visitor::visit(ir_dereference_array *ir)
		162	{
		163	ir_constant *constant_index;
		164	fs_reg src;
		165	int element_size = type_size(ir->type);
		166
		167	constant_index = ir->array_index->as_constant();
		168
		169	ir->array->accept(this);
		170	src = this->result;
		171	src.type = brw_type_for_base_type(ir->type);
		172
		173	if (constant_index) {
		174	assert(src.file == UNIFORM \|\| src.file == GRF);
		175	src.reg_offset += constant_index->value.i[0] * element_size;
		176	} else {
		177	/* Variable index array dereference. We attach the variable index
		178	* component to the reg as a pointer to a register containing the
		179	* offset. Currently only uniform arrays are supported in this patch,
		180	* and that reladdr pointer is resolved by
		181	* move_uniform_array_access_to_pull_constants(). All other array types
		182	* are lowered by lower_variable_index_to_cond_assign().
		183	*/
		184	ir->array_index->accept(this);
		185
		186	fs_reg index_reg;
		187	index_reg = fs_reg(this, glsl_type::int_type);
		188	emit(BRW_OPCODE_MUL, index_reg, this->result, fs_reg(element_size));
		189
		190	if (src.reladdr) {
		191	emit(BRW_OPCODE_ADD, index_reg, *src.reladdr, index_reg);
		192	}
		193
		194	src.reladdr = ralloc(mem_ctx, fs_reg);
		195	memcpy(src.reladdr, &index_reg, sizeof(index_reg));
		196	}
		197	this->result = src;
		198	}
		199
		200	void
		201	fs_visitor::emit_lrp(fs_reg dst, fs_reg x, fs_reg y, fs_reg a)
		202	{
		203	if (brw->gen < 6 \|\|
		204	!x.is_valid_3src() \|\|
		205	!y.is_valid_3src() \|\|
		206	!a.is_valid_3src()) {
		207	/* We can't use the LRP instruction. Emit x(1-a) + ya. */
		208	fs_reg y_times_a = fs_reg(this, glsl_type::float_type);
		209	fs_reg one_minus_a = fs_reg(this, glsl_type::float_type);
		210	fs_reg x_times_one_minus_a = fs_reg(this, glsl_type::float_type);
		211
		212	emit(MUL(y_times_a, y, a));
		213
		214	a.negate = !a.negate;
		215	emit(ADD(one_minus_a, a, fs_reg(1.0f)));
		216	emit(MUL(x_times_one_minus_a, x, one_minus_a));
		217
		218	emit(ADD(dst, x_times_one_minus_a, y_times_a));
		219	} else {
		220	/* The LRP instruction actually does op1 * op0 + op2 * (1 - op0), so
		221	* we need to reorder the operands.
		222	*/
		223	emit(LRP(dst, a, y, x));
		224	}
		225	}
		226
		227	void
		228	fs_visitor::emit_minmax(uint32_t conditionalmod, fs_reg dst,
		229	fs_reg src0, fs_reg src1)
		230	{
		231	fs_inst *inst;
		232
		233	if (brw->gen >= 6) {
		234	inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
		235	inst->conditional_mod = conditionalmod;
		236	} else {
		237	emit(CMP(reg_null_d, src0, src1, conditionalmod));
		238
		239	inst = emit(BRW_OPCODE_SEL, dst, src0, src1);
		240	inst->predicate = BRW_PREDICATE_NORMAL;
		241	}
		242	}
		243
		244	/* Instruction selection: Produce a MOV.sat instead of
		245	* MIN(MAX(val, 0), 1) when possible.
		246	*/
		247	bool
		248	fs_visitor::try_emit_saturate(ir_expression *ir)
		249	{
		250	ir_rvalue *sat_val = ir->as_rvalue_to_saturate();
		251
		252	if (!sat_val)
		253	return false;
		254
		255	fs_inst pre_inst = (fs_inst ) this->instructions.get_tail();
		256
		257	sat_val->accept(this);
		258	fs_reg src = this->result;
		259
		260	fs_inst last_inst = (fs_inst ) this->instructions.get_tail();
		261
		262	/* If the last instruction from our accept() didn't generate our
		263	* src, generate a saturated MOV
		264	*/
		265	fs_inst *modify = get_instruction_generating_reg(pre_inst, last_inst, src);
		266	if (!modify \|\| modify->regs_written != 1) {
		267	this->result = fs_reg(this, ir->type);
		268	fs_inst *inst = emit(MOV(this->result, src));
		269	inst->saturate = true;
		270	} else {
		271	modify->saturate = true;
		272	this->result = src;
		273	}
		274
		275
		276	return true;
		277	}
		278
		279	bool
		280	fs_visitor::try_emit_mad(ir_expression *ir, int mul_arg)
		281	{
		282	/* 3-src instructions were introduced in gen6. */
		283	if (brw->gen < 6)
		284	return false;
		285
		286	/* MAD can only handle floating-point data. */
		287	if (ir->type != glsl_type::float_type)
		288	return false;
		289
		290	ir_rvalue *nonmul = ir->operands[1 - mul_arg];
		291	ir_expression *mul = ir->operands[mul_arg]->as_expression();
		292
		293	if (!mul \|\| mul->operation != ir_binop_mul)
		294	return false;
		295
		296	if (nonmul->as_constant() \|\|
		297	mul->operands[0]->as_constant() \|\|
		298	mul->operands[1]->as_constant())
		299	return false;
		300
		301	nonmul->accept(this);
		302	fs_reg src0 = this->result;
		303
		304	mul->operands[0]->accept(this);
		305	fs_reg src1 = this->result;
		306
		307	mul->operands[1]->accept(this);
		308	fs_reg src2 = this->result;
		309
		310	this->result = fs_reg(this, ir->type);
		311	emit(BRW_OPCODE_MAD, this->result, src0, src1, src2);
		312
		313	return true;
		314	}
		315
		316	void
		317	fs_visitor::visit(ir_expression *ir)
		318	{
		319	unsigned int operand;
		320	fs_reg op[3], temp;
		321	fs_inst *inst;
		322
		323	assert(ir->get_num_operands() <= 3);
		324
		325	if (try_emit_saturate(ir))
		326	return;
		327	if (ir->operation == ir_binop_add) {
		328	if (try_emit_mad(ir, 0) \|\| try_emit_mad(ir, 1))
		329	return;
		330	}
		331
		332	for (operand = 0; operand < ir->get_num_operands(); operand++) {
		333	ir->operands[operand]->accept(this);
		334	if (this->result.file == BAD_FILE) {
		335	fail("Failed to get tree for expression operand:\n");
		336	ir->operands[operand]->print();
		337	printf("\n");
		338	}
		339	op[operand] = this->result;
		340
		341	/* Matrix expression operands should have been broken down to vector
		342	* operations already.
		343	*/
		344	assert(!ir->operands[operand]->type->is_matrix());
		345	/* And then those vector operands should have been broken down to scalar.
		346	*/
		347	assert(!ir->operands[operand]->type->is_vector());
		348	}
		349
		350	/* Storage for our result. If our result goes into an assignment, it will
		351	* just get copy-propagated out, so no worries.
		352	*/
		353	this->result = fs_reg(this, ir->type);
		354
		355	switch (ir->operation) {
		356	case ir_unop_logic_not:
		357	/* Note that BRW_OPCODE_NOT is not appropriate here, since it is
		358	* ones complement of the whole register, not just bit 0.
		359	*/
		360	emit(XOR(this->result, op[0], fs_reg(1)));
		361	break;
		362	case ir_unop_neg:
		363	op[0].negate = !op[0].negate;
		364	emit(MOV(this->result, op[0]));
		365	break;
		366	case ir_unop_abs:
		367	op[0].abs = true;
		368	op[0].negate = false;
		369	emit(MOV(this->result, op[0]));
		370	break;
		371	case ir_unop_sign:
		372	temp = fs_reg(this, ir->type);
		373
		374	emit(MOV(this->result, fs_reg(0.0f)));
		375
		376	emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_G));
		377	inst = emit(MOV(this->result, fs_reg(1.0f)));
		378	inst->predicate = BRW_PREDICATE_NORMAL;
		379
		380	emit(CMP(reg_null_f, op[0], fs_reg(0.0f), BRW_CONDITIONAL_L));
		381	inst = emit(MOV(this->result, fs_reg(-1.0f)));
		382	inst->predicate = BRW_PREDICATE_NORMAL;
		383
		384	break;
		385	case ir_unop_rcp:
		386	emit_math(SHADER_OPCODE_RCP, this->result, op[0]);
		387	break;
		388
		389	case ir_unop_exp2:
		390	emit_math(SHADER_OPCODE_EXP2, this->result, op[0]);
		391	break;
		392	case ir_unop_log2:
		393	emit_math(SHADER_OPCODE_LOG2, this->result, op[0]);
		394	break;
		395	case ir_unop_exp:
		396	case ir_unop_log:
		397	assert(!"not reached: should be handled by ir_explog_to_explog2");
		398	break;
		399	case ir_unop_sin:
		400	case ir_unop_sin_reduced:
		401	emit_math(SHADER_OPCODE_SIN, this->result, op[0]);
		402	break;
		403	case ir_unop_cos:
		404	case ir_unop_cos_reduced:
		405	emit_math(SHADER_OPCODE_COS, this->result, op[0]);
		406	break;
		407
		408	case ir_unop_dFdx:
		409	emit(FS_OPCODE_DDX, this->result, op[0]);
		410	break;
		411	case ir_unop_dFdy:
		412	emit(FS_OPCODE_DDY, this->result, op[0]);
		413	break;
		414
		415	case ir_binop_add:
		416	emit(ADD(this->result, op[0], op[1]));
		417	break;
		418	case ir_binop_sub:
		419	assert(!"not reached: should be handled by ir_sub_to_add_neg");
		420	break;
		421
		422	case ir_binop_mul:
		423	if (ir->type->is_integer()) {
		424	/* For integer multiplication, the MUL uses the low 16 bits
		425	* of one of the operands (src0 on gen6, src1 on gen7). The
		426	* MACH accumulates in the contribution of the upper 16 bits
		427	* of that operand.
		428	*
		429	* FINISHME: Emit just the MUL if we know an operand is small
		430	* enough.
		431	*/
		432	if (brw->gen >= 7 && dispatch_width == 16)
		433	fail("16-wide explicit accumulator operands unsupported\n");
		434
		435	struct brw_reg acc = retype(brw_acc_reg(), BRW_REGISTER_TYPE_D);
		436
		437	emit(MUL(acc, op[0], op[1]));
		438	emit(MACH(reg_null_d, op[0], op[1]));
		439	emit(MOV(this->result, fs_reg(acc)));
		440	} else {
		441	emit(MUL(this->result, op[0], op[1]));
		442	}
		443	break;
		444	case ir_binop_div:
		445	/* Floating point should be lowered by DIV_TO_MUL_RCP in the compiler. */
		446	assert(ir->type->is_integer());
		447	emit_math(SHADER_OPCODE_INT_QUOTIENT, this->result, op[0], op[1]);
		448	break;
		449	case ir_binop_mod:
		450	/* Floating point should be lowered by MOD_TO_FRACT in the compiler. */
		451	assert(ir->type->is_integer());
		452	emit_math(SHADER_OPCODE_INT_REMAINDER, this->result, op[0], op[1]);
		453	break;
		454
		455	case ir_binop_less:
		456	case ir_binop_greater:
		457	case ir_binop_lequal:
		458	case ir_binop_gequal:
		459	case ir_binop_equal:
		460	case ir_binop_all_equal:
		461	case ir_binop_nequal:
		462	case ir_binop_any_nequal:
		463	resolve_bool_comparison(ir->operands[0], &op[0]);
		464	resolve_bool_comparison(ir->operands[1], &op[1]);
		465
		466	emit(CMP(this->result, op[0], op[1],
		467	brw_conditional_for_comparison(ir->operation)));
		468	break;
		469
		470	case ir_binop_logic_xor:
		471	emit(XOR(this->result, op[0], op[1]));
		472	break;
		473
		474	case ir_binop_logic_or:
		475	emit(OR(this->result, op[0], op[1]));
		476	break;
		477
		478	case ir_binop_logic_and:
		479	emit(AND(this->result, op[0], op[1]));
		480	break;
		481
		482	case ir_binop_dot:
		483	case ir_unop_any:
		484	assert(!"not reached: should be handled by brw_fs_channel_expressions");
		485	break;
		486
		487	case ir_unop_noise:
		488	assert(!"not reached: should be handled by lower_noise");
		489	break;
		490
		491	case ir_quadop_vector:
		492	assert(!"not reached: should be handled by lower_quadop_vector");
		493	break;
		494
		495	case ir_binop_vector_extract:
		496	assert(!"not reached: should be handled by lower_vec_index_to_cond_assign()");
		497	break;
		498
		499	case ir_triop_vector_insert:
		500	assert(!"not reached: should be handled by lower_vector_insert()");
		501	break;
		502
		503	case ir_unop_sqrt:
		504	emit_math(SHADER_OPCODE_SQRT, this->result, op[0]);
		505	break;
		506
		507	case ir_unop_rsq:
		508	emit_math(SHADER_OPCODE_RSQ, this->result, op[0]);
		509	break;
		510
		511	case ir_unop_bitcast_i2f:
		512	case ir_unop_bitcast_u2f:
		513	op[0].type = BRW_REGISTER_TYPE_F;
		514	this->result = op[0];
		515	break;
		516	case ir_unop_i2u:
		517	case ir_unop_bitcast_f2u:
		518	op[0].type = BRW_REGISTER_TYPE_UD;
		519	this->result = op[0];
		520	break;
		521	case ir_unop_u2i:
		522	case ir_unop_bitcast_f2i:
		523	op[0].type = BRW_REGISTER_TYPE_D;
		524	this->result = op[0];
		525	break;
		526	case ir_unop_i2f:
		527	case ir_unop_u2f:
		528	case ir_unop_f2i:
		529	case ir_unop_f2u:
		530	emit(MOV(this->result, op[0]));
		531	break;
		532
		533	case ir_unop_b2i:
		534	emit(AND(this->result, op[0], fs_reg(1)));
		535	break;
		536	case ir_unop_b2f:
		537	temp = fs_reg(this, glsl_type::int_type);
		538	emit(AND(temp, op[0], fs_reg(1)));
		539	emit(MOV(this->result, temp));
		540	break;
		541
		542	case ir_unop_f2b:
		543	emit(CMP(this->result, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
		544	break;
		545	case ir_unop_i2b:
		546	emit(CMP(this->result, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
		547	break;
		548
		549	case ir_unop_trunc:
		550	emit(RNDZ(this->result, op[0]));
		551	break;
		552	case ir_unop_ceil:
		553	op[0].negate = !op[0].negate;
		554	emit(RNDD(this->result, op[0]));
		555	this->result.negate = true;
		556	break;
		557	case ir_unop_floor:
		558	emit(RNDD(this->result, op[0]));
		559	break;
		560	case ir_unop_fract:
		561	emit(FRC(this->result, op[0]));
		562	break;
		563	case ir_unop_round_even:
		564	emit(RNDE(this->result, op[0]));
		565	break;
		566
		567	case ir_binop_min:
		568	case ir_binop_max:
		569	resolve_ud_negate(&op[0]);
		570	resolve_ud_negate(&op[1]);
		571	emit_minmax(ir->operation == ir_binop_min ?
		572	BRW_CONDITIONAL_L : BRW_CONDITIONAL_GE,
		573	this->result, op[0], op[1]);
		574	break;
		575	case ir_unop_pack_snorm_2x16:
		576	case ir_unop_pack_snorm_4x8:
		577	case ir_unop_pack_unorm_2x16:
		578	case ir_unop_pack_unorm_4x8:
		579	case ir_unop_unpack_snorm_2x16:
		580	case ir_unop_unpack_snorm_4x8:
		581	case ir_unop_unpack_unorm_2x16:
		582	case ir_unop_unpack_unorm_4x8:
		583	case ir_unop_unpack_half_2x16:
		584	case ir_unop_pack_half_2x16:
		585	assert(!"not reached: should be handled by lower_packing_builtins");
		586	break;
		587	case ir_unop_unpack_half_2x16_split_x:
		588	emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X, this->result, op[0]);
		589	break;
		590	case ir_unop_unpack_half_2x16_split_y:
		591	emit(FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y, this->result, op[0]);
		592	break;
		593	case ir_binop_pow:
		594	emit_math(SHADER_OPCODE_POW, this->result, op[0], op[1]);
		595	break;
		596
		597	case ir_unop_bitfield_reverse:
		598	emit(BFREV(this->result, op[0]));
		599	break;
		600	case ir_unop_bit_count:
		601	emit(CBIT(this->result, op[0]));
		602	break;
		603	case ir_unop_find_msb:
		604	temp = fs_reg(this, glsl_type::uint_type);
		605	emit(FBH(temp, op[0]));
		606
		607	/* FBH counts from the MSB side, while GLSL's findMSB() wants the count
		608	* from the LSB side. If FBH didn't return an error (0xFFFFFFFF), then
		609	* subtract the result from 31 to convert the MSB count into an LSB count.
		610	*/
		611
		612	/* FBH only supports UD type for dst, so use a MOV to convert UD to D. */
		613	emit(MOV(this->result, temp));
		614	emit(CMP(reg_null_d, this->result, fs_reg(-1), BRW_CONDITIONAL_NZ));
		615
		616	temp.negate = true;
		617	inst = emit(ADD(this->result, temp, fs_reg(31)));
		618	inst->predicate = BRW_PREDICATE_NORMAL;
		619	break;
		620	case ir_unop_find_lsb:
		621	emit(FBL(this->result, op[0]));
		622	break;
		623	case ir_triop_bitfield_extract:
		624	/* Note that the instruction's argument order is reversed from GLSL
		625	* and the IR.
		626	*/
		627	emit(BFE(this->result, op[2], op[1], op[0]));
		628	break;
		629	case ir_binop_bfm:
		630	emit(BFI1(this->result, op[0], op[1]));
		631	break;
		632	case ir_triop_bfi:
		633	emit(BFI2(this->result, op[0], op[1], op[2]));
		634	break;
		635	case ir_quadop_bitfield_insert:
		636	assert(!"not reached: should be handled by "
		637	"lower_instructions::bitfield_insert_to_bfm_bfi");
		638	break;
		639
		640	case ir_unop_bit_not:
		641	emit(NOT(this->result, op[0]));
		642	break;
		643	case ir_binop_bit_and:
		644	emit(AND(this->result, op[0], op[1]));
		645	break;
		646	case ir_binop_bit_xor:
		647	emit(XOR(this->result, op[0], op[1]));
		648	break;
		649	case ir_binop_bit_or:
		650	emit(OR(this->result, op[0], op[1]));
		651	break;
		652
		653	case ir_binop_lshift:
		654	emit(SHL(this->result, op[0], op[1]));
		655	break;
		656
		657	case ir_binop_rshift:
		658	if (ir->type->base_type == GLSL_TYPE_INT)
		659	emit(ASR(this->result, op[0], op[1]));
		660	else
		661	emit(SHR(this->result, op[0], op[1]));
		662	break;
		663	case ir_binop_pack_half_2x16_split:
		664	emit(FS_OPCODE_PACK_HALF_2x16_SPLIT, this->result, op[0], op[1]);
		665	break;
		666	case ir_binop_ubo_load: {
		667	/* This IR node takes a constant uniform block and a constant or
		668	* variable byte offset within the block and loads a vector from that.
		669	*/
		670	ir_constant *uniform_block = ir->operands[0]->as_constant();
		671	ir_constant *const_offset = ir->operands[1]->as_constant();
		672	fs_reg surf_index = fs_reg((unsigned)SURF_INDEX_WM_UBO(uniform_block->value.u[0]));
		673	if (const_offset) {
		674	fs_reg packed_consts = fs_reg(this, glsl_type::float_type);
		675	packed_consts.type = result.type;
		676
		677	fs_reg const_offset_reg = fs_reg(const_offset->value.u[0] & ~15);
		678	emit(fs_inst(FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD,
		679	packed_consts, surf_index, const_offset_reg));
		680
		681	packed_consts.smear = const_offset->value.u[0] % 16 / 4;
		682	for (int i = 0; i < ir->type->vector_elements; i++) {
		683	/* UBO bools are any nonzero value. We consider bools to be
		684	* values with the low bit set to 1. Convert them using CMP.
		685	*/
		686	if (ir->type->base_type == GLSL_TYPE_BOOL) {
		687	emit(CMP(result, packed_consts, fs_reg(0u), BRW_CONDITIONAL_NZ));
		688	} else {
		689	emit(MOV(result, packed_consts));
		690	}
		691
		692	packed_consts.smear++;
		693	result.reg_offset++;
		694
		695	/* The std140 packing rules don't allow vectors to cross 16-byte
		696	* boundaries, and a reg is 32 bytes.
		697	*/
		698	assert(packed_consts.smear < 8);
		699	}
		700	} else {
		701	/* Turn the byte offset into a dword offset. */
		702	fs_reg base_offset = fs_reg(this, glsl_type::int_type);
		703	emit(SHR(base_offset, op[1], fs_reg(2)));
		704
		705	for (int i = 0; i < ir->type->vector_elements; i++) {
		706	emit(VARYING_PULL_CONSTANT_LOAD(result, surf_index,
		707	base_offset, i));
		708
		709	if (ir->type->base_type == GLSL_TYPE_BOOL)
		710	emit(CMP(result, result, fs_reg(0), BRW_CONDITIONAL_NZ));
		711
		712	result.reg_offset++;
		713	}
		714	}
		715
		716	result.reg_offset = 0;
		717	break;
		718	}
		719
		720	case ir_triop_lrp:
		721	emit_lrp(this->result, op[0], op[1], op[2]);
		722	break;
		723	}
		724	}
		725
		726	void
		727	fs_visitor::emit_assignment_writes(fs_reg &l, fs_reg &r,
		728	const glsl_type *type, bool predicated)
		729	{
		730	switch (type->base_type) {
		731	case GLSL_TYPE_FLOAT:
		732	case GLSL_TYPE_UINT:
		733	case GLSL_TYPE_INT:
		734	case GLSL_TYPE_BOOL:
		735	for (unsigned int i = 0; i < type->components(); i++) {
		736	l.type = brw_type_for_base_type(type);
		737	r.type = brw_type_for_base_type(type);
		738
		739	if (predicated \|\| !l.equals(r)) {
		740	fs_inst *inst = emit(MOV(l, r));
		741	inst->predicate = predicated ? BRW_PREDICATE_NORMAL : BRW_PREDICATE_NONE;
		742	}
		743
		744	l.reg_offset++;
		745	r.reg_offset++;
		746	}
		747	break;
		748	case GLSL_TYPE_ARRAY:
		749	for (unsigned int i = 0; i < type->length; i++) {
		750	emit_assignment_writes(l, r, type->fields.array, predicated);
		751	}
		752	break;
		753
		754	case GLSL_TYPE_STRUCT:
		755	for (unsigned int i = 0; i < type->length; i++) {
		756	emit_assignment_writes(l, r, type->fields.structure[i].type,
		757	predicated);
		758	}
		759	break;
		760
		761	case GLSL_TYPE_SAMPLER:
		762	break;
		763
		764	case GLSL_TYPE_VOID:
		765	case GLSL_TYPE_ERROR:
		766	case GLSL_TYPE_INTERFACE:
		767	assert(!"not reached");
		768	break;
		769	}
		770	}
		771
		772	/* If the RHS processing resulted in an instruction generating a
		773	* temporary value, and it would be easy to rewrite the instruction to
		774	* generate its result right into the LHS instead, do so. This ends
		775	* up reliably removing instructions where it can be tricky to do so
		776	* later without real UD chain information.
		777	*/
		778	bool
		779	fs_visitor::try_rewrite_rhs_to_dst(ir_assignment *ir,
		780	fs_reg dst,
		781	fs_reg src,
		782	fs_inst *pre_rhs_inst,
		783	fs_inst *last_rhs_inst)
		784	{
		785	/* Only attempt if we're doing a direct assignment. */
		786	if (ir->condition \|\|
		787	!(ir->lhs->type->is_scalar() \|\|
		788	(ir->lhs->type->is_vector() &&
		789	ir->write_mask == (1 << ir->lhs->type->vector_elements) - 1)))
		790	return false;
		791
		792	/* Make sure the last instruction generated our source reg. */
		793	fs_inst *modify = get_instruction_generating_reg(pre_rhs_inst,
		794	last_rhs_inst,
		795	src);
		796	if (!modify)
		797	return false;
		798
		799	/* If last_rhs_inst wrote a different number of components than our LHS,
		800	* we can't safely rewrite it.
		801	*/
		802	if (virtual_grf_sizes[dst.reg] != modify->regs_written)
		803	return false;
		804
		805	/* Success! Rewrite the instruction. */
		806	modify->dst = dst;
		807
		808	return true;
		809	}
		810
		811	void
		812	fs_visitor::visit(ir_assignment *ir)
		813	{
		814	fs_reg l, r;
		815	fs_inst *inst;
		816
		817	/* FINISHME: arrays on the lhs */
		818	ir->lhs->accept(this);
		819	l = this->result;
		820
		821	fs_inst pre_rhs_inst = (fs_inst ) this->instructions.get_tail();
		822
		823	ir->rhs->accept(this);
		824	r = this->result;
		825
		826	fs_inst last_rhs_inst = (fs_inst ) this->instructions.get_tail();
		827
		828	assert(l.file != BAD_FILE);
		829	assert(r.file != BAD_FILE);
		830
		831	if (try_rewrite_rhs_to_dst(ir, l, r, pre_rhs_inst, last_rhs_inst))
		832	return;
		833
		834	if (ir->condition) {
		835	emit_bool_to_cond_code(ir->condition);
		836	}
		837
		838	if (ir->lhs->type->is_scalar() \|\|
		839	ir->lhs->type->is_vector()) {
		840	for (int i = 0; i < ir->lhs->type->vector_elements; i++) {
		841	if (ir->write_mask & (1 << i)) {
		842	inst = emit(MOV(l, r));
		843	if (ir->condition)
		844	inst->predicate = BRW_PREDICATE_NORMAL;
		845	r.reg_offset++;
		846	}
		847	l.reg_offset++;
		848	}
		849	} else {
		850	emit_assignment_writes(l, r, ir->lhs->type, ir->condition != NULL);
		851	}
		852	}
		853
		854	fs_inst *
		855	fs_visitor::emit_texture_gen4(ir_texture *ir, fs_reg dst, fs_reg coordinate,
		856	fs_reg shadow_c, fs_reg lod, fs_reg dPdy)
		857	{
		858	int mlen;
		859	int base_mrf = 1;
		860	bool simd16 = false;
		861	fs_reg orig_dst;
		862
		863	/* g0 header. */
		864	mlen = 1;
		865
		866	if (ir->shadow_comparitor) {
		867	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		868	emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
		869	coordinate.reg_offset++;
		870	}
		871
		872	/* gen4's SIMD8 sampler always has the slots for u,v,r present.
		873	* the unused slots must be zeroed.
		874	*/
		875	for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
		876	emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
		877	}
		878	mlen += 3;
		879
		880	if (ir->op == ir_tex) {
		881	/* There's no plain shadow compare message, so we use shadow
		882	* compare with a bias of 0.0.
		883	*/
		884	emit(MOV(fs_reg(MRF, base_mrf + mlen), fs_reg(0.0f)));
		885	mlen++;
		886	} else if (ir->op == ir_txb \|\| ir->op == ir_txl) {
		887	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		888	mlen++;
		889	} else {
		890	assert(!"Should not get here.");
		891	}
		892
		893	emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
		894	mlen++;
		895	} else if (ir->op == ir_tex) {
		896	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		897	emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
		898	coordinate.reg_offset++;
		899	}
		900	/* zero the others. */
		901	for (int i = ir->coordinate->type->vector_elements; i<3; i++) {
		902	emit(MOV(fs_reg(MRF, base_mrf + mlen + i), fs_reg(0.0f)));
		903	}
		904	/* gen4's SIMD8 sampler always has the slots for u,v,r present. */
		905	mlen += 3;
		906	} else if (ir->op == ir_txd) {
		907	fs_reg &dPdx = lod;
		908
		909	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		910	emit(MOV(fs_reg(MRF, base_mrf + mlen + i), coordinate));
		911	coordinate.reg_offset++;
		912	}
		913	/* the slots for u and v are always present, but r is optional */
		914	mlen += MAX2(ir->coordinate->type->vector_elements, 2);
		915
		916	/* P = u, v, r
		917	* dPdx = dudx, dvdx, drdx
		918	* dPdy = dudy, dvdy, drdy
		919	*
		920	* 1-arg: Does not exist.
		921	*
		922	* 2-arg: dudx dvdx dudy dvdy
		923	* dPdx.x dPdx.y dPdy.x dPdy.y
		924	* m4 m5 m6 m7
		925	*
		926	* 3-arg: dudx dvdx drdx dudy dvdy drdy
		927	* dPdx.x dPdx.y dPdx.z dPdy.x dPdy.y dPdy.z
		928	* m5 m6 m7 m8 m9 m10
		929	*/
		930	for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
		931	emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdx));
		932	dPdx.reg_offset++;
		933	}
		934	mlen += MAX2(ir->lod_info.grad.dPdx->type->vector_elements, 2);
		935
		936	for (int i = 0; i < ir->lod_info.grad.dPdy->type->vector_elements; i++) {
		937	emit(MOV(fs_reg(MRF, base_mrf + mlen), dPdy));
		938	dPdy.reg_offset++;
		939	}
		940	mlen += MAX2(ir->lod_info.grad.dPdy->type->vector_elements, 2);
		941	} else if (ir->op == ir_txs) {
		942	/* There's no SIMD8 resinfo message on Gen4. Use SIMD16 instead. */
		943	simd16 = true;
		944	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
		945	mlen += 2;
		946	} else {
		947	/* Oh joy. gen4 doesn't have SIMD8 non-shadow-compare bias/lod
		948	* instructions. We'll need to do SIMD16 here.
		949	*/
		950	simd16 = true;
		951	assert(ir->op == ir_txb \|\| ir->op == ir_txl \|\| ir->op == ir_txf);
		952
		953	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		954	emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2, coordinate.type),
		955	coordinate));
		956	coordinate.reg_offset++;
		957	}
		958
		959	/* Initialize the rest of u/v/r with 0.0. Empirically, this seems to
		960	* be necessary for TXF (ld), but seems wise to do for all messages.
		961	*/
		962	for (int i = ir->coordinate->type->vector_elements; i < 3; i++) {
		963	emit(MOV(fs_reg(MRF, base_mrf + mlen + i * 2), fs_reg(0.0f)));
		964	}
		965
		966	/* lod/bias appears after u/v/r. */
		967	mlen += 6;
		968
		969	emit(MOV(fs_reg(MRF, base_mrf + mlen, lod.type), lod));
		970	mlen++;
		971
		972	/* The unused upper half. */
		973	mlen++;
		974	}
		975
		976	if (simd16) {
		977	/* Now, since we're doing simd16, the return is 2 interleaved
		978	* vec4s where the odd-indexed ones are junk. We'll need to move
		979	* this weirdness around to the expected layout.
		980	*/
		981	orig_dst = dst;
		982	dst = fs_reg(GRF, virtual_grf_alloc(8),
		983	(brw->is_g4x ?
		984	brw_type_for_base_type(ir->type) :
		985	BRW_REGISTER_TYPE_F));
		986	}
		987
		988	fs_inst *inst = NULL;
		989	switch (ir->op) {
		990	case ir_tex:
		991	inst = emit(SHADER_OPCODE_TEX, dst);
		992	break;
		993	case ir_txb:
		994	inst = emit(FS_OPCODE_TXB, dst);
		995	break;
		996	case ir_txl:
		997	inst = emit(SHADER_OPCODE_TXL, dst);
		998	break;
		999	case ir_txd:
		1000	inst = emit(SHADER_OPCODE_TXD, dst);
		1001	break;
		1002	case ir_txs:
		1003	inst = emit(SHADER_OPCODE_TXS, dst);
		1004	break;
		1005	case ir_txf:
		1006	inst = emit(SHADER_OPCODE_TXF, dst);
		1007	break;
		1008	default:
		1009	fail("unrecognized texture opcode");
		1010	}
		1011	inst->base_mrf = base_mrf;
		1012	inst->mlen = mlen;
		1013	inst->header_present = true;
		1014	inst->regs_written = simd16 ? 8 : 4;
		1015
		1016	if (simd16) {
		1017	for (int i = 0; i < 4; i++) {
		1018	emit(MOV(orig_dst, dst));
		1019	orig_dst.reg_offset++;
		1020	dst.reg_offset += 2;
		1021	}
		1022	}
		1023
		1024	return inst;
		1025	}
		1026
		1027	/* gen5's sampler has slots for u, v, r, array index, then optional
		1028	* parameters like shadow comparitor or LOD bias. If optional
		1029	* parameters aren't present, those base slots are optional and don't
		1030	* need to be included in the message.
		1031	*
		1032	* We don't fill in the unnecessary slots regardless, which may look
		1033	* surprising in the disassembly.
		1034	*/
		1035	fs_inst *
		1036	fs_visitor::emit_texture_gen5(ir_texture *ir, fs_reg dst, fs_reg coordinate,
		1037	fs_reg shadow_c, fs_reg lod, fs_reg lod2,
		1038	fs_reg sample_index)
		1039	{
		1040	int mlen = 0;
		1041	int base_mrf = 2;
		1042	int reg_width = dispatch_width / 8;
		1043	bool header_present = false;
		1044	const int vector_elements =
		1045	ir->coordinate ? ir->coordinate->type->vector_elements : 0;
		1046
		1047	if (ir->offset != NULL && ir->op == ir_txf) {
		1048	/* It appears that the ld instruction used for txf does its
		1049	* address bounds check before adding in the offset. To work
		1050	* around this, just add the integer offset to the integer texel
		1051	* coordinate, and don't put the offset in the header.
		1052	*/
		1053	ir_constant *offset = ir->offset->as_constant();
		1054	for (int i = 0; i < vector_elements; i++) {
		1055	emit(ADD(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
		1056	coordinate,
		1057	offset->value.i[i]));
		1058	coordinate.reg_offset++;
		1059	}
		1060	} else {
		1061	if (ir->offset) {
		1062	/* The offsets set up by the ir_texture visitor are in the
		1063	* m1 header, so we can't go headerless.
		1064	*/
		1065	header_present = true;
		1066	mlen++;
		1067	base_mrf--;
		1068	}
		1069
		1070	for (int i = 0; i < vector_elements; i++) {
		1071	emit(MOV(fs_reg(MRF, base_mrf + mlen + i * reg_width, coordinate.type),
		1072	coordinate));
		1073	coordinate.reg_offset++;
		1074	}
		1075	}
		1076	mlen += vector_elements * reg_width;
		1077
		1078	if (ir->shadow_comparitor) {
		1079	mlen = MAX2(mlen, header_present + 4 * reg_width);
		1080
		1081	emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
		1082	mlen += reg_width;
		1083	}
		1084
		1085	fs_inst *inst = NULL;
		1086	switch (ir->op) {
		1087	case ir_tex:
		1088	inst = emit(SHADER_OPCODE_TEX, dst);
		1089	break;
		1090	case ir_txb:
		1091	mlen = MAX2(mlen, header_present + 4 * reg_width);
		1092	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		1093	mlen += reg_width;
		1094
		1095	inst = emit(FS_OPCODE_TXB, dst);
		1096	break;
		1097	case ir_txl:
		1098	mlen = MAX2(mlen, header_present + 4 * reg_width);
		1099	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		1100	mlen += reg_width;
		1101
		1102	inst = emit(SHADER_OPCODE_TXL, dst);
		1103	break;
		1104	case ir_txd: {
		1105	mlen = MAX2(mlen, header_present + 4 * reg_width); /* skip over 'ai' */
		1106
		1107	/**
		1108	* P = u, v, r
		1109	* dPdx = dudx, dvdx, drdx
		1110	* dPdy = dudy, dvdy, drdy
		1111	*
		1112	* Load up these values:
		1113	* - dudx dudy dvdx dvdy drdx drdy
		1114	* - dPdx.x dPdy.x dPdx.y dPdy.y dPdx.z dPdy.z
		1115	*/
		1116	for (int i = 0; i < ir->lod_info.grad.dPdx->type->vector_elements; i++) {
		1117	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		1118	lod.reg_offset++;
		1119	mlen += reg_width;
		1120
		1121	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
		1122	lod2.reg_offset++;
		1123	mlen += reg_width;
		1124	}
		1125
		1126	inst = emit(SHADER_OPCODE_TXD, dst);
		1127	break;
		1128	}
		1129	case ir_txs:
		1130	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
		1131	mlen += reg_width;
		1132	inst = emit(SHADER_OPCODE_TXS, dst);
		1133	break;
		1134	case ir_txf:
		1135	mlen = header_present + 4 * reg_width;
		1136	emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), lod));
		1137	inst = emit(SHADER_OPCODE_TXF, dst);
		1138	break;
		1139	case ir_txf_ms:
		1140	mlen = header_present + 4 * reg_width;
		1141
		1142	/* lod */
		1143	emit(MOV(fs_reg(MRF, base_mrf + mlen - reg_width, BRW_REGISTER_TYPE_UD), fs_reg(0)));
		1144	/* sample index */
		1145	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
		1146	mlen += reg_width;
		1147	inst = emit(SHADER_OPCODE_TXF_MS, dst);
		1148	break;
		1149	case ir_lod:
		1150	inst = emit(SHADER_OPCODE_LOD, dst);
		1151	break;
		1152	}
		1153	inst->base_mrf = base_mrf;
		1154	inst->mlen = mlen;
		1155	inst->header_present = header_present;
		1156	inst->regs_written = 4;
		1157
		1158	if (mlen > 11) {
		1159	fail("Message length >11 disallowed by hardware\n");
		1160	}
		1161
		1162	return inst;
		1163	}
		1164
		1165	fs_inst *
		1166	fs_visitor::emit_texture_gen7(ir_texture *ir, fs_reg dst, fs_reg coordinate,
		1167	fs_reg shadow_c, fs_reg lod, fs_reg lod2,
		1168	fs_reg sample_index)
		1169	{
		1170	int mlen = 0;
		1171	int base_mrf = 2;
		1172	int reg_width = dispatch_width / 8;
		1173	bool header_present = false;
		1174	int offsets[3];
		1175
		1176	if (ir->offset && ir->op != ir_txf) {
		1177	/* The offsets set up by the ir_texture visitor are in the
		1178	* m1 header, so we can't go headerless.
		1179	*/
		1180	header_present = true;
		1181	mlen++;
		1182	base_mrf--;
		1183	}
		1184
		1185	if (ir->shadow_comparitor) {
		1186	emit(MOV(fs_reg(MRF, base_mrf + mlen), shadow_c));
		1187	mlen += reg_width;
		1188	}
		1189
		1190	/* Set up the LOD info */
		1191	switch (ir->op) {
		1192	case ir_tex:
		1193	case ir_lod:
		1194	break;
		1195	case ir_txb:
		1196	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		1197	mlen += reg_width;
		1198	break;
		1199	case ir_txl:
		1200	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		1201	mlen += reg_width;
		1202	break;
		1203	case ir_txd: {
		1204	if (dispatch_width == 16)
		1205	fail("Gen7 does not support sample_d/sample_d_c in SIMD16 mode.");
		1206
		1207	/* Load dPdx and the coordinate together:
		1208	* [hdr], [ref], x, dPdx.x, dPdy.x, y, dPdx.y, dPdy.y, z, dPdx.z, dPdy.z
		1209	*/
		1210	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		1211	emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
		1212	coordinate.reg_offset++;
		1213	mlen += reg_width;
		1214
		1215	/* For cube map array, the coordinate is (u,v,r,ai) but there are
		1216	* only derivatives for (u, v, r).
		1217	*/
		1218	if (i < ir->lod_info.grad.dPdx->type->vector_elements) {
		1219	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod));
		1220	lod.reg_offset++;
		1221	mlen += reg_width;
		1222
		1223	emit(MOV(fs_reg(MRF, base_mrf + mlen), lod2));
		1224	lod2.reg_offset++;
		1225	mlen += reg_width;
		1226	}
		1227	}
		1228	break;
		1229	}
		1230	case ir_txs:
		1231	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), lod));
		1232	mlen += reg_width;
		1233	break;
		1234	case ir_txf:
		1235	/* It appears that the ld instruction used for txf does its
		1236	* address bounds check before adding in the offset. To work
		1237	* around this, just add the integer offset to the integer texel
		1238	* coordinate, and don't put the offset in the header.
		1239	*/
		1240	if (ir->offset) {
		1241	ir_constant *offset = ir->offset->as_constant();
		1242	offsets[0] = offset->value.i[0];
		1243	offsets[1] = offset->value.i[1];
		1244	offsets[2] = offset->value.i[2];
		1245	} else {
		1246	memset(offsets, 0, sizeof(offsets));
		1247	}
		1248
		1249	/* Unfortunately, the parameters for LD are intermixed: u, lod, v, r. */
		1250	emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
		1251	coordinate, offsets[0]));
		1252	coordinate.reg_offset++;
		1253	mlen += reg_width;
		1254
		1255	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D), lod));
		1256	mlen += reg_width;
		1257
		1258	for (int i = 1; i < ir->coordinate->type->vector_elements; i++) {
		1259	emit(ADD(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
		1260	coordinate, offsets[i]));
		1261	coordinate.reg_offset++;
		1262	mlen += reg_width;
		1263	}
		1264	break;
		1265	case ir_txf_ms:
		1266	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), sample_index));
		1267	mlen += reg_width;
		1268
		1269	/* constant zero MCS; we arrange to never actually have a compressed
		1270	* multisample surface here for now. TODO: issue ld_mcs to get this first,
		1271	* if we ever support texturing from compressed multisample surfaces
		1272	*/
		1273	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_UD), fs_reg(0u)));
		1274	mlen += reg_width;
		1275
		1276	/* there is no offsetting for this message; just copy in the integer
		1277	* texture coordinates
		1278	*/
		1279	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		1280	emit(MOV(fs_reg(MRF, base_mrf + mlen, BRW_REGISTER_TYPE_D),
		1281	coordinate));
		1282	coordinate.reg_offset++;
		1283	mlen += reg_width;
		1284	}
		1285	break;
		1286	}
		1287
		1288	/* Set up the coordinate (except for cases where it was done above) */
		1289	if (ir->op != ir_txd && ir->op != ir_txs && ir->op != ir_txf && ir->op != ir_txf_ms) {
		1290	for (int i = 0; i < ir->coordinate->type->vector_elements; i++) {
		1291	emit(MOV(fs_reg(MRF, base_mrf + mlen), coordinate));
		1292	coordinate.reg_offset++;
		1293	mlen += reg_width;
		1294	}
		1295	}
		1296
		1297	/* Generate the SEND */
		1298	fs_inst *inst = NULL;
		1299	switch (ir->op) {
		1300	case ir_tex: inst = emit(SHADER_OPCODE_TEX, dst); break;
		1301	case ir_txb: inst = emit(FS_OPCODE_TXB, dst); break;
		1302	case ir_txl: inst = emit(SHADER_OPCODE_TXL, dst); break;
		1303	case ir_txd: inst = emit(SHADER_OPCODE_TXD, dst); break;
		1304	case ir_txf: inst = emit(SHADER_OPCODE_TXF, dst); break;
		1305	case ir_txf_ms: inst = emit(SHADER_OPCODE_TXF_MS, dst); break;
		1306	case ir_txs: inst = emit(SHADER_OPCODE_TXS, dst); break;
		1307	case ir_lod: inst = emit(SHADER_OPCODE_LOD, dst); break;
		1308	}
		1309	inst->base_mrf = base_mrf;
		1310	inst->mlen = mlen;
		1311	inst->header_present = header_present;
		1312	inst->regs_written = 4;
		1313
		1314	if (mlen > 11) {
		1315	fail("Message length >11 disallowed by hardware\n");
		1316	}
		1317
		1318	return inst;
		1319	}
		1320
		1321	fs_reg
		1322	fs_visitor::rescale_texcoord(ir_texture *ir, fs_reg coordinate,
		1323	bool is_rect, int sampler, int texunit)
		1324	{
		1325	fs_inst *inst = NULL;
		1326	bool needs_gl_clamp = true;
		1327	fs_reg scale_x, scale_y;
		1328
		1329	/* The 965 requires the EU to do the normalization of GL rectangle
		1330	* texture coordinates. We use the program parameter state
		1331	* tracking to get the scaling factor.
		1332	*/
		1333	if (is_rect &&
		1334	(brw->gen < 6 \|\|
		1335	(brw->gen >= 6 && (c->key.tex.gl_clamp_mask[0] & (1 << sampler) \|\|
		1336	c->key.tex.gl_clamp_mask[1] & (1 << sampler))))) {
		1337	struct gl_program_parameter_list *params = fp->Base.Parameters;
		1338	int tokens[STATE_LENGTH] = {
		1339	STATE_INTERNAL,
		1340	STATE_TEXRECT_SCALE,
		1341	texunit,
		1342	0,
		1343
		1344	};
		1345
		1346	if (dispatch_width == 16) {
		1347	fail("rectangle scale uniform setup not supported on 16-wide\n");
		1348	return coordinate;
		1349	}
		1350
		1351	scale_x = fs_reg(UNIFORM, c->prog_data.nr_params);
		1352	scale_y = fs_reg(UNIFORM, c->prog_data.nr_params + 1);
		1353
		1354	GLuint index = _mesa_add_state_reference(params,
		1355	(gl_state_index *)tokens);
		1356	c->prog_data.param[c->prog_data.nr_params++] =
		1357	&fp->Base.Parameters->ParameterValues[index][0].f;
		1358	c->prog_data.param[c->prog_data.nr_params++] =
		1359	&fp->Base.Parameters->ParameterValues[index][1].f;
		1360	}
		1361
		1362	/* The 965 requires the EU to do the normalization of GL rectangle
		1363	* texture coordinates. We use the program parameter state
		1364	* tracking to get the scaling factor.
		1365	*/
		1366	if (brw->gen < 6 && is_rect) {
		1367	fs_reg dst = fs_reg(this, ir->coordinate->type);
		1368	fs_reg src = coordinate;
		1369	coordinate = dst;
		1370
		1371	emit(MUL(dst, src, scale_x));
		1372	dst.reg_offset++;
		1373	src.reg_offset++;
		1374	emit(MUL(dst, src, scale_y));
		1375	} else if (is_rect) {
		1376	/* On gen6+, the sampler handles the rectangle coordinates
		1377	* natively, without needing rescaling. But that means we have
		1378	* to do GL_CLAMP clamping at the [0, width], [0, height] scale,
		1379	* not [0, 1] like the default case below.
		1380	*/
		1381	needs_gl_clamp = false;
		1382
		1383	for (int i = 0; i < 2; i++) {
		1384	if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
		1385	fs_reg chan = coordinate;
		1386	chan.reg_offset += i;
		1387
		1388	inst = emit(BRW_OPCODE_SEL, chan, chan, brw_imm_f(0.0));
		1389	inst->conditional_mod = BRW_CONDITIONAL_G;
		1390
		1391	/* Our parameter comes in as 1.0/width or 1.0/height,
		1392	* because that's what people normally want for doing
		1393	* texture rectangle handling. We need width or height
		1394	* for clamping, but we don't care enough to make a new
		1395	* parameter type, so just invert back.
		1396	*/
		1397	fs_reg limit = fs_reg(this, glsl_type::float_type);
		1398	emit(MOV(limit, i == 0 ? scale_x : scale_y));
		1399	emit(SHADER_OPCODE_RCP, limit, limit);
		1400
		1401	inst = emit(BRW_OPCODE_SEL, chan, chan, limit);
		1402	inst->conditional_mod = BRW_CONDITIONAL_L;
		1403	}
		1404	}
		1405	}
		1406
		1407	if (ir->coordinate && needs_gl_clamp) {
		1408	for (unsigned int i = 0;
		1409	i < MIN2(ir->coordinate->type->vector_elements, 3); i++) {
		1410	if (c->key.tex.gl_clamp_mask[i] & (1 << sampler)) {
		1411	fs_reg chan = coordinate;
		1412	chan.reg_offset += i;
		1413
		1414	fs_inst *inst = emit(MOV(chan, chan));
		1415	inst->saturate = true;
		1416	}
		1417	}
		1418	}
		1419	return coordinate;
		1420	}
		1421
		1422	void
		1423	fs_visitor::visit(ir_texture *ir)
		1424	{
		1425	fs_inst *inst = NULL;
		1426
		1427	int sampler =
		1428	_mesa_get_sampler_uniform_value(ir->sampler, shader_prog, &fp->Base);
		1429	/* FINISHME: We're failing to recompile our programs when the sampler is
		1430	* updated. This only matters for the texture rectangle scale parameters
		1431	* (pre-gen6, or gen6+ with GL_CLAMP).
		1432	*/
		1433	int texunit = fp->Base.SamplerUnits[sampler];
		1434
		1435	/* Should be lowered by do_lower_texture_projection */
		1436	assert(!ir->projector);
		1437
		1438	/* Generate code to compute all the subexpression trees. This has to be
		1439	* done before loading any values into MRFs for the sampler message since
		1440	* generating these values may involve SEND messages that need the MRFs.
		1441	*/
		1442	fs_reg coordinate;
		1443	if (ir->coordinate) {
		1444	ir->coordinate->accept(this);
		1445
		1446	coordinate = rescale_texcoord(ir, this->result,
		1447	ir->sampler->type->sampler_dimensionality ==
		1448	GLSL_SAMPLER_DIM_RECT,
		1449	sampler, texunit);
		1450	}
		1451
		1452	fs_reg shadow_comparitor;
		1453	if (ir->shadow_comparitor) {
		1454	ir->shadow_comparitor->accept(this);
		1455	shadow_comparitor = this->result;
		1456	}
		1457
		1458	fs_reg lod, lod2, sample_index;
		1459	switch (ir->op) {
		1460	case ir_tex:
		1461	case ir_lod:
		1462	break;
		1463	case ir_txb:
		1464	ir->lod_info.bias->accept(this);
		1465	lod = this->result;
		1466	break;
		1467	case ir_txd:
		1468	ir->lod_info.grad.dPdx->accept(this);
		1469	lod = this->result;
		1470
		1471	ir->lod_info.grad.dPdy->accept(this);
		1472	lod2 = this->result;
		1473	break;
		1474	case ir_txf:
		1475	case ir_txl:
		1476	case ir_txs:
		1477	ir->lod_info.lod->accept(this);
		1478	lod = this->result;
		1479	break;
		1480	case ir_txf_ms:
		1481	ir->lod_info.sample_index->accept(this);
		1482	sample_index = this->result;
		1483	break;
		1484	};
		1485
		1486	/* Writemasking doesn't eliminate channels on SIMD8 texture
		1487	* samples, so don't worry about them.
		1488	*/
		1489	fs_reg dst = fs_reg(this, glsl_type::get_instance(ir->type->base_type, 4, 1));
		1490
		1491	if (brw->gen >= 7) {
		1492	inst = emit_texture_gen7(ir, dst, coordinate, shadow_comparitor,
		1493	lod, lod2, sample_index);
		1494	} else if (brw->gen >= 5) {
		1495	inst = emit_texture_gen5(ir, dst, coordinate, shadow_comparitor,
		1496	lod, lod2, sample_index);
		1497	} else {
		1498	inst = emit_texture_gen4(ir, dst, coordinate, shadow_comparitor,
		1499	lod, lod2);
		1500	}
		1501
		1502	/* The header is set up by generate_tex() when necessary. */
		1503	inst->src[0] = reg_undef;
		1504
		1505	if (ir->offset != NULL && ir->op != ir_txf)
		1506	inst->texture_offset = brw_texture_offset(ir->offset->as_constant());
		1507
		1508	inst->sampler = sampler;
		1509
		1510	if (ir->shadow_comparitor)
		1511	inst->shadow_compare = true;
		1512
		1513	/* fixup #layers for cube map arrays */
		1514	if (ir->op == ir_txs) {
		1515	glsl_type const *type = ir->sampler->type;
		1516	if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE &&
		1517	type->sampler_array) {
		1518	fs_reg depth = dst;
		1519	depth.reg_offset = 2;
		1520	emit_math(SHADER_OPCODE_INT_QUOTIENT, depth, depth, fs_reg(6));
		1521	}
		1522	}
		1523
		1524	swizzle_result(ir, dst, sampler);
		1525	}
		1526
		1527	/**
		1528	* Swizzle the result of a texture result. This is necessary for
		1529	* EXT_texture_swizzle as well as DEPTH_TEXTURE_MODE for shadow comparisons.
		1530	*/
		1531	void
		1532	fs_visitor::swizzle_result(ir_texture *ir, fs_reg orig_val, int sampler)
		1533	{
		1534	this->result = orig_val;
		1535
		1536	if (ir->op == ir_txs \|\| ir->op == ir_lod)
		1537	return;
		1538
		1539	if (ir->type == glsl_type::float_type) {
		1540	/* Ignore DEPTH_TEXTURE_MODE swizzling. */
		1541	assert(ir->sampler->type->sampler_shadow);
		1542	} else if (c->key.tex.swizzles[sampler] != SWIZZLE_NOOP) {
		1543	fs_reg swizzled_result = fs_reg(this, glsl_type::vec4_type);
		1544
		1545	for (int i = 0; i < 4; i++) {
		1546	int swiz = GET_SWZ(c->key.tex.swizzles[sampler], i);
		1547	fs_reg l = swizzled_result;
		1548	l.reg_offset += i;
		1549
		1550	if (swiz == SWIZZLE_ZERO) {
		1551	emit(MOV(l, fs_reg(0.0f)));
		1552	} else if (swiz == SWIZZLE_ONE) {
		1553	emit(MOV(l, fs_reg(1.0f)));
		1554	} else {
		1555	fs_reg r = orig_val;
		1556	r.reg_offset += GET_SWZ(c->key.tex.swizzles[sampler], i);
		1557	emit(MOV(l, r));
		1558	}
		1559	}
		1560	this->result = swizzled_result;
		1561	}
		1562	}
		1563
		1564	void
		1565	fs_visitor::visit(ir_swizzle *ir)
		1566	{
		1567	ir->val->accept(this);
		1568	fs_reg val = this->result;
		1569
		1570	if (ir->type->vector_elements == 1) {
		1571	this->result.reg_offset += ir->mask.x;
		1572	return;
		1573	}
		1574
		1575	fs_reg result = fs_reg(this, ir->type);
		1576	this->result = result;
		1577
		1578	for (unsigned int i = 0; i < ir->type->vector_elements; i++) {
		1579	fs_reg channel = val;
		1580	int swiz = 0;
		1581
		1582	switch (i) {
		1583	case 0:
		1584	swiz = ir->mask.x;
		1585	break;
		1586	case 1:
		1587	swiz = ir->mask.y;
		1588	break;
		1589	case 2:
		1590	swiz = ir->mask.z;
		1591	break;
		1592	case 3:
		1593	swiz = ir->mask.w;
		1594	break;
		1595	}
		1596
		1597	channel.reg_offset += swiz;
		1598	emit(MOV(result, channel));
		1599	result.reg_offset++;
		1600	}
		1601	}
		1602
		1603	void
		1604	fs_visitor::visit(ir_discard *ir)
		1605	{
		1606	assert(ir->condition == NULL); /* FINISHME */
		1607
		1608	/* We track our discarded pixels in f0.1. By predicating on it, we can
		1609	* update just the flag bits that aren't yet discarded. By emitting a
		1610	* CMP of g0 != g0, all our currently executing channels will get turned
		1611	* off.
		1612	*/
		1613	fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
		1614	BRW_REGISTER_TYPE_UW));
		1615	fs_inst *cmp = emit(CMP(reg_null_f, some_reg, some_reg,
		1616	BRW_CONDITIONAL_NZ));
		1617	cmp->predicate = BRW_PREDICATE_NORMAL;
		1618	cmp->flag_subreg = 1;
		1619
		1620	if (brw->gen >= 6) {
		1621	/* For performance, after a discard, jump to the end of the shader.
		1622	* However, many people will do foliage by discarding based on a
		1623	* texture's alpha mask, and then continue on to texture with the
		1624	* remaining pixels. To avoid trashing the derivatives for those
		1625	* texture samples, we'll only jump if all of the pixels in the subspan
		1626	* have been discarded.
		1627	*/
		1628	fs_inst *discard_jump = emit(FS_OPCODE_DISCARD_JUMP);
		1629	discard_jump->flag_subreg = 1;
		1630	discard_jump->predicate = BRW_PREDICATE_ALIGN1_ANY4H;
		1631	discard_jump->predicate_inverse = true;
		1632	}
		1633	}
		1634
		1635	void
		1636	fs_visitor::visit(ir_constant *ir)
		1637	{
		1638	/* Set this->result to reg at the bottom of the function because some code
		1639	* paths will cause this visitor to be applied to other fields. This will
		1640	* cause the value stored in this->result to be modified.
		1641	*
		1642	* Make reg constant so that it doesn't get accidentally modified along the
		1643	* way. Yes, I actually had this problem. :(
		1644	*/
		1645	const fs_reg reg(this, ir->type);
		1646	fs_reg dst_reg = reg;
		1647
		1648	if (ir->type->is_array()) {
		1649	const unsigned size = type_size(ir->type->fields.array);
		1650
		1651	for (unsigned i = 0; i < ir->type->length; i++) {
		1652	ir->array_elements[i]->accept(this);
		1653	fs_reg src_reg = this->result;
		1654
		1655	dst_reg.type = src_reg.type;
		1656	for (unsigned j = 0; j < size; j++) {
		1657	emit(MOV(dst_reg, src_reg));
		1658	src_reg.reg_offset++;
		1659	dst_reg.reg_offset++;
		1660	}
		1661	}
		1662	} else if (ir->type->is_record()) {
		1663	foreach_list(node, &ir->components) {
		1664	ir_constant const field = (ir_constant ) node;
		1665	const unsigned size = type_size(field->type);
		1666
		1667	field->accept(this);
		1668	fs_reg src_reg = this->result;
		1669
		1670	dst_reg.type = src_reg.type;
		1671	for (unsigned j = 0; j < size; j++) {
		1672	emit(MOV(dst_reg, src_reg));
		1673	src_reg.reg_offset++;
		1674	dst_reg.reg_offset++;
		1675	}
		1676	}
		1677	} else {
		1678	const unsigned size = type_size(ir->type);
		1679
		1680	for (unsigned i = 0; i < size; i++) {
		1681	switch (ir->type->base_type) {
		1682	case GLSL_TYPE_FLOAT:
		1683	emit(MOV(dst_reg, fs_reg(ir->value.f[i])));
		1684	break;
		1685	case GLSL_TYPE_UINT:
		1686	emit(MOV(dst_reg, fs_reg(ir->value.u[i])));
		1687	break;
		1688	case GLSL_TYPE_INT:
		1689	emit(MOV(dst_reg, fs_reg(ir->value.i[i])));
		1690	break;
		1691	case GLSL_TYPE_BOOL:
		1692	emit(MOV(dst_reg, fs_reg((int)ir->value.b[i])));
		1693	break;
		1694	default:
		1695	assert(!"Non-float/uint/int/bool constant");
		1696	}
		1697	dst_reg.reg_offset++;
		1698	}
		1699	}
		1700
		1701	this->result = reg;
		1702	}
		1703
		1704	void
		1705	fs_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
		1706	{
		1707	ir_expression *expr = ir->as_expression();
		1708
		1709	if (expr) {
		1710	fs_reg op[2];
		1711	fs_inst *inst;
		1712
		1713	assert(expr->get_num_operands() <= 2);
		1714	for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
		1715	assert(expr->operands[i]->type->is_scalar());
		1716
		1717	expr->operands[i]->accept(this);
		1718	op[i] = this->result;
		1719
		1720	resolve_ud_negate(&op[i]);
		1721	}
		1722
		1723	switch (expr->operation) {
		1724	case ir_unop_logic_not:
		1725	inst = emit(AND(reg_null_d, op[0], fs_reg(1)));
		1726	inst->conditional_mod = BRW_CONDITIONAL_Z;
		1727	break;
		1728
		1729	case ir_binop_logic_xor:
		1730	case ir_binop_logic_or:
		1731	case ir_binop_logic_and:
		1732	goto out;
		1733
		1734	case ir_unop_f2b:
		1735	if (brw->gen >= 6) {
		1736	emit(CMP(reg_null_d, op[0], fs_reg(0.0f), BRW_CONDITIONAL_NZ));
		1737	} else {
		1738	inst = emit(MOV(reg_null_f, op[0]));
		1739	inst->conditional_mod = BRW_CONDITIONAL_NZ;
		1740	}
		1741	break;
		1742
		1743	case ir_unop_i2b:
		1744	if (brw->gen >= 6) {
		1745	emit(CMP(reg_null_d, op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
		1746	} else {
		1747	inst = emit(MOV(reg_null_d, op[0]));
		1748	inst->conditional_mod = BRW_CONDITIONAL_NZ;
		1749	}
		1750	break;
		1751
		1752	case ir_binop_greater:
		1753	case ir_binop_gequal:
		1754	case ir_binop_less:
		1755	case ir_binop_lequal:
		1756	case ir_binop_equal:
		1757	case ir_binop_all_equal:
		1758	case ir_binop_nequal:
		1759	case ir_binop_any_nequal:
		1760	resolve_bool_comparison(expr->operands[0], &op[0]);
		1761	resolve_bool_comparison(expr->operands[1], &op[1]);
		1762
		1763	emit(CMP(reg_null_d, op[0], op[1],
		1764	brw_conditional_for_comparison(expr->operation)));
		1765	break;
		1766
		1767	default:
		1768	assert(!"not reached");
		1769	fail("bad cond code\n");
		1770	break;
		1771	}
		1772	return;
		1773	}
		1774
		1775	out:
		1776	ir->accept(this);
		1777
		1778	fs_inst *inst = emit(AND(reg_null_d, this->result, fs_reg(1)));
		1779	inst->conditional_mod = BRW_CONDITIONAL_NZ;
		1780	}
		1781
		1782	/**
		1783	* Emit a gen6 IF statement with the comparison folded into the IF
		1784	* instruction.
		1785	*/
		1786	void
		1787	fs_visitor::emit_if_gen6(ir_if *ir)
		1788	{
		1789	ir_expression *expr = ir->condition->as_expression();
		1790
		1791	if (expr) {
		1792	fs_reg op[2];
		1793	fs_inst *inst;
		1794	fs_reg temp;
		1795
		1796	assert(expr->get_num_operands() <= 2);
		1797	for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
		1798	assert(expr->operands[i]->type->is_scalar());
		1799
		1800	expr->operands[i]->accept(this);
		1801	op[i] = this->result;
		1802	}
		1803
		1804	switch (expr->operation) {
		1805	case ir_unop_logic_not:
		1806	case ir_binop_logic_xor:
		1807	case ir_binop_logic_or:
		1808	case ir_binop_logic_and:
		1809	/* For operations on bool arguments, only the low bit of the bool is
		1810	* valid, and the others are undefined. Fall back to the condition
		1811	* code path.
		1812	*/
		1813	break;
		1814
		1815	case ir_unop_f2b:
		1816	inst = emit(BRW_OPCODE_IF, reg_null_f, op[0], fs_reg(0));
		1817	inst->conditional_mod = BRW_CONDITIONAL_NZ;
		1818	return;
		1819
		1820	case ir_unop_i2b:
		1821	emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
		1822	return;
		1823
		1824	case ir_binop_greater:
		1825	case ir_binop_gequal:
		1826	case ir_binop_less:
		1827	case ir_binop_lequal:
		1828	case ir_binop_equal:
		1829	case ir_binop_all_equal:
		1830	case ir_binop_nequal:
		1831	case ir_binop_any_nequal:
		1832	resolve_bool_comparison(expr->operands[0], &op[0]);
		1833	resolve_bool_comparison(expr->operands[1], &op[1]);
		1834
		1835	emit(IF(op[0], op[1],
		1836	brw_conditional_for_comparison(expr->operation)));
		1837	return;
		1838	default:
		1839	assert(!"not reached");
		1840	emit(IF(op[0], fs_reg(0), BRW_CONDITIONAL_NZ));
		1841	fail("bad condition\n");
		1842	return;
		1843	}
		1844	}
		1845
		1846	emit_bool_to_cond_code(ir->condition);
		1847	fs_inst *inst = emit(BRW_OPCODE_IF);
		1848	inst->predicate = BRW_PREDICATE_NORMAL;
		1849	}
		1850
		1851	void
		1852	fs_visitor::visit(ir_if *ir)
		1853	{
		1854	if (brw->gen < 6 && dispatch_width == 16) {
		1855	fail("Can't support (non-uniform) control flow on 16-wide\n");
		1856	}
		1857
		1858	/* Don't point the annotation at the if statement, because then it plus
		1859	* the then and else blocks get printed.
		1860	*/
		1861	this->base_ir = ir->condition;
		1862
		1863	if (brw->gen == 6) {
		1864	emit_if_gen6(ir);
		1865	} else {
		1866	emit_bool_to_cond_code(ir->condition);
		1867
		1868	emit(IF(BRW_PREDICATE_NORMAL));
		1869	}
		1870
		1871	foreach_list(node, &ir->then_instructions) {
		1872	ir_instruction ir = (ir_instruction )node;
		1873	this->base_ir = ir;
		1874
		1875	ir->accept(this);
		1876	}
		1877
		1878	if (!ir->else_instructions.is_empty()) {
		1879	emit(BRW_OPCODE_ELSE);
		1880
		1881	foreach_list(node, &ir->else_instructions) {
		1882	ir_instruction ir = (ir_instruction )node;
		1883	this->base_ir = ir;
		1884
		1885	ir->accept(this);
		1886	}
		1887	}
		1888
		1889	emit(BRW_OPCODE_ENDIF);
		1890	}
		1891
		1892	void
		1893	fs_visitor::visit(ir_loop *ir)
		1894	{
		1895	fs_reg counter = reg_undef;
		1896
		1897	if (brw->gen < 6 && dispatch_width == 16) {
		1898	fail("Can't support (non-uniform) control flow on 16-wide\n");
		1899	}
		1900
		1901	if (ir->counter) {
		1902	this->base_ir = ir->counter;
		1903	ir->counter->accept(this);
		1904	counter = *(variable_storage(ir->counter));
		1905
		1906	if (ir->from) {
		1907	this->base_ir = ir->from;
		1908	ir->from->accept(this);
		1909
		1910	emit(MOV(counter, this->result));
		1911	}
		1912	}
		1913
		1914	this->base_ir = NULL;
		1915	emit(BRW_OPCODE_DO);
		1916
		1917	if (ir->to) {
		1918	this->base_ir = ir->to;
		1919	ir->to->accept(this);
		1920
		1921	emit(CMP(reg_null_d, counter, this->result,
		1922	brw_conditional_for_comparison(ir->cmp)));
		1923
		1924	fs_inst *inst = emit(BRW_OPCODE_BREAK);
		1925	inst->predicate = BRW_PREDICATE_NORMAL;
		1926	}
		1927
		1928	foreach_list(node, &ir->body_instructions) {
		1929	ir_instruction ir = (ir_instruction )node;
		1930
		1931	this->base_ir = ir;
		1932	ir->accept(this);
		1933	}
		1934
		1935	if (ir->increment) {
		1936	this->base_ir = ir->increment;
		1937	ir->increment->accept(this);
		1938	emit(ADD(counter, counter, this->result));
		1939	}
		1940
		1941	this->base_ir = NULL;
		1942	emit(BRW_OPCODE_WHILE);
		1943	}
		1944
		1945	void
		1946	fs_visitor::visit(ir_loop_jump *ir)
		1947	{
		1948	switch (ir->mode) {
		1949	case ir_loop_jump::jump_break:
		1950	emit(BRW_OPCODE_BREAK);
		1951	break;
		1952	case ir_loop_jump::jump_continue:
		1953	emit(BRW_OPCODE_CONTINUE);
		1954	break;
		1955	}
		1956	}
		1957
		1958	void
		1959	fs_visitor::visit(ir_call *ir)
		1960	{
		1961	assert(!"FINISHME");
		1962	}
		1963
		1964	void
		1965	fs_visitor::visit(ir_return *ir)
		1966	{
		1967	assert(!"FINISHME");
		1968	}
		1969
		1970	void
		1971	fs_visitor::visit(ir_function *ir)
		1972	{
		1973	/* Ignore function bodies other than main() -- we shouldn't see calls to
		1974	* them since they should all be inlined before we get to ir_to_mesa.
		1975	*/
		1976	if (strcmp(ir->name, "main") == 0) {
		1977	const ir_function_signature *sig;
		1978	exec_list empty;
		1979
		1980	sig = ir->matching_signature(&empty);
		1981
		1982	assert(sig);
		1983
		1984	foreach_list(node, &sig->body) {
		1985	ir_instruction ir = (ir_instruction )node;
		1986	this->base_ir = ir;
		1987
		1988	ir->accept(this);
		1989	}
		1990	}
		1991	}
		1992
		1993	void
		1994	fs_visitor::visit(ir_function_signature *ir)
		1995	{
		1996	assert(!"not reached");
		1997	(void)ir;
		1998	}
		1999
		2000	fs_inst *
		2001	fs_visitor::emit(fs_inst inst)
		2002	{
		2003	fs_inst *list_inst = new(mem_ctx) fs_inst;
		2004	*list_inst = inst;
		2005	emit(list_inst);
		2006	return list_inst;
		2007	}
		2008
		2009	fs_inst *
		2010	fs_visitor::emit(fs_inst *inst)
		2011	{
		2012	if (force_uncompressed_stack > 0)
		2013	inst->force_uncompressed = true;
		2014	else if (force_sechalf_stack > 0)
		2015	inst->force_sechalf = true;
		2016
		2017	inst->annotation = this->current_annotation;
		2018	inst->ir = this->base_ir;
		2019
		2020	this->instructions.push_tail(inst);
		2021
		2022	return inst;
		2023	}
		2024
		2025	void
		2026	fs_visitor::emit(exec_list list)
		2027	{
		2028	foreach_list_safe(node, &list) {
		2029	fs_inst inst = (fs_inst )node;
		2030	inst->remove();
		2031	emit(inst);
		2032	}
		2033	}
		2034
		2035	/** Emits a dummy fragment shader consisting of magenta for bringup purposes. */
		2036	void
		2037	fs_visitor::emit_dummy_fs()
		2038	{
		2039	int reg_width = dispatch_width / 8;
		2040
		2041	/* Everyone's favorite color. */
		2042	emit(MOV(fs_reg(MRF, 2 + 0 * reg_width), fs_reg(1.0f)));
		2043	emit(MOV(fs_reg(MRF, 2 + 1 * reg_width), fs_reg(0.0f)));
		2044	emit(MOV(fs_reg(MRF, 2 + 2 * reg_width), fs_reg(1.0f)));
		2045	emit(MOV(fs_reg(MRF, 2 + 3 * reg_width), fs_reg(0.0f)));
		2046
		2047	fs_inst *write;
		2048	write = emit(FS_OPCODE_FB_WRITE, fs_reg(0), fs_reg(0));
		2049	write->base_mrf = 2;
		2050	write->mlen = 4 * reg_width;
		2051	write->eot = true;
		2052	}
		2053
		2054	/* The register location here is relative to the start of the URB
		2055	* data. It will get adjusted to be a real location before
		2056	* generate_code() time.
		2057	*/
		2058	struct brw_reg
		2059	fs_visitor::interp_reg(int location, int channel)
		2060	{
		2061	int regnr = urb_setup[location] * 2 + channel / 2;
		2062	int stride = (channel & 1) * 4;
		2063
		2064	assert(urb_setup[location] != -1);
		2065
		2066	return brw_vec1_grf(regnr, stride);
		2067	}
		2068
		2069	/** Emits the interpolation for the varying inputs. */
		2070	void
		2071	fs_visitor::emit_interpolation_setup_gen4()
		2072	{
		2073	this->current_annotation = "compute pixel centers";
		2074	this->pixel_x = fs_reg(this, glsl_type::uint_type);
		2075	this->pixel_y = fs_reg(this, glsl_type::uint_type);
		2076	this->pixel_x.type = BRW_REGISTER_TYPE_UW;
		2077	this->pixel_y.type = BRW_REGISTER_TYPE_UW;
		2078
		2079	emit(FS_OPCODE_PIXEL_X, this->pixel_x);
		2080	emit(FS_OPCODE_PIXEL_Y, this->pixel_y);
		2081
		2082	this->current_annotation = "compute pixel deltas from v0";
		2083	if (brw->has_pln) {
		2084	this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
		2085	fs_reg(this, glsl_type::vec2_type);
		2086	this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
		2087	this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC];
		2088	this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC].reg_offset++;
		2089	} else {
		2090	this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
		2091	fs_reg(this, glsl_type::float_type);
		2092	this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC] =
		2093	fs_reg(this, glsl_type::float_type);
		2094	}
		2095	emit(ADD(this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
		2096	this->pixel_x, fs_reg(negate(brw_vec1_grf(1, 0)))));
		2097	emit(ADD(this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
		2098	this->pixel_y, fs_reg(negate(brw_vec1_grf(1, 1)))));
		2099
		2100	this->current_annotation = "compute pos.w and 1/pos.w";
		2101	/* Compute wpos.w. It's always in our setup, since it's needed to
		2102	* interpolate the other attributes.
		2103	*/
		2104	this->wpos_w = fs_reg(this, glsl_type::float_type);
		2105	emit(FS_OPCODE_LINTERP, wpos_w,
		2106	this->delta_x[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
		2107	this->delta_y[BRW_WM_PERSPECTIVE_PIXEL_BARYCENTRIC],
		2108	interp_reg(VARYING_SLOT_POS, 3));
		2109	/* Compute the pixel 1/W value from wpos.w. */
		2110	this->pixel_w = fs_reg(this, glsl_type::float_type);
		2111	emit_math(SHADER_OPCODE_RCP, this->pixel_w, wpos_w);
		2112	this->current_annotation = NULL;
		2113	}
		2114
		2115	/** Emits the interpolation for the varying inputs. */
		2116	void
		2117	fs_visitor::emit_interpolation_setup_gen6()
		2118	{
		2119	struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW);
		2120
		2121	/* If the pixel centers end up used, the setup is the same as for gen4. */
		2122	this->current_annotation = "compute pixel centers";
		2123	fs_reg int_pixel_x = fs_reg(this, glsl_type::uint_type);
		2124	fs_reg int_pixel_y = fs_reg(this, glsl_type::uint_type);
		2125	int_pixel_x.type = BRW_REGISTER_TYPE_UW;
		2126	int_pixel_y.type = BRW_REGISTER_TYPE_UW;
		2127	emit(ADD(int_pixel_x,
		2128	fs_reg(stride(suboffset(g1_uw, 4), 2, 4, 0)),
		2129	fs_reg(brw_imm_v(0x10101010))));
		2130	emit(ADD(int_pixel_y,
		2131	fs_reg(stride(suboffset(g1_uw, 5), 2, 4, 0)),
		2132	fs_reg(brw_imm_v(0x11001100))));
		2133
		2134	/* As of gen6, we can no longer mix float and int sources. We have
		2135	* to turn the integer pixel centers into floats for their actual
		2136	* use.
		2137	*/
		2138	this->pixel_x = fs_reg(this, glsl_type::float_type);
		2139	this->pixel_y = fs_reg(this, glsl_type::float_type);
		2140	emit(MOV(this->pixel_x, int_pixel_x));
		2141	emit(MOV(this->pixel_y, int_pixel_y));
		2142
		2143	this->current_annotation = "compute pos.w";
		2144	this->pixel_w = fs_reg(brw_vec8_grf(c->source_w_reg, 0));
		2145	this->wpos_w = fs_reg(this, glsl_type::float_type);
		2146	emit_math(SHADER_OPCODE_RCP, this->wpos_w, this->pixel_w);
		2147
		2148	for (int i = 0; i < BRW_WM_BARYCENTRIC_INTERP_MODE_COUNT; ++i) {
		2149	uint8_t reg = c->barycentric_coord_reg[i];
		2150	this->delta_x[i] = fs_reg(brw_vec8_grf(reg, 0));
		2151	this->delta_y[i] = fs_reg(brw_vec8_grf(reg + 1, 0));
		2152	}
		2153
		2154	this->current_annotation = NULL;
		2155	}
		2156
		2157	void
		2158	fs_visitor::emit_color_write(int target, int index, int first_color_mrf)
		2159	{
		2160	int reg_width = dispatch_width / 8;
		2161	fs_inst *inst;
		2162	fs_reg color = outputs[target];
		2163	fs_reg mrf;
		2164
		2165	/* If there's no color data to be written, skip it. */
		2166	if (color.file == BAD_FILE)
		2167	return;
		2168
		2169	color.reg_offset += index;
		2170
		2171	if (dispatch_width == 8 \|\| brw->gen >= 6) {
		2172	/* SIMD8 write looks like:
		2173	* m + 0: r0
		2174	* m + 1: r1
		2175	* m + 2: g0
		2176	* m + 3: g1
		2177	*
		2178	* gen6 SIMD16 DP write looks like:
		2179	* m + 0: r0
		2180	* m + 1: r1
		2181	* m + 2: g0
		2182	* m + 3: g1
		2183	* m + 4: b0
		2184	* m + 5: b1
		2185	* m + 6: a0
		2186	* m + 7: a1
		2187	*/
		2188	inst = emit(MOV(fs_reg(MRF, first_color_mrf + index * reg_width,
		2189	color.type),
		2190	color));
		2191	inst->saturate = c->key.clamp_fragment_color;
		2192	} else {
		2193	/* pre-gen6 SIMD16 single source DP write looks like:
		2194	* m + 0: r0
		2195	* m + 1: g0
		2196	* m + 2: b0
		2197	* m + 3: a0
		2198	* m + 4: r1
		2199	* m + 5: g1
		2200	* m + 6: b1
		2201	* m + 7: a1
		2202	*/
		2203	if (brw->has_compr4) {
		2204	/* By setting the high bit of the MRF register number, we
		2205	* indicate that we want COMPR4 mode - instead of doing the
		2206	* usual destination + 1 for the second half we get
		2207	* destination + 4.
		2208	*/
		2209	inst = emit(MOV(fs_reg(MRF, BRW_MRF_COMPR4 + first_color_mrf + index,
		2210	color.type),
		2211	color));
		2212	inst->saturate = c->key.clamp_fragment_color;
		2213	} else {
		2214	push_force_uncompressed();
		2215	inst = emit(MOV(fs_reg(MRF, first_color_mrf + index, color.type),
		2216	color));
		2217	inst->saturate = c->key.clamp_fragment_color;
		2218	pop_force_uncompressed();
		2219
		2220	push_force_sechalf();
		2221	color.sechalf = true;
		2222	inst = emit(MOV(fs_reg(MRF, first_color_mrf + index + 4, color.type),
		2223	color));
		2224	inst->saturate = c->key.clamp_fragment_color;
		2225	pop_force_sechalf();
		2226	color.sechalf = false;
		2227	}
		2228	}
		2229	}
		2230
4401	Serge	2231	static int
		2232	cond_for_alpha_func(GLenum func)
		2233	{
		2234	switch(func) {
		2235	case GL_GREATER:
		2236	return BRW_CONDITIONAL_G;
		2237	case GL_GEQUAL:
		2238	return BRW_CONDITIONAL_GE;
		2239	case GL_LESS:
		2240	return BRW_CONDITIONAL_L;
		2241	case GL_LEQUAL:
		2242	return BRW_CONDITIONAL_LE;
		2243	case GL_EQUAL:
		2244	return BRW_CONDITIONAL_EQ;
		2245	case GL_NOTEQUAL:
		2246	return BRW_CONDITIONAL_NEQ;
		2247	default:
		2248	assert(!"Not reached");
		2249	return 0;
		2250	}
		2251	}
		2252
		2253	/**
		2254	* Alpha test support for when we compile it into the shader instead
		2255	* of using the normal fixed-function alpha test.
		2256	*/
4358	Serge	2257	void
4401	Serge	2258	fs_visitor::emit_alpha_test()
		2259	{
		2260	this->current_annotation = "Alpha test";
		2261
		2262	fs_inst *cmp;
		2263	if (c->key.alpha_test_func == GL_ALWAYS)
		2264	return;
		2265
		2266	if (c->key.alpha_test_func == GL_NEVER) {
		2267	/* f0.1 = 0 */
		2268	fs_reg some_reg = fs_reg(retype(brw_vec8_grf(0, 0),
		2269	BRW_REGISTER_TYPE_UW));
		2270	cmp = emit(CMP(reg_null_f, some_reg, some_reg,
		2271	BRW_CONDITIONAL_NEQ));
		2272	} else {
		2273	/* RT0 alpha */
		2274	fs_reg color = outputs[0];
		2275	color.reg_offset += 3;
		2276
		2277	/* f0.1 &= func(color, ref) */
		2278	cmp = emit(CMP(reg_null_f, color, fs_reg(c->key.alpha_test_ref),
		2279	cond_for_alpha_func(c->key.alpha_test_func)));
		2280	}
		2281	cmp->predicate = BRW_PREDICATE_NORMAL;
		2282	cmp->flag_subreg = 1;
		2283	}
		2284
		2285	void
4358	Serge	2286	fs_visitor::emit_fb_writes()
		2287	{
		2288	this->current_annotation = "FB write header";
		2289	bool header_present = true;
		2290	/* We can potentially have a message length of up to 15, so we have to set
		2291	* base_mrf to either 0 or 1 in order to fit in m0..m15.
		2292	*/
		2293	int base_mrf = 1;
		2294	int nr = base_mrf;
		2295	int reg_width = dispatch_width / 8;
		2296	bool do_dual_src = this->dual_src_output.file != BAD_FILE;
		2297	bool src0_alpha_to_render_target = false;
		2298
		2299	if (dispatch_width == 16 && do_dual_src) {
		2300	fail("GL_ARB_blend_func_extended not yet supported in 16-wide.");
		2301	do_dual_src = false;
		2302	}
		2303
		2304	/* From the Sandy Bridge PRM, volume 4, page 198:
		2305	*
		2306	* "Dispatched Pixel Enables. One bit per pixel indicating
		2307	* which pixels were originally enabled when the thread was
		2308	* dispatched. This field is only required for the end-of-
		2309	* thread message and on all dual-source messages."
		2310	*/
		2311	if (brw->gen >= 6 &&
		2312	!this->fp->UsesKill &&
		2313	!do_dual_src &&
		2314	c->key.nr_color_regions == 1) {
		2315	header_present = false;
		2316	}
		2317
		2318	if (header_present) {
		2319	src0_alpha_to_render_target = brw->gen >= 6 &&
		2320	!do_dual_src &&
		2321	c->key.replicate_alpha;
		2322	/* m2, m3 header */
		2323	nr += 2;
		2324	}
		2325
		2326	if (c->aa_dest_stencil_reg) {
		2327	push_force_uncompressed();
		2328	emit(MOV(fs_reg(MRF, nr++),
		2329	fs_reg(brw_vec8_grf(c->aa_dest_stencil_reg, 0))));
		2330	pop_force_uncompressed();
		2331	}
		2332
		2333	/* Reserve space for color. It'll be filled in per MRT below. */
		2334	int color_mrf = nr;
		2335	nr += 4 * reg_width;
		2336	if (do_dual_src)
		2337	nr += 4;
		2338	if (src0_alpha_to_render_target)
		2339	nr += reg_width;
		2340
		2341	if (c->source_depth_to_render_target) {
		2342	if (brw->gen == 6 && dispatch_width == 16) {
		2343	/* For outputting oDepth on gen6, SIMD8 writes have to be
		2344	* used. This would require 8-wide moves of each half to
		2345	* message regs, kind of like pre-gen5 SIMD16 FB writes.
		2346	* Just bail on doing so for now.
		2347	*/
		2348	fail("Missing support for simd16 depth writes on gen6\n");
		2349	}
		2350
		2351	if (fp->Base.OutputsWritten & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) {
		2352	/* Hand over gl_FragDepth. */
		2353	assert(this->frag_depth.file != BAD_FILE);
		2354	emit(MOV(fs_reg(MRF, nr), this->frag_depth));
		2355	} else {
		2356	/* Pass through the payload depth. */
		2357	emit(MOV(fs_reg(MRF, nr),
		2358	fs_reg(brw_vec8_grf(c->source_depth_reg, 0))));
		2359	}
		2360	nr += reg_width;
		2361	}
		2362
		2363	if (c->dest_depth_reg) {
		2364	emit(MOV(fs_reg(MRF, nr),
		2365	fs_reg(brw_vec8_grf(c->dest_depth_reg, 0))));
		2366	nr += reg_width;
		2367	}
		2368
		2369	if (do_dual_src) {
		2370	fs_reg src0 = this->outputs[0];
		2371	fs_reg src1 = this->dual_src_output;
		2372
		2373	this->current_annotation = ralloc_asprintf(this->mem_ctx,
		2374	"FB write src0");
		2375	for (int i = 0; i < 4; i++) {
		2376	fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + i, src0.type), src0));
		2377	src0.reg_offset++;
		2378	inst->saturate = c->key.clamp_fragment_color;
		2379	}
		2380
		2381	this->current_annotation = ralloc_asprintf(this->mem_ctx,
		2382	"FB write src1");
		2383	for (int i = 0; i < 4; i++) {
		2384	fs_inst *inst = emit(MOV(fs_reg(MRF, color_mrf + 4 + i, src1.type),
		2385	src1));
		2386	src1.reg_offset++;
		2387	inst->saturate = c->key.clamp_fragment_color;
		2388	}
		2389
		2390	if (INTEL_DEBUG & DEBUG_SHADER_TIME)
		2391	emit_shader_time_end();
		2392
		2393	fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
		2394	inst->target = 0;
		2395	inst->base_mrf = base_mrf;
		2396	inst->mlen = nr - base_mrf;
		2397	inst->eot = true;
		2398	inst->header_present = header_present;
		2399
		2400	c->prog_data.dual_src_blend = true;
		2401	this->current_annotation = NULL;
		2402	return;
		2403	}
		2404
		2405	for (int target = 0; target < c->key.nr_color_regions; target++) {
		2406	this->current_annotation = ralloc_asprintf(this->mem_ctx,
		2407	"FB write target %d",
		2408	target);
		2409	/* If src0_alpha_to_render_target is true, include source zero alpha
		2410	* data in RenderTargetWrite message for targets > 0.
		2411	*/
		2412	int write_color_mrf = color_mrf;
		2413	if (src0_alpha_to_render_target && target != 0) {
		2414	fs_inst *inst;
		2415	fs_reg color = outputs[0];
		2416	color.reg_offset += 3;
		2417
		2418	inst = emit(MOV(fs_reg(MRF, write_color_mrf, color.type),
		2419	color));
		2420	inst->saturate = c->key.clamp_fragment_color;
		2421	write_color_mrf = color_mrf + reg_width;
		2422	}
		2423
		2424	for (unsigned i = 0; i < this->output_components[target]; i++)
		2425	emit_color_write(target, i, write_color_mrf);
		2426
		2427	bool eot = false;
		2428	if (target == c->key.nr_color_regions - 1) {
		2429	eot = true;
		2430
		2431	if (INTEL_DEBUG & DEBUG_SHADER_TIME)
		2432	emit_shader_time_end();
		2433	}
		2434
		2435	fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
		2436	inst->target = target;
		2437	inst->base_mrf = base_mrf;
		2438	if (src0_alpha_to_render_target && target == 0)
		2439	inst->mlen = nr - base_mrf - reg_width;
		2440	else
		2441	inst->mlen = nr - base_mrf;
		2442	inst->eot = eot;
		2443	inst->header_present = header_present;
		2444	}
		2445
		2446	if (c->key.nr_color_regions == 0) {
		2447	/* Even if there's no color buffers enabled, we still need to send
		2448	* alpha out the pipeline to our null renderbuffer to support
		2449	* alpha-testing, alpha-to-coverage, and so on.
		2450	*/
		2451	emit_color_write(0, 3, color_mrf);
		2452
		2453	if (INTEL_DEBUG & DEBUG_SHADER_TIME)
		2454	emit_shader_time_end();
		2455
		2456	fs_inst *inst = emit(FS_OPCODE_FB_WRITE);
		2457	inst->base_mrf = base_mrf;
		2458	inst->mlen = nr - base_mrf;
		2459	inst->eot = true;
		2460	inst->header_present = header_present;
		2461	}
		2462
		2463	this->current_annotation = NULL;
		2464	}
		2465
		2466	void
		2467	fs_visitor::resolve_ud_negate(fs_reg *reg)
		2468	{
		2469	if (reg->type != BRW_REGISTER_TYPE_UD \|\|
		2470	!reg->negate)
		2471	return;
		2472
		2473	fs_reg temp = fs_reg(this, glsl_type::uint_type);
		2474	emit(MOV(temp, *reg));
		2475	*reg = temp;
		2476	}
		2477
		2478	void
		2479	fs_visitor::resolve_bool_comparison(ir_rvalue rvalue, fs_reg reg)
		2480	{
		2481	if (rvalue->type != glsl_type::bool_type)
		2482	return;
		2483
		2484	fs_reg temp = fs_reg(this, glsl_type::bool_type);
		2485	emit(AND(temp, *reg, fs_reg(1)));
		2486	*reg = temp;
		2487	}
		2488
		2489	fs_visitor::fs_visitor(struct brw_context *brw,
		2490	struct brw_wm_compile *c,
		2491	struct gl_shader_program *shader_prog,
		2492	struct gl_fragment_program *fp,
		2493	unsigned dispatch_width)
		2494	: dispatch_width(dispatch_width)
		2495	{
		2496	this->c = c;
		2497	this->brw = brw;
		2498	this->fp = fp;
		2499	this->shader_prog = shader_prog;
		2500	this->ctx = &brw->ctx;
		2501	this->mem_ctx = ralloc_context(NULL);
		2502	if (shader_prog)
		2503	shader = (struct brw_shader *)
		2504	shader_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
		2505	else
		2506	shader = NULL;
		2507	this->failed = false;
		2508	this->variable_ht = hash_table_ctor(0,
		2509	hash_table_pointer_hash,
		2510	hash_table_pointer_compare);
		2511
		2512	memset(this->outputs, 0, sizeof(this->outputs));
		2513	memset(this->output_components, 0, sizeof(this->output_components));
		2514	this->first_non_payload_grf = 0;
		2515	this->max_grf = brw->gen >= 7 ? GEN7_MRF_HACK_START : BRW_MAX_GRF;
		2516
		2517	this->current_annotation = NULL;
		2518	this->base_ir = NULL;
		2519
		2520	this->virtual_grf_sizes = NULL;
		2521	this->virtual_grf_count = 0;
		2522	this->virtual_grf_array_size = 0;
		2523	this->virtual_grf_start = NULL;
		2524	this->virtual_grf_end = NULL;
		2525	this->live_intervals_valid = false;
		2526
		2527	this->params_remap = NULL;
		2528	this->nr_params_remap = 0;
		2529
		2530	this->force_uncompressed_stack = 0;
		2531	this->force_sechalf_stack = 0;
		2532
		2533	memset(&this->param_size, 0, sizeof(this->param_size));
		2534	}
		2535
		2536	fs_visitor::~fs_visitor()
		2537	{
		2538	ralloc_free(this->mem_ctx);
		2539	hash_table_dtor(this->variable_ht);
		2540	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_visitor.cpp – Rev 4401