WebSVN – Kolibri OS – Blame – /contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp

Rev	Author	Line No.	Line
4358	Serge	1	/*
		2	* Copyright © 2010 Intel Corporation
		3	*
		4	* Permission is hereby granted, free of charge, to any person obtaining a
		5	* copy of this software and associated documentation files (the "Software"),
		6	* to deal in the Software without restriction, including without limitation
		7	* the rights to use, copy, modify, merge, publish, distribute, sublicense,
		8	* and/or sell copies of the Software, and to permit persons to whom the
		9	* Software is furnished to do so, subject to the following conditions:
		10	*
		11	* The above copyright notice and this permission notice (including the next
		12	* paragraph) shall be included in all copies or substantial portions of the
		13	* Software.
		14	*
		15	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
		16	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
		17	* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
		18	* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
		19	* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
		20	* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
		21	* DEALINGS IN THE SOFTWARE.
		22	*/
		23
		24	/**
		25	* \file brw_wm_channel_expressions.cpp
		26	*
		27	* Breaks vector operations down into operations on each component.
		28	*
		29	* The 965 fragment shader receives 8 or 16 pixels at a time, so each
		30	* channel of a vector is laid out as 1 or 2 8-float registers. Each
		31	* ALU operation operates on one of those channel registers. As a
		32	* result, there is no value to the 965 fragment shader in tracking
		33	* "vector" expressions in the sense of GLSL fragment shaders, when
		34	* doing a channel at a time may help in constant folding, algebraic
		35	* simplification, and reducing the liveness of channel registers.
		36	*
		37	* The exception to the desire to break everything down to floats is
		38	* texturing. The texture sampler returns a writemasked masked
		39	* 4/8-register sequence containing the texture values. We don't want
		40	* to dispatch to the sampler separately for each channel we need, so
		41	* we do retain the vector types in that case.
		42	*/
		43
		44	extern "C" {
		45	#include "main/core.h"
		46	#include "brw_wm.h"
		47	}
		48	#include "glsl/ir.h"
		49	#include "glsl/ir_expression_flattening.h"
		50	#include "glsl/glsl_types.h"
		51
		52	class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
		53	public:
		54	ir_channel_expressions_visitor()
		55	{
		56	this->progress = false;
		57	this->mem_ctx = NULL;
		58	}
		59
		60	ir_visitor_status visit_leave(ir_assignment *);
		61
		62	ir_rvalue get_element(ir_variable var, unsigned int element);
		63	void assign(ir_assignment ir, int elem, ir_rvalue val);
		64
		65	bool progress;
		66	void *mem_ctx;
		67	};
		68
		69	static bool
		70	channel_expressions_predicate(ir_instruction *ir)
		71	{
		72	ir_expression *expr = ir->as_expression();
		73	unsigned int i;
		74
		75	if (!expr)
		76	return false;
		77
		78	for (i = 0; i < expr->get_num_operands(); i++) {
		79	if (expr->operands[i]->type->is_vector())
		80	return true;
		81	}
		82
		83	return false;
		84	}
		85
		86	bool
		87	brw_do_channel_expressions(exec_list *instructions)
		88	{
		89	ir_channel_expressions_visitor v;
		90
		91	/* Pull out any matrix expression to a separate assignment to a
		92	* temp. This will make our handling of the breakdown to
		93	* operations on the matrix's vector components much easier.
		94	*/
		95	do_expression_flattening(instructions, channel_expressions_predicate);
		96
		97	visit_list_elements(&v, instructions);
		98
		99	return v.progress;
		100	}
		101
		102	ir_rvalue *
		103	ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
		104	{
		105	ir_dereference *deref;
		106
		107	if (var->type->is_scalar())
		108	return new(mem_ctx) ir_dereference_variable(var);
		109
		110	assert(elem < var->type->components());
		111	deref = new(mem_ctx) ir_dereference_variable(var);
		112	return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
		113	}
		114
		115	void
		116	ir_channel_expressions_visitor::assign(ir_assignment ir, int elem, ir_rvalue val)
		117	{
		118	ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
		119	ir_assignment *assign;
		120
		121	/* This assign-of-expression should have been generated by the
		122	* expression flattening visitor (since we never short circit to
		123	* not flatten, even for plain assignments of variables), so the
		124	* writemask is always full.
		125	*/
		126	assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
		127
		128	assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
		129	ir->insert_before(assign);
		130	}
		131
		132	ir_visitor_status
		133	ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
		134	{
		135	ir_expression *expr = ir->rhs->as_expression();
		136	bool found_vector = false;
		137	unsigned int i, vector_elements = 1;
		138	ir_variable *op_var[3];
		139
		140	if (!expr)
		141	return visit_continue;
		142
		143	if (!this->mem_ctx)
		144	this->mem_ctx = ralloc_parent(ir);
		145
		146	for (i = 0; i < expr->get_num_operands(); i++) {
		147	if (expr->operands[i]->type->is_vector()) {
		148	found_vector = true;
		149	vector_elements = expr->operands[i]->type->vector_elements;
		150	break;
		151	}
		152	}
		153	if (!found_vector)
		154	return visit_continue;
		155
		156	/* Store the expression operands in temps so we can use them
		157	* multiple times.
		158	*/
		159	for (i = 0; i < expr->get_num_operands(); i++) {
		160	ir_assignment *assign;
		161	ir_dereference *deref;
		162
		163	assert(!expr->operands[i]->type->is_matrix());
		164
		165	op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
		166	"channel_expressions",
		167	ir_var_temporary);
		168	ir->insert_before(op_var[i]);
		169
		170	deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
		171	assign = new(mem_ctx) ir_assignment(deref,
		172	expr->operands[i],
		173	NULL);
		174	ir->insert_before(assign);
		175	}
		176
		177	const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
		178	1, 1);
		179
		180	/* OK, time to break down this vector operation. */
		181	switch (expr->operation) {
		182	case ir_unop_bit_not:
		183	case ir_unop_logic_not:
		184	case ir_unop_neg:
		185	case ir_unop_abs:
		186	case ir_unop_sign:
		187	case ir_unop_rcp:
		188	case ir_unop_rsq:
		189	case ir_unop_sqrt:
		190	case ir_unop_exp:
		191	case ir_unop_log:
		192	case ir_unop_exp2:
		193	case ir_unop_log2:
		194	case ir_unop_bitcast_i2f:
		195	case ir_unop_bitcast_f2i:
		196	case ir_unop_bitcast_f2u:
		197	case ir_unop_bitcast_u2f:
		198	case ir_unop_i2u:
		199	case ir_unop_u2i:
		200	case ir_unop_f2i:
		201	case ir_unop_f2u:
		202	case ir_unop_i2f:
		203	case ir_unop_f2b:
		204	case ir_unop_b2f:
		205	case ir_unop_i2b:
		206	case ir_unop_b2i:
		207	case ir_unop_u2f:
		208	case ir_unop_trunc:
		209	case ir_unop_ceil:
		210	case ir_unop_floor:
		211	case ir_unop_fract:
		212	case ir_unop_round_even:
		213	case ir_unop_sin:
		214	case ir_unop_cos:
		215	case ir_unop_sin_reduced:
		216	case ir_unop_cos_reduced:
		217	case ir_unop_dFdx:
		218	case ir_unop_dFdy:
		219	case ir_unop_bitfield_reverse:
		220	case ir_unop_bit_count:
		221	case ir_unop_find_msb:
		222	case ir_unop_find_lsb:
		223	for (i = 0; i < vector_elements; i++) {
		224	ir_rvalue *op0 = get_element(op_var[0], i);
		225
		226	assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
		227	element_type,
		228	op0,
		229	NULL));
		230	}
		231	break;
		232
		233	case ir_binop_add:
		234	case ir_binop_sub:
		235	case ir_binop_mul:
		236	case ir_binop_div:
		237	case ir_binop_mod:
		238	case ir_binop_min:
		239	case ir_binop_max:
		240	case ir_binop_pow:
		241	case ir_binop_lshift:
		242	case ir_binop_rshift:
		243	case ir_binop_bit_and:
		244	case ir_binop_bit_xor:
		245	case ir_binop_bit_or:
		246	case ir_binop_less:
		247	case ir_binop_greater:
		248	case ir_binop_lequal:
		249	case ir_binop_gequal:
		250	case ir_binop_equal:
		251	case ir_binop_nequal:
		252	for (i = 0; i < vector_elements; i++) {
		253	ir_rvalue *op0 = get_element(op_var[0], i);
		254	ir_rvalue *op1 = get_element(op_var[1], i);
		255
		256	assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
		257	element_type,
		258	op0,
		259	op1));
		260	}
		261	break;
		262
		263	case ir_unop_any: {
		264	ir_expression *temp;
		265	temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
		266	element_type,
		267	get_element(op_var[0], 0),
		268	get_element(op_var[0], 1));
		269
		270	for (i = 2; i < vector_elements; i++) {
		271	temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
		272	element_type,
		273	get_element(op_var[0], i),
		274	temp);
		275	}
		276	assign(ir, 0, temp);
		277	break;
		278	}
		279
		280	case ir_binop_dot: {
		281	ir_expression *last = NULL;
		282	for (i = 0; i < vector_elements; i++) {
		283	ir_rvalue *op0 = get_element(op_var[0], i);
		284	ir_rvalue *op1 = get_element(op_var[1], i);
		285	ir_expression *temp;
		286
		287	temp = new(mem_ctx) ir_expression(ir_binop_mul,
		288	element_type,
		289	op0,
		290	op1);
		291	if (last) {
		292	last = new(mem_ctx) ir_expression(ir_binop_add,
		293	element_type,
		294	temp,
		295	last);
		296	} else {
		297	last = temp;
		298	}
		299	}
		300	assign(ir, 0, last);
		301	break;
		302	}
		303
		304	case ir_binop_logic_and:
		305	case ir_binop_logic_xor:
		306	case ir_binop_logic_or:
		307	ir->print();
		308	printf("\n");
		309	assert(!"not reached: expression operates on scalars only");
		310	break;
		311	case ir_binop_all_equal:
		312	case ir_binop_any_nequal: {
		313	ir_expression *last = NULL;
		314	for (i = 0; i < vector_elements; i++) {
		315	ir_rvalue *op0 = get_element(op_var[0], i);
		316	ir_rvalue *op1 = get_element(op_var[1], i);
		317	ir_expression *temp;
		318	ir_expression_operation join;
		319
		320	if (expr->operation == ir_binop_all_equal)
		321	join = ir_binop_logic_and;
		322	else
		323	join = ir_binop_logic_or;
		324
		325	temp = new(mem_ctx) ir_expression(expr->operation,
		326	element_type,
		327	op0,
		328	op1);
		329	if (last) {
		330	last = new(mem_ctx) ir_expression(join,
		331	element_type,
		332	temp,
		333	last);
		334	} else {
		335	last = temp;
		336	}
		337	}
		338	assign(ir, 0, last);
		339	break;
		340	}
		341	case ir_unop_noise:
		342	assert(!"noise should have been broken down to function call");
		343	break;
		344
		345	case ir_binop_bfm: {
		346	/* Does not need to be scalarized, since its result will be identical
		347	* for all channels.
		348	*/
		349	ir_rvalue *op0 = get_element(op_var[0], 0);
		350	ir_rvalue *op1 = get_element(op_var[1], 0);
		351
		352	assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
		353	element_type,
		354	op0,
		355	op1));
		356	break;
		357	}
		358
		359	case ir_binop_ubo_load:
		360	assert(!"not yet supported");
		361	break;
		362
		363	case ir_triop_lrp:
		364	case ir_triop_bitfield_extract:
		365	for (i = 0; i < vector_elements; i++) {
		366	ir_rvalue *op0 = get_element(op_var[0], i);
		367	ir_rvalue *op1 = get_element(op_var[1], i);
		368	ir_rvalue *op2 = get_element(op_var[2], i);
		369
		370	assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
		371	element_type,
		372	op0,
		373	op1,
		374	op2));
		375	}
		376	break;
		377
		378	case ir_triop_bfi: {
		379	/* Only a single BFM is needed for multiple BFIs. */
		380	ir_rvalue *op0 = get_element(op_var[0], 0);
		381
		382	for (i = 0; i < vector_elements; i++) {
		383	ir_rvalue *op1 = get_element(op_var[1], i);
		384	ir_rvalue *op2 = get_element(op_var[2], i);
		385
		386	assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
		387	element_type,
		388	op0->clone(mem_ctx, NULL),
		389	op1,
		390	op2));
		391	}
		392	break;
		393	}
		394
		395	case ir_unop_pack_snorm_2x16:
		396	case ir_unop_pack_snorm_4x8:
		397	case ir_unop_pack_unorm_2x16:
		398	case ir_unop_pack_unorm_4x8:
		399	case ir_unop_pack_half_2x16:
		400	case ir_unop_unpack_snorm_2x16:
		401	case ir_unop_unpack_snorm_4x8:
		402	case ir_unop_unpack_unorm_2x16:
		403	case ir_unop_unpack_unorm_4x8:
		404	case ir_unop_unpack_half_2x16:
		405	case ir_binop_vector_extract:
		406	case ir_triop_vector_insert:
		407	case ir_quadop_bitfield_insert:
		408	case ir_quadop_vector:
		409	assert(!"should have been lowered");
		410	break;
		411
		412	case ir_unop_unpack_half_2x16_split_x:
		413	case ir_unop_unpack_half_2x16_split_y:
		414	case ir_binop_pack_half_2x16_split:
		415	assert("!not reached: expression operates on scalars only");
		416	break;
		417	}
		418
		419	ir->remove();
		420	this->progress = true;
		421
		422	return visit_continue;
		423	}

Subversion Repositories Kolibri OS

(root)/contrib/sdk/sources/Mesa/src/mesa/drivers/dri/i965/brw_fs_channel_expressions.cpp – Rev 4401