Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright © 2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 */
23
 
24
/**
25
 * \file brw_wm_channel_expressions.cpp
26
 *
27
 * Breaks vector operations down into operations on each component.
28
 *
29
 * The 965 fragment shader receives 8 or 16 pixels at a time, so each
30
 * channel of a vector is laid out as 1 or 2 8-float registers.  Each
31
 * ALU operation operates on one of those channel registers.  As a
32
 * result, there is no value to the 965 fragment shader in tracking
33
 * "vector" expressions in the sense of GLSL fragment shaders, when
34
 * doing a channel at a time may help in constant folding, algebraic
35
 * simplification, and reducing the liveness of channel registers.
36
 *
37
 * The exception to the desire to break everything down to floats is
38
 * texturing.  The texture sampler returns a writemasked masked
39
 * 4/8-register sequence containing the texture values.  We don't want
40
 * to dispatch to the sampler separately for each channel we need, so
41
 * we do retain the vector types in that case.
42
 */
43
 
44
extern "C" {
45
#include "main/core.h"
46
#include "brw_wm.h"
47
}
48
#include "glsl/ir.h"
49
#include "glsl/ir_expression_flattening.h"
50
#include "glsl/glsl_types.h"
51
 
52
class ir_channel_expressions_visitor : public ir_hierarchical_visitor {
53
public:
54
   ir_channel_expressions_visitor()
55
   {
56
      this->progress = false;
57
      this->mem_ctx = NULL;
58
   }
59
 
60
   ir_visitor_status visit_leave(ir_assignment *);
61
 
62
   ir_rvalue *get_element(ir_variable *var, unsigned int element);
63
   void assign(ir_assignment *ir, int elem, ir_rvalue *val);
64
 
65
   bool progress;
66
   void *mem_ctx;
67
};
68
 
69
static bool
70
channel_expressions_predicate(ir_instruction *ir)
71
{
72
   ir_expression *expr = ir->as_expression();
73
   unsigned int i;
74
 
75
   if (!expr)
76
      return false;
77
 
78
   for (i = 0; i < expr->get_num_operands(); i++) {
79
      if (expr->operands[i]->type->is_vector())
80
	 return true;
81
   }
82
 
83
   return false;
84
}
85
 
86
bool
87
brw_do_channel_expressions(exec_list *instructions)
88
{
89
   ir_channel_expressions_visitor v;
90
 
91
   /* Pull out any matrix expression to a separate assignment to a
92
    * temp.  This will make our handling of the breakdown to
93
    * operations on the matrix's vector components much easier.
94
    */
95
   do_expression_flattening(instructions, channel_expressions_predicate);
96
 
97
   visit_list_elements(&v, instructions);
98
 
99
   return v.progress;
100
}
101
 
102
ir_rvalue *
103
ir_channel_expressions_visitor::get_element(ir_variable *var, unsigned int elem)
104
{
105
   ir_dereference *deref;
106
 
107
   if (var->type->is_scalar())
108
      return new(mem_ctx) ir_dereference_variable(var);
109
 
110
   assert(elem < var->type->components());
111
   deref = new(mem_ctx) ir_dereference_variable(var);
112
   return new(mem_ctx) ir_swizzle(deref, elem, 0, 0, 0, 1);
113
}
114
 
115
void
116
ir_channel_expressions_visitor::assign(ir_assignment *ir, int elem, ir_rvalue *val)
117
{
118
   ir_dereference *lhs = ir->lhs->clone(mem_ctx, NULL);
119
   ir_assignment *assign;
120
 
121
   /* This assign-of-expression should have been generated by the
122
    * expression flattening visitor (since we never short circit to
123
    * not flatten, even for plain assignments of variables), so the
124
    * writemask is always full.
125
    */
126
   assert(ir->write_mask == (1 << ir->lhs->type->components()) - 1);
127
 
128
   assign = new(mem_ctx) ir_assignment(lhs, val, NULL, (1 << elem));
129
   ir->insert_before(assign);
130
}
131
 
132
ir_visitor_status
133
ir_channel_expressions_visitor::visit_leave(ir_assignment *ir)
134
{
135
   ir_expression *expr = ir->rhs->as_expression();
136
   bool found_vector = false;
137
   unsigned int i, vector_elements = 1;
138
   ir_variable *op_var[3];
139
 
140
   if (!expr)
141
      return visit_continue;
142
 
143
   if (!this->mem_ctx)
144
      this->mem_ctx = ralloc_parent(ir);
145
 
146
   for (i = 0; i < expr->get_num_operands(); i++) {
147
      if (expr->operands[i]->type->is_vector()) {
148
	 found_vector = true;
149
	 vector_elements = expr->operands[i]->type->vector_elements;
150
	 break;
151
      }
152
   }
153
   if (!found_vector)
154
      return visit_continue;
155
 
156
   /* Store the expression operands in temps so we can use them
157
    * multiple times.
158
    */
159
   for (i = 0; i < expr->get_num_operands(); i++) {
160
      ir_assignment *assign;
161
      ir_dereference *deref;
162
 
163
      assert(!expr->operands[i]->type->is_matrix());
164
 
165
      op_var[i] = new(mem_ctx) ir_variable(expr->operands[i]->type,
166
					   "channel_expressions",
167
					   ir_var_temporary);
168
      ir->insert_before(op_var[i]);
169
 
170
      deref = new(mem_ctx) ir_dereference_variable(op_var[i]);
171
      assign = new(mem_ctx) ir_assignment(deref,
172
					  expr->operands[i],
173
					  NULL);
174
      ir->insert_before(assign);
175
   }
176
 
177
   const glsl_type *element_type = glsl_type::get_instance(ir->lhs->type->base_type,
178
							   1, 1);
179
 
180
   /* OK, time to break down this vector operation. */
181
   switch (expr->operation) {
182
   case ir_unop_bit_not:
183
   case ir_unop_logic_not:
184
   case ir_unop_neg:
185
   case ir_unop_abs:
186
   case ir_unop_sign:
187
   case ir_unop_rcp:
188
   case ir_unop_rsq:
189
   case ir_unop_sqrt:
190
   case ir_unop_exp:
191
   case ir_unop_log:
192
   case ir_unop_exp2:
193
   case ir_unop_log2:
194
   case ir_unop_bitcast_i2f:
195
   case ir_unop_bitcast_f2i:
196
   case ir_unop_bitcast_f2u:
197
   case ir_unop_bitcast_u2f:
198
   case ir_unop_i2u:
199
   case ir_unop_u2i:
200
   case ir_unop_f2i:
201
   case ir_unop_f2u:
202
   case ir_unop_i2f:
203
   case ir_unop_f2b:
204
   case ir_unop_b2f:
205
   case ir_unop_i2b:
206
   case ir_unop_b2i:
207
   case ir_unop_u2f:
208
   case ir_unop_trunc:
209
   case ir_unop_ceil:
210
   case ir_unop_floor:
211
   case ir_unop_fract:
212
   case ir_unop_round_even:
213
   case ir_unop_sin:
214
   case ir_unop_cos:
215
   case ir_unop_sin_reduced:
216
   case ir_unop_cos_reduced:
217
   case ir_unop_dFdx:
218
   case ir_unop_dFdy:
219
   case ir_unop_bitfield_reverse:
220
   case ir_unop_bit_count:
221
   case ir_unop_find_msb:
222
   case ir_unop_find_lsb:
223
      for (i = 0; i < vector_elements; i++) {
224
	 ir_rvalue *op0 = get_element(op_var[0], i);
225
 
226
	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
227
						  element_type,
228
						  op0,
229
						  NULL));
230
      }
231
      break;
232
 
233
   case ir_binop_add:
234
   case ir_binop_sub:
235
   case ir_binop_mul:
236
   case ir_binop_div:
237
   case ir_binop_mod:
238
   case ir_binop_min:
239
   case ir_binop_max:
240
   case ir_binop_pow:
241
   case ir_binop_lshift:
242
   case ir_binop_rshift:
243
   case ir_binop_bit_and:
244
   case ir_binop_bit_xor:
245
   case ir_binop_bit_or:
246
   case ir_binop_less:
247
   case ir_binop_greater:
248
   case ir_binop_lequal:
249
   case ir_binop_gequal:
250
   case ir_binop_equal:
251
   case ir_binop_nequal:
252
      for (i = 0; i < vector_elements; i++) {
253
	 ir_rvalue *op0 = get_element(op_var[0], i);
254
	 ir_rvalue *op1 = get_element(op_var[1], i);
255
 
256
	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
257
						  element_type,
258
						  op0,
259
						  op1));
260
      }
261
      break;
262
 
263
   case ir_unop_any: {
264
      ir_expression *temp;
265
      temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
266
					element_type,
267
					get_element(op_var[0], 0),
268
					get_element(op_var[0], 1));
269
 
270
      for (i = 2; i < vector_elements; i++) {
271
	 temp = new(mem_ctx) ir_expression(ir_binop_logic_or,
272
					   element_type,
273
					   get_element(op_var[0], i),
274
					   temp);
275
      }
276
      assign(ir, 0, temp);
277
      break;
278
   }
279
 
280
   case ir_binop_dot: {
281
      ir_expression *last = NULL;
282
      for (i = 0; i < vector_elements; i++) {
283
	 ir_rvalue *op0 = get_element(op_var[0], i);
284
	 ir_rvalue *op1 = get_element(op_var[1], i);
285
	 ir_expression *temp;
286
 
287
	 temp = new(mem_ctx) ir_expression(ir_binop_mul,
288
					   element_type,
289
					   op0,
290
					   op1);
291
	 if (last) {
292
	    last = new(mem_ctx) ir_expression(ir_binop_add,
293
					      element_type,
294
					      temp,
295
					      last);
296
	 } else {
297
	    last = temp;
298
	 }
299
      }
300
      assign(ir, 0, last);
301
      break;
302
   }
303
 
304
   case ir_binop_logic_and:
305
   case ir_binop_logic_xor:
306
   case ir_binop_logic_or:
307
      ir->print();
308
      printf("\n");
309
      assert(!"not reached: expression operates on scalars only");
310
      break;
311
   case ir_binop_all_equal:
312
   case ir_binop_any_nequal: {
313
      ir_expression *last = NULL;
314
      for (i = 0; i < vector_elements; i++) {
315
	 ir_rvalue *op0 = get_element(op_var[0], i);
316
	 ir_rvalue *op1 = get_element(op_var[1], i);
317
	 ir_expression *temp;
318
	 ir_expression_operation join;
319
 
320
	 if (expr->operation == ir_binop_all_equal)
321
	    join = ir_binop_logic_and;
322
	 else
323
	    join = ir_binop_logic_or;
324
 
325
	 temp = new(mem_ctx) ir_expression(expr->operation,
326
					   element_type,
327
					   op0,
328
					   op1);
329
	 if (last) {
330
	    last = new(mem_ctx) ir_expression(join,
331
					      element_type,
332
					      temp,
333
					      last);
334
	 } else {
335
	    last = temp;
336
	 }
337
      }
338
      assign(ir, 0, last);
339
      break;
340
   }
341
   case ir_unop_noise:
342
      assert(!"noise should have been broken down to function call");
343
      break;
344
 
345
   case ir_binop_bfm: {
346
      /* Does not need to be scalarized, since its result will be identical
347
       * for all channels.
348
       */
349
      ir_rvalue *op0 = get_element(op_var[0], 0);
350
      ir_rvalue *op1 = get_element(op_var[1], 0);
351
 
352
      assign(ir, 0, new(mem_ctx) ir_expression(expr->operation,
353
                                               element_type,
354
                                               op0,
355
                                               op1));
356
      break;
357
   }
358
 
359
   case ir_binop_ubo_load:
360
      assert(!"not yet supported");
361
      break;
362
 
363
   case ir_triop_lrp:
364
   case ir_triop_bitfield_extract:
365
      for (i = 0; i < vector_elements; i++) {
366
	 ir_rvalue *op0 = get_element(op_var[0], i);
367
	 ir_rvalue *op1 = get_element(op_var[1], i);
368
	 ir_rvalue *op2 = get_element(op_var[2], i);
369
 
370
	 assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
371
						  element_type,
372
						  op0,
373
						  op1,
374
						  op2));
375
      }
376
      break;
377
 
378
   case ir_triop_bfi: {
379
      /* Only a single BFM is needed for multiple BFIs. */
380
      ir_rvalue *op0 = get_element(op_var[0], 0);
381
 
382
      for (i = 0; i < vector_elements; i++) {
383
         ir_rvalue *op1 = get_element(op_var[1], i);
384
         ir_rvalue *op2 = get_element(op_var[2], i);
385
 
386
         assign(ir, i, new(mem_ctx) ir_expression(expr->operation,
387
                                                  element_type,
388
                                                  op0->clone(mem_ctx, NULL),
389
                                                  op1,
390
                                                  op2));
391
      }
392
      break;
393
   }
394
 
395
   case ir_unop_pack_snorm_2x16:
396
   case ir_unop_pack_snorm_4x8:
397
   case ir_unop_pack_unorm_2x16:
398
   case ir_unop_pack_unorm_4x8:
399
   case ir_unop_pack_half_2x16:
400
   case ir_unop_unpack_snorm_2x16:
401
   case ir_unop_unpack_snorm_4x8:
402
   case ir_unop_unpack_unorm_2x16:
403
   case ir_unop_unpack_unorm_4x8:
404
   case ir_unop_unpack_half_2x16:
405
   case ir_binop_vector_extract:
406
   case ir_triop_vector_insert:
407
   case ir_quadop_bitfield_insert:
408
   case ir_quadop_vector:
409
      assert(!"should have been lowered");
410
      break;
411
 
412
   case ir_unop_unpack_half_2x16_split_x:
413
   case ir_unop_unpack_half_2x16_split_y:
414
   case ir_binop_pack_half_2x16_split:
415
      assert("!not reached: expression operates on scalars only");
416
      break;
417
   }
418
 
419
   ir->remove();
420
   this->progress = true;
421
 
422
   return visit_continue;
423
}