Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright © 2010 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21
 * DEALINGS IN THE SOFTWARE.
22
 */
23
 
24
#include "main/glheader.h"
25
#include "main/context.h"
26
#include "main/macros.h"
27
#include "program.h"
28
#include "prog_instruction.h"
29
#include "prog_optimize.h"
30
#include "prog_parameter.h"
31
#include 
32
 
33
static bool
34
src_regs_are_constant(const struct prog_instruction *inst, unsigned num_srcs)
35
{
36
   unsigned i;
37
 
38
   for (i = 0; i < num_srcs; i++) {
39
      if (inst->SrcReg[i].File != PROGRAM_CONSTANT)
40
	 return false;
41
   }
42
 
43
   return true;
44
}
45
 
46
static struct prog_src_register
47
src_reg_for_float(struct gl_program *prog, float val)
48
{
49
   struct prog_src_register src;
50
   unsigned swiz;
51
 
52
   memset(&src, 0, sizeof(src));
53
 
54
   src.File = PROGRAM_CONSTANT;
55
   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
56
					  (gl_constant_value *) &val, 1, &swiz);
57
   src.Swizzle = swiz;
58
   return src;
59
}
60
 
61
static struct prog_src_register
62
src_reg_for_vec4(struct gl_program *prog, const float *val)
63
{
64
   struct prog_src_register src;
65
   unsigned swiz;
66
 
67
   memset(&src, 0, sizeof(src));
68
 
69
   src.File = PROGRAM_CONSTANT;
70
   src.Index = _mesa_add_unnamed_constant(prog->Parameters,
71
					  (gl_constant_value *) val, 4, &swiz);
72
   src.Swizzle = swiz;
73
   return src;
74
}
75
 
76
static bool
77
src_regs_are_same(const struct prog_src_register *a,
78
		  const struct prog_src_register *b)
79
{
80
   return (a->File == b->File)
81
      && (a->Index == b->Index)
82
      && (a->Swizzle == b->Swizzle)
83
      && (a->Abs == b->Abs)
84
      && (a->Negate == b->Negate)
85
      && (a->RelAddr == 0)
86
      && (b->RelAddr == 0);
87
}
88
 
89
static void
90
get_value(struct gl_program *prog, struct prog_src_register *r, float *data)
91
{
92
   const gl_constant_value *const value =
93
      prog->Parameters->ParameterValues[r->Index];
94
 
95
   data[0] = value[GET_SWZ(r->Swizzle, 0)].f;
96
   data[1] = value[GET_SWZ(r->Swizzle, 1)].f;
97
   data[2] = value[GET_SWZ(r->Swizzle, 2)].f;
98
   data[3] = value[GET_SWZ(r->Swizzle, 3)].f;
99
 
100
   if (r->Abs) {
101
      data[0] = fabsf(data[0]);
102
      data[1] = fabsf(data[1]);
103
      data[2] = fabsf(data[2]);
104
      data[3] = fabsf(data[3]);
105
   }
106
 
107
   if (r->Negate & 0x01) {
108
      data[0] = -data[0];
109
   }
110
 
111
   if (r->Negate & 0x02) {
112
      data[1] = -data[1];
113
   }
114
 
115
   if (r->Negate & 0x04) {
116
      data[2] = -data[2];
117
   }
118
 
119
   if (r->Negate & 0x08) {
120
      data[3] = -data[3];
121
   }
122
}
123
 
124
/**
125
 * Try to replace instructions that produce a constant result with simple moves
126
 *
127
 * The hope is that a following copy propagation pass will eliminate the
128
 * unnecessary move instructions.
129
 */
130
GLboolean
131
_mesa_constant_fold(struct gl_program *prog)
132
{
133
   bool progress = false;
134
   unsigned i;
135
 
136
   for (i = 0; i < prog->NumInstructions; i++) {
137
      struct prog_instruction *const inst = &prog->Instructions[i];
138
 
139
      switch (inst->Opcode) {
140
      case OPCODE_ADD:
141
	 if (src_regs_are_constant(inst, 2)) {
142
	    float a[4];
143
	    float b[4];
144
	    float result[4];
145
 
146
	    get_value(prog, &inst->SrcReg[0], a);
147
	    get_value(prog, &inst->SrcReg[1], b);
148
 
149
	    result[0] = a[0] + b[0];
150
	    result[1] = a[1] + b[1];
151
	    result[2] = a[2] + b[2];
152
	    result[3] = a[3] + b[3];
153
 
154
	    inst->Opcode = OPCODE_MOV;
155
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
156
 
157
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
158
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
159
 
160
	    progress = true;
161
	 }
162
	 break;
163
 
164
      case OPCODE_CMP:
165
	 /* FINISHME: We could also optimize CMP instructions where the first
166
	  * FINISHME: source is a constant that is either all < 0.0 or all
167
	  * FINISHME: >= 0.0.
168
	  */
169
	 if (src_regs_are_constant(inst, 3)) {
170
	    float a[4];
171
	    float b[4];
172
	    float c[4];
173
	    float result[4];
174
 
175
	    get_value(prog, &inst->SrcReg[0], a);
176
	    get_value(prog, &inst->SrcReg[1], b);
177
	    get_value(prog, &inst->SrcReg[2], c);
178
 
179
            result[0] = a[0] < 0.0f ? b[0] : c[0];
180
            result[1] = a[1] < 0.0f ? b[1] : c[1];
181
            result[2] = a[2] < 0.0f ? b[2] : c[2];
182
            result[3] = a[3] < 0.0f ? b[3] : c[3];
183
 
184
	    inst->Opcode = OPCODE_MOV;
185
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
186
 
187
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
188
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
189
	    inst->SrcReg[2].File = PROGRAM_UNDEFINED;
190
	    inst->SrcReg[2].Swizzle = SWIZZLE_NOOP;
191
 
192
	    progress = true;
193
	 }
194
	 break;
195
 
196
      case OPCODE_DP2:
197
      case OPCODE_DP3:
198
      case OPCODE_DP4:
199
	 if (src_regs_are_constant(inst, 2)) {
200
	    float a[4];
201
	    float b[4];
202
	    float result;
203
 
204
	    get_value(prog, &inst->SrcReg[0], a);
205
	    get_value(prog, &inst->SrcReg[1], b);
206
 
207
	    result = (a[0] * b[0]) + (a[1] * b[1]);
208
 
209
	    if (inst->Opcode >= OPCODE_DP3)
210
	       result += a[2] * b[2];
211
 
212
	    if (inst->Opcode == OPCODE_DP4)
213
	       result += a[3] * b[3];
214
 
215
	    inst->Opcode = OPCODE_MOV;
216
	    inst->SrcReg[0] = src_reg_for_float(prog, result);
217
 
218
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
219
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
220
 
221
	    progress = true;
222
	 }
223
	 break;
224
 
225
      case OPCODE_MUL:
226
	 if (src_regs_are_constant(inst, 2)) {
227
	    float a[4];
228
	    float b[4];
229
	    float result[4];
230
 
231
	    get_value(prog, &inst->SrcReg[0], a);
232
	    get_value(prog, &inst->SrcReg[1], b);
233
 
234
	    result[0] = a[0] * b[0];
235
	    result[1] = a[1] * b[1];
236
	    result[2] = a[2] * b[2];
237
	    result[3] = a[3] * b[3];
238
 
239
	    inst->Opcode = OPCODE_MOV;
240
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
241
 
242
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
243
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
244
 
245
	    progress = true;
246
	 }
247
	 break;
248
 
249
      case OPCODE_SEQ:
250
	 if (src_regs_are_constant(inst, 2)) {
251
	    float a[4];
252
	    float b[4];
253
	    float result[4];
254
 
255
	    get_value(prog, &inst->SrcReg[0], a);
256
	    get_value(prog, &inst->SrcReg[1], b);
257
 
258
	    result[0] = (a[0] == b[0]) ? 1.0f : 0.0f;
259
	    result[1] = (a[1] == b[1]) ? 1.0f : 0.0f;
260
	    result[2] = (a[2] == b[2]) ? 1.0f : 0.0f;
261
	    result[3] = (a[3] == b[3]) ? 1.0f : 0.0f;
262
 
263
	    inst->Opcode = OPCODE_MOV;
264
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
265
 
266
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
267
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
268
 
269
	    progress = true;
270
	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
271
	    inst->Opcode = OPCODE_MOV;
272
	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
273
 
274
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
275
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
276
 
277
	    progress = true;
278
	 }
279
	 break;
280
 
281
      case OPCODE_SGE:
282
	 if (src_regs_are_constant(inst, 2)) {
283
	    float a[4];
284
	    float b[4];
285
	    float result[4];
286
 
287
	    get_value(prog, &inst->SrcReg[0], a);
288
	    get_value(prog, &inst->SrcReg[1], b);
289
 
290
	    result[0] = (a[0] >= b[0]) ? 1.0f : 0.0f;
291
	    result[1] = (a[1] >= b[1]) ? 1.0f : 0.0f;
292
	    result[2] = (a[2] >= b[2]) ? 1.0f : 0.0f;
293
	    result[3] = (a[3] >= b[3]) ? 1.0f : 0.0f;
294
 
295
	    inst->Opcode = OPCODE_MOV;
296
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
297
 
298
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
299
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
300
 
301
	    progress = true;
302
	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
303
	    inst->Opcode = OPCODE_MOV;
304
	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
305
 
306
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
307
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
308
 
309
	    progress = true;
310
	 }
311
	 break;
312
 
313
      case OPCODE_SGT:
314
	 if (src_regs_are_constant(inst, 2)) {
315
	    float a[4];
316
	    float b[4];
317
	    float result[4];
318
 
319
	    get_value(prog, &inst->SrcReg[0], a);
320
	    get_value(prog, &inst->SrcReg[1], b);
321
 
322
	    result[0] = (a[0] > b[0]) ? 1.0f : 0.0f;
323
	    result[1] = (a[1] > b[1]) ? 1.0f : 0.0f;
324
	    result[2] = (a[2] > b[2]) ? 1.0f : 0.0f;
325
	    result[3] = (a[3] > b[3]) ? 1.0f : 0.0f;
326
 
327
	    inst->Opcode = OPCODE_MOV;
328
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
329
 
330
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
331
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
332
 
333
	    progress = true;
334
	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
335
	    inst->Opcode = OPCODE_MOV;
336
	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
337
 
338
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
339
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
340
 
341
	    progress = true;
342
	 }
343
	 break;
344
 
345
      case OPCODE_SLE:
346
	 if (src_regs_are_constant(inst, 2)) {
347
	    float a[4];
348
	    float b[4];
349
	    float result[4];
350
 
351
	    get_value(prog, &inst->SrcReg[0], a);
352
	    get_value(prog, &inst->SrcReg[1], b);
353
 
354
	    result[0] = (a[0] <= b[0]) ? 1.0f : 0.0f;
355
	    result[1] = (a[1] <= b[1]) ? 1.0f : 0.0f;
356
	    result[2] = (a[2] <= b[2]) ? 1.0f : 0.0f;
357
	    result[3] = (a[3] <= b[3]) ? 1.0f : 0.0f;
358
 
359
	    inst->Opcode = OPCODE_MOV;
360
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
361
 
362
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
363
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
364
 
365
	    progress = true;
366
	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
367
	    inst->Opcode = OPCODE_MOV;
368
	    inst->SrcReg[0] = src_reg_for_float(prog, 1.0f);
369
 
370
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
371
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
372
 
373
	    progress = true;
374
	 }
375
	 break;
376
 
377
      case OPCODE_SLT:
378
	 if (src_regs_are_constant(inst, 2)) {
379
	    float a[4];
380
	    float b[4];
381
	    float result[4];
382
 
383
	    get_value(prog, &inst->SrcReg[0], a);
384
	    get_value(prog, &inst->SrcReg[1], b);
385
 
386
	    result[0] = (a[0] < b[0]) ? 1.0f : 0.0f;
387
	    result[1] = (a[1] < b[1]) ? 1.0f : 0.0f;
388
	    result[2] = (a[2] < b[2]) ? 1.0f : 0.0f;
389
	    result[3] = (a[3] < b[3]) ? 1.0f : 0.0f;
390
 
391
	    inst->Opcode = OPCODE_MOV;
392
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
393
 
394
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
395
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
396
 
397
	    progress = true;
398
	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
399
	    inst->Opcode = OPCODE_MOV;
400
	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
401
 
402
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
403
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
404
 
405
	    progress = true;
406
	 }
407
	 break;
408
 
409
      case OPCODE_SNE:
410
	 if (src_regs_are_constant(inst, 2)) {
411
	    float a[4];
412
	    float b[4];
413
	    float result[4];
414
 
415
	    get_value(prog, &inst->SrcReg[0], a);
416
	    get_value(prog, &inst->SrcReg[1], b);
417
 
418
	    result[0] = (a[0] != b[0]) ? 1.0f : 0.0f;
419
	    result[1] = (a[1] != b[1]) ? 1.0f : 0.0f;
420
	    result[2] = (a[2] != b[2]) ? 1.0f : 0.0f;
421
	    result[3] = (a[3] != b[3]) ? 1.0f : 0.0f;
422
 
423
	    inst->Opcode = OPCODE_MOV;
424
	    inst->SrcReg[0] = src_reg_for_vec4(prog, result);
425
 
426
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
427
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
428
 
429
	    progress = true;
430
	 } else if (src_regs_are_same(&inst->SrcReg[0], &inst->SrcReg[1])) {
431
	    inst->Opcode = OPCODE_MOV;
432
	    inst->SrcReg[0] = src_reg_for_float(prog, 0.0f);
433
 
434
	    inst->SrcReg[1].File = PROGRAM_UNDEFINED;
435
	    inst->SrcReg[1].Swizzle = SWIZZLE_NOOP;
436
 
437
	    progress = true;
438
	 }
439
	 break;
440
 
441
      default:
442
	 break;
443
      }
444
   }
445
 
446
   return progress;
447
}