Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1901 serge 1
/*
2
 * Copyright (C) 2004  David Airlie   All Rights Reserved.
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
 * and/or sell copies of the Software, and to permit persons to whom the
9
 * Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice shall be included
12
 * in all copies or substantial portions of the Software.
13
 *
14
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17
 * DAVID AIRLIE BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
18
 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
19
 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
20
 */
21
 
22
#include "main/glheader.h"
23
#include "main/colormac.h"
24
#include "main/macros.h"
25
#include "main/atifragshader.h"
26
#include "swrast/s_atifragshader.h"
27
#include "swrast/s_context.h"
28
 
29
 
30
/**
31
 * State for executing ATI fragment shader.
32
 */
33
struct atifs_machine
34
{
35
   GLfloat Registers[6][4];         /** six temporary registers */
36
   GLfloat PrevPassRegisters[6][4];
37
   GLfloat Inputs[2][4];   /** Primary, secondary input colors */
38
};
39
 
40
 
41
 
42
/**
43
 * Fetch a texel.
44
 */
45
static void
46
fetch_texel(struct gl_context * ctx, const GLfloat texcoord[4], GLfloat lambda,
47
	    GLuint unit, GLfloat color[4])
48
{
49
   SWcontext *swrast = SWRAST_CONTEXT(ctx);
50
 
51
   /* XXX use a float-valued TextureSample routine here!!! */
52
   swrast->TextureSample[unit](ctx, ctx->Texture.Unit[unit]._Current,
53
                               1, (const GLfloat(*)[4]) texcoord,
54
                               &lambda, (GLfloat (*)[4]) color);
55
}
56
 
57
static void
58
apply_swizzle(GLfloat values[4], GLuint swizzle)
59
{
60
   GLfloat s, t, r, q;
61
 
62
   s = values[0];
63
   t = values[1];
64
   r = values[2];
65
   q = values[3];
66
 
67
   switch (swizzle) {
68
   case GL_SWIZZLE_STR_ATI:
69
      values[0] = s;
70
      values[1] = t;
71
      values[2] = r;
72
      break;
73
   case GL_SWIZZLE_STQ_ATI:
74
      values[0] = s;
75
      values[1] = t;
76
      values[2] = q;
77
      break;
78
   case GL_SWIZZLE_STR_DR_ATI:
79
      values[0] = s / r;
80
      values[1] = t / r;
81
      values[2] = 1 / r;
82
      break;
83
   case GL_SWIZZLE_STQ_DQ_ATI:
84
/* make sure q is not 0 to avoid problems later with infinite values (texture lookup)? */
85
      if (q == 0.0F)
86
         q = 0.000000001F;
87
      values[0] = s / q;
88
      values[1] = t / q;
89
      values[2] = 1.0F / q;
90
      break;
91
   }
92
   values[3] = 0.0;
93
}
94
 
95
static void
96
apply_src_rep(GLint optype, GLuint rep, GLfloat * val)
97
{
98
   GLint i;
99
   GLint start, end;
100
   if (!rep)
101
      return;
102
 
103
   start = optype ? 3 : 0;
104
   end = 4;
105
 
106
   for (i = start; i < end; i++) {
107
      switch (rep) {
108
      case GL_RED:
109
	 val[i] = val[0];
110
	 break;
111
      case GL_GREEN:
112
	 val[i] = val[1];
113
	 break;
114
      case GL_BLUE:
115
	 val[i] = val[2];
116
	 break;
117
      case GL_ALPHA:
118
	 val[i] = val[3];
119
	 break;
120
      }
121
   }
122
}
123
 
124
static void
125
apply_src_mod(GLint optype, GLuint mod, GLfloat * val)
126
{
127
   GLint i;
128
   GLint start, end;
129
 
130
   if (!mod)
131
      return;
132
 
133
   start = optype ? 3 : 0;
134
   end = 4;
135
 
136
   for (i = start; i < end; i++) {
137
      if (mod & GL_COMP_BIT_ATI)
138
	 val[i] = 1 - val[i];
139
 
140
      if (mod & GL_BIAS_BIT_ATI)
141
	 val[i] = val[i] - 0.5F;
142
 
143
      if (mod & GL_2X_BIT_ATI)
144
	 val[i] = 2 * val[i];
145
 
146
      if (mod & GL_NEGATE_BIT_ATI)
147
	 val[i] = -val[i];
148
   }
149
}
150
 
151
static void
152
apply_dst_mod(GLuint optype, GLuint mod, GLfloat * val)
153
{
154
   GLint i;
155
   GLint has_sat = mod & GL_SATURATE_BIT_ATI;
156
   GLint start, end;
157
 
158
   mod &= ~GL_SATURATE_BIT_ATI;
159
 
160
   start = optype ? 3 : 0;
161
   end = optype ? 4 : 3;
162
 
163
   for (i = start; i < end; i++) {
164
      switch (mod) {
165
      case GL_2X_BIT_ATI:
166
	 val[i] = 2 * val[i];
167
	 break;
168
      case GL_4X_BIT_ATI:
169
	 val[i] = 4 * val[i];
170
	 break;
171
      case GL_8X_BIT_ATI:
172
	 val[i] = 8 * val[i];
173
	 break;
174
      case GL_HALF_BIT_ATI:
175
	 val[i] = val[i] * 0.5F;
176
	 break;
177
      case GL_QUARTER_BIT_ATI:
178
	 val[i] = val[i] * 0.25F;
179
	 break;
180
      case GL_EIGHTH_BIT_ATI:
181
	 val[i] = val[i] * 0.125F;
182
	 break;
183
      }
184
 
185
      if (has_sat) {
186
	 if (val[i] < 0.0F)
187
	    val[i] = 0.0F;
188
	 else if (val[i] > 1.0F)
189
	    val[i] = 1.0F;
190
      }
191
      else {
192
	 if (val[i] < -8.0F)
193
	    val[i] = -8.0F;
194
	 else if (val[i] > 8.0F)
195
	    val[i] = 8.0F;
196
      }
197
   }
198
}
199
 
200
 
201
static void
202
write_dst_addr(GLuint optype, GLuint mod, GLuint mask, GLfloat * src,
203
	       GLfloat * dst)
204
{
205
   GLint i;
206
   apply_dst_mod(optype, mod, src);
207
 
208
   if (optype == ATI_FRAGMENT_SHADER_COLOR_OP) {
209
      if (mask) {
210
	 if (mask & GL_RED_BIT_ATI)
211
	    dst[0] = src[0];
212
 
213
	 if (mask & GL_GREEN_BIT_ATI)
214
	    dst[1] = src[1];
215
 
216
	 if (mask & GL_BLUE_BIT_ATI)
217
	    dst[2] = src[2];
218
      }
219
      else {
220
	 for (i = 0; i < 3; i++)
221
	    dst[i] = src[i];
222
      }
223
   }
224
   else
225
      dst[3] = src[3];
226
}
227
 
228
static void
229
finish_pass(struct atifs_machine *machine)
230
{
231
   GLint i;
232
 
233
   for (i = 0; i < 6; i++) {
234
      COPY_4V(machine->PrevPassRegisters[i], machine->Registers[i]);
235
   }
236
}
237
 
238
struct ati_fs_opcode_st ati_fs_opcodes[] = {
239
   {GL_ADD_ATI, 2},
240
   {GL_SUB_ATI, 2},
241
   {GL_MUL_ATI, 2},
242
   {GL_MAD_ATI, 3},
243
   {GL_LERP_ATI, 3},
244
   {GL_MOV_ATI, 1},
245
   {GL_CND_ATI, 3},
246
   {GL_CND0_ATI, 3},
247
   {GL_DOT2_ADD_ATI, 3},
248
   {GL_DOT3_ATI, 2},
249
   {GL_DOT4_ATI, 2}
250
};
251
 
252
 
253
 
254
static void
255
handle_pass_op(struct atifs_machine *machine, struct atifs_setupinst *texinst,
256
	       const SWspan *span, GLuint column, GLuint idx)
257
{
258
   GLuint swizzle = texinst->swizzle;
259
   GLuint pass_tex = texinst->src;
260
 
261
   if (pass_tex >= GL_TEXTURE0_ARB && pass_tex <= GL_TEXTURE7_ARB) {
262
      pass_tex -= GL_TEXTURE0_ARB;
263
      COPY_4V(machine->Registers[idx],
264
	      span->array->attribs[FRAG_ATTRIB_TEX0 + pass_tex][column]);
265
   }
266
   else if (pass_tex >= GL_REG_0_ATI && pass_tex <= GL_REG_5_ATI) {
267
      pass_tex -= GL_REG_0_ATI;
268
      COPY_4V(machine->Registers[idx], machine->PrevPassRegisters[pass_tex]);
269
   }
270
   apply_swizzle(machine->Registers[idx], swizzle);
271
 
272
}
273
 
274
static void
275
handle_sample_op(struct gl_context * ctx, struct atifs_machine *machine,
276
		 struct atifs_setupinst *texinst, const SWspan *span,
277
		 GLuint column, GLuint idx)
278
{
279
/* sample from unit idx using texinst->src as coords */
280
   GLuint swizzle = texinst->swizzle;
281
   GLuint coord_source = texinst->src;
282
   GLfloat tex_coords[4] = { 0 };
283
 
284
   if (coord_source >= GL_TEXTURE0_ARB && coord_source <= GL_TEXTURE7_ARB) {
285
      coord_source -= GL_TEXTURE0_ARB;
286
      COPY_4V(tex_coords,
287
              span->array->attribs[FRAG_ATTRIB_TEX0 + coord_source][column]);
288
   }
289
   else if (coord_source >= GL_REG_0_ATI && coord_source <= GL_REG_5_ATI) {
290
      coord_source -= GL_REG_0_ATI;
291
      COPY_4V(tex_coords, machine->PrevPassRegisters[coord_source]);
292
   }
293
   apply_swizzle(tex_coords, swizzle);
294
   fetch_texel(ctx, tex_coords, 0.0F, idx, machine->Registers[idx]);
295
}
296
 
297
#define SETUP_SRC_REG(optype, i, x)		\
298
do {						\
299
   COPY_4V(src[optype][i], x); 			\
300
} while (0)
301
 
302
 
303
 
304
/**
305
 * Execute the given fragment shader.
306
 * NOTE: we do everything in single-precision floating point
307
 * \param ctx - rendering context
308
 * \param shader - the shader to execute
309
 * \param machine - virtual machine state
310
 * \param span - the SWspan we're operating on
311
 * \param column - which pixel [i] we're operating on in the span
312
 */
313
static void
314
execute_shader(struct gl_context *ctx, const struct ati_fragment_shader *shader,
315
	       struct atifs_machine *machine, const SWspan *span,
316
               GLuint column)
317
{
318
   GLuint pc;
319
   struct atifs_instruction *inst;
320
   struct atifs_setupinst *texinst;
321
   GLint optype;
322
   GLuint i;
323
   GLint j, pass;
324
   GLint dstreg;
325
   GLfloat src[2][3][4];
326
   GLfloat zeros[4] = { 0.0, 0.0, 0.0, 0.0 };
327
   GLfloat ones[4] = { 1.0, 1.0, 1.0, 1.0 };
328
   GLfloat dst[2][4], *dstp;
329
 
330
   for (pass = 0; pass < shader->NumPasses; pass++) {
331
      if (pass > 0)
332
	 finish_pass(machine);
333
      for (j = 0; j < MAX_NUM_FRAGMENT_REGISTERS_ATI; j++) {
334
	 texinst = &shader->SetupInst[pass][j];
335
	 if (texinst->Opcode == ATI_FRAGMENT_SHADER_PASS_OP)
336
	    handle_pass_op(machine, texinst, span, column, j);
337
	 else if (texinst->Opcode == ATI_FRAGMENT_SHADER_SAMPLE_OP)
338
	    handle_sample_op(ctx, machine, texinst, span, column, j);
339
      }
340
 
341
      for (pc = 0; pc < shader->numArithInstr[pass]; pc++) {
342
	 inst = &shader->Instructions[pass][pc];
343
 
344
	 /* setup the source registers for color and alpha ops */
345
	 for (optype = 0; optype < 2; optype++) {
346
 	    for (i = 0; i < inst->ArgCount[optype]; i++) {
347
	       GLint index = inst->SrcReg[optype][i].Index;
348
 
349
	       if (index >= GL_REG_0_ATI && index <= GL_REG_5_ATI)
350
		  SETUP_SRC_REG(optype, i,
351
				machine->Registers[index - GL_REG_0_ATI]);
352
	       else if (index >= GL_CON_0_ATI && index <= GL_CON_7_ATI) {
353
		  if (shader->LocalConstDef & (1 << (index - GL_CON_0_ATI))) {
354
		     SETUP_SRC_REG(optype, i,
355
				shader->Constants[index - GL_CON_0_ATI]);
356
		  } else {
357
		     SETUP_SRC_REG(optype, i,
358
				ctx->ATIFragmentShader.GlobalConstants[index - GL_CON_0_ATI]);
359
		  }
360
	       }
361
	       else if (index == GL_ONE)
362
		  SETUP_SRC_REG(optype, i, ones);
363
	       else if (index == GL_ZERO)
364
		  SETUP_SRC_REG(optype, i, zeros);
365
	       else if (index == GL_PRIMARY_COLOR_EXT)
366
		  SETUP_SRC_REG(optype, i,
367
				machine->Inputs[ATI_FS_INPUT_PRIMARY]);
368
	       else if (index == GL_SECONDARY_INTERPOLATOR_ATI)
369
		  SETUP_SRC_REG(optype, i,
370
				machine->Inputs[ATI_FS_INPUT_SECONDARY]);
371
 
372
	       apply_src_rep(optype, inst->SrcReg[optype][i].argRep,
373
			     src[optype][i]);
374
	       apply_src_mod(optype, inst->SrcReg[optype][i].argMod,
375
			     src[optype][i]);
376
	    }
377
	 }
378
 
379
	 /* Execute the operations - color then alpha */
380
	 for (optype = 0; optype < 2; optype++) {
381
	    if (inst->Opcode[optype]) {
382
	       switch (inst->Opcode[optype]) {
383
	       case GL_ADD_ATI:
384
		  if (!optype)
385
		     for (i = 0; i < 3; i++) {
386
			dst[optype][i] =
387
			   src[optype][0][i] + src[optype][1][i];
388
		     }
389
		  else
390
		     dst[optype][3] = src[optype][0][3] + src[optype][1][3];
391
		  break;
392
	       case GL_SUB_ATI:
393
		  if (!optype)
394
		     for (i = 0; i < 3; i++) {
395
			dst[optype][i] =
396
			   src[optype][0][i] - src[optype][1][i];
397
		     }
398
		  else
399
		     dst[optype][3] = src[optype][0][3] - src[optype][1][3];
400
		  break;
401
	       case GL_MUL_ATI:
402
		  if (!optype)
403
		     for (i = 0; i < 3; i++) {
404
			dst[optype][i] =
405
			   src[optype][0][i] * src[optype][1][i];
406
		     }
407
		  else
408
		     dst[optype][3] = src[optype][0][3] * src[optype][1][3];
409
		  break;
410
	       case GL_MAD_ATI:
411
		  if (!optype)
412
		     for (i = 0; i < 3; i++) {
413
			dst[optype][i] =
414
			   src[optype][0][i] * src[optype][1][i] +
415
			   src[optype][2][i];
416
		     }
417
		  else
418
		     dst[optype][3] =
419
			src[optype][0][3] * src[optype][1][3] +
420
			src[optype][2][3];
421
		  break;
422
	       case GL_LERP_ATI:
423
		  if (!optype)
424
		     for (i = 0; i < 3; i++) {
425
			dst[optype][i] =
426
			   src[optype][0][i] * src[optype][1][i] + (1 -
427
								    src
428
								    [optype]
429
								    [0][i]) *
430
			   src[optype][2][i];
431
		     }
432
		  else
433
		     dst[optype][3] =
434
			src[optype][0][3] * src[optype][1][3] + (1 -
435
								 src[optype]
436
								 [0][3]) *
437
			src[optype][2][3];
438
		  break;
439
 
440
	       case GL_MOV_ATI:
441
		  if (!optype)
442
		     for (i = 0; i < 3; i++) {
443
			dst[optype][i] = src[optype][0][i];
444
		     }
445
		  else
446
		     dst[optype][3] = src[optype][0][3];
447
		  break;
448
	       case GL_CND_ATI:
449
		  if (!optype) {
450
		     for (i = 0; i < 3; i++) {
451
			dst[optype][i] =
452
			   (src[optype][2][i] >
453
			    0.5) ? src[optype][0][i] : src[optype][1][i];
454
		     }
455
		  }
456
		  else {
457
		     dst[optype][3] =
458
			(src[optype][2][3] >
459
			 0.5) ? src[optype][0][3] : src[optype][1][3];
460
		  }
461
		  break;
462
 
463
	       case GL_CND0_ATI:
464
		  if (!optype)
465
		     for (i = 0; i < 3; i++) {
466
			dst[optype][i] =
467
			   (src[optype][2][i] >=
468
			    0) ? src[optype][0][i] : src[optype][1][i];
469
		     }
470
		  else {
471
		     dst[optype][3] =
472
			(src[optype][2][3] >=
473
			 0) ? src[optype][0][3] : src[optype][1][3];
474
		  }
475
		  break;
476
	       case GL_DOT2_ADD_ATI:
477
		  {
478
		     GLfloat result;
479
 
480
		     /* DOT 2 always uses the source from the color op */
481
		     /* could save recalculation of dot products for alpha inst */
482
		     result = src[0][0][0] * src[0][1][0] +
483
			src[0][0][1] * src[0][1][1] + src[0][2][2];
484
		     if (!optype) {
485
			for (i = 0; i < 3; i++) {
486
			   dst[optype][i] = result;
487
			}
488
		     }
489
		     else
490
			dst[optype][3] = result;
491
		  }
492
		  break;
493
	       case GL_DOT3_ATI:
494
		  {
495
		     GLfloat result;
496
 
497
		     /* DOT 3 always uses the source from the color op */
498
		     result = src[0][0][0] * src[0][1][0] +
499
			src[0][0][1] * src[0][1][1] +
500
			src[0][0][2] * src[0][1][2];
501
 
502
		     if (!optype) {
503
			for (i = 0; i < 3; i++) {
504
			   dst[optype][i] = result;
505
			}
506
		     }
507
		     else
508
			dst[optype][3] = result;
509
		  }
510
		  break;
511
	       case GL_DOT4_ATI:
512
		  {
513
		     GLfloat result;
514
 
515
		     /* DOT 4 always uses the source from the color op */
516
		     result = src[0][0][0] * src[0][1][0] +
517
			src[0][0][1] * src[0][1][1] +
518
			src[0][0][2] * src[0][1][2] +
519
			src[0][0][3] * src[0][1][3];
520
		     if (!optype) {
521
			for (i = 0; i < 3; i++) {
522
			   dst[optype][i] = result;
523
			}
524
		     }
525
		     else
526
			dst[optype][3] = result;
527
		  }
528
		  break;
529
 
530
	       }
531
	    }
532
	 }
533
 
534
	 /* write out the destination registers */
535
	 for (optype = 0; optype < 2; optype++) {
536
	    if (inst->Opcode[optype]) {
537
	       dstreg = inst->DstReg[optype].Index;
538
	       dstp = machine->Registers[dstreg - GL_REG_0_ATI];
539
 
540
	       if ((optype == 0) || ((inst->Opcode[1] != GL_DOT2_ADD_ATI) &&
541
		  (inst->Opcode[1] != GL_DOT3_ATI) && (inst->Opcode[1] != GL_DOT4_ATI)))
542
	          write_dst_addr(optype, inst->DstReg[optype].dstMod,
543
			      inst->DstReg[optype].dstMask, dst[optype],
544
			      dstp);
545
	       else
546
		  write_dst_addr(1, inst->DstReg[0].dstMod, 0, dst[1], dstp);
547
	    }
548
	 }
549
      }
550
   }
551
}
552
 
553
 
554
/**
555
 * Init fragment shader virtual machine state.
556
 */
557
static void
558
init_machine(struct gl_context * ctx, struct atifs_machine *machine,
559
	     const struct ati_fragment_shader *shader,
560
	     const SWspan *span, GLuint col)
561
{
562
   GLfloat (*inputs)[4] = machine->Inputs;
563
   GLint i, j;
564
 
565
   for (i = 0; i < 6; i++) {
566
      for (j = 0; j < 4; j++)
567
	 machine->Registers[i][j] = 0.0;
568
   }
569
 
570
   COPY_4V(inputs[ATI_FS_INPUT_PRIMARY], span->array->attribs[FRAG_ATTRIB_COL0][col]);
571
   COPY_4V(inputs[ATI_FS_INPUT_SECONDARY], span->array->attribs[FRAG_ATTRIB_COL1][col]);
572
}
573
 
574
 
575
 
576
/**
577
 * Execute the current ATI shader program, operating on the given span.
578
 */
579
void
580
_swrast_exec_fragment_shader(struct gl_context * ctx, SWspan *span)
581
{
582
   const struct ati_fragment_shader *shader = ctx->ATIFragmentShader.Current;
583
   struct atifs_machine machine;
584
   GLuint i;
585
 
586
   /* incoming colors should be floats */
587
   ASSERT(span->array->ChanType == GL_FLOAT);
588
 
589
   for (i = 0; i < span->end; i++) {
590
      if (span->array->mask[i]) {
591
	 init_machine(ctx, &machine, shader, span, i);
592
 
593
	 execute_shader(ctx, shader, &machine, span, i);
594
 
595
         /* store result color */
596
	 {
597
	    const GLfloat *colOut = machine.Registers[0];
598
            /*fprintf(stderr,"outputs %f %f %f %f\n",
599
              colOut[0], colOut[1], colOut[2], colOut[3]); */
600
            COPY_4V(span->array->attribs[FRAG_ATTRIB_COL0][i], colOut);
601
	 }
602
      }
603
   }
604
}