Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/**************************************************************************
2
 *
3
 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
/**
29
 * \file ffvertex_prog.c
30
 *
31
 * Create a vertex program to execute the current fixed function T&L pipeline.
32
 * \author Keith Whitwell
33
 */
34
 
35
 
36
#include "main/glheader.h"
37
#include "main/mtypes.h"
38
#include "main/macros.h"
39
#include "main/enums.h"
40
#include "main/ffvertex_prog.h"
41
#include "program/program.h"
42
#include "program/prog_cache.h"
43
#include "program/prog_instruction.h"
44
#include "program/prog_parameter.h"
45
#include "program/prog_print.h"
46
#include "program/prog_statevars.h"
47
 
48
 
49
/** Max of number of lights and texture coord units */
50
#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS)
51
 
52
struct state_key {
53
   unsigned light_color_material_mask:12;
54
   unsigned light_global_enabled:1;
55
   unsigned light_local_viewer:1;
56
   unsigned light_twoside:1;
57
   unsigned material_shininess_is_zero:1;
58
   unsigned need_eye_coords:1;
59
   unsigned normalize:1;
60
   unsigned rescale_normals:1;
61
 
62
   unsigned fog_source_is_depth:1;
63
   unsigned fog_distance_mode:2;
64
   unsigned separate_specular:1;
65
   unsigned point_attenuated:1;
66
   unsigned point_array:1;
67
   unsigned texture_enabled_global:1;
68
   unsigned fragprog_inputs_read:12;
69
 
70
   GLbitfield64 varying_vp_inputs;
71
 
72
   struct {
73
      unsigned light_enabled:1;
74
      unsigned light_eyepos3_is_zero:1;
75
      unsigned light_spotcutoff_is_180:1;
76
      unsigned light_attenuated:1;
77
      unsigned texunit_really_enabled:1;
78
      unsigned texmat_enabled:1;
79
      unsigned coord_replace:1;
80
      unsigned texgen_enabled:4;
81
      unsigned texgen_mode0:4;
82
      unsigned texgen_mode1:4;
83
      unsigned texgen_mode2:4;
84
      unsigned texgen_mode3:4;
85
   } unit[NUM_UNITS];
86
};
87
 
88
 
89
#define TXG_NONE           0
90
#define TXG_OBJ_LINEAR     1
91
#define TXG_EYE_LINEAR     2
92
#define TXG_SPHERE_MAP     3
93
#define TXG_REFLECTION_MAP 4
94
#define TXG_NORMAL_MAP     5
95
 
96
static GLuint translate_texgen( GLboolean enabled, GLenum mode )
97
{
98
   if (!enabled)
99
      return TXG_NONE;
100
 
101
   switch (mode) {
102
   case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR;
103
   case GL_EYE_LINEAR: return TXG_EYE_LINEAR;
104
   case GL_SPHERE_MAP: return TXG_SPHERE_MAP;
105
   case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP;
106
   case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP;
107
   default: return TXG_NONE;
108
   }
109
}
110
 
111
#define FDM_EYE_RADIAL    0
112
#define FDM_EYE_PLANE     1
113
#define FDM_EYE_PLANE_ABS 2
114
 
115
static GLuint translate_fog_distance_mode( GLenum mode )
116
{
117
   switch (mode) {
118
   case GL_EYE_RADIAL_NV:
119
      return FDM_EYE_RADIAL;
120
   case GL_EYE_PLANE:
121
      return FDM_EYE_PLANE;
122
   default: /* shouldn't happen; fall through to a sensible default */
123
   case GL_EYE_PLANE_ABSOLUTE_NV:
124
      return FDM_EYE_PLANE_ABS;
125
   }
126
}
127
 
128
static GLboolean check_active_shininess( struct gl_context *ctx,
129
                                         const struct state_key *key,
130
                                         GLuint side )
131
{
132
   GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side;
133
 
134
   if ((key->varying_vp_inputs & VERT_BIT_COLOR0) &&
135
       (key->light_color_material_mask & (1 << attr)))
136
      return GL_TRUE;
137
 
138
   if (key->varying_vp_inputs & VERT_ATTRIB_GENERIC(attr))
139
      return GL_TRUE;
140
 
141
   if (ctx->Light.Material.Attrib[attr][0] != 0.0F)
142
      return GL_TRUE;
143
 
144
   return GL_FALSE;
145
}
146
 
147
 
148
static void make_state_key( struct gl_context *ctx, struct state_key *key )
149
{
150
   const struct gl_fragment_program *fp;
151
   GLuint i;
152
 
153
   memset(key, 0, sizeof(struct state_key));
154
   fp = ctx->FragmentProgram._Current;
155
 
156
   /* This now relies on texenvprogram.c being active:
157
    */
158
   assert(fp);
159
 
160
   key->need_eye_coords = ctx->_NeedEyeCoords;
161
 
162
   key->fragprog_inputs_read = fp->Base.InputsRead;
163
   key->varying_vp_inputs = ctx->varying_vp_inputs;
164
 
165
   if (ctx->RenderMode == GL_FEEDBACK) {
166
      /* make sure the vertprog emits color and tex0 */
167
      key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0);
168
   }
169
 
170
   key->separate_specular = (ctx->Light.Model.ColorControl ==
171
			     GL_SEPARATE_SPECULAR_COLOR);
172
 
173
   if (ctx->Light.Enabled) {
174
      key->light_global_enabled = 1;
175
 
176
      if (ctx->Light.Model.LocalViewer)
177
	 key->light_local_viewer = 1;
178
 
179
      if (ctx->Light.Model.TwoSide)
180
	 key->light_twoside = 1;
181
 
182
      if (ctx->Light.ColorMaterialEnabled) {
183
	 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask;
184
      }
185
 
186
      for (i = 0; i < MAX_LIGHTS; i++) {
187
	 struct gl_light *light = &ctx->Light.Light[i];
188
 
189
	 if (light->Enabled) {
190
	    key->unit[i].light_enabled = 1;
191
 
192
	    if (light->EyePosition[3] == 0.0)
193
	       key->unit[i].light_eyepos3_is_zero = 1;
194
 
195
	    if (light->SpotCutoff == 180.0)
196
	       key->unit[i].light_spotcutoff_is_180 = 1;
197
 
198
	    if (light->ConstantAttenuation != 1.0 ||
199
		light->LinearAttenuation != 0.0 ||
200
		light->QuadraticAttenuation != 0.0)
201
	       key->unit[i].light_attenuated = 1;
202
	 }
203
      }
204
 
205
      if (check_active_shininess(ctx, key, 0)) {
206
         key->material_shininess_is_zero = 0;
207
      }
208
      else if (key->light_twoside &&
209
               check_active_shininess(ctx, key, 1)) {
210
         key->material_shininess_is_zero = 0;
211
      }
212
      else {
213
         key->material_shininess_is_zero = 1;
214
      }
215
   }
216
 
217
   if (ctx->Transform.Normalize)
218
      key->normalize = 1;
219
 
220
   if (ctx->Transform.RescaleNormals)
221
      key->rescale_normals = 1;
222
 
223
   if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) {
224
      key->fog_source_is_depth = 1;
225
      key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode);
226
   }
227
 
228
   if (ctx->Point._Attenuated)
229
      key->point_attenuated = 1;
230
 
231
   if (ctx->Array.ArrayObj->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled)
232
      key->point_array = 1;
233
 
234
   if (ctx->Texture._TexGenEnabled ||
235
       ctx->Texture._TexMatEnabled ||
236
       ctx->Texture._EnabledUnits)
237
      key->texture_enabled_global = 1;
238
 
239
   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
240
      struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i];
241
 
242
      if (texUnit->_ReallyEnabled)
243
	 key->unit[i].texunit_really_enabled = 1;
244
 
245
      if (ctx->Point.PointSprite)
246
	 if (ctx->Point.CoordReplace[i])
247
	    key->unit[i].coord_replace = 1;
248
 
249
      if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i))
250
	 key->unit[i].texmat_enabled = 1;
251
 
252
      if (texUnit->TexGenEnabled) {
253
	 key->unit[i].texgen_enabled = 1;
254
 
255
	 key->unit[i].texgen_mode0 =
256
	    translate_texgen( texUnit->TexGenEnabled & (1<<0),
257
			      texUnit->GenS.Mode );
258
	 key->unit[i].texgen_mode1 =
259
	    translate_texgen( texUnit->TexGenEnabled & (1<<1),
260
			      texUnit->GenT.Mode );
261
	 key->unit[i].texgen_mode2 =
262
	    translate_texgen( texUnit->TexGenEnabled & (1<<2),
263
			      texUnit->GenR.Mode );
264
	 key->unit[i].texgen_mode3 =
265
	    translate_texgen( texUnit->TexGenEnabled & (1<<3),
266
			      texUnit->GenQ.Mode );
267
      }
268
   }
269
}
270
 
271
 
272
 
273
/* Very useful debugging tool - produces annotated listing of
274
 * generated program with line/function references for each
275
 * instruction back into this file:
276
 */
277
#define DISASSEM 0
278
 
279
 
280
/* Use uregs to represent registers internally, translate to Mesa's
281
 * expected formats on emit.
282
 *
283
 * NOTE: These are passed by value extensively in this file rather
284
 * than as usual by pointer reference.  If this disturbs you, try
285
 * remembering they are just 32bits in size.
286
 *
287
 * GCC is smart enough to deal with these dword-sized structures in
288
 * much the same way as if I had defined them as dwords and was using
289
 * macros to access and set the fields.  This is much nicer and easier
290
 * to evolve.
291
 */
292
struct ureg {
293
   GLuint file:4;
294
   GLint idx:9;      /* relative addressing may be negative */
295
                     /* sizeof(idx) should == sizeof(prog_src_reg::Index) */
296
   GLuint negate:1;
297
   GLuint swz:12;
298
   GLuint pad:6;
299
};
300
 
301
 
302
struct tnl_program {
303
   const struct state_key *state;
304
   struct gl_vertex_program *program;
305
   GLint max_inst;  /** number of instructions allocated for program */
306
   GLboolean mvp_with_dp4;
307
 
308
   GLuint temp_in_use;
309
   GLuint temp_reserved;
310
 
311
   struct ureg eye_position;
312
   struct ureg eye_position_z;
313
   struct ureg eye_position_normalized;
314
   struct ureg transformed_normal;
315
   struct ureg identity;
316
 
317
   GLuint materials;
318
   GLuint color_materials;
319
};
320
 
321
 
322
static const struct ureg undef = {
323
   PROGRAM_UNDEFINED,
324
   0,
325
   0,
326
   0,
327
 
328
};
329
 
330
/* Local shorthand:
331
 */
332
#define X    SWIZZLE_X
333
#define Y    SWIZZLE_Y
334
#define Z    SWIZZLE_Z
335
#define W    SWIZZLE_W
336
 
337
 
338
/* Construct a ureg:
339
 */
340
static struct ureg make_ureg(GLuint file, GLint idx)
341
{
342
   struct ureg reg;
343
   reg.file = file;
344
   reg.idx = idx;
345
   reg.negate = 0;
346
   reg.swz = SWIZZLE_NOOP;
347
   reg.pad = 0;
348
   return reg;
349
}
350
 
351
 
352
 
353
static struct ureg negate( struct ureg reg )
354
{
355
   reg.negate ^= 1;
356
   return reg;
357
}
358
 
359
 
360
static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w )
361
{
362
   reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x),
363
			   GET_SWZ(reg.swz, y),
364
			   GET_SWZ(reg.swz, z),
365
			   GET_SWZ(reg.swz, w));
366
   return reg;
367
}
368
 
369
 
370
static struct ureg swizzle1( struct ureg reg, int x )
371
{
372
   return swizzle(reg, x, x, x, x);
373
}
374
 
375
 
376
static struct ureg get_temp( struct tnl_program *p )
377
{
378
   int bit = ffs( ~p->temp_in_use );
379
   if (!bit) {
380
      _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__);
381
      exit(1);
382
   }
383
 
384
   if ((GLuint) bit > p->program->Base.NumTemporaries)
385
      p->program->Base.NumTemporaries = bit;
386
 
387
   p->temp_in_use |= 1<<(bit-1);
388
   return make_ureg(PROGRAM_TEMPORARY, bit-1);
389
}
390
 
391
 
392
static struct ureg reserve_temp( struct tnl_program *p )
393
{
394
   struct ureg temp = get_temp( p );
395
   p->temp_reserved |= 1<
396
   return temp;
397
}
398
 
399
 
400
static void release_temp( struct tnl_program *p, struct ureg reg )
401
{
402
   if (reg.file == PROGRAM_TEMPORARY) {
403
      p->temp_in_use &= ~(1<
404
      p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */
405
   }
406
}
407
 
408
static void release_temps( struct tnl_program *p )
409
{
410
   p->temp_in_use = p->temp_reserved;
411
}
412
 
413
 
414
static struct ureg register_param5(struct tnl_program *p,
415
				   GLint s0,
416
				   GLint s1,
417
				   GLint s2,
418
				   GLint s3,
419
                                   GLint s4)
420
{
421
   gl_state_index tokens[STATE_LENGTH];
422
   GLint idx;
423
   tokens[0] = s0;
424
   tokens[1] = s1;
425
   tokens[2] = s2;
426
   tokens[3] = s3;
427
   tokens[4] = s4;
428
   idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens );
429
   return make_ureg(PROGRAM_STATE_VAR, idx);
430
}
431
 
432
 
433
#define register_param1(p,s0)          register_param5(p,s0,0,0,0,0)
434
#define register_param2(p,s0,s1)       register_param5(p,s0,s1,0,0,0)
435
#define register_param3(p,s0,s1,s2)    register_param5(p,s0,s1,s2,0,0)
436
#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0)
437
 
438
 
439
 
440
/**
441
 * \param input  one of VERT_ATTRIB_x tokens.
442
 */
443
static struct ureg register_input( struct tnl_program *p, GLuint input )
444
{
445
   assert(input < VERT_ATTRIB_MAX);
446
 
447
   if (p->state->varying_vp_inputs & VERT_BIT(input)) {
448
      p->program->Base.InputsRead |= VERT_BIT(input);
449
      return make_ureg(PROGRAM_INPUT, input);
450
   }
451
   else {
452
      return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input );
453
   }
454
}
455
 
456
 
457
/**
458
 * \param input  one of VARYING_SLOT_x tokens.
459
 */
460
static struct ureg register_output( struct tnl_program *p, GLuint output )
461
{
462
   p->program->Base.OutputsWritten |= BITFIELD64_BIT(output);
463
   return make_ureg(PROGRAM_OUTPUT, output);
464
}
465
 
466
 
467
static struct ureg register_const4f( struct tnl_program *p,
468
			      GLfloat s0,
469
			      GLfloat s1,
470
			      GLfloat s2,
471
			      GLfloat s3)
472
{
473
   gl_constant_value values[4];
474
   GLint idx;
475
   GLuint swizzle;
476
   values[0].f = s0;
477
   values[1].f = s1;
478
   values[2].f = s2;
479
   values[3].f = s3;
480
   idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4,
481
                                     &swizzle );
482
   ASSERT(swizzle == SWIZZLE_NOOP);
483
   return make_ureg(PROGRAM_CONSTANT, idx);
484
}
485
 
486
#define register_const1f(p, s0)         register_const4f(p, s0, 0, 0, 1)
487
#define register_scalar_const(p, s0)    register_const4f(p, s0, s0, s0, s0)
488
#define register_const2f(p, s0, s1)     register_const4f(p, s0, s1, 0, 1)
489
#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1)
490
 
491
static GLboolean is_undef( struct ureg reg )
492
{
493
   return reg.file == PROGRAM_UNDEFINED;
494
}
495
 
496
 
497
static struct ureg get_identity_param( struct tnl_program *p )
498
{
499
   if (is_undef(p->identity))
500
      p->identity = register_const4f(p, 0,0,0,1);
501
 
502
   return p->identity;
503
}
504
 
505
static void register_matrix_param5( struct tnl_program *p,
506
				    GLint s0, /* modelview, projection, etc */
507
				    GLint s1, /* texture matrix number */
508
				    GLint s2, /* first row */
509
				    GLint s3, /* last row */
510
				    GLint s4, /* inverse, transpose, etc */
511
				    struct ureg *matrix )
512
{
513
   GLint i;
514
 
515
   /* This is a bit sad as the support is there to pull the whole
516
    * matrix out in one go:
517
    */
518
   for (i = 0; i <= s3 - s2; i++)
519
      matrix[i] = register_param5( p, s0, s1, i, i, s4 );
520
}
521
 
522
 
523
static void emit_arg( struct prog_src_register *src,
524
		      struct ureg reg )
525
{
526
   src->File = reg.file;
527
   src->Index = reg.idx;
528
   src->Swizzle = reg.swz;
529
   src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE;
530
   src->Abs = 0;
531
   src->RelAddr = 0;
532
   /* Check that bitfield sizes aren't exceeded */
533
   ASSERT(src->Index == reg.idx);
534
}
535
 
536
 
537
static void emit_dst( struct prog_dst_register *dst,
538
		      struct ureg reg, GLuint mask )
539
{
540
   dst->File = reg.file;
541
   dst->Index = reg.idx;
542
   /* allow zero as a shorthand for xyzw */
543
   dst->WriteMask = mask ? mask : WRITEMASK_XYZW;
544
   dst->CondMask = COND_TR;  /* always pass cond test */
545
   dst->CondSwizzle = SWIZZLE_NOOP;
546
   /* Check that bitfield sizes aren't exceeded */
547
   ASSERT(dst->Index == reg.idx);
548
}
549
 
550
 
551
static void debug_insn( struct prog_instruction *inst, const char *fn,
552
			GLuint line )
553
{
554
   if (DISASSEM) {
555
      static const char *last_fn;
556
 
557
      if (fn != last_fn) {
558
	 last_fn = fn;
559
	 printf("%s:\n", fn);
560
      }
561
 
562
      printf("%d:\t", line);
563
      _mesa_print_instruction(inst);
564
   }
565
}
566
 
567
 
568
static void emit_op3fn(struct tnl_program *p,
569
                       enum prog_opcode op,
570
		       struct ureg dest,
571
		       GLuint mask,
572
		       struct ureg src0,
573
		       struct ureg src1,
574
		       struct ureg src2,
575
		       const char *fn,
576
		       GLuint line)
577
{
578
   GLuint nr;
579
   struct prog_instruction *inst;
580
 
581
   assert((GLint) p->program->Base.NumInstructions <= p->max_inst);
582
 
583
   if (p->program->Base.NumInstructions == p->max_inst) {
584
      /* need to extend the program's instruction array */
585
      struct prog_instruction *newInst;
586
 
587
      /* double the size */
588
      p->max_inst *= 2;
589
 
590
      newInst = _mesa_alloc_instructions(p->max_inst);
591
      if (!newInst) {
592
         _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build");
593
         return;
594
      }
595
 
596
      _mesa_copy_instructions(newInst,
597
                              p->program->Base.Instructions,
598
                              p->program->Base.NumInstructions);
599
 
600
      _mesa_free_instructions(p->program->Base.Instructions,
601
                              p->program->Base.NumInstructions);
602
 
603
      p->program->Base.Instructions = newInst;
604
   }
605
 
606
   nr = p->program->Base.NumInstructions++;
607
 
608
   inst = &p->program->Base.Instructions[nr];
609
   inst->Opcode = (enum prog_opcode) op;
610
 
611
   emit_arg( &inst->SrcReg[0], src0 );
612
   emit_arg( &inst->SrcReg[1], src1 );
613
   emit_arg( &inst->SrcReg[2], src2 );
614
 
615
   emit_dst( &inst->DstReg, dest, mask );
616
 
617
   debug_insn(inst, fn, line);
618
}
619
 
620
 
621
#define emit_op3(p, op, dst, mask, src0, src1, src2) \
622
   emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__)
623
 
624
#define emit_op2(p, op, dst, mask, src0, src1) \
625
    emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__)
626
 
627
#define emit_op1(p, op, dst, mask, src0) \
628
    emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__)
629
 
630
 
631
static struct ureg make_temp( struct tnl_program *p, struct ureg reg )
632
{
633
   if (reg.file == PROGRAM_TEMPORARY &&
634
       !(p->temp_reserved & (1<
635
      return reg;
636
   else {
637
      struct ureg temp = get_temp(p);
638
      emit_op1(p, OPCODE_MOV, temp, 0, reg);
639
      return temp;
640
   }
641
}
642
 
643
 
644
/* Currently no tracking performed of input/output/register size or
645
 * active elements.  Could be used to reduce these operations, as
646
 * could the matrix type.
647
 */
648
static void emit_matrix_transform_vec4( struct tnl_program *p,
649
					struct ureg dest,
650
					const struct ureg *mat,
651
					struct ureg src)
652
{
653
   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]);
654
   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]);
655
   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]);
656
   emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]);
657
}
658
 
659
 
660
/* This version is much easier to implement if writemasks are not
661
 * supported natively on the target or (like SSE), the target doesn't
662
 * have a clean/obvious dotproduct implementation.
663
 */
664
static void emit_transpose_matrix_transform_vec4( struct tnl_program *p,
665
						  struct ureg dest,
666
						  const struct ureg *mat,
667
						  struct ureg src)
668
{
669
   struct ureg tmp;
670
 
671
   if (dest.file != PROGRAM_TEMPORARY)
672
      tmp = get_temp(p);
673
   else
674
      tmp = dest;
675
 
676
   emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]);
677
   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp);
678
   emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp);
679
   emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp);
680
 
681
   if (dest.file != PROGRAM_TEMPORARY)
682
      release_temp(p, tmp);
683
}
684
 
685
 
686
static void emit_matrix_transform_vec3( struct tnl_program *p,
687
					struct ureg dest,
688
					const struct ureg *mat,
689
					struct ureg src)
690
{
691
   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]);
692
   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]);
693
   emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]);
694
}
695
 
696
 
697
static void emit_normalize_vec3( struct tnl_program *p,
698
				 struct ureg dest,
699
				 struct ureg src )
700
{
701
   struct ureg tmp = get_temp(p);
702
   emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src);
703
   emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp);
704
   emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X));
705
   release_temp(p, tmp);
706
}
707
 
708
 
709
static void emit_passthrough( struct tnl_program *p,
710
			      GLuint input,
711
			      GLuint output )
712
{
713
   struct ureg out = register_output(p, output);
714
   emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input));
715
}
716
 
717
 
718
static struct ureg get_eye_position( struct tnl_program *p )
719
{
720
   if (is_undef(p->eye_position)) {
721
      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
722
      struct ureg modelview[4];
723
 
724
      p->eye_position = reserve_temp(p);
725
 
726
      if (p->mvp_with_dp4) {
727
	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
728
                                 0, modelview );
729
 
730
	 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos);
731
      }
732
      else {
733
	 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
734
				 STATE_MATRIX_TRANSPOSE, modelview );
735
 
736
	 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos);
737
      }
738
   }
739
 
740
   return p->eye_position;
741
}
742
 
743
 
744
static struct ureg get_eye_position_z( struct tnl_program *p )
745
{
746
   if (!is_undef(p->eye_position))
747
      return swizzle1(p->eye_position, Z);
748
 
749
   if (is_undef(p->eye_position_z)) {
750
      struct ureg pos = register_input( p, VERT_ATTRIB_POS );
751
      struct ureg modelview[4];
752
 
753
      p->eye_position_z = reserve_temp(p);
754
 
755
      register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3,
756
                              0, modelview );
757
 
758
      emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]);
759
   }
760
 
761
   return p->eye_position_z;
762
}
763
 
764
 
765
static struct ureg get_eye_position_normalized( struct tnl_program *p )
766
{
767
   if (is_undef(p->eye_position_normalized)) {
768
      struct ureg eye = get_eye_position(p);
769
      p->eye_position_normalized = reserve_temp(p);
770
      emit_normalize_vec3(p, p->eye_position_normalized, eye);
771
   }
772
 
773
   return p->eye_position_normalized;
774
}
775
 
776
 
777
static struct ureg get_transformed_normal( struct tnl_program *p )
778
{
779
   if (is_undef(p->transformed_normal) &&
780
       !p->state->need_eye_coords &&
781
       !p->state->normalize &&
782
       !(p->state->need_eye_coords == p->state->rescale_normals))
783
   {
784
      p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL );
785
   }
786
   else if (is_undef(p->transformed_normal))
787
   {
788
      struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL );
789
      struct ureg mvinv[3];
790
      struct ureg transformed_normal = reserve_temp(p);
791
 
792
      if (p->state->need_eye_coords) {
793
         register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2,
794
                                 STATE_MATRIX_INVTRANS, mvinv );
795
 
796
         /* Transform to eye space:
797
          */
798
         emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal );
799
         normal = transformed_normal;
800
      }
801
 
802
      /* Normalize/Rescale:
803
       */
804
      if (p->state->normalize) {
805
	 emit_normalize_vec3( p, transformed_normal, normal );
806
         normal = transformed_normal;
807
      }
808
      else if (p->state->need_eye_coords == p->state->rescale_normals) {
809
         /* This is already adjusted for eye/non-eye rendering:
810
          */
811
	 struct ureg rescale = register_param2(p, STATE_INTERNAL,
812
                                               STATE_NORMAL_SCALE);
813
 
814
	 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale );
815
         normal = transformed_normal;
816
      }
817
 
818
      assert(normal.file == PROGRAM_TEMPORARY);
819
      p->transformed_normal = normal;
820
   }
821
 
822
   return p->transformed_normal;
823
}
824
 
825
 
826
static void build_hpos( struct tnl_program *p )
827
{
828
   struct ureg pos = register_input( p, VERT_ATTRIB_POS );
829
   struct ureg hpos = register_output( p, VARYING_SLOT_POS );
830
   struct ureg mvp[4];
831
 
832
   if (p->mvp_with_dp4) {
833
      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
834
			      0, mvp );
835
      emit_matrix_transform_vec4( p, hpos, mvp, pos );
836
   }
837
   else {
838
      register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3,
839
			      STATE_MATRIX_TRANSPOSE, mvp );
840
      emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos );
841
   }
842
}
843
 
844
 
845
static GLuint material_attrib( GLuint side, GLuint property )
846
{
847
   return (property - STATE_AMBIENT) * 2 + side;
848
}
849
 
850
 
851
/**
852
 * Get a bitmask of which material values vary on a per-vertex basis.
853
 */
854
static void set_material_flags( struct tnl_program *p )
855
{
856
   p->color_materials = 0;
857
   p->materials = 0;
858
 
859
   if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) {
860
      p->materials =
861
	 p->color_materials = p->state->light_color_material_mask;
862
   }
863
 
864
   p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0);
865
}
866
 
867
 
868
static struct ureg get_material( struct tnl_program *p, GLuint side,
869
				 GLuint property )
870
{
871
   GLuint attrib = material_attrib(side, property);
872
 
873
   if (p->color_materials & (1<
874
      return register_input(p, VERT_ATTRIB_COLOR0);
875
   else if (p->materials & (1<
876
      /* Put material values in the GENERIC slots -- they are not used
877
       * for anything in fixed function mode.
878
       */
879
      return register_input( p, attrib + VERT_ATTRIB_GENERIC0 );
880
   }
881
   else
882
      return register_param3( p, STATE_MATERIAL, side, property );
883
}
884
 
885
#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \
886
				   MAT_BIT_FRONT_AMBIENT | \
887
				   MAT_BIT_FRONT_DIFFUSE) << (side))
888
 
889
 
890
/**
891
 * Either return a precalculated constant value or emit code to
892
 * calculate these values dynamically in the case where material calls
893
 * are present between begin/end pairs.
894
 *
895
 * Probably want to shift this to the program compilation phase - if
896
 * we always emitted the calculation here, a smart compiler could
897
 * detect that it was constant (given a certain set of inputs), and
898
 * lift it out of the main loop.  That way the programs created here
899
 * would be independent of the vertex_buffer details.
900
 */
901
static struct ureg get_scenecolor( struct tnl_program *p, GLuint side )
902
{
903
   if (p->materials & SCENE_COLOR_BITS(side)) {
904
      struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT);
905
      struct ureg material_emission = get_material(p, side, STATE_EMISSION);
906
      struct ureg material_ambient = get_material(p, side, STATE_AMBIENT);
907
      struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE);
908
      struct ureg tmp = make_temp(p, material_diffuse);
909
      emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient,
910
	       material_ambient, material_emission);
911
      return tmp;
912
   }
913
   else
914
      return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side );
915
}
916
 
917
 
918
static struct ureg get_lightprod( struct tnl_program *p, GLuint light,
919
				  GLuint side, GLuint property )
920
{
921
   GLuint attrib = material_attrib(side, property);
922
   if (p->materials & (1<
923
      struct ureg light_value =
924
	 register_param3(p, STATE_LIGHT, light, property);
925
      struct ureg material_value = get_material(p, side, property);
926
      struct ureg tmp = get_temp(p);
927
      emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value);
928
      return tmp;
929
   }
930
   else
931
      return register_param4(p, STATE_LIGHTPROD, light, side, property);
932
}
933
 
934
 
935
static struct ureg calculate_light_attenuation( struct tnl_program *p,
936
						GLuint i,
937
						struct ureg VPpli,
938
						struct ureg dist )
939
{
940
   struct ureg attenuation = register_param3(p, STATE_LIGHT, i,
941
					     STATE_ATTENUATION);
942
   struct ureg att = undef;
943
 
944
   /* Calculate spot attenuation:
945
    */
946
   if (!p->state->unit[i].light_spotcutoff_is_180) {
947
      struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL,
948
						  STATE_LIGHT_SPOT_DIR_NORMALIZED, i);
949
      struct ureg spot = get_temp(p);
950
      struct ureg slt = get_temp(p);
951
 
952
      att = get_temp(p);
953
 
954
      emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm);
955
      emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot);
956
      emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W));
957
      emit_op2(p, OPCODE_MUL, att, 0, slt, spot);
958
 
959
      release_temp(p, spot);
960
      release_temp(p, slt);
961
   }
962
 
963
   /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62):
964
    *
965
    * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero)
966
    */
967
   if (p->state->unit[i].light_attenuated && !is_undef(dist)) {
968
      if (is_undef(att))
969
         att = get_temp(p);
970
      /* 1/d,d,d,1/d */
971
      emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist);
972
      /* 1,d,d*d,1/d */
973
      emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y));
974
      /* 1/dist-atten */
975
      emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist);
976
 
977
      if (!p->state->unit[i].light_spotcutoff_is_180) {
978
	 /* dist-atten */
979
	 emit_op1(p, OPCODE_RCP, dist, 0, dist);
980
	 /* spot-atten * dist-atten */
981
	 emit_op2(p, OPCODE_MUL, att, 0, dist, att);
982
      }
983
      else {
984
	 /* dist-atten */
985
	 emit_op1(p, OPCODE_RCP, att, 0, dist);
986
      }
987
   }
988
 
989
   return att;
990
}
991
 
992
 
993
/**
994
 * Compute:
995
 *   lit.y = MAX(0, dots.x)
996
 *   lit.z = SLT(0, dots.x)
997
 */
998
static void emit_degenerate_lit( struct tnl_program *p,
999
                                 struct ureg lit,
1000
                                 struct ureg dots )
1001
{
1002
   struct ureg id = get_identity_param(p);  /* id = {0,0,0,1} */
1003
 
1004
   /* Note that lit.x & lit.w will not be examined.  Note also that
1005
    * dots.xyzw == dots.xxxx.
1006
    */
1007
 
1008
   /* MAX lit, id, dots;
1009
    */
1010
   emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots);
1011
 
1012
   /* result[2] = (in > 0 ? 1 : 0)
1013
    * SLT lit.z, id.z, dots;   # lit.z = (0 < dots.z) ? 1 : 0
1014
    */
1015
   emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots);
1016
}
1017
 
1018
 
1019
/* Need to add some addtional parameters to allow lighting in object
1020
 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye
1021
 * space lighting.
1022
 */
1023
static void build_lighting( struct tnl_program *p )
1024
{
1025
   const GLboolean twoside = p->state->light_twoside;
1026
   const GLboolean separate = p->state->separate_specular;
1027
   GLuint nr_lights = 0, count = 0;
1028
   struct ureg normal = get_transformed_normal(p);
1029
   struct ureg lit = get_temp(p);
1030
   struct ureg dots = get_temp(p);
1031
   struct ureg _col0 = undef, _col1 = undef;
1032
   struct ureg _bfc0 = undef, _bfc1 = undef;
1033
   GLuint i;
1034
 
1035
   /*
1036
    * NOTE:
1037
    * dots.x = dot(normal, VPpli)
1038
    * dots.y = dot(normal, halfAngle)
1039
    * dots.z = back.shininess
1040
    * dots.w = front.shininess
1041
    */
1042
 
1043
   for (i = 0; i < MAX_LIGHTS; i++)
1044
      if (p->state->unit[i].light_enabled)
1045
	 nr_lights++;
1046
 
1047
   set_material_flags(p);
1048
 
1049
   {
1050
      if (!p->state->material_shininess_is_zero) {
1051
         struct ureg shininess = get_material(p, 0, STATE_SHININESS);
1052
         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X));
1053
         release_temp(p, shininess);
1054
      }
1055
 
1056
      _col0 = make_temp(p, get_scenecolor(p, 0));
1057
      if (separate)
1058
	 _col1 = make_temp(p, get_identity_param(p));
1059
      else
1060
	 _col1 = _col0;
1061
   }
1062
 
1063
   if (twoside) {
1064
      if (!p->state->material_shininess_is_zero) {
1065
         /* Note that we negate the back-face specular exponent here.
1066
          * The negation will be un-done later in the back-face code below.
1067
          */
1068
         struct ureg shininess = get_material(p, 1, STATE_SHININESS);
1069
         emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z,
1070
                  negate(swizzle1(shininess,X)));
1071
         release_temp(p, shininess);
1072
      }
1073
 
1074
      _bfc0 = make_temp(p, get_scenecolor(p, 1));
1075
      if (separate)
1076
	 _bfc1 = make_temp(p, get_identity_param(p));
1077
      else
1078
	 _bfc1 = _bfc0;
1079
   }
1080
 
1081
   /* If no lights, still need to emit the scenecolor.
1082
    */
1083
   {
1084
      struct ureg res0 = register_output( p, VARYING_SLOT_COL0 );
1085
      emit_op1(p, OPCODE_MOV, res0, 0, _col0);
1086
   }
1087
 
1088
   if (separate) {
1089
      struct ureg res1 = register_output( p, VARYING_SLOT_COL1 );
1090
      emit_op1(p, OPCODE_MOV, res1, 0, _col1);
1091
   }
1092
 
1093
   if (twoside) {
1094
      struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 );
1095
      emit_op1(p, OPCODE_MOV, res0, 0, _bfc0);
1096
   }
1097
 
1098
   if (twoside && separate) {
1099
      struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 );
1100
      emit_op1(p, OPCODE_MOV, res1, 0, _bfc1);
1101
   }
1102
 
1103
   if (nr_lights == 0) {
1104
      release_temps(p);
1105
      return;
1106
   }
1107
 
1108
   for (i = 0; i < MAX_LIGHTS; i++) {
1109
      if (p->state->unit[i].light_enabled) {
1110
	 struct ureg half = undef;
1111
	 struct ureg att = undef, VPpli = undef;
1112
	 struct ureg dist = undef;
1113
 
1114
	 count++;
1115
         if (p->state->unit[i].light_eyepos3_is_zero) {
1116
             VPpli = register_param3(p, STATE_INTERNAL,
1117
                                     STATE_LIGHT_POSITION_NORMALIZED, i);
1118
         } else {
1119
            struct ureg Ppli = register_param3(p, STATE_INTERNAL,
1120
                                               STATE_LIGHT_POSITION, i);
1121
            struct ureg V = get_eye_position(p);
1122
 
1123
            VPpli = get_temp(p);
1124
            dist = get_temp(p);
1125
 
1126
            /* Calculate VPpli vector
1127
             */
1128
            emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V);
1129
 
1130
            /* Normalize VPpli.  The dist value also used in
1131
             * attenuation below.
1132
             */
1133
            emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli);
1134
            emit_op1(p, OPCODE_RSQ, dist, 0, dist);
1135
            emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist);
1136
         }
1137
 
1138
         /* Calculate attenuation:
1139
          */
1140
         att = calculate_light_attenuation(p, i, VPpli, dist);
1141
         release_temp(p, dist);
1142
 
1143
	 /* Calculate viewer direction, or use infinite viewer:
1144
	  */
1145
         if (!p->state->material_shininess_is_zero) {
1146
            if (p->state->light_local_viewer) {
1147
               struct ureg eye_hat = get_eye_position_normalized(p);
1148
               half = get_temp(p);
1149
               emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat);
1150
               emit_normalize_vec3(p, half, half);
1151
            } else if (p->state->unit[i].light_eyepos3_is_zero) {
1152
               half = register_param3(p, STATE_INTERNAL,
1153
                                      STATE_LIGHT_HALF_VECTOR, i);
1154
            } else {
1155
               struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z);
1156
               half = get_temp(p);
1157
               emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir);
1158
               emit_normalize_vec3(p, half, half);
1159
            }
1160
	 }
1161
 
1162
	 /* Calculate dot products:
1163
	  */
1164
         if (p->state->material_shininess_is_zero) {
1165
            emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli);
1166
         }
1167
         else {
1168
            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli);
1169
            emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half);
1170
         }
1171
 
1172
	 /* Front face lighting:
1173
	  */
1174
	 {
1175
	    struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT);
1176
	    struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE);
1177
	    struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR);
1178
	    struct ureg res0, res1;
1179
	    GLuint mask0, mask1;
1180
 
1181
	    if (count == nr_lights) {
1182
	       if (separate) {
1183
		  mask0 = WRITEMASK_XYZ;
1184
		  mask1 = WRITEMASK_XYZ;
1185
		  res0 = register_output( p, VARYING_SLOT_COL0 );
1186
		  res1 = register_output( p, VARYING_SLOT_COL1 );
1187
	       }
1188
	       else {
1189
		  mask0 = 0;
1190
		  mask1 = WRITEMASK_XYZ;
1191
		  res0 = _col0;
1192
		  res1 = register_output( p, VARYING_SLOT_COL0 );
1193
	       }
1194
	    }
1195
            else {
1196
	       mask0 = 0;
1197
	       mask1 = 0;
1198
	       res0 = _col0;
1199
	       res1 = _col1;
1200
	    }
1201
 
1202
	    if (!is_undef(att)) {
1203
               /* light is attenuated by distance */
1204
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1205
               emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1206
               emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0);
1207
            }
1208
            else if (!p->state->material_shininess_is_zero) {
1209
               /* there's a non-zero specular term */
1210
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1211
               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1212
            }
1213
            else {
1214
               /* no attenutation, no specular */
1215
               emit_degenerate_lit(p, lit, dots);
1216
               emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0);
1217
            }
1218
 
1219
	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0);
1220
	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1);
1221
 
1222
	    release_temp(p, ambient);
1223
	    release_temp(p, diffuse);
1224
	    release_temp(p, specular);
1225
	 }
1226
 
1227
	 /* Back face lighting:
1228
	  */
1229
	 if (twoside) {
1230
	    struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT);
1231
	    struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE);
1232
	    struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR);
1233
	    struct ureg res0, res1;
1234
	    GLuint mask0, mask1;
1235
 
1236
	    if (count == nr_lights) {
1237
	       if (separate) {
1238
		  mask0 = WRITEMASK_XYZ;
1239
		  mask1 = WRITEMASK_XYZ;
1240
		  res0 = register_output( p, VARYING_SLOT_BFC0 );
1241
		  res1 = register_output( p, VARYING_SLOT_BFC1 );
1242
	       }
1243
	       else {
1244
		  mask0 = 0;
1245
		  mask1 = WRITEMASK_XYZ;
1246
		  res0 = _bfc0;
1247
		  res1 = register_output( p, VARYING_SLOT_BFC0 );
1248
	       }
1249
	    }
1250
            else {
1251
	       res0 = _bfc0;
1252
	       res1 = _bfc1;
1253
	       mask0 = 0;
1254
	       mask1 = 0;
1255
	    }
1256
 
1257
            /* For the back face we need to negate the X and Y component
1258
             * dot products.  dots.Z has the negated back-face specular
1259
             * exponent.  We swizzle that into the W position.  This
1260
             * negation makes the back-face specular term positive again.
1261
             */
1262
            dots = negate(swizzle(dots,X,Y,W,Z));
1263
 
1264
	    if (!is_undef(att)) {
1265
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1266
	       emit_op2(p, OPCODE_MUL, lit, 0, lit, att);
1267
               emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0);
1268
            }
1269
            else if (!p->state->material_shininess_is_zero) {
1270
               emit_op1(p, OPCODE_LIT, lit, 0, dots);
1271
               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/
1272
            }
1273
            else {
1274
               emit_degenerate_lit(p, lit, dots);
1275
               emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0);
1276
            }
1277
 
1278
	    emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0);
1279
	    emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1);
1280
            /* restore dots to its original state for subsequent lights
1281
             * by negating and swizzling again.
1282
             */
1283
            dots = negate(swizzle(dots,X,Y,W,Z));
1284
 
1285
	    release_temp(p, ambient);
1286
	    release_temp(p, diffuse);
1287
	    release_temp(p, specular);
1288
	 }
1289
 
1290
	 release_temp(p, half);
1291
	 release_temp(p, VPpli);
1292
	 release_temp(p, att);
1293
      }
1294
   }
1295
 
1296
   release_temps( p );
1297
}
1298
 
1299
 
1300
static void build_fog( struct tnl_program *p )
1301
{
1302
   struct ureg fog = register_output(p, VARYING_SLOT_FOGC);
1303
   struct ureg input;
1304
 
1305
   if (p->state->fog_source_is_depth) {
1306
 
1307
      switch (p->state->fog_distance_mode) {
1308
      case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */
1309
	input = get_eye_position(p);
1310
	emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input);
1311
	emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog);
1312
	emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog);
1313
	break;
1314
      case FDM_EYE_PLANE: /* Z = Ze */
1315
	input = get_eye_position_z(p);
1316
	emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input);
1317
	break;
1318
      case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */
1319
	input = get_eye_position_z(p);
1320
	emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1321
	break;
1322
      default: assert(0); break; /* can't happen */
1323
      }
1324
 
1325
   }
1326
   else {
1327
      input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X);
1328
      emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input);
1329
   }
1330
 
1331
   emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p));
1332
}
1333
 
1334
 
1335
static void build_reflect_texgen( struct tnl_program *p,
1336
				  struct ureg dest,
1337
				  GLuint writemask )
1338
{
1339
   struct ureg normal = get_transformed_normal(p);
1340
   struct ureg eye_hat = get_eye_position_normalized(p);
1341
   struct ureg tmp = get_temp(p);
1342
 
1343
   /* n.u */
1344
   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1345
   /* 2n.u */
1346
   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1347
   /* (-2n.u)n + u */
1348
   emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat);
1349
 
1350
   release_temp(p, tmp);
1351
}
1352
 
1353
 
1354
static void build_sphere_texgen( struct tnl_program *p,
1355
				 struct ureg dest,
1356
				 GLuint writemask )
1357
{
1358
   struct ureg normal = get_transformed_normal(p);
1359
   struct ureg eye_hat = get_eye_position_normalized(p);
1360
   struct ureg tmp = get_temp(p);
1361
   struct ureg half = register_scalar_const(p, .5);
1362
   struct ureg r = get_temp(p);
1363
   struct ureg inv_m = get_temp(p);
1364
   struct ureg id = get_identity_param(p);
1365
 
1366
   /* Could share the above calculations, but it would be
1367
    * a fairly odd state for someone to set (both sphere and
1368
    * reflection active for different texture coordinate
1369
    * components.  Of course - if two texture units enable
1370
    * reflect and/or sphere, things start to tilt in favour
1371
    * of seperating this out:
1372
    */
1373
 
1374
   /* n.u */
1375
   emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat);
1376
   /* 2n.u */
1377
   emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp);
1378
   /* (-2n.u)n + u */
1379
   emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat);
1380
   /* r + 0,0,1 */
1381
   emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z));
1382
   /* rx^2 + ry^2 + (rz+1)^2 */
1383
   emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp);
1384
   /* 2/m */
1385
   emit_op1(p, OPCODE_RSQ, tmp, 0, tmp);
1386
   /* 1/m */
1387
   emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half);
1388
   /* r/m + 1/2 */
1389
   emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half);
1390
 
1391
   release_temp(p, tmp);
1392
   release_temp(p, r);
1393
   release_temp(p, inv_m);
1394
}
1395
 
1396
 
1397
static void build_texture_transform( struct tnl_program *p )
1398
{
1399
   GLuint i, j;
1400
 
1401
   for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) {
1402
 
1403
      if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i)))
1404
	 continue;
1405
 
1406
      if (p->state->unit[i].coord_replace)
1407
  	 continue;
1408
 
1409
      if (p->state->unit[i].texgen_enabled ||
1410
	  p->state->unit[i].texmat_enabled) {
1411
 
1412
	 GLuint texmat_enabled = p->state->unit[i].texmat_enabled;
1413
	 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i);
1414
	 struct ureg out_texgen = undef;
1415
 
1416
	 if (p->state->unit[i].texgen_enabled) {
1417
	    GLuint copy_mask = 0;
1418
	    GLuint sphere_mask = 0;
1419
	    GLuint reflect_mask = 0;
1420
	    GLuint normal_mask = 0;
1421
	    GLuint modes[4];
1422
 
1423
	    if (texmat_enabled)
1424
	       out_texgen = get_temp(p);
1425
	    else
1426
	       out_texgen = out;
1427
 
1428
	    modes[0] = p->state->unit[i].texgen_mode0;
1429
	    modes[1] = p->state->unit[i].texgen_mode1;
1430
	    modes[2] = p->state->unit[i].texgen_mode2;
1431
	    modes[3] = p->state->unit[i].texgen_mode3;
1432
 
1433
	    for (j = 0; j < 4; j++) {
1434
	       switch (modes[j]) {
1435
	       case TXG_OBJ_LINEAR: {
1436
		  struct ureg obj = register_input(p, VERT_ATTRIB_POS);
1437
		  struct ureg plane =
1438
		     register_param3(p, STATE_TEXGEN, i,
1439
				     STATE_TEXGEN_OBJECT_S + j);
1440
 
1441
		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1442
			   obj, plane );
1443
		  break;
1444
	       }
1445
	       case TXG_EYE_LINEAR: {
1446
		  struct ureg eye = get_eye_position(p);
1447
		  struct ureg plane =
1448
		     register_param3(p, STATE_TEXGEN, i,
1449
				     STATE_TEXGEN_EYE_S + j);
1450
 
1451
		  emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j,
1452
			   eye, plane );
1453
		  break;
1454
	       }
1455
	       case TXG_SPHERE_MAP:
1456
		  sphere_mask |= WRITEMASK_X << j;
1457
		  break;
1458
	       case TXG_REFLECTION_MAP:
1459
		  reflect_mask |= WRITEMASK_X << j;
1460
		  break;
1461
	       case TXG_NORMAL_MAP:
1462
		  normal_mask |= WRITEMASK_X << j;
1463
		  break;
1464
	       case TXG_NONE:
1465
		  copy_mask |= WRITEMASK_X << j;
1466
	       }
1467
	    }
1468
 
1469
	    if (sphere_mask) {
1470
	       build_sphere_texgen(p, out_texgen, sphere_mask);
1471
	    }
1472
 
1473
	    if (reflect_mask) {
1474
	       build_reflect_texgen(p, out_texgen, reflect_mask);
1475
	    }
1476
 
1477
	    if (normal_mask) {
1478
	       struct ureg normal = get_transformed_normal(p);
1479
	       emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal );
1480
	    }
1481
 
1482
	    if (copy_mask) {
1483
	       struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i);
1484
	       emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in );
1485
	    }
1486
	 }
1487
 
1488
	 if (texmat_enabled) {
1489
	    struct ureg texmat[4];
1490
	    struct ureg in = (!is_undef(out_texgen) ?
1491
			      out_texgen :
1492
			      register_input(p, VERT_ATTRIB_TEX0+i));
1493
	    if (p->mvp_with_dp4) {
1494
	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1495
				       0, texmat );
1496
	       emit_matrix_transform_vec4( p, out, texmat, in );
1497
	    }
1498
	    else {
1499
	       register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3,
1500
				       STATE_MATRIX_TRANSPOSE, texmat );
1501
	       emit_transpose_matrix_transform_vec4( p, out, texmat, in );
1502
	    }
1503
	 }
1504
 
1505
	 release_temps(p);
1506
      }
1507
      else {
1508
	 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i);
1509
      }
1510
   }
1511
}
1512
 
1513
 
1514
/**
1515
 * Point size attenuation computation.
1516
 */
1517
static void build_atten_pointsize( struct tnl_program *p )
1518
{
1519
   struct ureg eye = get_eye_position_z(p);
1520
   struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED);
1521
   struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION);
1522
   struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
1523
   struct ureg ut = get_temp(p);
1524
 
1525
   /* dist = |eyez| */
1526
   emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z));
1527
   /* p1 + dist * (p2 + dist * p3); */
1528
   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1529
		swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y));
1530
   emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y),
1531
		ut, swizzle1(state_attenuation, X));
1532
 
1533
   /* 1 / sqrt(factor) */
1534
   emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut );
1535
 
1536
#if 0
1537
   /* out = pointSize / sqrt(factor) */
1538
   emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size);
1539
#else
1540
   /* this is a good place to clamp the point size since there's likely
1541
    * no hardware registers to clamp point size at rasterization time.
1542
    */
1543
   emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size);
1544
   emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y));
1545
   emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z));
1546
#endif
1547
 
1548
   release_temp(p, ut);
1549
}
1550
 
1551
 
1552
/**
1553
 * Pass-though per-vertex point size, from user's point size array.
1554
 */
1555
static void build_array_pointsize( struct tnl_program *p )
1556
{
1557
   struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE);
1558
   struct ureg out = register_output(p, VARYING_SLOT_PSIZ);
1559
   emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in);
1560
}
1561
 
1562
 
1563
static void build_tnl_program( struct tnl_program *p )
1564
{
1565
   /* Emit the program, starting with the modelview, projection transforms:
1566
    */
1567
   build_hpos(p);
1568
 
1569
   /* Lighting calculations:
1570
    */
1571
   if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) {
1572
      if (p->state->light_global_enabled)
1573
	 build_lighting(p);
1574
      else {
1575
	 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0)
1576
	    emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0);
1577
 
1578
	 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1)
1579
	    emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1);
1580
      }
1581
   }
1582
 
1583
   if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC)
1584
      build_fog(p);
1585
 
1586
   if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY)
1587
      build_texture_transform(p);
1588
 
1589
   if (p->state->point_attenuated)
1590
      build_atten_pointsize(p);
1591
   else if (p->state->point_array)
1592
      build_array_pointsize(p);
1593
 
1594
   /* Finish up:
1595
    */
1596
   emit_op1(p, OPCODE_END, undef, 0, undef);
1597
 
1598
   /* Disassemble:
1599
    */
1600
   if (DISASSEM) {
1601
      printf ("\n");
1602
   }
1603
}
1604
 
1605
 
1606
static void
1607
create_new_program( const struct state_key *key,
1608
                    struct gl_vertex_program *program,
1609
                    GLboolean mvp_with_dp4,
1610
                    GLuint max_temps)
1611
{
1612
   struct tnl_program p;
1613
 
1614
   memset(&p, 0, sizeof(p));
1615
   p.state = key;
1616
   p.program = program;
1617
   p.eye_position = undef;
1618
   p.eye_position_z = undef;
1619
   p.eye_position_normalized = undef;
1620
   p.transformed_normal = undef;
1621
   p.identity = undef;
1622
   p.temp_in_use = 0;
1623
   p.mvp_with_dp4 = mvp_with_dp4;
1624
 
1625
   if (max_temps >= sizeof(int) * 8)
1626
      p.temp_reserved = 0;
1627
   else
1628
      p.temp_reserved = ~((1<
1629
 
1630
   /* Start by allocating 32 instructions.
1631
    * If we need more, we'll grow the instruction array as needed.
1632
    */
1633
   p.max_inst = 32;
1634
   p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst);
1635
   p.program->Base.String = NULL;
1636
   p.program->Base.NumInstructions =
1637
   p.program->Base.NumTemporaries =
1638
   p.program->Base.NumParameters =
1639
   p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0;
1640
   p.program->Base.Parameters = _mesa_new_parameter_list();
1641
   p.program->Base.InputsRead = 0;
1642
   p.program->Base.OutputsWritten = 0;
1643
 
1644
   build_tnl_program( &p );
1645
}
1646
 
1647
 
1648
/**
1649
 * Return a vertex program which implements the current fixed-function
1650
 * transform/lighting/texgen operations.
1651
 */
1652
struct gl_vertex_program *
1653
_mesa_get_fixed_func_vertex_program(struct gl_context *ctx)
1654
{
1655
   struct gl_vertex_program *prog;
1656
   struct state_key key;
1657
 
1658
   /* Grab all the relevent state and put it in a single structure:
1659
    */
1660
   make_state_key(ctx, &key);
1661
 
1662
   /* Look for an already-prepared program for this state:
1663
    */
1664
   prog = gl_vertex_program(
1665
      _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key)));
1666
 
1667
   if (!prog) {
1668
      /* OK, we'll have to build a new one */
1669
      if (0)
1670
         printf("Build new TNL program\n");
1671
 
1672
      prog = gl_vertex_program(ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0));
1673
      if (!prog)
1674
         return NULL;
1675
 
1676
      create_new_program( &key, prog,
1677
                          ctx->ShaderCompilerOptions[MESA_SHADER_VERTEX].PreferDP4,
1678
                          ctx->Const.VertexProgram.MaxTemps );
1679
 
1680
#if 0
1681
      if (ctx->Driver.ProgramStringNotify)
1682
         ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB,
1683
                                          &prog->Base );
1684
#endif
1685
      _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache,
1686
                                 &key, sizeof(key), &prog->Base);
1687
   }
1688
 
1689
   return prog;
1690
}