Subversion Repositories Kolibri OS

Rev

Rev 9237 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
9237 leency 1
; Bilinear filtering, real Phongs shading and glass like parallel.
2
; Thanks to authors of 3dica tutorial.
3
; Implemented in FASM by Maciej Guba.
4
; http://macgub.co.pl
5
 
6
ROUND2 equ 10
7
 
8
glass_tex_tri:
9
;----Procedure render Phongs shaded triangle with z coord
10
;----interpolation ( Catmull alghoritm ), each pixel is -
11
;----covered by texture using bilinear filtering.--------
12
;----I normalize normal vector in every pixel -----------
13
;------------------in - eax - x1 shl 16 + y1 ------------
14
;---------------------- ebx - x2 shl 16 + y2 ------------
15
;---------------------- ecx - x3 shl 16 + y3 ------------
16
;---------------------- esi - pointer to stencil buffer--
17
;----------------------   filled with dd float variables-
18
;---------------------- edi - pointer to screen buffer---
19
;---------------------- edx - pointer to texture---------
20
;---------------------- xmm0 - 1st normal vector --------
21
;---------------------- xmm1 - 2cond normal vector ------
22
;---------------------- xmm2 - 3rd normal vector --------
23
;---------------------- xmm3 - normalized light vector --
24
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
25
;----------------------  as dwords floats ---------------
26
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
27
;----------------------  x_min, x_max as dword integers -
28
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, --
29
;----------------------  ty2, tx3, ty3 as word, xres as--
30
;----------------------  dword integers------------------
31
;---------------------- stack - no parameters -----------
32
;--------------------------------------------------------
33
;----------------- procedure don't save registers !! ----
34
 
35
 
36
 
37
 
38
  push  ebp
39
  mov   ebp,esp
40
  sub   esp,512
41
  sub   ebp,16
42
  and   ebp,0xfffffff0
43
 
44
  .1_nv equ [ebp-16]
45
  .2_nv equ [ebp-32]
46
  .3_nv equ [ebp-48]
47
  .l_v  equ [ebp-64]
48
  .z3   equ [ebp-72]
49
  .z2   equ [ebp-76]
50
  .z1   equ [ebp-80]
51
  .x1   equ [ebp-82]
52
  .y1   equ [ebp-84]
53
  .x2   equ [ebp-86]
54
  .y2   equ [ebp-88]
55
  .x3   equ [ebp-90]
56
  .y3   equ [ebp-92]
57
  .Zbuf equ [ebp-96]
58
  .x_max equ  [ebp-100]
59
  .x_min equ  [ebp-104]
60
  .y_max equ  [ebp-108]
61
  .y_min equ  [ebp-112]
62
  .screen equ [ebp-116]
63
  .dx12   equ [ebp-120]
64
  .dx13   equ [ebp-124]
65
  .dx23   equ [ebp-128]
66
  .dn12   equ [ebp-144]
67
  .dn13   equ [ebp-160]
68
  .dn23   equ [ebp-176]
69
 
70
  .cnv1   equ [ebp-192]  ; cur normal vectors
71
  .cnv2   equ [ebp-208]
72
  .x_res  equ [ebp-212]
73
  .ty3    equ [ebp-214]
74
  .tx3    equ [ebp-216]
75
  .ty2    equ [ebp-218]
76
  .tx2    equ [ebp-220]
77
  .ty1    equ [ebp-222]
78
  .tx1    equ [ebp-224]
79
  .dz12   equ [ebp-232]
80
  .dty12  equ [ebp-236]
81
  .dtx12  equ [ebp-240]
82
  .dz13   equ [ebp-248]
83
  .dty13  equ [ebp-252]
84
  .dtx13  equ [ebp-256]
85
  .dz23   equ [ebp-264]
86
  .dty23  equ [ebp-268]
87
  .dtx23  equ [ebp-272]
88
  .cz1    equ [ebp-280]
89
  .cty1   equ [ebp-284]
90
  .ctx1   equ [ebp-288]
91
  .cz2    equ [ebp-296]
92
  .cty2   equ [ebp-300]
93
  .ctx2   equ [ebp-304]
94
  .tx_ptr equ [ebp-308]
95
 
96
 
97
      emms
98
    ;  movd    .x_res,xmm7
99
 .sort3:                  ; sort triangle coordinates...
100
       cmp     ax,bx
101
       jle     .sort1
102
       xchg    eax,ebx
103
       shufps  xmm4,xmm4,11100001b
104
       shufps  xmm6,xmm6,11100001b
105
       movaps  xmm7,xmm0
106
       movaps  xmm0,xmm1
107
       movaps  xmm1,xmm7
108
 
109
 
110
 .sort1:
111
       cmp      bx,cx
112
       jle      .sort2
113
       xchg     ebx,ecx
114
       shufps   xmm4,xmm4,11011000b
115
       shufps   xmm6,xmm6,11011000b
116
       movaps   xmm7,xmm1
117
       movaps   xmm1,xmm2
118
       movaps   xmm2,xmm7
119
 
120
       jmp .sort3
121
 
122
 .sort2:
123
 ;  movq    .tx1,xmm6
124
 ;  pshufd  xmm6,xmm6,01001110b
125
 ;  movd    .tx3,xmm6
126
   movaps .tx1,xmm6
127
   movaps .z1,xmm4
128
   mov    .y1,eax
129
   mov    .y2,ebx
130
   mov    .y3,ecx
131
 
132
   movdqa   .y_min,xmm5
133
if 1                            ; check if at last only fragment
134
   packssdw xmm5,xmm5       ; of triangle is in visable area
135
   pshuflw  xmm5,xmm5,11011000b
136
   movdqu   xmm7,.y3
137
   movdqa   xmm6,xmm5
138
   pshufd   xmm5,xmm5,0  ; xmm5 lo-hi -> broadcasted y_min, x_min
139
   pshufd   xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
140
   movdqa   xmm4,xmm7
141
   pcmpgtw  xmm7,xmm5
142
   pcmpgtw  xmm4,xmm6
143
   pxor     xmm7,xmm4
144
   pmovmskb eax,xmm7
145
   and      eax,0x00aaaaaa
146
   or       eax,eax
147
   jz       .rpt_loop2_end
148
end if
149
   movaps   .1_nv,xmm0
150
   movaps   .2_nv,xmm1
151
   movaps   .3_nv,xmm2
152
   movaps   .l_v,xmm3
153
   mov      .Zbuf,esi
154
   mov      .screen,edi
155
   mov      .tx_ptr,edx
156
 
157
 
158
 
159
       mov      bx,.y2       ; calc deltas
160
       sub      bx,.y1
161
       jnz      .rpt_dx12_make
162
 
163
       xorps    xmm7,xmm7
164
       mov      dword .dx12,0
165
       movaps   .dtx12,xmm7
166
       movaps   .dn12,xmm7
167
       jmp      .rpt_dx12_done
168
 
169
  .rpt_dx12_make:
170
       mov      ax,.x2
171
       sub      ax,.x1
172
       cwde
173
       movsx    ebx,bx
174
       shl      eax,ROUND2
175
       cdq
176
       idiv     ebx
177
       mov      .dx12,eax
178
 
179
       cvtsi2ss xmm6,ebx
180
       shufps   xmm6,xmm6,0
181
       movss    xmm5,.z2
182
       subss    xmm5,.z1
183
       divss    xmm5,xmm6
184
       movss    .dz12,xmm5
185
 
186
       movd     xmm0,.tx1
187
       movd     xmm2,.tx2
188
       pxor     xmm1,xmm1
189
       punpcklwd xmm0,xmm1
190
       punpcklwd xmm2,xmm1
191
       psubd     xmm2,xmm0
192
     ;  cvtdq2ps xmm0,xmm0
193
       cvtdq2ps xmm2,xmm2
194
;       movlps   .ctx1,xmm0
195
;       movlps   .ctx2,xmm2
196
     ;  subps    xmm2,xmm0
197
       divps    xmm2,xmm6
198
       movlps   .dtx12,xmm2
199
 
200
       movaps   xmm0,.2_nv
201
       subps    xmm0,.1_nv
202
       divps    xmm0,xmm6
203
       movaps   .dn12,xmm0
204
 
205
 
206
   .rpt_dx12_done:
207
 
208
       mov      bx,.y3       ; calc deltas
209
       sub      bx,.y1
210
       jnz      .rpt_dx13_make
211
 
212
       xorps    xmm7,xmm7
213
       mov      dword .dx13,0
214
       movaps   .dtx13,xmm7
215
       movaps   .dn13,xmm7
216
       jmp      .rpt_dx13_done
217
 
218
  .rpt_dx13_make:
219
       mov      ax,.x3
220
       sub      ax,.x1
221
       cwde
222
       movsx    ebx,bx
223
       shl      eax,ROUND2
224
       cdq
225
       idiv     ebx
226
       mov      .dx13,eax
227
 
228
 
229
       cvtsi2ss xmm6,ebx
230
       shufps   xmm6,xmm6,0
231
 
232
       movss    xmm5,.z3
233
       subss    xmm5,.z1
234
       divss    xmm5,xmm6
235
       movss    .dz13,xmm5
236
 
237
       movd     xmm0,.tx1
238
       movd     xmm2,.tx3
239
       pxor     xmm1,xmm1
240
       punpcklwd xmm0,xmm1
241
       punpcklwd xmm2,xmm1
242
       psubd    xmm2,xmm0
243
     ;  cvtdq2ps xmm0,xmm0
244
       cvtdq2ps xmm2,xmm2
245
     ;  subps    xmm2,xmm0
246
       divps    xmm2,xmm6
247
       movlps   .dtx13,xmm2
248
 
249
 
250
 
251
       movaps   xmm0,.3_nv
252
       subps    xmm0,.1_nv
253
       divps    xmm0,xmm6
254
       movaps   .dn13,xmm0
255
 
256
   .rpt_dx13_done:
257
 
258
       mov      bx,.y3       ; calc deltas
259
       sub      bx,.y2
260
       jnz      .rpt_dx23_make
261
 
262
       xorps    xmm7,xmm7
263
       mov      dword .dx23,0
264
       movaps   .dtx23,xmm7
265
       movaps   .dn23,xmm7
266
       jmp      .rpt_dx23_done
267
 
268
  .rpt_dx23_make:
269
       mov      ax,.x3
270
       sub      ax,.x2
271
       cwde
272
       movsx    ebx,bx
273
       shl      eax,ROUND2
274
       cdq
275
       idiv     ebx
276
       mov      .dx23,eax
277
 
278
       cvtsi2ss xmm6,ebx
279
       shufps   xmm6,xmm6,0
280
       movss    xmm5,.z3
281
       subss    xmm5,.z2
282
       divss    xmm5,xmm6
283
       movss    .dz23,xmm5
284
 
285
       movd     xmm0,.tx2
286
       movd     xmm2,.tx3
287
       pxor     xmm1,xmm1
288
       punpcklwd xmm0,xmm1
289
       punpcklwd xmm2,xmm1
290
       psubd     xmm2,xmm0
291
      ; cvtdq2ps xmm0,xmm0
292
       cvtdq2ps xmm2,xmm2
293
;       movlps   .ctx1,xmm0
294
;       movlps   .ctx2,xmm2
295
    ;   subps    xmm2,xmm0
296
       divps    xmm2,xmm6
297
       movlps   .dtx23,xmm2
298
 
299
 
300
 
301
 
302
       movaps   xmm0,.3_nv
303
       subps    xmm0,.2_nv
304
       divps    xmm0,xmm6
305
       movaps   .dn23,xmm0
306
 
307
   .rpt_dx23_done:
308
 
309
       movsx   eax,word .x1
310
       shl     eax,ROUND2
311
       mov     ebx,eax
312
       mov     edx,.z1
313
       movd    xmm1,.tx1
314
       pxor    xmm2,xmm2
315
       punpcklwd xmm1,xmm2
316
       cvtdq2ps  xmm1,xmm1
317
 
318
       mov     .cz1,edx
319
       mov     .cz2,edx
320
       movaps  xmm0,.1_nv
321
       movlps  .ctx1,xmm1
322
       movlps  .ctx2,xmm1
323
       movaps  .cnv1,xmm0
324
       movaps  .cnv2,xmm0
325
 
326
     ;  mov     edx,.dx13
327
     ;  cmp     edx,.dx12
328
     ;  jg      .second_cause
329
 
330
       movsx    ecx,word .y1
331
       cmp      cx,.y2
332
 
333
       jge      .rpt_loop1_end
334
 
335
    .rpt_loop1:
336
       pushad
337
 
338
       movaps   xmm2,.y_min
339
       movaps   xmm0,.cnv1
340
       movaps   xmm1,.cnv2
341
    ;   movlps   xmm3,.cz1 ; cz1, cz2 both
342
       movaps   xmm3,.ctx1
343
       movaps   xmm5,.ctx2
344
       movaps   xmm4,.l_v
345
       movd     xmm6,.x_res
346
       sar      ebx,ROUND2
347
       sar      eax,ROUND2
348
       mov      edx,.tx_ptr
349
       mov      edi,.screen
350
 
351
       mov      esi,.Zbuf
352
 
353
       call     glass_tex_line
354
 
355
       popad
356
       movaps   xmm0,.cnv1
357
       movaps   xmm1,.cnv2
358
     ;  movss    xmm2,.cz1
359
     ;  movss    xmm3,.cz2
360
       movaps   xmm2,.ctx1
361
       movaps   xmm3,.ctx2
362
       addps    xmm0,.dn13
363
       addps    xmm1,.dn12
364
       addps    xmm2,.dtx13
365
       addps    xmm3,.dtx12
366
       add      eax,.dx13
367
       add      ebx,.dx12
368
 
369
       movaps   .cnv1,xmm0
370
       movaps   .cnv2,xmm1
371
     ;  movss    .cz1,xmm2
372
     ;  movss    .cz2,xmm3
373
       movaps   .ctx1,xmm2
374
       movaps   .ctx2,xmm3
375
       add      ecx,1
376
       cmp      cx,.y2
377
       jl       .rpt_loop1
378
 
379
 
380
   ;    jmp  .rpt_loop2_end
381
 
382
 
383
   .rpt_loop1_end:
384
       movsx    ecx,word .y2
385
       cmp      cx,.y3
386
       jge      .rpt_loop2_end
387
 
388
       movsx    ebx,word .x2                    ; eax - cur x1
389
       shl      ebx,ROUND2                 ; ebx - cur x2
390
       push     dword .z2
391
       pop      dword .cz2
392
       movd     xmm1,.tx2
393
       pxor     xmm2,xmm2
394
       punpcklwd xmm1,xmm2
395
       cvtdq2ps xmm1,xmm1
396
       movlps   .ctx2,xmm1
397
       movaps   xmm0,.2_nv
398
       movaps   .cnv2,xmm0
399
 
400
 
401
     .rpt_loop2:
402
       pushad
403
 
404
       movaps   xmm2,.y_min
405
       movaps   xmm0,.cnv1
406
       movaps   xmm1,.cnv2
407
       movaps   xmm3,.ctx1
408
       movaps   xmm5,.ctx2
409
       movaps   xmm4,.l_v
410
       sar      ebx,ROUND2
411
       sar      eax,ROUND2
412
       mov      edx,.tx_ptr
413
       mov      edi,.screen
414
       mov      esi,.Zbuf
415
       movd     xmm6,.x_res
416
       call     glass_tex_line
417
 
418
       popad
419
       movaps   xmm0,.cnv1
420
       movaps   xmm1,.cnv2
421
   ;    movss    xmm2,.cz1
422
   ;    movss    xmm3,.cz2
423
       movaps   xmm2,.ctx1
424
       movaps   xmm3,.ctx2
425
       addps    xmm0,.dn13
426
       addps    xmm1,.dn23
427
    ;   addss    xmm2,.dz13
428
    ;   addss    xmm3,.dz23
429
       addps    xmm2,.dtx13
430
       addps    xmm3,.dtx23
431
 
432
       add      eax,.dx13
433
       add      ebx,.dx23
434
 
435
       movaps   .cnv1,xmm0
436
       movaps   .cnv2,xmm1
437
       movaps   .ctx1,xmm2
438
       movaps   .ctx2,xmm3
439
 
440
     ;  movss    .cz1,xmm2
441
     ;  movss    .cz2,xmm3
442
 
443
       add      ecx,1
444
       cmp      cx,.y3
445
       jl       .rpt_loop2
446
 
447
    .second_cause:  ;dx13 > dx12
448
 
449
    .rpt_loop2_end:
450
 
451
      add   esp,512
452
      pop   ebp
453
 
454
ret
9740 macgub 455
 
9237 leency 456
glass_tex_line:
457
; in:
458
;    xmm0 - normal vector 1
459
;    xmm1 - normal vect 2
460
;    xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float
461
;    xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float
462
;    xmm2 - lo -> hi y_min, y_max, x_min, x_max
463
;           as dword integers
464
;    xmm4 - normalized light vector
465
;    eax - x1
466
;    ebx - x2
467
;    ecx - y
468
;    edi - screen buffer
469
;    esi - stencil buffer filled with dd floats
470
;    edx - texture pointer (handle)
471
;    xmm6 - lowest dword x_res as integer
472
 
473
   push  ebp
474
   mov   ebp,esp
475
   sub   esp,350
476
   sub   ebp,16
477
   and   ebp,0xfffffff0
478
 
479
 .n1 equ [ebp-16]
480
 .n2 equ [ebp-32]
481
 .lv equ [ebp-48]
482
 .lx1 equ [ebp-52]
483
 .lx2 equ [ebp-56]
484
; .z2 equ [ebp-60]
485
; .z1 equ [ebp-64]
486
 .screen equ [ebp-68]
487
 .zbuff  equ [ebp-72]
488
 .x_max  equ [ebp-74]
489
 .x_min  equ [ebp-76]
490
 .y_max  equ [ebp-78]
491
 .y_min  equ [ebp-80]
492
 .dn     equ [ebp-96]
493
 .x_res  equ [ebp-100]
494
 .y      equ [ebp-104]
495
 .cnv    equ [ebp-128]
496
 .z1     equ [ebp-136]
497
 .ty1    equ [ebp-140]
498
 .tx1    equ [ebp-144]
499
 .z2     equ [ebp-152]
500
 .ty2    equ [ebp-156]
501
 .tx2    equ [ebp-160]
502
 .cz     equ [ebp-168]
503
 .cty    equ [ebp-172]
504
 .ctx    equ [ebp-176]
505
 .dz     equ [ebp-184]
506
 .dty    equ [ebp-188]
507
 .dtx    equ [ebp-192]
508
 .yd     equ [ebp-196]
509
 .xd     equ [ebp-200]
510
 .yf     equ [ebp-204]
511
 .xf     equ [ebp-208]
512
 .w4     equ [ebp-212]
513
 .w3     equ [ebp-216]
514
 .w2     equ [ebp-220]
515
 .w1     equ [ebp-224]
516
 .p4     equ [ebp-228]
517
 .p3     equ [ebp-232]
518
 .p2     equ [ebp-236]
519
 .p1     equ [ebp-240]
520
 
521
 
522
 .tx_ptr equ [ebp-244]
523
 
524
  ;      movaps  xmm7,xmm3
525
  ;      movaps  xmm3,xmm5
526
  ;      movaps  xmm5,xmm7
527
 
528
 
529
        mov    .y,ecx
530
        packssdw xmm2,xmm2
531
    ;    movaps xmm7,xmm2
532
    ;    movhps xmm2,[the_zero]
533
    ;    pshuflw xmm2,xmm2,11111000b
534
    ;    pshufd  xmm2,xmm2,11111100b
535
   ;     movlps  xmm7,[the_zero]
536
    ;    pshufhw xmm7,xmm7,11111111b
537
     ;   movlps  xmm7,[the_zero]
538
     ;   psrldq  xmm7,4
539
     ;   por     xmm2,xmm7
540
        movq   .y_min,xmm2
541
        cmp    cx,.y_min
542
        jl     .end_line
543
        cmp    cx,.y_max
544
        jge    .end_line          ;
545
 
546
        cmp     eax,ebx
547
        je      .end_line
548
        jl      @f
549
        xchg    eax,ebx
550
        movaps  xmm7,xmm0
551
        movaps  xmm0,xmm1
552
        movaps  xmm1,xmm7
553
        movaps  xmm7,xmm3
554
        movaps  xmm3,xmm5
555
        movaps  xmm5,xmm7
556
   @@:
557
 
558
        cmp     ax,.x_max
559
        jge     .end_line
560
        cmp     bx,.x_min
561
        jle     .end_line
562
        movaps  .lv,xmm4
563
        movaps  .n1,xmm0
564
        movaps  .n2,xmm1
565
        mov     .lx1,eax
566
        mov     .lx2,ebx
567
        movaps  .tx1,xmm3
568
        movaps  .tx2,xmm5
569
        movd    .x_res,xmm6
570
        mov     .tx_ptr,edx
571
        sub     ebx,eax
572
        cvtsi2ss xmm7,ebx
573
        shufps  xmm7,xmm7,0
574
        subps   xmm1,xmm0
575
        divps   xmm1,xmm7
576
        movaps  .dn,xmm1
577
        subps   xmm5,xmm3
578
        divps   xmm5,xmm7
579
        movaps  .dtx,xmm5
580
 
581
 
582
 
583
        mov      ebx,.lx1
584
        cmp      bx,.x_min     ; clipping on function4
585
        jge      @f
586
        movzx    eax,word .x_min
587
        sub      eax,ebx
588
        cvtsi2ss xmm7,eax
589
        shufps   xmm7,xmm7,0
590
        mulps    xmm5,xmm7
591
        mulps    xmm1,xmm7
592
        addps    xmm5,.tx1
593
        addps    xmm1,.n1
594
        movsx    eax,word .x_min
595
        movaps   .tx1,xmm5
596
        movaps   .n1,xmm1
597
        mov      dword .lx1,eax
598
 
599
      @@:
600
        movzx   eax,word .x_max
601
        cmp     .lx2,eax
602
        jl      @f
603
        mov     .lx2,eax
604
      @@:
605
        mov     eax,.x_res
606
        mul     dword .y
607
        add     eax,.lx1
608
        shl     eax,2
609
        add     edi,eax
610
        add     esi,eax
611
 
612
        mov     ecx,.lx2
613
        sub     ecx,.lx1
614
   ;     movaps  xmm0,.n1
615
        movaps  xmm2,.tx1
616
      ;  xorps   xmm1,xmm1
617
align 16
618
   .ddraw:
619
     ;   movhlps  xmm7,xmm2
620
     ;   cmpnltss xmm7,dword[esi]
621
     ;   movd     eax,xmm7
622
     ;   or       eax,eax
623
     ;   jnz      .skip
624
        xorps    xmm5,xmm5
625
   ;     movhlps  xmm7,xmm2
626
   ;     movss    [esi],xmm7
627
        movaps   xmm7,.n1  ;xmm0
628
        mulps    xmm7,xmm7 ; normalize
629
        haddps   xmm7,xmm7
630
        haddps   xmm7,xmm7
631
        rsqrtps  xmm7,xmm7
632
        mulps    xmm7,.n1   ;xmm0
633
 ;       andps    xmm7,[abs_z_coof]
634
        movaps   .cnv,xmm7
635
 
636
        movaps   xmm6,xmm2
637
        minps    xmm6,[tex_m2]  ;    float  TEX_X-2,TEX_Y-2
638
        cvttps2dq xmm7,xmm6
639
        cvtdq2ps xmm4,xmm7
640
        subps    xmm6,xmm4
641
        movlps   .xf,xmm6
642
     ;    movaps   xmm5,.lv
643
        mov      eax,lights_aligned   ; global
644
     align 16
645
      .again_col:
646
        movaps   xmm0,[eax] ; calc multple lights
647
        mulps    xmm0,.cnv  ;.lv  ; last dword should be zeroed
648
        haddps   xmm0,xmm0
649
        haddps   xmm0,xmm0
650
   ;     andps    xmm0,[abs_val]  ;calc absolute value
651
if 1
652
        ; stencil
653
        movhlps   xmm6,xmm2
654
        movhlps   xmm4,xmm2
655
        addss     xmm6,[aprox]
656
        subss     xmm4,[aprox]
657
        cmpnltss  xmm6,dword[esi]
658
        cmpnltss  xmm4,dword[esi]
659
        xorps     xmm6,xmm4
660
        xorps     xmm4,xmm4
661
        movd      ebx,xmm6
662
        cmp       ebx,-1
663
        jne       .no_reflective
664
end if
665
        movaps    xmm4,xmm0
666
        mulps     xmm4,xmm4
667
        mulps     xmm4,xmm4
668
        mulps     xmm4,xmm4
669
        mulps     xmm4,xmm4
670
        mulps     xmm4,[eax+48]
671
 
672
     .no_reflective:
673
        maxps    xmm0,[the_zero]
674
 ;       movaps   xmm1,xmm0
675
        mulps    xmm0,[eax+16]
676
        addps    xmm4,xmm0
677
        addps    xmm4,[eax+32]
678
        maxps    xmm5,xmm4
679
        add      eax,64
680
        cmp      eax,lights_aligned_end
681
        jnz      .again_col
682
        minps    xmm5,[mask_255f]
683
 
684
          ; texture coords work
685
        movd     eax,xmm7
686
        psrldq   xmm7,4
687
        movd     ebx,xmm7
688
        shl      ebx,TEX_SHIFT
689
        add      eax,ebx
690
        lea      eax,[eax*3]
691
        add      eax,.tx_ptr
692
        mov      ebx,eax
693
        add      ebx,TEX_X*3
694
        movd     xmm7,[eax]
695
        movd     xmm6,[eax+3]
696
        movd     xmm4,[ebx]
697
        movd     xmm3,[ebx+3]
698
        punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2
699
        punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4
700
        punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ...
701
        movdqa    xmm6,xmm7
702
        movdqa    xmm4,xmm7
703
        psrldq    xmm6,4
704
        psrldq    xmm4,8
705
 
706
        punpcklbw xmm7,[the_zero]  ; broadcasted 0
707
        punpcklbw xmm6,[the_zero]
708
        punpcklbw xmm4,[the_zero]
709
        punpcklwd xmm7,[the_zero]
710
        punpcklwd xmm6,[the_zero]
711
        punpcklwd xmm4,[the_zero]
712
 
713
 
714
     ; calc w .........
715
        movlps   xmm3,[the_one]  ;  broadcasted dword 1.0
716
        cvtdq2ps  xmm7,xmm7
717
        subps    xmm3,.xf
718
        cvtdq2ps  xmm6,xmm6
719
        movhps   xmm3,.xf
720
        cvtdq2ps  xmm4,xmm4
721
        movaps   xmm1,xmm3  ; 1-xf, 1-yf, xf, yf
722
        shufps   xmm3,xmm3,10001000b
723
        shufps   xmm1,xmm1,11110101b
724
        mulps    xmm3,xmm1
725
 
726
        mulps    xmm7,xmm3
727
        mulps    xmm6,xmm3
728
        mulps    xmm4,xmm3
729
        haddps   xmm7,xmm7  ; r
730
        haddps   xmm6,xmm6  ; g
731
        haddps   xmm4,xmm4  ; b
732
        haddps   xmm7,xmm7  ; r
733
        haddps   xmm6,xmm6  ; g
734
        haddps   xmm4,xmm4  ; b
735
        movlhps  xmm7,xmm6
736
        shufps   xmm7,xmm7,11101000b
737
        movlhps  xmm7,xmm4
738
 
739
        mulps    xmm5,xmm7
740
        cvtps2dq xmm5,xmm5
741
        psrld    xmm5,8
742
        movd     xmm6,[edi]
743
        packssdw xmm5,xmm5
744
        packuswb xmm5,xmm5
745
        paddusb  xmm5,xmm6
746
        movd     [edi],xmm5
747
     .skip:
748
        add      edi,4
749
        add      esi,4
750
        movaps   xmm0,.n1     ; cur normal
751
        addps    xmm0,.dn
752
        addps    xmm2,.dtx
753
        movaps   .n1,xmm0
754
        sub      ecx,1
755
        jnz      .ddraw
756
 
757
  .end_line:
758
        add      esp,350
759
        pop      ebp
760
 
761
ret