Subversion Repositories Kolibri OS

Rev

Rev 9512 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
9237 leency 1
; Ray casted shadows
2
; by Maciej Guba.
3
; http://macgub.co.pl
4
 
5
 
6
ROUND2 equ 10
7
ray_shad:
8
;--- Procedure render triangle with ray casted shadow ---
9
;--- effect. Calc intersection with all triangles in ----
10
;--- everypixel. Its not real time process, especially --
11
;--- when many triangles are computed. ------------------
12
;------in - eax - x1 shl 16 + y1 ------------------------
13
;---------- ebx - x2 shl 16 + y2 ------------------------
14
;---------- ecx - x3 shl 16 + y3 ------------------------
15
;---------- edx - ptr to fur coords struct --------------
16
;---------- esi - pointer to stencil / Z-buffer, filled -
17
;--------------   with dword float variables, it masks --
18
;--------------   'Z' position (coord) of every front ---
19
;--------------   pixel. --------------------------------
20
;---------- edi - pointer to screen buffer --------------
21
;---------- xmm0 - 1st normal vector --------------------
22
;---------- xmm1 - 2cond normal vector ------------------
23
;---------- xmm2 - 3rd normal vector --------------------
24
;---------- xmm3 - --------------------------------------
25
;---------- xmm4 - lo -> hi z1, z2, z3 coords -----------
26
;---------------   as dwords floats ---------------------
27
;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max --
28
;---------------   as dword integers --------------------
29
;-----------mm7  - current triangle index ---------------
30
;---------------------- stack - no parameters -----------
31
;--------------------------------------------------------
32
;----------------- procedure don't save registers !! ----
33
 
34
  push  ebp
35
  mov   ebp,esp
36
  sub   esp,1024
37
  sub   ebp,16
38
  and   ebp,0xfffffff0
39
 
40
  .1_nv equ [ebp-16]
41
  .2_nv equ [ebp-32]
42
  .3_nv equ [ebp-48]
43
  .l_v  equ [ebp-64]
44
  .z3   equ [ebp-72]
45
  .z2   equ [ebp-76]
46
  .z1   equ [ebp-80]
47
  .x1   equ [ebp-82]
48
  .y1   equ [ebp-84]
49
  .x2   equ [ebp-86]
50
  .y2   equ [ebp-88]
51
  .x3   equ [ebp-90]
52
  .y3   equ [ebp-92]
53
  .Zbuf equ [ebp-96]
54
  .x_max  equ [ebp-100]
55
  .x_min  equ [ebp-104]
56
  .y_max  equ [ebp-108]
57
  .y_min  equ [ebp-112]
58
  .screen equ [ebp-116]
59
  .dx12   equ [ebp-120]
60
  .dx13   equ [ebp-124]
61
  .dx23   equ [ebp-128]
62
  .dn12   equ [ebp-144]
63
  .dn13   equ [ebp-160]
64
  .dn23   equ [ebp-176]
65
  .dz12   equ [ebp-180]
66
  .dz13   equ [ebp-184]
67
  .dz23   equ [ebp-188]
68
  .cnv1   equ [ebp-208] ; current normal vectors
69
  .cnv2   equ [ebp-240]
70
  .cz2    equ [ebp-244]
71
  .cz1    equ [ebp-248]
72
  .tri_no equ [ebp-252]
73
 
74
 
75
 .sort3:                  ; sort triangle coordinates...
76
       cmp     ax,bx
77
       jle     .sort1
78
       xchg    eax,ebx
79
       shufps  xmm4,xmm4,11100001b
80
       movaps  xmm6,xmm0
81
       movaps  xmm0,xmm1
82
       movaps  xmm1,xmm6
83
 
84
 .sort1:
85
       cmp      bx,cx
86
       jle      .sort2
87
       xchg     ebx,ecx
88
       shufps   xmm4,xmm4,11011000b
89
       movaps   xmm6,xmm1
90
       movaps   xmm1,xmm2
91
       movaps   xmm2,xmm6
92
 
93
       jmp .sort3
94
 
95
 .sort2:
96
 
97
   movaps .z1,xmm4
98
   mov    .y1,eax
99
   mov    .y2,ebx
100
   mov    .y3,ecx
101
 
102
   movdqa   .y_min,xmm5
103
if 1                            ; check if at last only fragment
104
   packssdw xmm5,xmm5       ; of triangle is in visable area
105
   pshuflw  xmm5,xmm5,11011000b
106
   movdqu   xmm7,.y3
107
   movdqa   xmm6,xmm5
108
   pshufd   xmm5,xmm5,0  ; xmm5 lo-hi -> broadcasted y_min, x_min
109
   pshufd   xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
110
   movdqa   xmm4,xmm7
111
   pcmpgtw  xmm7,xmm5
112
   pcmpgtw  xmm4,xmm6
113
   pxor     xmm7,xmm4
114
   pmovmskb eax,xmm7
115
   and      eax,0x00aaaaaa
116
   or       eax,eax
117
   jz       .rpt_loop2_end
118
end if
119
   movd     .tri_no,mm7
120
   movaps   .1_nv,xmm0
121
   movaps   .2_nv,xmm1
122
   movaps   .3_nv,xmm2
123
 ;  movaps   .l_v,xmm3
124
   mov      .Zbuf,esi
125
   mov      .screen,edi
126
 
127
 
128
 
129
       mov      bx,.y2       ; calc deltas
130
       sub      bx,.y1
131
       jnz      .rpt_dx12_make
132
 
133
       xorps    xmm7,xmm7
134
       mov      dword .dx12,0
135
       mov      dword .dz12,0
136
       movaps   .dn12,xmm7
137
       jmp      .rpt_dx12_done
138
 
139
  .rpt_dx12_make:
140
       mov      ax,.x2
141
       sub      ax,.x1
142
       cwde
143
       movsx    ebx,bx
144
       shl      eax,ROUND2
145
       cdq
146
       idiv     ebx
147
       mov      .dx12,eax
148
 
149
       cvtsi2ss xmm6,ebx
150
       movss    xmm5,.z2
151
       rcpss    xmm6,xmm6
152
       subss    xmm5,.z1
153
       mulss    xmm5,xmm6
154
       movss    .dz12,xmm5
155
 
156
       shufps   xmm6,xmm6,0
157
       movaps   xmm0,.2_nv
158
       subps    xmm0,.1_nv
159
       mulps    xmm0,xmm6
160
       movaps   .dn12,xmm0
161
;       subps    xmm3,xmm0
162
;       mulps    xmm3,xmm6
163
 
164
   .rpt_dx12_done:
165
       mov      bx,.y3       ; calc deltas
166
       sub      bx,.y1
167
       jnz      .rpt_dx13_make
168
 
169
       xorps    xmm7,xmm7
170
       mov      dword .dx13,0
171
       mov      dword .dz13,0
172
       movaps   .dn13,xmm7
173
       jmp      .rpt_dx13_done
174
 
175
  .rpt_dx13_make:
176
       mov      ax,.x3
177
       sub      ax,.x1
178
       cwde
179
       movsx    ebx,bx
180
       shl      eax,ROUND2
181
       cdq
182
       idiv     ebx
183
       mov      .dx13,eax
184
 
185
       cvtsi2ss xmm6,ebx
186
       movss    xmm5,.z3
187
       rcpss    xmm6,xmm6
188
       subss    xmm5,.z1
189
       mulss    xmm5,xmm6
190
       movss    .dz13,xmm5
191
 
192
       movaps   xmm0,.3_nv
193
       subps    xmm0,.1_nv
194
       shufps   xmm6,xmm6,0
195
       mulps    xmm0,xmm6
196
       movaps   .dn13,xmm0
197
 
198
 ;      mulps    xmm0,xmm6
199
 
200
  .rpt_dx13_done:
201
 
202
       mov      bx,.y3       ; calc deltas
203
       sub      bx,.y2
204
       jnz      .rpt_dx23_make
205
 
206
       xorps    xmm7,xmm7
207
       mov      dword .dx23,0
208
       mov      dword .dz23,0
209
       movaps   .dn23,xmm7
210
 
211
       jmp      .rpt_dx23_done
212
 
213
  .rpt_dx23_make:
214
       mov      ax,.x3
215
       sub      ax,.x2
216
       cwde
217
       movsx    ebx,bx
218
       shl      eax,ROUND2
219
       cdq
220
       idiv     ebx
221
       mov      .dx23,eax
222
 
223
       cvtsi2ss xmm6,ebx
224
       movss    xmm5,.z3
225
       rcpss    xmm6,xmm6
226
       subss    xmm5,.z2
227
       mulss    xmm5,xmm6
228
       movss    .dz23,xmm5
229
 
230
       movaps   xmm0,.3_nv
231
       subps    xmm0,.2_nv
232
       shufps   xmm6,xmm6,0
233
       mulps    xmm0,xmm6
234
       movaps   .dn23,xmm0
235
  ;     mulps    xmm0,xmm6
236
 
237
   .rpt_dx23_done:
238
 
239
       movsx   eax,word .x1
240
       shl     eax,ROUND2
241
       mov     ebx,eax
242
       mov     ecx,.z1
243
       mov     .cz1,ecx
244
       mov     .cz2,ecx
245
       movaps  xmm0,.1_nv
246
       movaps  .cnv1,xmm0
247
       movaps  .cnv2,xmm0
248
       mov      edi,.screen
249
       mov      esi,.Zbuf
250
       movsx    ecx,word .y1
251
       cmp      cx,.y2
252
 
253
       jge      .rpt_loop1_end
254
 
255
    .rpt_loop1:
256
       pushad
257
 
258
       movaps   xmm2,.y_min
259
       movaps   xmm0,.cnv1
260
       movaps   xmm1,.cnv2
261
       movlps   xmm3,.cz1
262
    ;   movaps   xmm4,.l_v
263
       sar      ebx,ROUND2
264
       sar      eax,ROUND2
265
       movd     mm7,.tri_no
266
 
267
       call     ray_shd_l
268
 
269
       popad
270
       movaps   xmm0,.cnv1
271
       movaps   xmm1,.cnv2
272
       ; fur x,y
273
       movss    xmm2,.cz1
274
       movss    xmm3,.cz2
275
       shufps   xmm4,xmm4,01001110b
276
       addps    xmm0,.dn13
277
       addps    xmm1,.dn12
278
       addss    xmm2,.dz13
279
       addss    xmm3,.dz12
280
 
281
 
282
       add      eax,.dx13
283
       add      ebx,.dx12
284
 
285
       shufps   xmm4,xmm4,01001110b
286
       movaps   .cnv1,xmm0
287
       movaps   .cnv2,xmm1
288
       movss    .cz1,xmm2
289
       movss    .cz2,xmm3
290
 
291
       add      ecx,1
292
       cmp      cx,.y2
293
       jl       .rpt_loop1
294
 
295
 
296
   .rpt_loop1_end:
297
       movsx    ecx,word .y2
298
       cmp      cx,.y3
299
       jge      .rpt_loop2_end
300
 
301
       movsx    ebx,word .x2               ; eax - cur x1
302
       shl      ebx,ROUND2                 ; ebx - cur x2
303
       push     dword .z2
304
       pop      dword .cz2
305
       movaps   xmm0,.2_nv
306
       movaps   .cnv2,xmm0
307
 
308
       mov      edi,.screen
309
       mov      esi,.Zbuf
310
 
311
 
312
     .rpt_loop2:
313
       pushad
314
       movaps   xmm2,.y_min
315
       movaps   xmm0,.cnv1
316
       movaps   xmm1,.cnv2
317
       movlps   xmm3,.cz1
318
   ;    movaps   xmm4,.l_v
319
       sar      ebx,ROUND2
320
       sar      eax,ROUND2
321
       movd     mm7,.tri_no
322
 
323
       call     ray_shd_l
324
 
325
       popad
326
       movaps   xmm0,.cnv1
327
       movaps   xmm1,.cnv2
328
       movss    xmm2,.cz1
329
       movss    xmm3,.cz2
330
 
331
       addps    xmm0,.dn13
332
       addps    xmm1,.dn23
333
       addss    xmm2,.dz13
334
       addss    xmm3,.dz23
335
       add      eax,.dx13
336
       add      ebx,.dx23
337
       addps    xmm4,xmm6
338
 
339
       movaps   .cnv1,xmm0
340
       movaps   .cnv2,xmm1
341
       movss    .cz1,xmm2
342
       movss    .cz2,xmm3
343
 
344
       add      ecx,1
345
       cmp      cx,.y3
346
       jl       .rpt_loop2
347
 
348
    .rpt_loop2_end:
349
 
350
      add   esp,1024
351
      pop   ebp
352
 
353
 
354
 
355
ret
9512 IgorA 356
 
9237 leency 357
ray_shd_l:
358
; in:
359
;    xmm0 - normal vector 1
360
;    xmm1 - normal vect 2
361
;    xmm3 - lo -> hi z1, z2 coords as dwords floats
362
;    xmm2 - lo -> hi y_min, y_max, x_min, x_max
363
;           as dword integers
364
;    xmm4 - ----
365
;    mm7  - current triangle index
366
;    eax  - x1
367
;    ebx  - x2
368
;    ecx  - y
369
;    edx  - -----
370
;    edi  - screen buffer
371
;    esi  - z buffer / stencil buffer filled with dd floats
372
 
373
   push  ebp
374
   mov   ebp,esp
9740 macgub 375
   sub   esp,270
9237 leency 376
   sub   ebp,16
377
   and   ebp,0xfffffff0
378
 
379
 .n1  equ [ebp-16]
380
 .n2  equ [ebp-32]
381
 .lv  equ [ebp-48]
382
 .lx1 equ [ebp-52]
383
 .lx2 equ [ebp-56]
384
 .z2  equ [ebp-60]
385
 .z1  equ [ebp-64]
386
 .screen    equ [ebp-68]
387
 .zbuff     equ [ebp-72]
388
 .x_max     equ [ebp-74]
389
 .x_min     equ [ebp-76]
390
 .y_max     equ [ebp-78]
391
 .y_min     equ [ebp-80]
392
 .dn        equ [ebp-96]
393
 .dz        equ [ebp-100]
394
 .y         equ [ebp-104]
9512 IgorA 395
 .startx    equ [ebp-108]
9237 leency 396
 .cnv       equ [ebp-128]
397
 .Rlen      equ [ebp-128-16]
398
 .r1        equ [ebp-128-32]
399
 .vect_t    equ [ebp-128-48]
400
 .cur_tri    equ [ebp-128-64]
401
; .p3t       equ [ebp-128-80]
402
 .nray      equ [ebp-128-96]
403
 .final_col equ [ebp-128-112]
404
 .aabb_mask equ dword[ebp-128-112-4]
405
 
406
        mov     .y,ecx
407
        movdqa   xmm4,xmm2
408
        packssdw xmm2,xmm2
409
        movq    .y_min,xmm2
410
        cmp     cx,.y_min
411
        jl      .end_rp_line
412
        cmp     cx,.y_max
413
        jge     .end_rp_line          ;
414
        cmp     eax,ebx
415
        je      .end_rp_line
416
        jl      @f
417
        xchg    eax,ebx
418
        movaps  xmm7,xmm0
419
        movaps  xmm0,xmm1
420
        movaps  xmm1,xmm7
421
        shufps  xmm3,xmm3,11100001b
422
    @@:
423
        movd    .cur_tri,mm7
9740 macgub 424
  ;      sub     .cur_tri,dword 1
9237 leency 425
        cmp     ax,.x_max
426
        jge     .end_rp_line
427
        cmp     bx,.x_min
428
        jle     .end_rp_line
429
     ;   movaps  .lv,xmm4
430
        andps    xmm0,[zero_hgst_dd]
431
        andps    xmm1,[zero_hgst_dd]
432
        movaps  .n1,xmm0
433
        movaps  .n2,xmm1
434
        mov     .lx1,eax
9512 IgorA 435
;        mov     .startx,eax
9237 leency 436
        mov     .lx2,ebx
437
        movlps  .z1,xmm3
438
 
439
        sub     ebx,eax
440
        cvtsi2ss xmm7,ebx
441
        rcpss   xmm7,xmm7
442
        shufps  xmm7,xmm7,0
443
        subps   xmm1,xmm0
444
        mulps   xmm1,xmm7
445
        movaps  .dn,xmm1
446
        shufps  xmm3,xmm3,11111001b
447
        subss   xmm3,.z1
448
        mulss   xmm3,xmm7
449
        movss   .dz,xmm3
450
 
451
        subps   xmm6,xmm5
452
        mulps   xmm6,xmm7
453
 
454
        mov      ebx,.lx1
455
        cmp      bx,.x_min     ; clipping on function4
456
        jge      @f
457
        movzx    eax,word .x_min
458
        sub      eax,ebx
459
        cvtsi2ss xmm7,eax
460
        shufps   xmm7,xmm7,0
461
        mulss    xmm3,xmm7
462
        mulps    xmm1,xmm7
463
        mulps    xmm6,xmm7
464
        addss    xmm3,.z1
465
        addps    xmm1,.n1
466
        addps    xmm6,xmm5
467
        movsx    eax,word .x_min
468
        movss    .z1,xmm3
469
        movaps   .n1,xmm1
470
        mov      dword .lx1,eax
471
      @@:
472
 
473
        movzx   eax,word .x_max
474
        cmp     .lx2,eax
475
        jl      @f
476
        mov     .lx2,eax
477
      @@:
478
        movzx   eax,word[xres_var]
479
        mul     dword .y
480
        add     eax,.lx1
481
        mov     .zbuff,esi
482
        mov     .screen,edi
483
        shl     eax,2
484
        add     edi,eax
485
        add     esi,eax
486
        mov     ecx,.lx2
487
        sub     ecx,.lx1
488
 
489
        movd       xmm0,[vect_x]
490
        punpcklwd  xmm0,[the_zero]
491
        cvtdq2ps   xmm0,xmm0
492
        movaps     .vect_t,xmm0
493
 
494
 
495
   .ddraw:
496
 
497
        xorps    xmm0,xmm0
498
        movss    xmm2,.z1
499
        movss    xmm5,.z1
500
        movaps   .final_col,xmm0
501
        addss    xmm2,[f1]
502
        subss    xmm5,[f1]
503
        cmpnltss xmm2,dword[esi]
504
        cmpnltss xmm5,dword[esi]
505
        pxor     xmm2,xmm5
506
        movd     eax,xmm2
507
        or       eax,eax
508
        jz       .skips
509
 
510
        movaps   xmm7,.n1
511
        andps    xmm7,[zero_hgst_dd]
512
        mulps    xmm7,xmm7 ; normalize
513
        haddps   xmm7,xmm7
514
        haddps   xmm7,xmm7
515
        rsqrtps  xmm7,xmm7
516
        mulps    xmm7,.n1
517
        movaps   .cnv,xmm7
518
        mov      ebx,point_light_coords
519
        mov      edx,lights_aligned
520
        xor      eax,eax
521
  .nx_light:
522
   pushad
523
   cvtsi2ss xmm0,.lx1
524
   cvtsi2ss xmm1,.y
525
   movss    xmm2,.z1
526
   movlhps  xmm0,xmm1
527
   shufps   xmm0,xmm2,11001000b
528
   subps    xmm0,[ebx] ; xmm0 - ray end, -> current vertex
529
   movaps   xmm3,[ebx]
530
   andps    xmm0,[zero_hgst_dd]
531
   movaps   xmm1,xmm0
532
   mulps    xmm0,xmm0
533
   haddps   xmm0,xmm0
534
   haddps   xmm0,xmm0
535
   sqrtps   xmm0,xmm0
536
   movss    .Rlen,xmm0
537
   rcpps    xmm0,xmm0
538
   mulps    xmm0,xmm1    ; xmm0 - normalized ray vector
539
   andps    xmm0,[zero_hgst_dd]
540
   movaps   .nray,xmm0
541
   movaps   .r1,xmm3   ; ray orgin
542
 if 0
543
   movaps   xmm1,xmm3
544
   call     calc_bounding_box
545
 
546
   mov      .aabb_mask,eax
547
end if
548
   mov      edi,[triangles_ptr]
549
   xor      ecx,ecx
550
 .nx_tri:  ; next triangle
9512 IgorA 551
 ;  mov     eax,.lx1
552
 ;  cmp     eax,.startx
553
 ;  je      @f          ; prevent artifact borders on tri
554
 ;  cmp     eax,.lx2    ; NOT work as I want !!
555
;   je      @f
9237 leency 556
 
557
   cmp     ecx,.cur_tri ; prevent self shadowing
558
   je      .skipp
9512 IgorA 559
  @@:
9237 leency 560
 if 0
561
   mov     edi,ecx
562
   imul    edi,[i12]
563
   add     edi,[triangles_ptr]
564
   mov     eax,[edi]
565
   mov     ebx,[edi+4]
566
   mov     edx,[edi+8]
567
   imul    eax,[i12]
568
   imul    ebx,[i12]
569
   imul    edx,[i12]
570
   add     eax,[points_ptr]
571
   add     ebx,[points_ptr]
572
   add     edx,[points_ptr]
573
   movups   xmm2,[eax]
574
   movups   xmm3,[ebx]
575
   movups   xmm4,[edx]
576
   andps    xmm2,[sign_mask]
577
   andps    xmm3,[sign_mask]
578
   andps    xmm4,[sign_mask]
579
   movmskps ebx,xmm4
580
   cmpeqps  xmm2,xmm3
581
   cmpeqps  xmm3,xmm4
582
   andps    xmm2,xmm3
583
   movmskps eax,xmm2
584
   and      eax,111b
585
   and      ebx,111b
586
   cmp      eax,111b
587
   jne      @f
588
   bt       .aabb_mask,ebx
589
   jnc      .skipp
590
  @@:
591
end if
592
   mov     edi,ecx
593
   imul    edi,[i12]
594
   add     edi,[triangles_ptr]
595
   mov     eax,[edi]
596
   mov     ebx,[edi+4]
597
   mov     edx,[edi+8]
598
   imul    eax,[i12]
599
   imul    ebx,[i12]
600
   imul    edx,[i12]
601
   add     eax,[points_rotated_ptr]
602
   add     ebx,[points_rotated_ptr]
603
   add     edx,[points_rotated_ptr]
604
   movups   xmm2,[eax]
605
   movups   xmm3,[ebx]
606
   movups   xmm4,[edx]
607
   addps    xmm2,.vect_t
608
   addps    xmm3,.vect_t
609
   addps    xmm4,.vect_t
610
 
611
 
612
;intersect_tri: procs header
613
; in:
614
;     xmm0 - ray direction  ; should be normalized
615
;     xmm1 - ray orgin
616
;     xmm2 - tri vert1
617
;     xmm3 - tri vert2
618
;     xmm4 - tri vert3
619
;     if  eax = 1 - intersction with edge
620
;        xmm6 - edge lenght
621
;     if  eax = 0 - intersect with ray (classic)
622
; out:
623
;     eax  = 1 - intersection occured
624
;     xmm0 - float lo -> hi = t, v, u, ...
625
 
626
   movss     xmm6,.Rlen
627
   movaps    xmm0,.nray
628
   movaps    xmm1,.r1
629
   subss     xmm6,[the_one]
630
   mov       eax,1
631
   push      ecx
632
   call      intersect_tri
633
   pop       ecx
634
   cmp       eax,1
635
   je        .inter
636
 .skipp:
637
 .skp:
638
   inc       ecx
639
   cmp       ecx,[triangles_count_var]
640
   jnz       .nx_tri
641
;   jz        .do_process
642
;   comiss  xmm0,.Rlen
643
;   jl      .inter
644
 
645
   popad
646
 .do_process:
647
        movaps    xmm5,.nray  ;[edx]
648
        andps     xmm5,[zero_hgst_dd]  ; global
649
        mulps    xmm5,.cnv  ;.lv   ; last dword should be zeroed
650
 ;       andps     xmm5,[sign_z]   ; global
651
        haddps   xmm5,xmm5
652
        haddps   xmm5,xmm5
653
        andps     xmm5,[abs_mask]  ; global
654
        movaps   xmm7,xmm5
655
        mulps    xmm7,xmm7
656
        mulps    xmm7,xmm7
657
        mulps    xmm5,[edx+16]
658
        mulps    xmm7,xmm7
659
        mulps    xmm7,xmm7
660
        mulps    xmm7,[edx+48]
661
        addps    xmm5,xmm7
662
        minps    xmm5,[mask_255f]  ; global
663
        maxps    xmm5,.final_col     ;  addps  maxps
664
        movaps   .final_col,xmm5
665
        jmp     .nx_loop
666
  .inter:
667
 
668
        popad
669
      .nx_loop:
670
      ;  add      edx,64    ; unncomment to achive 3 lights
671
      ;  add      ebx,16
672
      ;  cmp      edx,lights_aligned_end    ; global
673
      ;  jnz      .nx_light
674
 
675
        movaps    xmm1,.final_col
676
        cvtps2dq  xmm1,xmm1
677
        packssdw  xmm1,xmm1
678
        packuswb  xmm1,xmm1
679
        movd      [edi],xmm1
680
  .skips:
681
        movaps   xmm0,.n1
682
        movss    xmm2,.z1
683
        add      edi,4
684
        add      esi,4
685
        add      dword .lx1,1
686
        addps    xmm0,.dn
687
        addss    xmm2,.dz
688
        movaps   .n1,xmm0
689
        movss    .z1,xmm2
690
        dec      ecx
691
        jnz      .ddraw
692
  .end_rp_line:
9740 macgub 693
        add      esp,270
9237 leency 694
        pop      ebp
695
 
696
ret