Subversion Repositories Kolibri OS

Rev

Rev 9237 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
9237 leency 1
; Ray casted shadows
2
; by Maciej Guba.
3
; http://macgub.co.pl
4
 
5
 
6
ROUND2 equ 10
7
ray_shad:
8
;--- Procedure render triangle with ray casted shadow ---
9
;--- effect. Calc intersection with all triangles in ----
10
;--- everypixel. Its not real time process, especially --
11
;--- when many triangles are computed. ------------------
12
;------in - eax - x1 shl 16 + y1 ------------------------
13
;---------- ebx - x2 shl 16 + y2 ------------------------
14
;---------- ecx - x3 shl 16 + y3 ------------------------
15
;---------- edx - ptr to fur coords struct --------------
16
;---------- esi - pointer to stencil / Z-buffer, filled -
17
;--------------   with dword float variables, it masks --
18
;--------------   'Z' position (coord) of every front ---
19
;--------------   pixel. --------------------------------
20
;---------- edi - pointer to screen buffer --------------
21
;---------- xmm0 - 1st normal vector --------------------
22
;---------- xmm1 - 2cond normal vector ------------------
23
;---------- xmm2 - 3rd normal vector --------------------
24
;---------- xmm3 - --------------------------------------
25
;---------- xmm4 - lo -> hi z1, z2, z3 coords -----------
26
;---------------   as dwords floats ---------------------
27
;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max --
28
;---------------   as dword integers --------------------
29
;-----------mm7  - current triangle index ---------------
30
;---------------------- stack - no parameters -----------
31
;--------------------------------------------------------
32
;----------------- procedure don't save registers !! ----
33
 
34
  push  ebp
35
  mov   ebp,esp
36
  sub   esp,1024
37
  sub   ebp,16
38
  and   ebp,0xfffffff0
39
 
40
  .1_nv equ [ebp-16]
41
  .2_nv equ [ebp-32]
42
  .3_nv equ [ebp-48]
43
  .l_v  equ [ebp-64]
44
  .z3   equ [ebp-72]
45
  .z2   equ [ebp-76]
46
  .z1   equ [ebp-80]
47
  .x1   equ [ebp-82]
48
  .y1   equ [ebp-84]
49
  .x2   equ [ebp-86]
50
  .y2   equ [ebp-88]
51
  .x3   equ [ebp-90]
52
  .y3   equ [ebp-92]
53
  .Zbuf equ [ebp-96]
54
  .x_max  equ [ebp-100]
55
  .x_min  equ [ebp-104]
56
  .y_max  equ [ebp-108]
57
  .y_min  equ [ebp-112]
58
  .screen equ [ebp-116]
59
  .dx12   equ [ebp-120]
60
  .dx13   equ [ebp-124]
61
  .dx23   equ [ebp-128]
62
  .dn12   equ [ebp-144]
63
  .dn13   equ [ebp-160]
64
  .dn23   equ [ebp-176]
65
  .dz12   equ [ebp-180]
66
  .dz13   equ [ebp-184]
67
  .dz23   equ [ebp-188]
68
  .cnv1   equ [ebp-208] ; current normal vectors
69
  .cnv2   equ [ebp-240]
70
  .cz2    equ [ebp-244]
71
  .cz1    equ [ebp-248]
72
  .tri_no equ [ebp-252]
73
 
74
 
75
 .sort3:                  ; sort triangle coordinates...
76
       cmp     ax,bx
77
       jle     .sort1
78
       xchg    eax,ebx
79
       shufps  xmm4,xmm4,11100001b
80
       movaps  xmm6,xmm0
81
       movaps  xmm0,xmm1
82
       movaps  xmm1,xmm6
83
 
84
 .sort1:
85
       cmp      bx,cx
86
       jle      .sort2
87
       xchg     ebx,ecx
88
       shufps   xmm4,xmm4,11011000b
89
       movaps   xmm6,xmm1
90
       movaps   xmm1,xmm2
91
       movaps   xmm2,xmm6
92
 
93
       jmp .sort3
94
 
95
 .sort2:
96
 
97
   movaps .z1,xmm4
98
   mov    .y1,eax
99
   mov    .y2,ebx
100
   mov    .y3,ecx
101
 
102
   movdqa   .y_min,xmm5
103
if 1                            ; check if at last only fragment
104
   packssdw xmm5,xmm5       ; of triangle is in visable area
105
   pshuflw  xmm5,xmm5,11011000b
106
   movdqu   xmm7,.y3
107
   movdqa   xmm6,xmm5
108
   pshufd   xmm5,xmm5,0  ; xmm5 lo-hi -> broadcasted y_min, x_min
109
   pshufd   xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
110
   movdqa   xmm4,xmm7
111
   pcmpgtw  xmm7,xmm5
112
   pcmpgtw  xmm4,xmm6
113
   pxor     xmm7,xmm4
114
   pmovmskb eax,xmm7
115
   and      eax,0x00aaaaaa
116
   or       eax,eax
117
   jz       .rpt_loop2_end
118
end if
119
   movd     .tri_no,mm7
120
   movaps   .1_nv,xmm0
121
   movaps   .2_nv,xmm1
122
   movaps   .3_nv,xmm2
123
 ;  movaps   .l_v,xmm3
124
   mov      .Zbuf,esi
125
   mov      .screen,edi
126
 
127
 
128
 
129
       mov      bx,.y2       ; calc deltas
130
       sub      bx,.y1
131
       jnz      .rpt_dx12_make
132
 
133
       xorps    xmm7,xmm7
134
       mov      dword .dx12,0
135
       mov      dword .dz12,0
136
       movaps   .dn12,xmm7
137
       jmp      .rpt_dx12_done
138
 
139
  .rpt_dx12_make:
140
       mov      ax,.x2
141
       sub      ax,.x1
142
       cwde
143
       movsx    ebx,bx
144
       shl      eax,ROUND2
145
       cdq
146
       idiv     ebx
147
       mov      .dx12,eax
148
 
149
       cvtsi2ss xmm6,ebx
150
       movss    xmm5,.z2
151
       rcpss    xmm6,xmm6
152
       subss    xmm5,.z1
153
       mulss    xmm5,xmm6
154
       movss    .dz12,xmm5
155
 
156
       shufps   xmm6,xmm6,0
157
       movaps   xmm0,.2_nv
158
       subps    xmm0,.1_nv
159
       mulps    xmm0,xmm6
160
       movaps   .dn12,xmm0
161
;       subps    xmm3,xmm0
162
;       mulps    xmm3,xmm6
163
 
164
   .rpt_dx12_done:
165
       mov      bx,.y3       ; calc deltas
166
       sub      bx,.y1
167
       jnz      .rpt_dx13_make
168
 
169
       xorps    xmm7,xmm7
170
       mov      dword .dx13,0
171
       mov      dword .dz13,0
172
       movaps   .dn13,xmm7
173
       jmp      .rpt_dx13_done
174
 
175
  .rpt_dx13_make:
176
       mov      ax,.x3
177
       sub      ax,.x1
178
       cwde
179
       movsx    ebx,bx
180
       shl      eax,ROUND2
181
       cdq
182
       idiv     ebx
183
       mov      .dx13,eax
184
 
185
       cvtsi2ss xmm6,ebx
186
       movss    xmm5,.z3
187
       rcpss    xmm6,xmm6
188
       subss    xmm5,.z1
189
       mulss    xmm5,xmm6
190
       movss    .dz13,xmm5
191
 
192
       movaps   xmm0,.3_nv
193
       subps    xmm0,.1_nv
194
       shufps   xmm6,xmm6,0
195
       mulps    xmm0,xmm6
196
       movaps   .dn13,xmm0
197
 
198
 ;      mulps    xmm0,xmm6
199
 
200
  .rpt_dx13_done:
201
 
202
       mov      bx,.y3       ; calc deltas
203
       sub      bx,.y2
204
       jnz      .rpt_dx23_make
205
 
206
       xorps    xmm7,xmm7
207
       mov      dword .dx23,0
208
       mov      dword .dz23,0
209
       movaps   .dn23,xmm7
210
 
211
       jmp      .rpt_dx23_done
212
 
213
  .rpt_dx23_make:
214
       mov      ax,.x3
215
       sub      ax,.x2
216
       cwde
217
       movsx    ebx,bx
218
       shl      eax,ROUND2
219
       cdq
220
       idiv     ebx
221
       mov      .dx23,eax
222
 
223
       cvtsi2ss xmm6,ebx
224
       movss    xmm5,.z3
225
       rcpss    xmm6,xmm6
226
       subss    xmm5,.z2
227
       mulss    xmm5,xmm6
228
       movss    .dz23,xmm5
229
 
230
       movaps   xmm0,.3_nv
231
       subps    xmm0,.2_nv
232
       shufps   xmm6,xmm6,0
233
       mulps    xmm0,xmm6
234
       movaps   .dn23,xmm0
235
  ;     mulps    xmm0,xmm6
236
 
237
   .rpt_dx23_done:
238
 
239
       movsx   eax,word .x1
240
       shl     eax,ROUND2
241
       mov     ebx,eax
242
       mov     ecx,.z1
243
       mov     .cz1,ecx
244
       mov     .cz2,ecx
245
       movaps  xmm0,.1_nv
246
       movaps  .cnv1,xmm0
247
       movaps  .cnv2,xmm0
248
       mov      edi,.screen
249
       mov      esi,.Zbuf
250
       movsx    ecx,word .y1
251
       cmp      cx,.y2
252
 
253
       jge      .rpt_loop1_end
254
 
255
    .rpt_loop1:
256
       pushad
257
 
258
       movaps   xmm2,.y_min
259
       movaps   xmm0,.cnv1
260
       movaps   xmm1,.cnv2
261
       movlps   xmm3,.cz1
262
    ;   movaps   xmm4,.l_v
263
       sar      ebx,ROUND2
264
       sar      eax,ROUND2
265
       movd     mm7,.tri_no
266
 
267
       call     ray_shd_l
268
 
269
       popad
270
       movaps   xmm0,.cnv1
271
       movaps   xmm1,.cnv2
272
       ; fur x,y
273
       movss    xmm2,.cz1
274
       movss    xmm3,.cz2
275
       shufps   xmm4,xmm4,01001110b
276
       addps    xmm0,.dn13
277
       addps    xmm1,.dn12
278
       addss    xmm2,.dz13
279
       addss    xmm3,.dz12
280
 
281
 
282
       add      eax,.dx13
283
       add      ebx,.dx12
284
 
285
       shufps   xmm4,xmm4,01001110b
286
       movaps   .cnv1,xmm0
287
       movaps   .cnv2,xmm1
288
       movss    .cz1,xmm2
289
       movss    .cz2,xmm3
290
 
291
       add      ecx,1
292
       cmp      cx,.y2
293
       jl       .rpt_loop1
294
 
295
 
296
   .rpt_loop1_end:
297
       movsx    ecx,word .y2
298
       cmp      cx,.y3
299
       jge      .rpt_loop2_end
300
 
301
       movsx    ebx,word .x2               ; eax - cur x1
302
       shl      ebx,ROUND2                 ; ebx - cur x2
303
       push     dword .z2
304
       pop      dword .cz2
305
       movaps   xmm0,.2_nv
306
       movaps   .cnv2,xmm0
307
 
308
       mov      edi,.screen
309
       mov      esi,.Zbuf
310
 
311
 
312
     .rpt_loop2:
313
       pushad
314
       movaps   xmm2,.y_min
315
       movaps   xmm0,.cnv1
316
       movaps   xmm1,.cnv2
317
       movlps   xmm3,.cz1
318
   ;    movaps   xmm4,.l_v
319
       sar      ebx,ROUND2
320
       sar      eax,ROUND2
321
       movd     mm7,.tri_no
322
 
323
       call     ray_shd_l
324
 
325
       popad
326
       movaps   xmm0,.cnv1
327
       movaps   xmm1,.cnv2
328
       movss    xmm2,.cz1
329
       movss    xmm3,.cz2
330
 
331
       addps    xmm0,.dn13
332
       addps    xmm1,.dn23
333
       addss    xmm2,.dz13
334
       addss    xmm3,.dz23
335
       add      eax,.dx13
336
       add      ebx,.dx23
337
       addps    xmm4,xmm6
338
 
339
       movaps   .cnv1,xmm0
340
       movaps   .cnv2,xmm1
341
       movss    .cz1,xmm2
342
       movss    .cz2,xmm3
343
 
344
       add      ecx,1
345
       cmp      cx,.y3
346
       jl       .rpt_loop2
347
 
348
    .rpt_loop2_end:
349
 
350
      add   esp,1024
351
      pop   ebp
352
 
353
 
354
 
355
ret
9512 IgorA 356
 
9237 leency 357
ray_shd_l:
358
; in:
359
;    xmm0 - normal vector 1
360
;    xmm1 - normal vect 2
361
;    xmm3 - lo -> hi z1, z2 coords as dwords floats
362
;    xmm2 - lo -> hi y_min, y_max, x_min, x_max
363
;           as dword integers
364
;    xmm4 - ----
365
;    mm7  - current triangle index
366
;    eax  - x1
367
;    ebx  - x2
368
;    ecx  - y
369
;    edx  - -----
370
;    edi  - screen buffer
371
;    esi  - z buffer / stencil buffer filled with dd floats
372
 
373
   push  ebp
374
   mov   ebp,esp
375
   sub   esp,320
376
   sub   ebp,16
377
   and   ebp,0xfffffff0
378
 
379
 .n1  equ [ebp-16]
380
 .n2  equ [ebp-32]
381
 .lv  equ [ebp-48]
382
 .lx1 equ [ebp-52]
383
 .lx2 equ [ebp-56]
384
 .z2  equ [ebp-60]
385
 .z1  equ [ebp-64]
386
 .screen    equ [ebp-68]
387
 .zbuff     equ [ebp-72]
388
 .x_max     equ [ebp-74]
389
 .x_min     equ [ebp-76]
390
 .y_max     equ [ebp-78]
391
 .y_min     equ [ebp-80]
392
 .dn        equ [ebp-96]
393
 .dz        equ [ebp-100]
394
 .y         equ [ebp-104]
9512 IgorA 395
 .startx    equ [ebp-108]
9237 leency 396
 .cnv       equ [ebp-128]
397
 .Rlen      equ [ebp-128-16]
398
 .r1        equ [ebp-128-32]
399
 .vect_t    equ [ebp-128-48]
400
 .cur_tri    equ [ebp-128-64]
401
; .p3t       equ [ebp-128-80]
402
 .nray      equ [ebp-128-96]
403
 .final_col equ [ebp-128-112]
404
 .aabb_mask equ dword[ebp-128-112-4]
405
 
406
        mov     .y,ecx
407
        movdqa   xmm4,xmm2
408
        packssdw xmm2,xmm2
409
        movq    .y_min,xmm2
410
        cmp     cx,.y_min
411
        jl      .end_rp_line
412
        cmp     cx,.y_max
413
        jge     .end_rp_line          ;
414
        cmp     eax,ebx
415
        je      .end_rp_line
416
        jl      @f
417
        xchg    eax,ebx
418
        movaps  xmm7,xmm0
419
        movaps  xmm0,xmm1
420
        movaps  xmm1,xmm7
421
        shufps  xmm3,xmm3,11100001b
422
    @@:
423
        movd    .cur_tri,mm7
424
        cmp     ax,.x_max
425
        jge     .end_rp_line
426
        cmp     bx,.x_min
427
        jle     .end_rp_line
428
     ;   movaps  .lv,xmm4
429
        andps    xmm0,[zero_hgst_dd]
430
        andps    xmm1,[zero_hgst_dd]
431
        movaps  .n1,xmm0
432
        movaps  .n2,xmm1
433
        mov     .lx1,eax
9512 IgorA 434
;        mov     .startx,eax
9237 leency 435
        mov     .lx2,ebx
436
        movlps  .z1,xmm3
437
 
438
        sub     ebx,eax
439
        cvtsi2ss xmm7,ebx
440
        rcpss   xmm7,xmm7
441
        shufps  xmm7,xmm7,0
442
        subps   xmm1,xmm0
443
        mulps   xmm1,xmm7
444
        movaps  .dn,xmm1
445
        shufps  xmm3,xmm3,11111001b
446
        subss   xmm3,.z1
447
        mulss   xmm3,xmm7
448
        movss   .dz,xmm3
449
 
450
        subps   xmm6,xmm5
451
        mulps   xmm6,xmm7
452
 
453
        mov      ebx,.lx1
454
        cmp      bx,.x_min     ; clipping on function4
455
        jge      @f
456
        movzx    eax,word .x_min
457
        sub      eax,ebx
458
        cvtsi2ss xmm7,eax
459
        shufps   xmm7,xmm7,0
460
        mulss    xmm3,xmm7
461
        mulps    xmm1,xmm7
462
        mulps    xmm6,xmm7
463
        addss    xmm3,.z1
464
        addps    xmm1,.n1
465
        addps    xmm6,xmm5
466
        movsx    eax,word .x_min
467
        movss    .z1,xmm3
468
        movaps   .n1,xmm1
469
        mov      dword .lx1,eax
470
      @@:
471
 
472
        movzx   eax,word .x_max
473
        cmp     .lx2,eax
474
        jl      @f
475
        mov     .lx2,eax
476
      @@:
477
        movzx   eax,word[xres_var]
478
        mul     dword .y
479
        add     eax,.lx1
480
        mov     .zbuff,esi
481
        mov     .screen,edi
482
        shl     eax,2
483
        add     edi,eax
484
        add     esi,eax
485
        mov     ecx,.lx2
486
        sub     ecx,.lx1
487
 
488
        movd       xmm0,[vect_x]
489
        punpcklwd  xmm0,[the_zero]
490
        cvtdq2ps   xmm0,xmm0
491
        movaps     .vect_t,xmm0
492
 
493
 
494
   .ddraw:
495
 
496
        xorps    xmm0,xmm0
497
        movss    xmm2,.z1
498
        movss    xmm5,.z1
499
        movaps   .final_col,xmm0
500
        addss    xmm2,[f1]
501
        subss    xmm5,[f1]
502
        cmpnltss xmm2,dword[esi]
503
        cmpnltss xmm5,dword[esi]
504
        pxor     xmm2,xmm5
505
        movd     eax,xmm2
506
        or       eax,eax
507
        jz       .skips
508
 
509
        movaps   xmm7,.n1
510
        andps    xmm7,[zero_hgst_dd]
511
        mulps    xmm7,xmm7 ; normalize
512
        haddps   xmm7,xmm7
513
        haddps   xmm7,xmm7
514
        rsqrtps  xmm7,xmm7
515
        mulps    xmm7,.n1
516
        movaps   .cnv,xmm7
517
        mov      ebx,point_light_coords
518
        mov      edx,lights_aligned
519
        xor      eax,eax
520
  .nx_light:
521
   pushad
522
   cvtsi2ss xmm0,.lx1
523
   cvtsi2ss xmm1,.y
524
   movss    xmm2,.z1
525
   movlhps  xmm0,xmm1
526
   shufps   xmm0,xmm2,11001000b
527
   subps    xmm0,[ebx] ; xmm0 - ray end, -> current vertex
528
   movaps   xmm3,[ebx]
529
   andps    xmm0,[zero_hgst_dd]
530
   movaps   xmm1,xmm0
531
   mulps    xmm0,xmm0
532
   haddps   xmm0,xmm0
533
   haddps   xmm0,xmm0
534
   sqrtps   xmm0,xmm0
535
   movss    .Rlen,xmm0
536
   rcpps    xmm0,xmm0
537
   mulps    xmm0,xmm1    ; xmm0 - normalized ray vector
538
   andps    xmm0,[zero_hgst_dd]
539
   movaps   .nray,xmm0
540
   movaps   .r1,xmm3   ; ray orgin
541
 if 0
542
   movaps   xmm1,xmm3
543
   call     calc_bounding_box
544
 
545
   mov      .aabb_mask,eax
546
end if
547
   mov      edi,[triangles_ptr]
548
   xor      ecx,ecx
549
 .nx_tri:  ; next triangle
9512 IgorA 550
 ;  mov     eax,.lx1
551
 ;  cmp     eax,.startx
552
 ;  je      @f          ; prevent artifact borders on tri
553
 ;  cmp     eax,.lx2    ; NOT work as I want !!
554
;   je      @f
9237 leency 555
 
556
   cmp     ecx,.cur_tri ; prevent self shadowing
557
   je      .skipp
9512 IgorA 558
  @@:
9237 leency 559
 if 0
560
   mov     edi,ecx
561
   imul    edi,[i12]
562
   add     edi,[triangles_ptr]
563
   mov     eax,[edi]
564
   mov     ebx,[edi+4]
565
   mov     edx,[edi+8]
566
   imul    eax,[i12]
567
   imul    ebx,[i12]
568
   imul    edx,[i12]
569
   add     eax,[points_ptr]
570
   add     ebx,[points_ptr]
571
   add     edx,[points_ptr]
572
   movups   xmm2,[eax]
573
   movups   xmm3,[ebx]
574
   movups   xmm4,[edx]
575
   andps    xmm2,[sign_mask]
576
   andps    xmm3,[sign_mask]
577
   andps    xmm4,[sign_mask]
578
   movmskps ebx,xmm4
579
   cmpeqps  xmm2,xmm3
580
   cmpeqps  xmm3,xmm4
581
   andps    xmm2,xmm3
582
   movmskps eax,xmm2
583
   and      eax,111b
584
   and      ebx,111b
585
   cmp      eax,111b
586
   jne      @f
587
   bt       .aabb_mask,ebx
588
   jnc      .skipp
589
  @@:
590
end if
591
   mov     edi,ecx
592
   imul    edi,[i12]
593
   add     edi,[triangles_ptr]
594
   mov     eax,[edi]
595
   mov     ebx,[edi+4]
596
   mov     edx,[edi+8]
597
   imul    eax,[i12]
598
   imul    ebx,[i12]
599
   imul    edx,[i12]
600
   add     eax,[points_rotated_ptr]
601
   add     ebx,[points_rotated_ptr]
602
   add     edx,[points_rotated_ptr]
603
   movups   xmm2,[eax]
604
   movups   xmm3,[ebx]
605
   movups   xmm4,[edx]
606
   addps    xmm2,.vect_t
607
   addps    xmm3,.vect_t
608
   addps    xmm4,.vect_t
609
 
610
 
611
;intersect_tri: procs header
612
; in:
613
;     xmm0 - ray direction  ; should be normalized
614
;     xmm1 - ray orgin
615
;     xmm2 - tri vert1
616
;     xmm3 - tri vert2
617
;     xmm4 - tri vert3
618
;     if  eax = 1 - intersction with edge
619
;        xmm6 - edge lenght
620
;     if  eax = 0 - intersect with ray (classic)
621
; out:
622
;     eax  = 1 - intersection occured
623
;     xmm0 - float lo -> hi = t, v, u, ...
624
 
625
   movss     xmm6,.Rlen
626
   movaps    xmm0,.nray
627
   movaps    xmm1,.r1
628
   subss     xmm6,[the_one]
629
   mov       eax,1
630
   push      ecx
631
   call      intersect_tri
632
   pop       ecx
633
   cmp       eax,1
634
   je        .inter
635
 .skipp:
636
 .skp:
637
   inc       ecx
638
   cmp       ecx,[triangles_count_var]
639
   jnz       .nx_tri
640
;   jz        .do_process
641
;   comiss  xmm0,.Rlen
642
;   jl      .inter
643
 
644
   popad
645
 .do_process:
646
        movaps    xmm5,.nray  ;[edx]
647
        andps     xmm5,[zero_hgst_dd]  ; global
648
        mulps    xmm5,.cnv  ;.lv   ; last dword should be zeroed
649
 ;       andps     xmm5,[sign_z]   ; global
650
        haddps   xmm5,xmm5
651
        haddps   xmm5,xmm5
652
        andps     xmm5,[abs_mask]  ; global
653
        movaps   xmm7,xmm5
654
        mulps    xmm7,xmm7
655
        mulps    xmm7,xmm7
656
        mulps    xmm5,[edx+16]
657
        mulps    xmm7,xmm7
658
        mulps    xmm7,xmm7
659
        mulps    xmm7,[edx+48]
660
        addps    xmm5,xmm7
661
        minps    xmm5,[mask_255f]  ; global
662
        maxps    xmm5,.final_col     ;  addps  maxps
663
        movaps   .final_col,xmm5
664
        jmp     .nx_loop
665
  .inter:
666
 
667
        popad
668
      .nx_loop:
669
      ;  add      edx,64    ; unncomment to achive 3 lights
670
      ;  add      ebx,16
671
      ;  cmp      edx,lights_aligned_end    ; global
672
      ;  jnz      .nx_light
673
 
674
        movaps    xmm1,.final_col
675
        cvtps2dq  xmm1,xmm1
676
        packssdw  xmm1,xmm1
677
        packuswb  xmm1,xmm1
678
        movd      [edi],xmm1
679
  .skips:
680
        movaps   xmm0,.n1
681
        movss    xmm2,.z1
682
        add      edi,4
683
        add      esi,4
684
        add      dword .lx1,1
685
        addps    xmm0,.dn
686
        addss    xmm2,.dz
687
        movaps   .n1,xmm0
688
        movss    .z1,xmm2
689
        dec      ecx
690
        jnz      .ddraw
691
  .end_rp_line:
692
        add      esp,320
693
        pop      ebp
694
 
695
ret