Subversion Repositories Kolibri OS

Rev

Rev 8047 | Rev 9237 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8047 leency 1
; Real Phong's shading implemented if flat assembler
2
; by Maciej Guba.
3
; http://macgub.vxm.pl
4
 
5
ROUND2 equ 10
6
real_phong_tri_z:
7
;----procedure render Phongs shaded triangle with z coord
8
;----interpolation ( Catmull alghoritm )-----------------
9
;----I normalize normal vector in every pixel -----------
10
;------------------in - eax - x1 shl 16 + y1 ------------
11
;---------------------- ebx - x2 shl 16 + y2 ------------
12
;---------------------- ecx - x3 shl 16 + y3 ------------
13
;---------------------- esi - pointer to Z-buffer filled-
14
;----------------------   with dd float variables--------
15
;---------------------- edi - pointer to screen buffer---
16
;---------------------- xmm0 - 1st normal vector --------
17
;---------------------- xmm1 - 2cond normal vector ------
18
;---------------------- xmm2 - 3rd normal vector --------
19
;---------------------- xmm3 - normalized light vector --
20
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
21
;----------------------  as dwords floats ---------------
22
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
23
;----------------------  x_min, x_max as dword integers -
24
;---------------------- stack - no parameters -----------
25
;--------------------------------------------------------
26
;----------------- procedure don't save registers !! ----
27
 
28
 
29
 
30
 
31
  push  ebp
32
  mov   ebp,esp
33
  sub   esp,512
34
  sub   ebp,16
35
  and   ebp,0xfffffff0
36
 
37
  .1_nv equ [ebp-16]
38
  .2_nv equ [ebp-32]
39
  .3_nv equ [ebp-48]
40
  .l_v  equ [ebp-64]
41
  .z3   equ [ebp-72]
42
  .z2   equ [ebp-76]
43
  .z1   equ [ebp-80]
44
  .x1   equ [ebp-82]
45
  .y1   equ [ebp-84]
46
  .x2   equ [ebp-86]
47
  .y2   equ [ebp-88]
48
  .x3   equ [ebp-90]
49
  .y3   equ [ebp-92]
50
  .Zbuf equ [ebp-96]
51
  .x_max equ  [ebp-100]
52
  .x_min equ  [ebp-104]
53
  .y_max equ  [ebp-108]
54
  .y_min equ  [ebp-112]
55
  .screen equ [ebp-116]
56
  .dx12   equ [ebp-120]
57
  .dx13   equ [ebp-124]
58
  .dx23   equ [ebp-128]
59
  .dn12   equ [ebp-144]
60
  .dn13   equ [ebp-160]
61
  .dn23   equ [ebp-176]
62
  .dz12   equ [ebp-180]
63
  .dz13   equ [ebp-184]
64
  .dz23   equ [ebp-188]
65
 
66
  .cnv1   equ [ebp-208]  ; cur normal vectors
67
  .cnv2   equ [ebp-224]
68
  .cz2    equ [ebp-228]
69
  .cz1    equ [ebp-232]
70
 
71
 
72
 
73
 
74
 
75
 .sort3:                  ; sort triangle coordinates...
76
       cmp     ax,bx
77
       jle     .sort1
78
       xchg    eax,ebx
79
       shufps  xmm4,xmm4,11100001b
80
       movaps  xmm6,xmm0
81
       movaps  xmm0,xmm1
82
       movaps  xmm1,xmm6
83
 
84
 
85
 .sort1:
86
       cmp      bx,cx
87
       jle      .sort2
88
       xchg     ebx,ecx
89
       shufps   xmm4,xmm4,11011000b
90
       movaps   xmm6,xmm1
91
       movaps   xmm1,xmm2
92
       movaps   xmm2,xmm6
93
 
94
       jmp .sort3
95
 
96
 .sort2:
97
 
98
   movaps .z1,xmm4
99
   mov    .y1,eax
100
   mov    .y2,ebx
101
   mov    .y3,ecx
102
 
103
   movdqa   .y_min,xmm5
104
if 1                            ; check if at last only fragment
105
   packssdw xmm5,xmm5       ; of triangle is in visable area
106
   pshuflw  xmm5,xmm5,11011000b
107
   movdqu   xmm7,.y3
108
   movdqa   xmm6,xmm5
109
   pshufd   xmm5,xmm5,0  ; xmm5 lo-hi -> broadcasted y_min, x_min
110
   pshufd   xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
111
   movdqa   xmm4,xmm7
112
   pcmpgtw  xmm7,xmm5
113
   pcmpgtw  xmm4,xmm6
114
   pxor     xmm7,xmm4
115
   pmovmskb eax,xmm7
116
   and      eax,0x00aaaaaa
117
   or       eax,eax
118
   jz       .rpt_loop2_end
119
end if
120
   movaps   .1_nv,xmm0
121
   movaps   .2_nv,xmm1
122
   movaps   .3_nv,xmm2
123
   movaps   .l_v,xmm3
124
   mov      .Zbuf,esi
125
   mov      .screen,edi
126
 
127
 
128
 
129
       mov      bx,.y2       ; calc deltas
130
       sub      bx,.y1
131
       jnz      .rpt_dx12_make
132
 
133
       xorps    xmm7,xmm7
134
       mov      dword .dx12,0
135
       mov      dword .dz12,0
136
       movaps   .dn12,xmm7
137
       jmp      .rpt_dx12_done
138
 
139
  .rpt_dx12_make:
140
       mov      ax,.x2
141
       sub      ax,.x1
142
       cwde
143
       movsx    ebx,bx
144
       shl      eax,ROUND2
145
       cdq
146
       idiv     ebx
147
       mov      .dx12,eax
148
 
149
       cvtsi2ss xmm6,ebx
150
       movss    xmm5,.z2
151
       subss    xmm5,.z1
152
       divss    xmm5,xmm6
153
       movss    .dz12,xmm5
154
 
155
       movaps   xmm0,.2_nv
156
       subps    xmm0,.1_nv
157
       shufps   xmm6,xmm6,0
158
       divps    xmm0,xmm6
159
       movaps   .dn12,xmm0
160
 
161
 
162
   .rpt_dx12_done:
163
 
164
       mov      bx,.y3       ; calc deltas
165
       sub      bx,.y1
166
       jnz      .rpt_dx13_make
167
 
168
       xorps    xmm7,xmm7
169
       mov      dword .dx13,0
170
       mov      dword .dz13,0
171
       movaps   .dn13,xmm7
172
       jmp      .rpt_dx13_done
173
 
174
  .rpt_dx13_make:
175
       mov      ax,.x3
176
       sub      ax,.x1
177
       cwde
178
       movsx    ebx,bx
179
       shl      eax,ROUND2
180
       cdq
181
       idiv     ebx
182
       mov      .dx13,eax
183
 
184
       cvtsi2ss xmm6,ebx
185
       movss    xmm5,.z3
186
       subss    xmm5,.z1
187
       divss    xmm5,xmm6
188
       movss    .dz13,xmm5
189
 
190
       movaps   xmm0,.3_nv
191
       subps    xmm0,.1_nv
192
       shufps   xmm6,xmm6,0
193
       divps    xmm0,xmm6
194
       movaps   .dn13,xmm0
195
 
196
   .rpt_dx13_done:
197
 
198
       mov      bx,.y3       ; calc deltas
199
       sub      bx,.y2
200
       jnz      .rpt_dx23_make
201
 
202
       xorps    xmm7,xmm7
203
       mov      dword .dx23,0
204
       mov      dword .dz23,0
205
       movaps   .dn23,xmm7
206
       jmp      .rpt_dx23_done
207
 
208
  .rpt_dx23_make:
209
       mov      ax,.x3
210
       sub      ax,.x2
211
       cwde
212
       movsx    ebx,bx
213
       shl      eax,ROUND2
214
       cdq
215
       idiv     ebx
216
       mov      .dx23,eax
217
 
218
       cvtsi2ss xmm6,ebx
219
       movss    xmm5,.z3
220
       subss    xmm5,.z2
221
       divss    xmm5,xmm6
222
       movss    .dz23,xmm5
223
 
224
       movaps   xmm0,.3_nv
225
       subps    xmm0,.2_nv
226
       shufps   xmm6,xmm6,0
227
       divps    xmm0,xmm6
228
       movaps   .dn23,xmm0
229
 
230
   .rpt_dx23_done:
231
 
232
 
233
       movsx   eax,word .x1
234
       shl     eax,ROUND2
235
       mov     ebx,eax
236
       mov     edx,.z1
237
       mov     .cz1,edx
238
       mov     .cz2,edx
239
       movaps  xmm0,.1_nv
240
       movaps  .cnv1,xmm0
241
       movaps  .cnv2,xmm0
242
 
243
 
244
       movsx    ecx,word .y1
245
       cmp      cx,.y2
246
 
247
       jge      .rpt_loop1_end
248
 
249
    .rpt_loop1:
250
       pushad
251
 
252
       movaps   xmm2,.y_min
253
       movaps   xmm0,.cnv1
254
       movaps   xmm1,.cnv2
255
       movlps   xmm3,.cz1
256
       movaps   xmm4,.l_v
257
       sar      ebx,ROUND2
258
       sar      eax,ROUND2
259
       mov      edi,.screen
260
       mov      esi,.Zbuf
261
 
262
       call     real_phong_line_z
263
 
264
       popad
265
       movaps   xmm0,.cnv1
266
       movaps   xmm1,.cnv2
267
       movss    xmm2,.cz1
268
       movss    xmm3,.cz2
269
       addps    xmm0,.dn13
270
       addps    xmm1,.dn12
271
       addss    xmm2,.dz13
272
       addss    xmm3,.dz12
273
       add      eax,.dx13
274
       add      ebx,.dx12
275
 
276
       movaps   .cnv1,xmm0
277
       movaps   .cnv2,xmm1
278
       movss    .cz1,xmm2
279
       movss    .cz2,xmm3
280
 
281
       add      ecx,1
282
       cmp      cx,.y2
283
       jl       .rpt_loop1
284
 
285
 
286
 
287
 
288
 
289
   .rpt_loop1_end:
290
       movsx    ecx,word .y2
291
       cmp      cx,.y3
292
       jge      .rpt_loop2_end
293
 
294
       movsx    ebx,word .x2                    ; eax - cur x1
295
       shl      ebx,ROUND2                 ; ebx - cur x2
296
       push     dword .z2
297
       pop      dword .cz2
298
       movaps   xmm0,.2_nv
299
       movaps   .cnv2,xmm0
300
 
301
 
302
     .rpt_loop2:
303
       pushad
304
 
305
       movaps   xmm2,.y_min
306
       movaps   xmm0,.cnv1
307
       movaps   xmm1,.cnv2
308
       movlps   xmm3,.cz1
309
       movaps   xmm4,.l_v
310
       sar      ebx,ROUND2
311
       sar      eax,ROUND2
312
       mov      edi,.screen
313
       mov      esi,.Zbuf
314
 
315
       call     real_phong_line_z
316
 
317
       popad
318
       movaps   xmm0,.cnv1
319
       movaps   xmm1,.cnv2
320
       movss    xmm2,.cz1
321
       movss    xmm3,.cz2
322
       addps    xmm0,.dn13
323
       addps    xmm1,.dn23
324
       addss    xmm2,.dz13
325
       addss    xmm3,.dz23
326
       add      eax,.dx13
327
       add      ebx,.dx23
328
 
329
       movaps   .cnv1,xmm0
330
       movaps   .cnv2,xmm1
331
       movss    .cz1,xmm2
332
       movss    .cz2,xmm3
333
 
334
       add      ecx,1
335
       cmp      cx,.y3
336
       jl       .rpt_loop2
337
 
338
    .rpt_loop2_end:
339
 
340
      add   esp,512
341
      pop   ebp
342
 
343
ret
344
align 16
345
real_phong_line_z:
346
; in:
347
;    xmm0 - normal vector 1
348
;    xmm1 - normal vect 2
349
;    xmm3 - lo -> hi z1, z2 coords as dwords floats
350
;    xmm2 - lo -> hi y_min, y_max, x_min, x_max
351
;           as dword integers
352
;    xmm4 - normalized light vector
353
;    eax - x1
354
;    ebx - x2
355
;    ecx - y
356
;    edi - screen buffer
357
;    esi - z buffer filled with dd floats
358
 
359
   push  ebp
360
   mov   ebp,esp
361
   sub   esp,160
362
   sub   ebp,16
363
   and   ebp,0xfffffff0
364
 
365
 .n1 equ [ebp-16]
366
 .n2 equ [ebp-32]
367
 .lv equ [ebp-48]
368
 .lx1 equ [ebp-52]
369
 .lx2 equ [ebp-56]
370
 .z2 equ [ebp-60]
371
 .z1 equ [ebp-64]
372
 .screen equ [ebp-68]
373
 .zbuff  equ [ebp-72]
374
 .x_max  equ [ebp-74]
375
 .x_min  equ [ebp-76]
376
 .y_max  equ [ebp-78]
377
 .y_min  equ [ebp-80]
378
 .dn     equ [ebp-96]
379
 .dz     equ [ebp-100]
380
 .y      equ [ebp-104]
381
 .cnv    equ [ebp-128]
382
 
383
        mov    .y,ecx
384
        packssdw xmm2,xmm2
385
        movq   .y_min,xmm2
386
        cmp    cx,.y_min
387
        jl     .end_rp_line
388
        cmp    cx,.y_max
389
        jge    .end_rp_line          ;
390
 
391
        cmp     eax,ebx
392
        je      .end_rp_line
393
        jl      @f
394
        xchg    eax,ebx
395
        movaps  xmm7,xmm0
396
        movaps  xmm0,xmm1
397
        movaps  xmm1,xmm7
398
        shufps  xmm3,xmm3,11100001b
399
   @@:
400
 
401
        cmp     ax,.x_max
402
        jge     .end_rp_line
403
        cmp     bx,.x_min
404
        jle     .end_rp_line
405
        movaps  .lv,xmm4
406
        movaps  .n1,xmm0
407
        movaps  .n2,xmm1
408
        mov     .lx1,eax
409
        mov     .lx2,ebx
410
        movlps  .z1,xmm3
411
 
412
        sub     ebx,eax
413
        cvtsi2ss xmm7,ebx
414
        shufps  xmm7,xmm7,0
415
        subps   xmm1,xmm0
416
        divps   xmm1,xmm7
417
        movaps  .dn,xmm1
418
        psrldq  xmm3,4
419
        subss   xmm3,.z1
420
        divss   xmm3,xmm7
421
        movss   .dz,xmm3
422
 
423
 
424
 
425
        mov      ebx,.lx1
426
        cmp      bx,.x_min     ; clipping on function4
427
        jge      @f
428
        movzx    eax,word .x_min
429
        sub      eax,ebx
430
        cvtsi2ss xmm7,eax
431
        shufps   xmm7,xmm7,0
432
        mulss    xmm3,xmm7
433
        mulps    xmm1,xmm7
434
        addss    xmm3,.z1
435
        addps    xmm1,.n1
436
        movsx    eax,word .x_min
437
        movss    .z1,xmm3
438
        movaps   .n1,xmm1
439
        mov      dword .lx1,eax
440
 
441
      @@:
442
        movzx   eax,word .x_max
443
        cmp     .lx2,eax
444
        jl      @f
445
        mov     .lx2,eax
446
      @@:
447
        movzx   eax,word[size_x_var]
448
        mul     dword .y
449
      ;  mov     edx,.x1
450
        add     eax,.lx1
451
        shl     eax,2
452
        add     edi,eax
453
        add     esi,eax
454
 
455
        mov     ecx,.lx2
456
        sub     ecx,.lx1
457
        movaps  xmm0,.n1
458
        movss   xmm2,.z1
459
align 16
460
   .ddraw:
461
        movss    xmm7,xmm2
462
        cmpnltss xmm7,dword[esi]
463
        movd     eax,xmm7
464
        or       eax,eax
465
        jnz      .skip
466
        movss    [esi],xmm2
467
        movaps   xmm7,xmm0
468
        mulps    xmm7,xmm7 ; normalize
469
        haddps   xmm7,xmm7
470
        haddps   xmm7,xmm7
471
        rsqrtps  xmm7,xmm7
472
        mulps    xmm7,xmm0
473
        movaps   .cnv,xmm7
474
 
475
        mov      edx,lights_aligned    ; lights - global variable
476
        xorps    xmm1,xmm1     ; instead global can be used .lv - light vect.
477
      @@:
478
        movaps    xmm6,[edx+16]
479
        movaps    xmm5,[edx]
480
        movaps    xmm3,[edx+48]
481
        andps     xmm5,[zero_hgst_dd]  ; global
482
 
483
        mulps    xmm5,.cnv  ;.lv  ; last dword should be zeroed
484
        haddps   xmm5,xmm5
485
        haddps   xmm5,xmm5
486
   ;     mulps    xmm5,[env_const2]
487
   ;     maxps    xmm5,[dot_min]
488
   ;     minps    xmm5,[dot_max]
489
        movaps   xmm7,xmm5
490
      ;  mulps    xmm7,[env_const2]
491
     ;   mulps    xmm7,[env_const2]
492
     ;   maxps    xmm7,[dot_min]
493
     ;   minps    xmm7,[dot_max]
494
 
495
        mulps    xmm7,xmm7
496
        mulps    xmm7,xmm7
497
        mulps    xmm5,xmm6
498
        mulps    xmm7,xmm7
499
        mulps    xmm7,xmm3
500
 
501
        addps    xmm5,xmm7
502
        minps    xmm5,[mask_255f]   ; global
503
        maxps    xmm1,xmm5
504
     ;   movq     xmm3,[edx+20]    ; minimal color
505
     ;   punpcklwd xmm3,[minimum0]
506
     ;   cvtdq2ps xmm3,xmm3
507
     ;   maxps    xmm1,xmm3
508
        add      edx,64
509
        cmp      edx,lights_aligned_end    ; global
510
        jnz      @b
511
 
512
        cvtps2dq xmm1,xmm1
513
        packssdw xmm1,xmm1
514
        packuswb xmm1,xmm1
515
        movd     [edi],xmm1
516
     .skip:
517
        add      edi,4
518
        add      esi,4
519
        addps    xmm0,.dn
520
        addss    xmm2,.dz
521
        sub      ecx,1
522
        jnz      .ddraw
523
 
524
  .end_rp_line:
525
        add      esp,160
526
        pop      ebp
527
 
528
ret