Subversion Repositories Kolibri OS

Rev

Rev 9237 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
9237 leency 1
; Real Phong's shading implemented if flat assembler
2
; by Maciej Guba.
3
; http://macgub.co.pl
4
 
5
ROUND2 equ 10
6
real_phong_tri_z:
7
;----procedure render Phongs shaded triangle with z coord
8
;----interpolation ( Catmull alghoritm )-----------------
9
;----I normalize normal vector in every pixel -----------
10
;------------------in - eax - x1 shl 16 + y1 ------------
11
;---------------------- ebx - x2 shl 16 + y2 ------------
12
;---------------------- ecx - x3 shl 16 + y3 ------------
13
;---------------------- esi - pointer to Z-buffer filled-
14
;----------------------   with dd float variables--------
15
;---------------------- edi - pointer to screen buffer---
16
;---------------------- xmm0 - 1st normal vector --------
17
;---------------------- xmm1 - 2cond normal vector ------
18
;---------------------- xmm2 - 3rd normal vector --------
19
;---------------------- xmm3 - normalized light vector --
20
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
21
;----------------------  as dwords floats ---------------
22
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
23
;----------------------  x_min, x_max as dword integers -
24
;---------------------- stack - no parameters -----------
25
;--------------------------------------------------------
26
;----------------- procedure don't save registers !! ----
27
 
28
 
29
 
30
 
31
  push  ebp
32
  mov   ebp,esp
33
  sub   esp,512
34
  sub   ebp,16
35
  and   ebp,0xfffffff0
36
 
37
  .1_nv equ [ebp-16]
38
  .2_nv equ [ebp-32]
39
  .3_nv equ [ebp-48]
40
  .l_v  equ [ebp-64]
41
  .z3   equ [ebp-72]
42
  .z2   equ [ebp-76]
43
  .z1   equ [ebp-80]
44
  .x1   equ [ebp-82]
45
  .y1   equ [ebp-84]
46
  .x2   equ [ebp-86]
47
  .y2   equ [ebp-88]
48
  .x3   equ [ebp-90]
49
  .y3   equ [ebp-92]
50
  .Zbuf equ [ebp-96]
51
  .x_max equ  [ebp-100]
52
  .x_min equ  [ebp-104]
53
  .y_max equ  [ebp-108]
54
  .y_min equ  [ebp-112]
55
  .screen equ [ebp-116]
56
  .dx12   equ [ebp-120]
57
  .dx13   equ [ebp-124]
58
  .dx23   equ [ebp-128]
59
  .dn12   equ [ebp-144]
60
  .dn13   equ [ebp-160]
61
  .dn23   equ [ebp-176]
62
  .dz12   equ [ebp-180]
63
  .dz13   equ [ebp-184]
64
  .dz23   equ [ebp-188]
65
 
66
  .cnv1   equ [ebp-208]  ; cur normal vectors
67
  .cnv2   equ [ebp-224]
68
  .cz2    equ [ebp-228]
69
  .cz1    equ [ebp-232]
70
 
71
 
72
 
73
 
74
 
75
 .sort3:                  ; sort triangle coordinates...
76
       cmp     ax,bx
77
       jle     .sort1
78
       xchg    eax,ebx
79
       shufps  xmm4,xmm4,11100001b
80
       movaps  xmm6,xmm0
81
       movaps  xmm0,xmm1
82
       movaps  xmm1,xmm6
83
 
84
 
85
 .sort1:
86
       cmp      bx,cx
87
       jle      .sort2
88
       xchg     ebx,ecx
89
       shufps   xmm4,xmm4,11011000b
90
       movaps   xmm6,xmm1
91
       movaps   xmm1,xmm2
92
       movaps   xmm2,xmm6
93
 
94
       jmp .sort3
95
 
96
 .sort2:
97
 
98
   movaps .z1,xmm4
99
   mov    .y1,eax
100
   mov    .y2,ebx
101
   mov    .y3,ecx
102
 
103
   movdqa   .y_min,xmm5
104
if 1                            ; check if at last only fragment
105
   packssdw xmm5,xmm5       ; of triangle is in visable area
106
   pshuflw  xmm5,xmm5,11011000b
107
   movdqu   xmm7,.y3
108
   movdqa   xmm6,xmm5
109
   pshufd   xmm5,xmm5,0  ; xmm5 lo-hi -> broadcasted y_min, x_min
110
   pshufd   xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
111
   movdqa   xmm4,xmm7
112
   pcmpgtw  xmm7,xmm5
113
   pcmpgtw  xmm4,xmm6
114
   pxor     xmm7,xmm4
115
   pmovmskb eax,xmm7
116
   and      eax,0x00aaaaaa
117
   or       eax,eax
118
   jz       .rpt_loop2_end
119
end if
120
   movaps   .1_nv,xmm0
121
   movaps   .2_nv,xmm1
122
   movaps   .3_nv,xmm2
123
   movaps   .l_v,xmm3
124
   mov      .Zbuf,esi
125
   mov      .screen,edi
126
 
127
 
128
 
129
       mov      bx,.y2       ; calc deltas
130
       sub      bx,.y1
131
       jnz      .rpt_dx12_make
132
 
133
       xorps    xmm7,xmm7
134
       mov      dword .dx12,0
135
       mov      dword .dz12,0
136
       movaps   .dn12,xmm7
137
       jmp      .rpt_dx12_done
138
 
139
  .rpt_dx12_make:
140
       mov      ax,.x2
141
       sub      ax,.x1
142
       cwde
143
       movsx    ebx,bx
144
       shl      eax,ROUND2
145
       cdq
146
       idiv     ebx
147
       mov      .dx12,eax
148
 
149
       cvtsi2ss xmm6,ebx
150
       movss    xmm5,.z2
151
       subss    xmm5,.z1
152
       divss    xmm5,xmm6
153
       movss    .dz12,xmm5
154
 
155
       movaps   xmm0,.2_nv
156
       subps    xmm0,.1_nv
157
       shufps   xmm6,xmm6,0
158
       divps    xmm0,xmm6
159
       movaps   .dn12,xmm0
160
 
161
 
162
   .rpt_dx12_done:
163
 
164
       mov      bx,.y3       ; calc deltas
165
       sub      bx,.y1
166
       jnz      .rpt_dx13_make
167
 
168
       xorps    xmm7,xmm7
169
       mov      dword .dx13,0
170
       mov      dword .dz13,0
171
       movaps   .dn13,xmm7
172
       jmp      .rpt_dx13_done
173
 
174
  .rpt_dx13_make:
175
       mov      ax,.x3
176
       sub      ax,.x1
177
       cwde
178
       movsx    ebx,bx
179
       shl      eax,ROUND2
180
       cdq
181
       idiv     ebx
182
       mov      .dx13,eax
183
 
184
       cvtsi2ss xmm6,ebx
185
       movss    xmm5,.z3
186
       subss    xmm5,.z1
187
       divss    xmm5,xmm6
188
       movss    .dz13,xmm5
189
 
190
       movaps   xmm0,.3_nv
191
       subps    xmm0,.1_nv
192
       shufps   xmm6,xmm6,0
193
       divps    xmm0,xmm6
194
       movaps   .dn13,xmm0
195
 
196
   .rpt_dx13_done:
197
 
198
       mov      bx,.y3       ; calc deltas
199
       sub      bx,.y2
200
       jnz      .rpt_dx23_make
201
 
202
       xorps    xmm7,xmm7
203
       mov      dword .dx23,0
204
       mov      dword .dz23,0
205
       movaps   .dn23,xmm7
206
       jmp      .rpt_dx23_done
207
 
208
  .rpt_dx23_make:
209
       mov      ax,.x3
210
       sub      ax,.x2
211
       cwde
212
       movsx    ebx,bx
213
       shl      eax,ROUND2
214
       cdq
215
       idiv     ebx
216
       mov      .dx23,eax
217
 
218
       cvtsi2ss xmm6,ebx
219
       movss    xmm5,.z3
220
       subss    xmm5,.z2
221
       divss    xmm5,xmm6
222
       movss    .dz23,xmm5
223
 
224
       movaps   xmm0,.3_nv
225
       subps    xmm0,.2_nv
226
       shufps   xmm6,xmm6,0
227
       divps    xmm0,xmm6
228
       movaps   .dn23,xmm0
229
 
230
   .rpt_dx23_done:
231
 
232
 
233
       movsx   eax,word .x1
234
       shl     eax,ROUND2
235
       mov     ebx,eax
236
       mov     edx,.z1
237
       mov     .cz1,edx
238
       mov     .cz2,edx
239
       movaps  xmm0,.1_nv
240
       movaps  .cnv1,xmm0
241
       movaps  .cnv2,xmm0
242
 
243
 
244
       movsx    ecx,word .y1
245
       cmp      cx,.y2
246
 
247
       jge      .rpt_loop1_end
248
 
249
    .rpt_loop1:
250
       pushad
251
 
252
       movaps   xmm2,.y_min
253
       movaps   xmm0,.cnv1
254
       movaps   xmm1,.cnv2
255
       movlps   xmm3,.cz1
256
       movaps   xmm4,.l_v
257
       sar      ebx,ROUND2
258
       sar      eax,ROUND2
259
       mov      edi,.screen
260
       mov      esi,.Zbuf
261
 
262
       call     real_phong_line_z
263
 
264
       popad
265
       movaps   xmm0,.cnv1
266
       movaps   xmm1,.cnv2
267
       movss    xmm2,.cz1
268
       movss    xmm3,.cz2
269
       addps    xmm0,.dn13
270
       addps    xmm1,.dn12
271
       addss    xmm2,.dz13
272
       addss    xmm3,.dz12
273
       add      eax,.dx13
274
       add      ebx,.dx12
275
 
276
       movaps   .cnv1,xmm0
277
       movaps   .cnv2,xmm1
278
       movss    .cz1,xmm2
279
       movss    .cz2,xmm3
280
 
281
       add      ecx,1
282
       cmp      cx,.y2
283
       jl       .rpt_loop1
284
 
285
 
286
 
287
 
288
 
289
   .rpt_loop1_end:
290
       movsx    ecx,word .y2
291
       cmp      cx,.y3
292
       jge      .rpt_loop2_end
293
 
294
       movsx    ebx,word .x2                    ; eax - cur x1
295
       shl      ebx,ROUND2                 ; ebx - cur x2
296
       push     dword .z2
297
       pop      dword .cz2
298
       movaps   xmm0,.2_nv
299
       movaps   .cnv2,xmm0
300
 
301
 
302
     .rpt_loop2:
303
       pushad
304
 
305
       movaps   xmm2,.y_min
306
       movaps   xmm0,.cnv1
307
       movaps   xmm1,.cnv2
308
       movlps   xmm3,.cz1
309
       movaps   xmm4,.l_v
310
       sar      ebx,ROUND2
311
       sar      eax,ROUND2
312
       mov      edi,.screen
313
       mov      esi,.Zbuf
314
 
315
       call     real_phong_line_z
316
 
317
       popad
318
       movaps   xmm0,.cnv1
319
       movaps   xmm1,.cnv2
320
       movss    xmm2,.cz1
321
       movss    xmm3,.cz2
322
       addps    xmm0,.dn13
323
       addps    xmm1,.dn23
324
       addss    xmm2,.dz13
325
       addss    xmm3,.dz23
326
       add      eax,.dx13
327
       add      ebx,.dx23
328
 
329
       movaps   .cnv1,xmm0
330
       movaps   .cnv2,xmm1
331
       movss    .cz1,xmm2
332
       movss    .cz2,xmm3
333
 
334
       add      ecx,1
335
       cmp      cx,.y3
336
       jl       .rpt_loop2
337
 
338
    .rpt_loop2_end:
339
 
340
      add   esp,512
341
      pop   ebp
342
 
343
ret
344
real_phong_line_z:
345
; in:
346
;    xmm0 - normal vector 1
347
;    xmm1 - normal vect 2
348
;    xmm3 - lo -> hi z1, z2 coords as dwords floats
349
;    xmm2 - lo -> hi y_min, y_max, x_min, x_max
350
;           as dword integers
351
;    xmm4 - normalized light vector
352
;    eax - x1
353
;    ebx - x2
354
;    ecx - y
355
;    edi - screen buffer
356
;    esi - z buffer filled with dd floats
357
 
358
   push  ebp
359
   mov   ebp,esp
360
   sub   esp,160
361
   sub   ebp,16
362
   and   ebp,0xfffffff0
363
 
364
 .n1 equ [ebp-16]
365
 .n2 equ [ebp-32]
366
 .lv equ [ebp-48]
367
 .lx1 equ [ebp-52]
368
 .lx2 equ [ebp-56]
369
 .z2 equ [ebp-60]
370
 .z1 equ [ebp-64]
371
 .screen equ [ebp-68]
372
 .zbuff  equ [ebp-72]
373
 .x_max  equ [ebp-74]
374
 .x_min  equ [ebp-76]
375
 .y_max  equ [ebp-78]
376
 .y_min  equ [ebp-80]
377
 .dn     equ [ebp-96]
378
 .dz     equ [ebp-100]
379
 .y      equ [ebp-104]
380
 .cnv    equ [ebp-128]
381
 
382
        mov    .y,ecx
383
        packssdw xmm2,xmm2
384
        movq   .y_min,xmm2
385
        cmp    cx,.y_min
386
        jl     .end_rp_line
387
        cmp    cx,.y_max
388
        jge    .end_rp_line          ;
389
 
390
        cmp     eax,ebx
391
        je      .end_rp_line
392
        jl      @f
393
        xchg    eax,ebx
394
        movaps  xmm7,xmm0
395
        movaps  xmm0,xmm1
396
        movaps  xmm1,xmm7
397
        shufps  xmm3,xmm3,11100001b
398
   @@:
399
 
400
        cmp     ax,.x_max
401
        jge     .end_rp_line
402
        cmp     bx,.x_min
403
        jle     .end_rp_line
404
        movaps  .lv,xmm4
405
        movaps  .n1,xmm0
406
        movaps  .n2,xmm1
407
        mov     .lx1,eax
408
        mov     .lx2,ebx
409
        movlps  .z1,xmm3
410
 
411
        sub     ebx,eax
412
        cvtsi2ss xmm7,ebx
413
        shufps  xmm7,xmm7,0
414
        subps   xmm1,xmm0
415
        divps   xmm1,xmm7
416
        movaps  .dn,xmm1
417
        psrldq  xmm3,4
418
        subss   xmm3,.z1
419
        divss   xmm3,xmm7
420
        movss   .dz,xmm3
421
 
422
 
423
 
424
        mov      ebx,.lx1
425
        cmp      bx,.x_min     ; clipping on function4
426
        jge      @f
427
        movzx    eax,word .x_min
428
        sub      eax,ebx
429
        cvtsi2ss xmm7,eax
430
        shufps   xmm7,xmm7,0
431
        mulss    xmm3,xmm7
432
        mulps    xmm1,xmm7
433
        addss    xmm3,.z1
434
        addps    xmm1,.n1
435
        movsx    eax,word .x_min
436
        movss    .z1,xmm3
437
        movaps   .n1,xmm1
438
        mov      dword .lx1,eax
439
 
440
      @@:
441
        movzx   eax,word .x_max
442
        cmp     .lx2,eax
443
        jl      @f
444
        mov     .lx2,eax
445
      @@:
446
        movzx   eax,word[size_x_var]
447
        mul     dword .y
448
      ;  mov     edx,.x1
449
        add     eax,.lx1
450
        shl     eax,2
451
        add     edi,eax
452
        add     esi,eax
453
 
454
        mov     ecx,.lx2
455
        sub     ecx,.lx1
456
        movaps  xmm0,.n1
457
        movss   xmm2,.z1
9512 IgorA 458
 
9237 leency 459
   .ddraw:
460
        movss    xmm7,xmm2
461
        cmpnltss xmm7,dword[esi]
462
        movd     eax,xmm7
463
        or       eax,eax
464
        jnz      .skip
465
        movss    [esi],xmm2
466
        movaps   xmm7,xmm0
467
        mulps    xmm7,xmm7 ; normalize
468
        haddps   xmm7,xmm7
469
        haddps   xmm7,xmm7
470
        rsqrtps  xmm7,xmm7
471
        mulps    xmm7,xmm0
472
        movaps   .cnv,xmm7
473
 
474
        mov      edx,lights_aligned    ; lights - global variable
475
        xorps    xmm1,xmm1     ; instead global can be used .lv - light vect.
476
      @@:
477
        movaps    xmm6,[edx+16]
478
        movaps    xmm5,[edx]
479
        movaps    xmm3,[edx+48]
480
        andps     xmm5,[zero_hgst_dd]  ; global
481
 
482
        mulps    xmm5,.cnv  ;.lv  ; last dword should be zeroed
483
        haddps   xmm5,xmm5
484
        haddps   xmm5,xmm5
485
   ;     mulps    xmm5,[env_const2]
486
   ;     maxps    xmm5,[dot_min]
487
   ;     minps    xmm5,[dot_max]
488
        movaps   xmm7,xmm5
489
      ;  mulps    xmm7,[env_const2]
490
     ;   mulps    xmm7,[env_const2]
491
     ;   maxps    xmm7,[dot_min]
492
     ;   minps    xmm7,[dot_max]
493
 
494
        mulps    xmm7,xmm7
495
        mulps    xmm7,xmm7
496
        mulps    xmm5,xmm6
497
        mulps    xmm7,xmm7
498
        mulps    xmm7,xmm3
499
 
500
        addps    xmm5,xmm7
501
        minps    xmm5,[mask_255f]   ; global
502
        maxps    xmm1,xmm5
503
     ;   movq     xmm3,[edx+20]    ; minimal color
504
     ;   punpcklwd xmm3,[minimum0]
505
     ;   cvtdq2ps xmm3,xmm3
506
     ;   maxps    xmm1,xmm3
507
        add      edx,64
508
        cmp      edx,lights_aligned_end    ; global
509
        jnz      @b
510
 
511
        cvtps2dq xmm1,xmm1
512
        packssdw xmm1,xmm1
513
        packuswb xmm1,xmm1
514
        movd     [edi],xmm1
515
     .skip:
516
        add      edi,4
517
        add      esi,4
518
        addps    xmm0,.dn
519
        addss    xmm2,.dz
520
        sub      ecx,1
521
        jnz      .ddraw
522
 
523
  .end_rp_line:
524
        add      esp,160
525
        pop      ebp
526
 
527
ret