Rev 8047 | Rev 9237 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
8047 | leency | 1 | ; Real Phong's shading implemented if flat assembler |
2 | ; by Maciej Guba. |
||
3 | ; http://macgub.vxm.pl |
||
4 | |||
5 | ROUND2 equ 10 |
||
6 | real_phong_tri_z: |
||
7 | ;----procedure render Phongs shaded triangle with z coord |
||
8 | ;----interpolation ( Catmull alghoritm )----------------- |
||
9 | ;----I normalize normal vector in every pixel ----------- |
||
10 | ;------------------in - eax - x1 shl 16 + y1 ------------ |
||
11 | ;---------------------- ebx - x2 shl 16 + y2 ------------ |
||
12 | ;---------------------- ecx - x3 shl 16 + y3 ------------ |
||
13 | ;---------------------- esi - pointer to Z-buffer filled- |
||
14 | ;---------------------- with dd float variables-------- |
||
15 | ;---------------------- edi - pointer to screen buffer--- |
||
16 | ;---------------------- xmm0 - 1st normal vector -------- |
||
17 | ;---------------------- xmm1 - 2cond normal vector ------ |
||
18 | ;---------------------- xmm2 - 3rd normal vector -------- |
||
19 | ;---------------------- xmm3 - normalized light vector -- |
||
20 | ;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
||
21 | ;---------------------- as dwords floats --------------- |
||
22 | ;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
||
23 | ;---------------------- x_min, x_max as dword integers - |
||
24 | ;---------------------- stack - no parameters ----------- |
||
25 | ;-------------------------------------------------------- |
||
26 | ;----------------- procedure don't save registers !! ---- |
||
27 | |||
28 | |||
29 | |||
30 | |||
31 | push ebp |
||
32 | mov ebp,esp |
||
33 | sub esp,512 |
||
34 | sub ebp,16 |
||
35 | and ebp,0xfffffff0 |
||
36 | |||
37 | .1_nv equ [ebp-16] |
||
38 | .2_nv equ [ebp-32] |
||
39 | .3_nv equ [ebp-48] |
||
40 | .l_v equ [ebp-64] |
||
41 | .z3 equ [ebp-72] |
||
42 | .z2 equ [ebp-76] |
||
43 | .z1 equ [ebp-80] |
||
44 | .x1 equ [ebp-82] |
||
45 | .y1 equ [ebp-84] |
||
46 | .x2 equ [ebp-86] |
||
47 | .y2 equ [ebp-88] |
||
48 | .x3 equ [ebp-90] |
||
49 | .y3 equ [ebp-92] |
||
50 | .Zbuf equ [ebp-96] |
||
51 | .x_max equ [ebp-100] |
||
52 | .x_min equ [ebp-104] |
||
53 | .y_max equ [ebp-108] |
||
54 | .y_min equ [ebp-112] |
||
55 | .screen equ [ebp-116] |
||
56 | .dx12 equ [ebp-120] |
||
57 | .dx13 equ [ebp-124] |
||
58 | .dx23 equ [ebp-128] |
||
59 | .dn12 equ [ebp-144] |
||
60 | .dn13 equ [ebp-160] |
||
61 | .dn23 equ [ebp-176] |
||
62 | .dz12 equ [ebp-180] |
||
63 | .dz13 equ [ebp-184] |
||
64 | .dz23 equ [ebp-188] |
||
65 | |||
66 | .cnv1 equ [ebp-208] ; cur normal vectors |
||
67 | .cnv2 equ [ebp-224] |
||
68 | .cz2 equ [ebp-228] |
||
69 | .cz1 equ [ebp-232] |
||
70 | |||
71 | |||
72 | |||
73 | |||
74 | |||
75 | .sort3: ; sort triangle coordinates... |
||
76 | cmp ax,bx |
||
77 | jle .sort1 |
||
78 | xchg eax,ebx |
||
79 | shufps xmm4,xmm4,11100001b |
||
80 | movaps xmm6,xmm0 |
||
81 | movaps xmm0,xmm1 |
||
82 | movaps xmm1,xmm6 |
||
83 | |||
84 | |||
85 | .sort1: |
||
86 | cmp bx,cx |
||
87 | jle .sort2 |
||
88 | xchg ebx,ecx |
||
89 | shufps xmm4,xmm4,11011000b |
||
90 | movaps xmm6,xmm1 |
||
91 | movaps xmm1,xmm2 |
||
92 | movaps xmm2,xmm6 |
||
93 | |||
94 | jmp .sort3 |
||
95 | |||
96 | .sort2: |
||
97 | |||
98 | movaps .z1,xmm4 |
||
99 | mov .y1,eax |
||
100 | mov .y2,ebx |
||
101 | mov .y3,ecx |
||
102 | |||
103 | movdqa .y_min,xmm5 |
||
104 | if 1 ; check if at last only fragment |
||
105 | packssdw xmm5,xmm5 ; of triangle is in visable area |
||
106 | pshuflw xmm5,xmm5,11011000b |
||
107 | movdqu xmm7,.y3 |
||
108 | movdqa xmm6,xmm5 |
||
109 | pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
||
110 | pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
||
111 | movdqa xmm4,xmm7 |
||
112 | pcmpgtw xmm7,xmm5 |
||
113 | pcmpgtw xmm4,xmm6 |
||
114 | pxor xmm7,xmm4 |
||
115 | pmovmskb eax,xmm7 |
||
116 | and eax,0x00aaaaaa |
||
117 | or eax,eax |
||
118 | jz .rpt_loop2_end |
||
119 | end if |
||
120 | movaps .1_nv,xmm0 |
||
121 | movaps .2_nv,xmm1 |
||
122 | movaps .3_nv,xmm2 |
||
123 | movaps .l_v,xmm3 |
||
124 | mov .Zbuf,esi |
||
125 | mov .screen,edi |
||
126 | |||
127 | |||
128 | |||
129 | mov bx,.y2 ; calc deltas |
||
130 | sub bx,.y1 |
||
131 | jnz .rpt_dx12_make |
||
132 | |||
133 | xorps xmm7,xmm7 |
||
134 | mov dword .dx12,0 |
||
135 | mov dword .dz12,0 |
||
136 | movaps .dn12,xmm7 |
||
137 | jmp .rpt_dx12_done |
||
138 | |||
139 | .rpt_dx12_make: |
||
140 | mov ax,.x2 |
||
141 | sub ax,.x1 |
||
142 | cwde |
||
143 | movsx ebx,bx |
||
144 | shl eax,ROUND2 |
||
145 | cdq |
||
146 | idiv ebx |
||
147 | mov .dx12,eax |
||
148 | |||
149 | cvtsi2ss xmm6,ebx |
||
150 | movss xmm5,.z2 |
||
151 | subss xmm5,.z1 |
||
152 | divss xmm5,xmm6 |
||
153 | movss .dz12,xmm5 |
||
154 | |||
155 | movaps xmm0,.2_nv |
||
156 | subps xmm0,.1_nv |
||
157 | shufps xmm6,xmm6,0 |
||
158 | divps xmm0,xmm6 |
||
159 | movaps .dn12,xmm0 |
||
160 | |||
161 | |||
162 | .rpt_dx12_done: |
||
163 | |||
164 | mov bx,.y3 ; calc deltas |
||
165 | sub bx,.y1 |
||
166 | jnz .rpt_dx13_make |
||
167 | |||
168 | xorps xmm7,xmm7 |
||
169 | mov dword .dx13,0 |
||
170 | mov dword .dz13,0 |
||
171 | movaps .dn13,xmm7 |
||
172 | jmp .rpt_dx13_done |
||
173 | |||
174 | .rpt_dx13_make: |
||
175 | mov ax,.x3 |
||
176 | sub ax,.x1 |
||
177 | cwde |
||
178 | movsx ebx,bx |
||
179 | shl eax,ROUND2 |
||
180 | cdq |
||
181 | idiv ebx |
||
182 | mov .dx13,eax |
||
183 | |||
184 | cvtsi2ss xmm6,ebx |
||
185 | movss xmm5,.z3 |
||
186 | subss xmm5,.z1 |
||
187 | divss xmm5,xmm6 |
||
188 | movss .dz13,xmm5 |
||
189 | |||
190 | movaps xmm0,.3_nv |
||
191 | subps xmm0,.1_nv |
||
192 | shufps xmm6,xmm6,0 |
||
193 | divps xmm0,xmm6 |
||
194 | movaps .dn13,xmm0 |
||
195 | |||
196 | .rpt_dx13_done: |
||
197 | |||
198 | mov bx,.y3 ; calc deltas |
||
199 | sub bx,.y2 |
||
200 | jnz .rpt_dx23_make |
||
201 | |||
202 | xorps xmm7,xmm7 |
||
203 | mov dword .dx23,0 |
||
204 | mov dword .dz23,0 |
||
205 | movaps .dn23,xmm7 |
||
206 | jmp .rpt_dx23_done |
||
207 | |||
208 | .rpt_dx23_make: |
||
209 | mov ax,.x3 |
||
210 | sub ax,.x2 |
||
211 | cwde |
||
212 | movsx ebx,bx |
||
213 | shl eax,ROUND2 |
||
214 | cdq |
||
215 | idiv ebx |
||
216 | mov .dx23,eax |
||
217 | |||
218 | cvtsi2ss xmm6,ebx |
||
219 | movss xmm5,.z3 |
||
220 | subss xmm5,.z2 |
||
221 | divss xmm5,xmm6 |
||
222 | movss .dz23,xmm5 |
||
223 | |||
224 | movaps xmm0,.3_nv |
||
225 | subps xmm0,.2_nv |
||
226 | shufps xmm6,xmm6,0 |
||
227 | divps xmm0,xmm6 |
||
228 | movaps .dn23,xmm0 |
||
229 | |||
230 | .rpt_dx23_done: |
||
231 | |||
232 | |||
233 | movsx eax,word .x1 |
||
234 | shl eax,ROUND2 |
||
235 | mov ebx,eax |
||
236 | mov edx,.z1 |
||
237 | mov .cz1,edx |
||
238 | mov .cz2,edx |
||
239 | movaps xmm0,.1_nv |
||
240 | movaps .cnv1,xmm0 |
||
241 | movaps .cnv2,xmm0 |
||
242 | |||
243 | |||
244 | movsx ecx,word .y1 |
||
245 | cmp cx,.y2 |
||
246 | |||
247 | jge .rpt_loop1_end |
||
248 | |||
249 | .rpt_loop1: |
||
250 | pushad |
||
251 | |||
252 | movaps xmm2,.y_min |
||
253 | movaps xmm0,.cnv1 |
||
254 | movaps xmm1,.cnv2 |
||
255 | movlps xmm3,.cz1 |
||
256 | movaps xmm4,.l_v |
||
257 | sar ebx,ROUND2 |
||
258 | sar eax,ROUND2 |
||
259 | mov edi,.screen |
||
260 | mov esi,.Zbuf |
||
261 | |||
262 | call real_phong_line_z |
||
263 | |||
264 | popad |
||
265 | movaps xmm0,.cnv1 |
||
266 | movaps xmm1,.cnv2 |
||
267 | movss xmm2,.cz1 |
||
268 | movss xmm3,.cz2 |
||
269 | addps xmm0,.dn13 |
||
270 | addps xmm1,.dn12 |
||
271 | addss xmm2,.dz13 |
||
272 | addss xmm3,.dz12 |
||
273 | add eax,.dx13 |
||
274 | add ebx,.dx12 |
||
275 | |||
276 | movaps .cnv1,xmm0 |
||
277 | movaps .cnv2,xmm1 |
||
278 | movss .cz1,xmm2 |
||
279 | movss .cz2,xmm3 |
||
280 | |||
281 | add ecx,1 |
||
282 | cmp cx,.y2 |
||
283 | jl .rpt_loop1 |
||
284 | |||
285 | |||
286 | |||
287 | |||
288 | |||
289 | .rpt_loop1_end: |
||
290 | movsx ecx,word .y2 |
||
291 | cmp cx,.y3 |
||
292 | jge .rpt_loop2_end |
||
293 | |||
294 | movsx ebx,word .x2 ; eax - cur x1 |
||
295 | shl ebx,ROUND2 ; ebx - cur x2 |
||
296 | push dword .z2 |
||
297 | pop dword .cz2 |
||
298 | movaps xmm0,.2_nv |
||
299 | movaps .cnv2,xmm0 |
||
300 | |||
301 | |||
302 | .rpt_loop2: |
||
303 | pushad |
||
304 | |||
305 | movaps xmm2,.y_min |
||
306 | movaps xmm0,.cnv1 |
||
307 | movaps xmm1,.cnv2 |
||
308 | movlps xmm3,.cz1 |
||
309 | movaps xmm4,.l_v |
||
310 | sar ebx,ROUND2 |
||
311 | sar eax,ROUND2 |
||
312 | mov edi,.screen |
||
313 | mov esi,.Zbuf |
||
314 | |||
315 | call real_phong_line_z |
||
316 | |||
317 | popad |
||
318 | movaps xmm0,.cnv1 |
||
319 | movaps xmm1,.cnv2 |
||
320 | movss xmm2,.cz1 |
||
321 | movss xmm3,.cz2 |
||
322 | addps xmm0,.dn13 |
||
323 | addps xmm1,.dn23 |
||
324 | addss xmm2,.dz13 |
||
325 | addss xmm3,.dz23 |
||
326 | add eax,.dx13 |
||
327 | add ebx,.dx23 |
||
328 | |||
329 | movaps .cnv1,xmm0 |
||
330 | movaps .cnv2,xmm1 |
||
331 | movss .cz1,xmm2 |
||
332 | movss .cz2,xmm3 |
||
333 | |||
334 | add ecx,1 |
||
335 | cmp cx,.y3 |
||
336 | jl .rpt_loop2 |
||
337 | |||
338 | .rpt_loop2_end: |
||
339 | |||
340 | add esp,512 |
||
341 | pop ebp |
||
342 | |||
343 | ret |
||
344 | align 16 |
||
345 | real_phong_line_z: |
||
346 | ; in: |
||
347 | ; xmm0 - normal vector 1 |
||
348 | ; xmm1 - normal vect 2 |
||
349 | ; xmm3 - lo -> hi z1, z2 coords as dwords floats |
||
350 | ; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
||
351 | ; as dword integers |
||
352 | ; xmm4 - normalized light vector |
||
353 | ; eax - x1 |
||
354 | ; ebx - x2 |
||
355 | ; ecx - y |
||
356 | ; edi - screen buffer |
||
357 | ; esi - z buffer filled with dd floats |
||
358 | |||
359 | push ebp |
||
360 | mov ebp,esp |
||
361 | sub esp,160 |
||
362 | sub ebp,16 |
||
363 | and ebp,0xfffffff0 |
||
364 | |||
365 | .n1 equ [ebp-16] |
||
366 | .n2 equ [ebp-32] |
||
367 | .lv equ [ebp-48] |
||
368 | .lx1 equ [ebp-52] |
||
369 | .lx2 equ [ebp-56] |
||
370 | .z2 equ [ebp-60] |
||
371 | .z1 equ [ebp-64] |
||
372 | .screen equ [ebp-68] |
||
373 | .zbuff equ [ebp-72] |
||
374 | .x_max equ [ebp-74] |
||
375 | .x_min equ [ebp-76] |
||
376 | .y_max equ [ebp-78] |
||
377 | .y_min equ [ebp-80] |
||
378 | .dn equ [ebp-96] |
||
379 | .dz equ [ebp-100] |
||
380 | .y equ [ebp-104] |
||
381 | .cnv equ [ebp-128] |
||
382 | |||
383 | mov .y,ecx |
||
384 | packssdw xmm2,xmm2 |
||
385 | movq .y_min,xmm2 |
||
386 | cmp cx,.y_min |
||
387 | jl .end_rp_line |
||
388 | cmp cx,.y_max |
||
389 | jge .end_rp_line ; |
||
390 | |||
391 | cmp eax,ebx |
||
392 | je .end_rp_line |
||
393 | jl @f |
||
394 | xchg eax,ebx |
||
395 | movaps xmm7,xmm0 |
||
396 | movaps xmm0,xmm1 |
||
397 | movaps xmm1,xmm7 |
||
398 | shufps xmm3,xmm3,11100001b |
||
399 | @@: |
||
400 | |||
401 | cmp ax,.x_max |
||
402 | jge .end_rp_line |
||
403 | cmp bx,.x_min |
||
404 | jle .end_rp_line |
||
405 | movaps .lv,xmm4 |
||
406 | movaps .n1,xmm0 |
||
407 | movaps .n2,xmm1 |
||
408 | mov .lx1,eax |
||
409 | mov .lx2,ebx |
||
410 | movlps .z1,xmm3 |
||
411 | |||
412 | sub ebx,eax |
||
413 | cvtsi2ss xmm7,ebx |
||
414 | shufps xmm7,xmm7,0 |
||
415 | subps xmm1,xmm0 |
||
416 | divps xmm1,xmm7 |
||
417 | movaps .dn,xmm1 |
||
418 | psrldq xmm3,4 |
||
419 | subss xmm3,.z1 |
||
420 | divss xmm3,xmm7 |
||
421 | movss .dz,xmm3 |
||
422 | |||
423 | |||
424 | |||
425 | mov ebx,.lx1 |
||
426 | cmp bx,.x_min ; clipping on function4 |
||
427 | jge @f |
||
428 | movzx eax,word .x_min |
||
429 | sub eax,ebx |
||
430 | cvtsi2ss xmm7,eax |
||
431 | shufps xmm7,xmm7,0 |
||
432 | mulss xmm3,xmm7 |
||
433 | mulps xmm1,xmm7 |
||
434 | addss xmm3,.z1 |
||
435 | addps xmm1,.n1 |
||
436 | movsx eax,word .x_min |
||
437 | movss .z1,xmm3 |
||
438 | movaps .n1,xmm1 |
||
439 | mov dword .lx1,eax |
||
440 | |||
441 | @@: |
||
442 | movzx eax,word .x_max |
||
443 | cmp .lx2,eax |
||
444 | jl @f |
||
445 | mov .lx2,eax |
||
446 | @@: |
||
447 | movzx eax,word[size_x_var] |
||
448 | mul dword .y |
||
449 | ; mov edx,.x1 |
||
450 | add eax,.lx1 |
||
451 | shl eax,2 |
||
452 | add edi,eax |
||
453 | add esi,eax |
||
454 | |||
455 | mov ecx,.lx2 |
||
456 | sub ecx,.lx1 |
||
457 | movaps xmm0,.n1 |
||
458 | movss xmm2,.z1 |
||
459 | align 16 |
||
460 | .ddraw: |
||
461 | movss xmm7,xmm2 |
||
462 | cmpnltss xmm7,dword[esi] |
||
463 | movd eax,xmm7 |
||
464 | or eax,eax |
||
465 | jnz .skip |
||
466 | movss [esi],xmm2 |
||
467 | movaps xmm7,xmm0 |
||
468 | mulps xmm7,xmm7 ; normalize |
||
469 | haddps xmm7,xmm7 |
||
470 | haddps xmm7,xmm7 |
||
471 | rsqrtps xmm7,xmm7 |
||
472 | mulps xmm7,xmm0 |
||
473 | movaps .cnv,xmm7 |
||
474 | |||
475 | mov edx,lights_aligned ; lights - global variable |
||
476 | xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
||
477 | @@: |
||
478 | movaps xmm6,[edx+16] |
||
479 | movaps xmm5,[edx] |
||
480 | movaps xmm3,[edx+48] |
||
481 | andps xmm5,[zero_hgst_dd] ; global |
||
482 | |||
483 | mulps xmm5,.cnv ;.lv ; last dword should be zeroed |
||
484 | haddps xmm5,xmm5 |
||
485 | haddps xmm5,xmm5 |
||
486 | ; mulps xmm5,[env_const2] |
||
487 | ; maxps xmm5,[dot_min] |
||
488 | ; minps xmm5,[dot_max] |
||
489 | movaps xmm7,xmm5 |
||
490 | ; mulps xmm7,[env_const2] |
||
491 | ; mulps xmm7,[env_const2] |
||
492 | ; maxps xmm7,[dot_min] |
||
493 | ; minps xmm7,[dot_max] |
||
494 | |||
495 | mulps xmm7,xmm7 |
||
496 | mulps xmm7,xmm7 |
||
497 | mulps xmm5,xmm6 |
||
498 | mulps xmm7,xmm7 |
||
499 | mulps xmm7,xmm3 |
||
500 | |||
501 | addps xmm5,xmm7 |
||
502 | minps xmm5,[mask_255f] ; global |
||
503 | maxps xmm1,xmm5 |
||
504 | ; movq xmm3,[edx+20] ; minimal color |
||
505 | ; punpcklwd xmm3,[minimum0] |
||
506 | ; cvtdq2ps xmm3,xmm3 |
||
507 | ; maxps xmm1,xmm3 |
||
508 | add edx,64 |
||
509 | cmp edx,lights_aligned_end ; global |
||
510 | jnz @b |
||
511 | |||
512 | cvtps2dq xmm1,xmm1 |
||
513 | packssdw xmm1,xmm1 |
||
514 | packuswb xmm1,xmm1 |
||
515 | movd [edi],xmm1 |
||
516 | .skip: |
||
517 | add edi,4 |
||
518 | add esi,4 |
||
519 | addps xmm0,.dn |
||
520 | addss xmm2,.dz |
||
521 | sub ecx,1 |
||
522 | jnz .ddraw |
||
523 | |||
524 | .end_rp_line: |
||
525 | add esp,160 |
||
526 | pop ebp |
||
527 | |||
528 | ret |