Rev 9512 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
9237 | leency | 1 | ; Ray casted shadows |
2 | ; by Maciej Guba. |
||
3 | ; http://macgub.co.pl |
||
4 | |||
5 | |||
6 | ROUND2 equ 10 |
||
7 | ray_shad: |
||
8 | ;--- Procedure render triangle with ray casted shadow --- |
||
9 | ;--- effect. Calc intersection with all triangles in ---- |
||
10 | ;--- everypixel. Its not real time process, especially -- |
||
11 | ;--- when many triangles are computed. ------------------ |
||
12 | ;------in - eax - x1 shl 16 + y1 ------------------------ |
||
13 | ;---------- ebx - x2 shl 16 + y2 ------------------------ |
||
14 | ;---------- ecx - x3 shl 16 + y3 ------------------------ |
||
15 | ;---------- edx - ptr to fur coords struct -------------- |
||
16 | ;---------- esi - pointer to stencil / Z-buffer, filled - |
||
17 | ;-------------- with dword float variables, it masks -- |
||
18 | ;-------------- 'Z' position (coord) of every front --- |
||
19 | ;-------------- pixel. -------------------------------- |
||
20 | ;---------- edi - pointer to screen buffer -------------- |
||
21 | ;---------- xmm0 - 1st normal vector -------------------- |
||
22 | ;---------- xmm1 - 2cond normal vector ------------------ |
||
23 | ;---------- xmm2 - 3rd normal vector -------------------- |
||
24 | ;---------- xmm3 - -------------------------------------- |
||
25 | ;---------- xmm4 - lo -> hi z1, z2, z3 coords ----------- |
||
26 | ;--------------- as dwords floats --------------------- |
||
27 | ;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max -- |
||
28 | ;--------------- as dword integers -------------------- |
||
29 | ;-----------mm7 - current triangle index --------------- |
||
30 | ;---------------------- stack - no parameters ----------- |
||
31 | ;-------------------------------------------------------- |
||
32 | ;----------------- procedure don't save registers !! ---- |
||
33 | |||
34 | push ebp |
||
35 | mov ebp,esp |
||
36 | sub esp,1024 |
||
37 | sub ebp,16 |
||
38 | and ebp,0xfffffff0 |
||
39 | |||
40 | .1_nv equ [ebp-16] |
||
41 | .2_nv equ [ebp-32] |
||
42 | .3_nv equ [ebp-48] |
||
43 | .l_v equ [ebp-64] |
||
44 | .z3 equ [ebp-72] |
||
45 | .z2 equ [ebp-76] |
||
46 | .z1 equ [ebp-80] |
||
47 | .x1 equ [ebp-82] |
||
48 | .y1 equ [ebp-84] |
||
49 | .x2 equ [ebp-86] |
||
50 | .y2 equ [ebp-88] |
||
51 | .x3 equ [ebp-90] |
||
52 | .y3 equ [ebp-92] |
||
53 | .Zbuf equ [ebp-96] |
||
54 | .x_max equ [ebp-100] |
||
55 | .x_min equ [ebp-104] |
||
56 | .y_max equ [ebp-108] |
||
57 | .y_min equ [ebp-112] |
||
58 | .screen equ [ebp-116] |
||
59 | .dx12 equ [ebp-120] |
||
60 | .dx13 equ [ebp-124] |
||
61 | .dx23 equ [ebp-128] |
||
62 | .dn12 equ [ebp-144] |
||
63 | .dn13 equ [ebp-160] |
||
64 | .dn23 equ [ebp-176] |
||
65 | .dz12 equ [ebp-180] |
||
66 | .dz13 equ [ebp-184] |
||
67 | .dz23 equ [ebp-188] |
||
68 | .cnv1 equ [ebp-208] ; current normal vectors |
||
69 | .cnv2 equ [ebp-240] |
||
70 | .cz2 equ [ebp-244] |
||
71 | .cz1 equ [ebp-248] |
||
72 | .tri_no equ [ebp-252] |
||
73 | |||
74 | |||
75 | .sort3: ; sort triangle coordinates... |
||
76 | cmp ax,bx |
||
77 | jle .sort1 |
||
78 | xchg eax,ebx |
||
79 | shufps xmm4,xmm4,11100001b |
||
80 | movaps xmm6,xmm0 |
||
81 | movaps xmm0,xmm1 |
||
82 | movaps xmm1,xmm6 |
||
83 | |||
84 | .sort1: |
||
85 | cmp bx,cx |
||
86 | jle .sort2 |
||
87 | xchg ebx,ecx |
||
88 | shufps xmm4,xmm4,11011000b |
||
89 | movaps xmm6,xmm1 |
||
90 | movaps xmm1,xmm2 |
||
91 | movaps xmm2,xmm6 |
||
92 | |||
93 | jmp .sort3 |
||
94 | |||
95 | .sort2: |
||
96 | |||
97 | movaps .z1,xmm4 |
||
98 | mov .y1,eax |
||
99 | mov .y2,ebx |
||
100 | mov .y3,ecx |
||
101 | |||
102 | movdqa .y_min,xmm5 |
||
103 | if 1 ; check if at last only fragment |
||
104 | packssdw xmm5,xmm5 ; of triangle is in visable area |
||
105 | pshuflw xmm5,xmm5,11011000b |
||
106 | movdqu xmm7,.y3 |
||
107 | movdqa xmm6,xmm5 |
||
108 | pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
||
109 | pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
||
110 | movdqa xmm4,xmm7 |
||
111 | pcmpgtw xmm7,xmm5 |
||
112 | pcmpgtw xmm4,xmm6 |
||
113 | pxor xmm7,xmm4 |
||
114 | pmovmskb eax,xmm7 |
||
115 | and eax,0x00aaaaaa |
||
116 | or eax,eax |
||
117 | jz .rpt_loop2_end |
||
118 | end if |
||
119 | movd .tri_no,mm7 |
||
120 | movaps .1_nv,xmm0 |
||
121 | movaps .2_nv,xmm1 |
||
122 | movaps .3_nv,xmm2 |
||
123 | ; movaps .l_v,xmm3 |
||
124 | mov .Zbuf,esi |
||
125 | mov .screen,edi |
||
126 | |||
127 | |||
128 | |||
129 | mov bx,.y2 ; calc deltas |
||
130 | sub bx,.y1 |
||
131 | jnz .rpt_dx12_make |
||
132 | |||
133 | xorps xmm7,xmm7 |
||
134 | mov dword .dx12,0 |
||
135 | mov dword .dz12,0 |
||
136 | movaps .dn12,xmm7 |
||
137 | jmp .rpt_dx12_done |
||
138 | |||
139 | .rpt_dx12_make: |
||
140 | mov ax,.x2 |
||
141 | sub ax,.x1 |
||
142 | cwde |
||
143 | movsx ebx,bx |
||
144 | shl eax,ROUND2 |
||
145 | cdq |
||
146 | idiv ebx |
||
147 | mov .dx12,eax |
||
148 | |||
149 | cvtsi2ss xmm6,ebx |
||
150 | movss xmm5,.z2 |
||
151 | rcpss xmm6,xmm6 |
||
152 | subss xmm5,.z1 |
||
153 | mulss xmm5,xmm6 |
||
154 | movss .dz12,xmm5 |
||
155 | |||
156 | shufps xmm6,xmm6,0 |
||
157 | movaps xmm0,.2_nv |
||
158 | subps xmm0,.1_nv |
||
159 | mulps xmm0,xmm6 |
||
160 | movaps .dn12,xmm0 |
||
161 | ; subps xmm3,xmm0 |
||
162 | ; mulps xmm3,xmm6 |
||
163 | |||
164 | .rpt_dx12_done: |
||
165 | mov bx,.y3 ; calc deltas |
||
166 | sub bx,.y1 |
||
167 | jnz .rpt_dx13_make |
||
168 | |||
169 | xorps xmm7,xmm7 |
||
170 | mov dword .dx13,0 |
||
171 | mov dword .dz13,0 |
||
172 | movaps .dn13,xmm7 |
||
173 | jmp .rpt_dx13_done |
||
174 | |||
175 | .rpt_dx13_make: |
||
176 | mov ax,.x3 |
||
177 | sub ax,.x1 |
||
178 | cwde |
||
179 | movsx ebx,bx |
||
180 | shl eax,ROUND2 |
||
181 | cdq |
||
182 | idiv ebx |
||
183 | mov .dx13,eax |
||
184 | |||
185 | cvtsi2ss xmm6,ebx |
||
186 | movss xmm5,.z3 |
||
187 | rcpss xmm6,xmm6 |
||
188 | subss xmm5,.z1 |
||
189 | mulss xmm5,xmm6 |
||
190 | movss .dz13,xmm5 |
||
191 | |||
192 | movaps xmm0,.3_nv |
||
193 | subps xmm0,.1_nv |
||
194 | shufps xmm6,xmm6,0 |
||
195 | mulps xmm0,xmm6 |
||
196 | movaps .dn13,xmm0 |
||
197 | |||
198 | ; mulps xmm0,xmm6 |
||
199 | |||
200 | .rpt_dx13_done: |
||
201 | |||
202 | mov bx,.y3 ; calc deltas |
||
203 | sub bx,.y2 |
||
204 | jnz .rpt_dx23_make |
||
205 | |||
206 | xorps xmm7,xmm7 |
||
207 | mov dword .dx23,0 |
||
208 | mov dword .dz23,0 |
||
209 | movaps .dn23,xmm7 |
||
210 | |||
211 | jmp .rpt_dx23_done |
||
212 | |||
213 | .rpt_dx23_make: |
||
214 | mov ax,.x3 |
||
215 | sub ax,.x2 |
||
216 | cwde |
||
217 | movsx ebx,bx |
||
218 | shl eax,ROUND2 |
||
219 | cdq |
||
220 | idiv ebx |
||
221 | mov .dx23,eax |
||
222 | |||
223 | cvtsi2ss xmm6,ebx |
||
224 | movss xmm5,.z3 |
||
225 | rcpss xmm6,xmm6 |
||
226 | subss xmm5,.z2 |
||
227 | mulss xmm5,xmm6 |
||
228 | movss .dz23,xmm5 |
||
229 | |||
230 | movaps xmm0,.3_nv |
||
231 | subps xmm0,.2_nv |
||
232 | shufps xmm6,xmm6,0 |
||
233 | mulps xmm0,xmm6 |
||
234 | movaps .dn23,xmm0 |
||
235 | ; mulps xmm0,xmm6 |
||
236 | |||
237 | .rpt_dx23_done: |
||
238 | |||
239 | movsx eax,word .x1 |
||
240 | shl eax,ROUND2 |
||
241 | mov ebx,eax |
||
242 | mov ecx,.z1 |
||
243 | mov .cz1,ecx |
||
244 | mov .cz2,ecx |
||
245 | movaps xmm0,.1_nv |
||
246 | movaps .cnv1,xmm0 |
||
247 | movaps .cnv2,xmm0 |
||
248 | mov edi,.screen |
||
249 | mov esi,.Zbuf |
||
250 | movsx ecx,word .y1 |
||
251 | cmp cx,.y2 |
||
252 | |||
253 | jge .rpt_loop1_end |
||
254 | |||
255 | .rpt_loop1: |
||
256 | pushad |
||
257 | |||
258 | movaps xmm2,.y_min |
||
259 | movaps xmm0,.cnv1 |
||
260 | movaps xmm1,.cnv2 |
||
261 | movlps xmm3,.cz1 |
||
262 | ; movaps xmm4,.l_v |
||
263 | sar ebx,ROUND2 |
||
264 | sar eax,ROUND2 |
||
265 | movd mm7,.tri_no |
||
266 | |||
267 | call ray_shd_l |
||
268 | |||
269 | popad |
||
270 | movaps xmm0,.cnv1 |
||
271 | movaps xmm1,.cnv2 |
||
272 | ; fur x,y |
||
273 | movss xmm2,.cz1 |
||
274 | movss xmm3,.cz2 |
||
275 | shufps xmm4,xmm4,01001110b |
||
276 | addps xmm0,.dn13 |
||
277 | addps xmm1,.dn12 |
||
278 | addss xmm2,.dz13 |
||
279 | addss xmm3,.dz12 |
||
280 | |||
281 | |||
282 | add eax,.dx13 |
||
283 | add ebx,.dx12 |
||
284 | |||
285 | shufps xmm4,xmm4,01001110b |
||
286 | movaps .cnv1,xmm0 |
||
287 | movaps .cnv2,xmm1 |
||
288 | movss .cz1,xmm2 |
||
289 | movss .cz2,xmm3 |
||
290 | |||
291 | add ecx,1 |
||
292 | cmp cx,.y2 |
||
293 | jl .rpt_loop1 |
||
294 | |||
295 | |||
296 | .rpt_loop1_end: |
||
297 | movsx ecx,word .y2 |
||
298 | cmp cx,.y3 |
||
299 | jge .rpt_loop2_end |
||
300 | |||
301 | movsx ebx,word .x2 ; eax - cur x1 |
||
302 | shl ebx,ROUND2 ; ebx - cur x2 |
||
303 | push dword .z2 |
||
304 | pop dword .cz2 |
||
305 | movaps xmm0,.2_nv |
||
306 | movaps .cnv2,xmm0 |
||
307 | |||
308 | mov edi,.screen |
||
309 | mov esi,.Zbuf |
||
310 | |||
311 | |||
312 | .rpt_loop2: |
||
313 | pushad |
||
314 | movaps xmm2,.y_min |
||
315 | movaps xmm0,.cnv1 |
||
316 | movaps xmm1,.cnv2 |
||
317 | movlps xmm3,.cz1 |
||
318 | ; movaps xmm4,.l_v |
||
319 | sar ebx,ROUND2 |
||
320 | sar eax,ROUND2 |
||
321 | movd mm7,.tri_no |
||
322 | |||
323 | call ray_shd_l |
||
324 | |||
325 | popad |
||
326 | movaps xmm0,.cnv1 |
||
327 | movaps xmm1,.cnv2 |
||
328 | movss xmm2,.cz1 |
||
329 | movss xmm3,.cz2 |
||
330 | |||
331 | addps xmm0,.dn13 |
||
332 | addps xmm1,.dn23 |
||
333 | addss xmm2,.dz13 |
||
334 | addss xmm3,.dz23 |
||
335 | add eax,.dx13 |
||
336 | add ebx,.dx23 |
||
337 | addps xmm4,xmm6 |
||
338 | |||
339 | movaps .cnv1,xmm0 |
||
340 | movaps .cnv2,xmm1 |
||
341 | movss .cz1,xmm2 |
||
342 | movss .cz2,xmm3 |
||
343 | |||
344 | add ecx,1 |
||
345 | cmp cx,.y3 |
||
346 | jl .rpt_loop2 |
||
347 | |||
348 | .rpt_loop2_end: |
||
349 | |||
350 | add esp,1024 |
||
351 | pop ebp |
||
352 | |||
353 | |||
354 | |||
355 | ret |
||
9512 | IgorA | 356 | |
9237 | leency | 357 | ray_shd_l: |
358 | ; in: |
||
359 | ; xmm0 - normal vector 1 |
||
360 | ; xmm1 - normal vect 2 |
||
361 | ; xmm3 - lo -> hi z1, z2 coords as dwords floats |
||
362 | ; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
||
363 | ; as dword integers |
||
364 | ; xmm4 - ---- |
||
365 | ; mm7 - current triangle index |
||
366 | ; eax - x1 |
||
367 | ; ebx - x2 |
||
368 | ; ecx - y |
||
369 | ; edx - ----- |
||
370 | ; edi - screen buffer |
||
371 | ; esi - z buffer / stencil buffer filled with dd floats |
||
372 | |||
373 | push ebp |
||
374 | mov ebp,esp |
||
9740 | macgub | 375 | sub esp,270 |
9237 | leency | 376 | sub ebp,16 |
377 | and ebp,0xfffffff0 |
||
378 | |||
379 | .n1 equ [ebp-16] |
||
380 | .n2 equ [ebp-32] |
||
381 | .lv equ [ebp-48] |
||
382 | .lx1 equ [ebp-52] |
||
383 | .lx2 equ [ebp-56] |
||
384 | .z2 equ [ebp-60] |
||
385 | .z1 equ [ebp-64] |
||
386 | .screen equ [ebp-68] |
||
387 | .zbuff equ [ebp-72] |
||
388 | .x_max equ [ebp-74] |
||
389 | .x_min equ [ebp-76] |
||
390 | .y_max equ [ebp-78] |
||
391 | .y_min equ [ebp-80] |
||
392 | .dn equ [ebp-96] |
||
393 | .dz equ [ebp-100] |
||
394 | .y equ [ebp-104] |
||
9512 | IgorA | 395 | .startx equ [ebp-108] |
9237 | leency | 396 | .cnv equ [ebp-128] |
397 | .Rlen equ [ebp-128-16] |
||
398 | .r1 equ [ebp-128-32] |
||
399 | .vect_t equ [ebp-128-48] |
||
400 | .cur_tri equ [ebp-128-64] |
||
401 | ; .p3t equ [ebp-128-80] |
||
402 | .nray equ [ebp-128-96] |
||
403 | .final_col equ [ebp-128-112] |
||
404 | .aabb_mask equ dword[ebp-128-112-4] |
||
405 | |||
406 | mov .y,ecx |
||
407 | movdqa xmm4,xmm2 |
||
408 | packssdw xmm2,xmm2 |
||
409 | movq .y_min,xmm2 |
||
410 | cmp cx,.y_min |
||
411 | jl .end_rp_line |
||
412 | cmp cx,.y_max |
||
413 | jge .end_rp_line ; |
||
414 | cmp eax,ebx |
||
415 | je .end_rp_line |
||
416 | jl @f |
||
417 | xchg eax,ebx |
||
418 | movaps xmm7,xmm0 |
||
419 | movaps xmm0,xmm1 |
||
420 | movaps xmm1,xmm7 |
||
421 | shufps xmm3,xmm3,11100001b |
||
422 | @@: |
||
423 | movd .cur_tri,mm7 |
||
9740 | macgub | 424 | ; sub .cur_tri,dword 1 |
9237 | leency | 425 | cmp ax,.x_max |
426 | jge .end_rp_line |
||
427 | cmp bx,.x_min |
||
428 | jle .end_rp_line |
||
429 | ; movaps .lv,xmm4 |
||
430 | andps xmm0,[zero_hgst_dd] |
||
431 | andps xmm1,[zero_hgst_dd] |
||
432 | movaps .n1,xmm0 |
||
433 | movaps .n2,xmm1 |
||
434 | mov .lx1,eax |
||
9512 | IgorA | 435 | ; mov .startx,eax |
9237 | leency | 436 | mov .lx2,ebx |
437 | movlps .z1,xmm3 |
||
438 | |||
439 | sub ebx,eax |
||
440 | cvtsi2ss xmm7,ebx |
||
441 | rcpss xmm7,xmm7 |
||
442 | shufps xmm7,xmm7,0 |
||
443 | subps xmm1,xmm0 |
||
444 | mulps xmm1,xmm7 |
||
445 | movaps .dn,xmm1 |
||
446 | shufps xmm3,xmm3,11111001b |
||
447 | subss xmm3,.z1 |
||
448 | mulss xmm3,xmm7 |
||
449 | movss .dz,xmm3 |
||
450 | |||
451 | subps xmm6,xmm5 |
||
452 | mulps xmm6,xmm7 |
||
453 | |||
454 | mov ebx,.lx1 |
||
455 | cmp bx,.x_min ; clipping on function4 |
||
456 | jge @f |
||
457 | movzx eax,word .x_min |
||
458 | sub eax,ebx |
||
459 | cvtsi2ss xmm7,eax |
||
460 | shufps xmm7,xmm7,0 |
||
461 | mulss xmm3,xmm7 |
||
462 | mulps xmm1,xmm7 |
||
463 | mulps xmm6,xmm7 |
||
464 | addss xmm3,.z1 |
||
465 | addps xmm1,.n1 |
||
466 | addps xmm6,xmm5 |
||
467 | movsx eax,word .x_min |
||
468 | movss .z1,xmm3 |
||
469 | movaps .n1,xmm1 |
||
470 | mov dword .lx1,eax |
||
471 | @@: |
||
472 | |||
473 | movzx eax,word .x_max |
||
474 | cmp .lx2,eax |
||
475 | jl @f |
||
476 | mov .lx2,eax |
||
477 | @@: |
||
478 | movzx eax,word[xres_var] |
||
479 | mul dword .y |
||
480 | add eax,.lx1 |
||
481 | mov .zbuff,esi |
||
482 | mov .screen,edi |
||
483 | shl eax,2 |
||
484 | add edi,eax |
||
485 | add esi,eax |
||
486 | mov ecx,.lx2 |
||
487 | sub ecx,.lx1 |
||
488 | |||
489 | movd xmm0,[vect_x] |
||
490 | punpcklwd xmm0,[the_zero] |
||
491 | cvtdq2ps xmm0,xmm0 |
||
492 | movaps .vect_t,xmm0 |
||
493 | |||
494 | |||
495 | .ddraw: |
||
496 | |||
497 | xorps xmm0,xmm0 |
||
498 | movss xmm2,.z1 |
||
499 | movss xmm5,.z1 |
||
500 | movaps .final_col,xmm0 |
||
501 | addss xmm2,[f1] |
||
502 | subss xmm5,[f1] |
||
503 | cmpnltss xmm2,dword[esi] |
||
504 | cmpnltss xmm5,dword[esi] |
||
505 | pxor xmm2,xmm5 |
||
506 | movd eax,xmm2 |
||
507 | or eax,eax |
||
508 | jz .skips |
||
509 | |||
510 | movaps xmm7,.n1 |
||
511 | andps xmm7,[zero_hgst_dd] |
||
512 | mulps xmm7,xmm7 ; normalize |
||
513 | haddps xmm7,xmm7 |
||
514 | haddps xmm7,xmm7 |
||
515 | rsqrtps xmm7,xmm7 |
||
516 | mulps xmm7,.n1 |
||
517 | movaps .cnv,xmm7 |
||
518 | mov ebx,point_light_coords |
||
519 | mov edx,lights_aligned |
||
520 | xor eax,eax |
||
521 | .nx_light: |
||
522 | pushad |
||
523 | cvtsi2ss xmm0,.lx1 |
||
524 | cvtsi2ss xmm1,.y |
||
525 | movss xmm2,.z1 |
||
526 | movlhps xmm0,xmm1 |
||
527 | shufps xmm0,xmm2,11001000b |
||
528 | subps xmm0,[ebx] ; xmm0 - ray end, -> current vertex |
||
529 | movaps xmm3,[ebx] |
||
530 | andps xmm0,[zero_hgst_dd] |
||
531 | movaps xmm1,xmm0 |
||
532 | mulps xmm0,xmm0 |
||
533 | haddps xmm0,xmm0 |
||
534 | haddps xmm0,xmm0 |
||
535 | sqrtps xmm0,xmm0 |
||
536 | movss .Rlen,xmm0 |
||
537 | rcpps xmm0,xmm0 |
||
538 | mulps xmm0,xmm1 ; xmm0 - normalized ray vector |
||
539 | andps xmm0,[zero_hgst_dd] |
||
540 | movaps .nray,xmm0 |
||
541 | movaps .r1,xmm3 ; ray orgin |
||
542 | if 0 |
||
543 | movaps xmm1,xmm3 |
||
544 | call calc_bounding_box |
||
545 | |||
546 | mov .aabb_mask,eax |
||
547 | end if |
||
548 | mov edi,[triangles_ptr] |
||
549 | xor ecx,ecx |
||
550 | .nx_tri: ; next triangle |
||
9512 | IgorA | 551 | ; mov eax,.lx1 |
552 | ; cmp eax,.startx |
||
553 | ; je @f ; prevent artifact borders on tri |
||
554 | ; cmp eax,.lx2 ; NOT work as I want !! |
||
555 | ; je @f |
||
9237 | leency | 556 | |
557 | cmp ecx,.cur_tri ; prevent self shadowing |
||
558 | je .skipp |
||
9512 | IgorA | 559 | @@: |
9237 | leency | 560 | if 0 |
561 | mov edi,ecx |
||
562 | imul edi,[i12] |
||
563 | add edi,[triangles_ptr] |
||
564 | mov eax,[edi] |
||
565 | mov ebx,[edi+4] |
||
566 | mov edx,[edi+8] |
||
567 | imul eax,[i12] |
||
568 | imul ebx,[i12] |
||
569 | imul edx,[i12] |
||
570 | add eax,[points_ptr] |
||
571 | add ebx,[points_ptr] |
||
572 | add edx,[points_ptr] |
||
573 | movups xmm2,[eax] |
||
574 | movups xmm3,[ebx] |
||
575 | movups xmm4,[edx] |
||
576 | andps xmm2,[sign_mask] |
||
577 | andps xmm3,[sign_mask] |
||
578 | andps xmm4,[sign_mask] |
||
579 | movmskps ebx,xmm4 |
||
580 | cmpeqps xmm2,xmm3 |
||
581 | cmpeqps xmm3,xmm4 |
||
582 | andps xmm2,xmm3 |
||
583 | movmskps eax,xmm2 |
||
584 | and eax,111b |
||
585 | and ebx,111b |
||
586 | cmp eax,111b |
||
587 | jne @f |
||
588 | bt .aabb_mask,ebx |
||
589 | jnc .skipp |
||
590 | @@: |
||
591 | end if |
||
592 | mov edi,ecx |
||
593 | imul edi,[i12] |
||
594 | add edi,[triangles_ptr] |
||
595 | mov eax,[edi] |
||
596 | mov ebx,[edi+4] |
||
597 | mov edx,[edi+8] |
||
598 | imul eax,[i12] |
||
599 | imul ebx,[i12] |
||
600 | imul edx,[i12] |
||
601 | add eax,[points_rotated_ptr] |
||
602 | add ebx,[points_rotated_ptr] |
||
603 | add edx,[points_rotated_ptr] |
||
604 | movups xmm2,[eax] |
||
605 | movups xmm3,[ebx] |
||
606 | movups xmm4,[edx] |
||
607 | addps xmm2,.vect_t |
||
608 | addps xmm3,.vect_t |
||
609 | addps xmm4,.vect_t |
||
610 | |||
611 | |||
612 | ;intersect_tri: procs header |
||
613 | ; in: |
||
614 | ; xmm0 - ray direction ; should be normalized |
||
615 | ; xmm1 - ray orgin |
||
616 | ; xmm2 - tri vert1 |
||
617 | ; xmm3 - tri vert2 |
||
618 | ; xmm4 - tri vert3 |
||
619 | ; if eax = 1 - intersction with edge |
||
620 | ; xmm6 - edge lenght |
||
621 | ; if eax = 0 - intersect with ray (classic) |
||
622 | ; out: |
||
623 | ; eax = 1 - intersection occured |
||
624 | ; xmm0 - float lo -> hi = t, v, u, ... |
||
625 | |||
626 | movss xmm6,.Rlen |
||
627 | movaps xmm0,.nray |
||
628 | movaps xmm1,.r1 |
||
629 | subss xmm6,[the_one] |
||
630 | mov eax,1 |
||
631 | push ecx |
||
632 | call intersect_tri |
||
633 | pop ecx |
||
634 | cmp eax,1 |
||
635 | je .inter |
||
636 | .skipp: |
||
637 | .skp: |
||
638 | inc ecx |
||
639 | cmp ecx,[triangles_count_var] |
||
640 | jnz .nx_tri |
||
641 | ; jz .do_process |
||
642 | ; comiss xmm0,.Rlen |
||
643 | ; jl .inter |
||
644 | |||
645 | popad |
||
646 | .do_process: |
||
647 | movaps xmm5,.nray ;[edx] |
||
648 | andps xmm5,[zero_hgst_dd] ; global |
||
649 | mulps xmm5,.cnv ;.lv ; last dword should be zeroed |
||
650 | ; andps xmm5,[sign_z] ; global |
||
651 | haddps xmm5,xmm5 |
||
652 | haddps xmm5,xmm5 |
||
653 | andps xmm5,[abs_mask] ; global |
||
654 | movaps xmm7,xmm5 |
||
655 | mulps xmm7,xmm7 |
||
656 | mulps xmm7,xmm7 |
||
657 | mulps xmm5,[edx+16] |
||
658 | mulps xmm7,xmm7 |
||
659 | mulps xmm7,xmm7 |
||
660 | mulps xmm7,[edx+48] |
||
661 | addps xmm5,xmm7 |
||
662 | minps xmm5,[mask_255f] ; global |
||
663 | maxps xmm5,.final_col ; addps maxps |
||
664 | movaps .final_col,xmm5 |
||
665 | jmp .nx_loop |
||
666 | .inter: |
||
667 | |||
668 | popad |
||
669 | .nx_loop: |
||
670 | ; add edx,64 ; unncomment to achive 3 lights |
||
671 | ; add ebx,16 |
||
672 | ; cmp edx,lights_aligned_end ; global |
||
673 | ; jnz .nx_light |
||
674 | |||
675 | movaps xmm1,.final_col |
||
676 | cvtps2dq xmm1,xmm1 |
||
677 | packssdw xmm1,xmm1 |
||
678 | packuswb xmm1,xmm1 |
||
679 | movd [edi],xmm1 |
||
680 | .skips: |
||
681 | movaps xmm0,.n1 |
||
682 | movss xmm2,.z1 |
||
683 | add edi,4 |
||
684 | add esi,4 |
||
685 | add dword .lx1,1 |
||
686 | addps xmm0,.dn |
||
687 | addss xmm2,.dz |
||
688 | movaps .n1,xmm0 |
||
689 | movss .z1,xmm2 |
||
690 | dec ecx |
||
691 | jnz .ddraw |
||
692 | .end_rp_line: |
||
9740 | macgub | 693 | add esp,270 |
9237 | leency | 694 | pop ebp |
695 | |||
696 | ret |