Rev 9237 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
9237 | leency | 1 | ; Bilinear filtering, real Phongs shading and glass like parallel. |
2 | ; Thanks to authors of 3dica tutorial. |
||
3 | ; Implemented in FASM by Maciej Guba. |
||
4 | ; http://macgub.co.pl |
||
5 | |||
6 | ROUND2 equ 10 |
||
7 | |||
8 | glass_tex_tri: |
||
9 | ;----Procedure render Phongs shaded triangle with z coord |
||
10 | ;----interpolation ( Catmull alghoritm ), each pixel is - |
||
11 | ;----covered by texture using bilinear filtering.-------- |
||
12 | ;----I normalize normal vector in every pixel ----------- |
||
13 | ;------------------in - eax - x1 shl 16 + y1 ------------ |
||
14 | ;---------------------- ebx - x2 shl 16 + y2 ------------ |
||
15 | ;---------------------- ecx - x3 shl 16 + y3 ------------ |
||
16 | ;---------------------- esi - pointer to stencil buffer-- |
||
17 | ;---------------------- filled with dd float variables- |
||
18 | ;---------------------- edi - pointer to screen buffer--- |
||
19 | ;---------------------- edx - pointer to texture--------- |
||
20 | ;---------------------- xmm0 - 1st normal vector -------- |
||
21 | ;---------------------- xmm1 - 2cond normal vector ------ |
||
22 | ;---------------------- xmm2 - 3rd normal vector -------- |
||
23 | ;---------------------- xmm3 - normalized light vector -- |
||
24 | ;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
||
25 | ;---------------------- as dwords floats --------------- |
||
26 | ;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
||
27 | ;---------------------- x_min, x_max as dword integers - |
||
28 | ;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- |
||
29 | ;---------------------- ty2, tx3, ty3 as word, xres as-- |
||
30 | ;---------------------- dword integers------------------ |
||
31 | ;---------------------- stack - no parameters ----------- |
||
32 | ;-------------------------------------------------------- |
||
33 | ;----------------- procedure don't save registers !! ---- |
||
34 | |||
35 | |||
36 | |||
37 | |||
38 | push ebp |
||
39 | mov ebp,esp |
||
40 | sub esp,512 |
||
41 | sub ebp,16 |
||
42 | and ebp,0xfffffff0 |
||
43 | |||
44 | .1_nv equ [ebp-16] |
||
45 | .2_nv equ [ebp-32] |
||
46 | .3_nv equ [ebp-48] |
||
47 | .l_v equ [ebp-64] |
||
48 | .z3 equ [ebp-72] |
||
49 | .z2 equ [ebp-76] |
||
50 | .z1 equ [ebp-80] |
||
51 | .x1 equ [ebp-82] |
||
52 | .y1 equ [ebp-84] |
||
53 | .x2 equ [ebp-86] |
||
54 | .y2 equ [ebp-88] |
||
55 | .x3 equ [ebp-90] |
||
56 | .y3 equ [ebp-92] |
||
57 | .Zbuf equ [ebp-96] |
||
58 | .x_max equ [ebp-100] |
||
59 | .x_min equ [ebp-104] |
||
60 | .y_max equ [ebp-108] |
||
61 | .y_min equ [ebp-112] |
||
62 | .screen equ [ebp-116] |
||
63 | .dx12 equ [ebp-120] |
||
64 | .dx13 equ [ebp-124] |
||
65 | .dx23 equ [ebp-128] |
||
66 | .dn12 equ [ebp-144] |
||
67 | .dn13 equ [ebp-160] |
||
68 | .dn23 equ [ebp-176] |
||
69 | |||
70 | .cnv1 equ [ebp-192] ; cur normal vectors |
||
71 | .cnv2 equ [ebp-208] |
||
72 | .x_res equ [ebp-212] |
||
73 | .ty3 equ [ebp-214] |
||
74 | .tx3 equ [ebp-216] |
||
75 | .ty2 equ [ebp-218] |
||
76 | .tx2 equ [ebp-220] |
||
77 | .ty1 equ [ebp-222] |
||
78 | .tx1 equ [ebp-224] |
||
79 | .dz12 equ [ebp-232] |
||
80 | .dty12 equ [ebp-236] |
||
81 | .dtx12 equ [ebp-240] |
||
82 | .dz13 equ [ebp-248] |
||
83 | .dty13 equ [ebp-252] |
||
84 | .dtx13 equ [ebp-256] |
||
85 | .dz23 equ [ebp-264] |
||
86 | .dty23 equ [ebp-268] |
||
87 | .dtx23 equ [ebp-272] |
||
88 | .cz1 equ [ebp-280] |
||
89 | .cty1 equ [ebp-284] |
||
90 | .ctx1 equ [ebp-288] |
||
91 | .cz2 equ [ebp-296] |
||
92 | .cty2 equ [ebp-300] |
||
93 | .ctx2 equ [ebp-304] |
||
94 | .tx_ptr equ [ebp-308] |
||
95 | |||
96 | |||
97 | emms |
||
98 | ; movd .x_res,xmm7 |
||
99 | .sort3: ; sort triangle coordinates... |
||
100 | cmp ax,bx |
||
101 | jle .sort1 |
||
102 | xchg eax,ebx |
||
103 | shufps xmm4,xmm4,11100001b |
||
104 | shufps xmm6,xmm6,11100001b |
||
105 | movaps xmm7,xmm0 |
||
106 | movaps xmm0,xmm1 |
||
107 | movaps xmm1,xmm7 |
||
108 | |||
109 | |||
110 | .sort1: |
||
111 | cmp bx,cx |
||
112 | jle .sort2 |
||
113 | xchg ebx,ecx |
||
114 | shufps xmm4,xmm4,11011000b |
||
115 | shufps xmm6,xmm6,11011000b |
||
116 | movaps xmm7,xmm1 |
||
117 | movaps xmm1,xmm2 |
||
118 | movaps xmm2,xmm7 |
||
119 | |||
120 | jmp .sort3 |
||
121 | |||
122 | .sort2: |
||
123 | ; movq .tx1,xmm6 |
||
124 | ; pshufd xmm6,xmm6,01001110b |
||
125 | ; movd .tx3,xmm6 |
||
126 | movaps .tx1,xmm6 |
||
127 | movaps .z1,xmm4 |
||
128 | mov .y1,eax |
||
129 | mov .y2,ebx |
||
130 | mov .y3,ecx |
||
131 | |||
132 | movdqa .y_min,xmm5 |
||
133 | if 1 ; check if at last only fragment |
||
134 | packssdw xmm5,xmm5 ; of triangle is in visable area |
||
135 | pshuflw xmm5,xmm5,11011000b |
||
136 | movdqu xmm7,.y3 |
||
137 | movdqa xmm6,xmm5 |
||
138 | pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
||
139 | pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
||
140 | movdqa xmm4,xmm7 |
||
141 | pcmpgtw xmm7,xmm5 |
||
142 | pcmpgtw xmm4,xmm6 |
||
143 | pxor xmm7,xmm4 |
||
144 | pmovmskb eax,xmm7 |
||
145 | and eax,0x00aaaaaa |
||
146 | or eax,eax |
||
147 | jz .rpt_loop2_end |
||
148 | end if |
||
149 | movaps .1_nv,xmm0 |
||
150 | movaps .2_nv,xmm1 |
||
151 | movaps .3_nv,xmm2 |
||
152 | movaps .l_v,xmm3 |
||
153 | mov .Zbuf,esi |
||
154 | mov .screen,edi |
||
155 | mov .tx_ptr,edx |
||
156 | |||
157 | |||
158 | |||
159 | mov bx,.y2 ; calc deltas |
||
160 | sub bx,.y1 |
||
161 | jnz .rpt_dx12_make |
||
162 | |||
163 | xorps xmm7,xmm7 |
||
164 | mov dword .dx12,0 |
||
165 | movaps .dtx12,xmm7 |
||
166 | movaps .dn12,xmm7 |
||
167 | jmp .rpt_dx12_done |
||
168 | |||
169 | .rpt_dx12_make: |
||
170 | mov ax,.x2 |
||
171 | sub ax,.x1 |
||
172 | cwde |
||
173 | movsx ebx,bx |
||
174 | shl eax,ROUND2 |
||
175 | cdq |
||
176 | idiv ebx |
||
177 | mov .dx12,eax |
||
178 | |||
179 | cvtsi2ss xmm6,ebx |
||
180 | shufps xmm6,xmm6,0 |
||
181 | movss xmm5,.z2 |
||
182 | subss xmm5,.z1 |
||
183 | divss xmm5,xmm6 |
||
184 | movss .dz12,xmm5 |
||
185 | |||
186 | movd xmm0,.tx1 |
||
187 | movd xmm2,.tx2 |
||
188 | pxor xmm1,xmm1 |
||
189 | punpcklwd xmm0,xmm1 |
||
190 | punpcklwd xmm2,xmm1 |
||
191 | psubd xmm2,xmm0 |
||
192 | ; cvtdq2ps xmm0,xmm0 |
||
193 | cvtdq2ps xmm2,xmm2 |
||
194 | ; movlps .ctx1,xmm0 |
||
195 | ; movlps .ctx2,xmm2 |
||
196 | ; subps xmm2,xmm0 |
||
197 | divps xmm2,xmm6 |
||
198 | movlps .dtx12,xmm2 |
||
199 | |||
200 | movaps xmm0,.2_nv |
||
201 | subps xmm0,.1_nv |
||
202 | divps xmm0,xmm6 |
||
203 | movaps .dn12,xmm0 |
||
204 | |||
205 | |||
206 | .rpt_dx12_done: |
||
207 | |||
208 | mov bx,.y3 ; calc deltas |
||
209 | sub bx,.y1 |
||
210 | jnz .rpt_dx13_make |
||
211 | |||
212 | xorps xmm7,xmm7 |
||
213 | mov dword .dx13,0 |
||
214 | movaps .dtx13,xmm7 |
||
215 | movaps .dn13,xmm7 |
||
216 | jmp .rpt_dx13_done |
||
217 | |||
218 | .rpt_dx13_make: |
||
219 | mov ax,.x3 |
||
220 | sub ax,.x1 |
||
221 | cwde |
||
222 | movsx ebx,bx |
||
223 | shl eax,ROUND2 |
||
224 | cdq |
||
225 | idiv ebx |
||
226 | mov .dx13,eax |
||
227 | |||
228 | |||
229 | cvtsi2ss xmm6,ebx |
||
230 | shufps xmm6,xmm6,0 |
||
231 | |||
232 | movss xmm5,.z3 |
||
233 | subss xmm5,.z1 |
||
234 | divss xmm5,xmm6 |
||
235 | movss .dz13,xmm5 |
||
236 | |||
237 | movd xmm0,.tx1 |
||
238 | movd xmm2,.tx3 |
||
239 | pxor xmm1,xmm1 |
||
240 | punpcklwd xmm0,xmm1 |
||
241 | punpcklwd xmm2,xmm1 |
||
242 | psubd xmm2,xmm0 |
||
243 | ; cvtdq2ps xmm0,xmm0 |
||
244 | cvtdq2ps xmm2,xmm2 |
||
245 | ; subps xmm2,xmm0 |
||
246 | divps xmm2,xmm6 |
||
247 | movlps .dtx13,xmm2 |
||
248 | |||
249 | |||
250 | |||
251 | movaps xmm0,.3_nv |
||
252 | subps xmm0,.1_nv |
||
253 | divps xmm0,xmm6 |
||
254 | movaps .dn13,xmm0 |
||
255 | |||
256 | .rpt_dx13_done: |
||
257 | |||
258 | mov bx,.y3 ; calc deltas |
||
259 | sub bx,.y2 |
||
260 | jnz .rpt_dx23_make |
||
261 | |||
262 | xorps xmm7,xmm7 |
||
263 | mov dword .dx23,0 |
||
264 | movaps .dtx23,xmm7 |
||
265 | movaps .dn23,xmm7 |
||
266 | jmp .rpt_dx23_done |
||
267 | |||
268 | .rpt_dx23_make: |
||
269 | mov ax,.x3 |
||
270 | sub ax,.x2 |
||
271 | cwde |
||
272 | movsx ebx,bx |
||
273 | shl eax,ROUND2 |
||
274 | cdq |
||
275 | idiv ebx |
||
276 | mov .dx23,eax |
||
277 | |||
278 | cvtsi2ss xmm6,ebx |
||
279 | shufps xmm6,xmm6,0 |
||
280 | movss xmm5,.z3 |
||
281 | subss xmm5,.z2 |
||
282 | divss xmm5,xmm6 |
||
283 | movss .dz23,xmm5 |
||
284 | |||
285 | movd xmm0,.tx2 |
||
286 | movd xmm2,.tx3 |
||
287 | pxor xmm1,xmm1 |
||
288 | punpcklwd xmm0,xmm1 |
||
289 | punpcklwd xmm2,xmm1 |
||
290 | psubd xmm2,xmm0 |
||
291 | ; cvtdq2ps xmm0,xmm0 |
||
292 | cvtdq2ps xmm2,xmm2 |
||
293 | ; movlps .ctx1,xmm0 |
||
294 | ; movlps .ctx2,xmm2 |
||
295 | ; subps xmm2,xmm0 |
||
296 | divps xmm2,xmm6 |
||
297 | movlps .dtx23,xmm2 |
||
298 | |||
299 | |||
300 | |||
301 | |||
302 | movaps xmm0,.3_nv |
||
303 | subps xmm0,.2_nv |
||
304 | divps xmm0,xmm6 |
||
305 | movaps .dn23,xmm0 |
||
306 | |||
307 | .rpt_dx23_done: |
||
308 | |||
309 | movsx eax,word .x1 |
||
310 | shl eax,ROUND2 |
||
311 | mov ebx,eax |
||
312 | mov edx,.z1 |
||
313 | movd xmm1,.tx1 |
||
314 | pxor xmm2,xmm2 |
||
315 | punpcklwd xmm1,xmm2 |
||
316 | cvtdq2ps xmm1,xmm1 |
||
317 | |||
318 | mov .cz1,edx |
||
319 | mov .cz2,edx |
||
320 | movaps xmm0,.1_nv |
||
321 | movlps .ctx1,xmm1 |
||
322 | movlps .ctx2,xmm1 |
||
323 | movaps .cnv1,xmm0 |
||
324 | movaps .cnv2,xmm0 |
||
325 | |||
326 | ; mov edx,.dx13 |
||
327 | ; cmp edx,.dx12 |
||
328 | ; jg .second_cause |
||
329 | |||
330 | movsx ecx,word .y1 |
||
331 | cmp cx,.y2 |
||
332 | |||
333 | jge .rpt_loop1_end |
||
334 | |||
335 | .rpt_loop1: |
||
336 | pushad |
||
337 | |||
338 | movaps xmm2,.y_min |
||
339 | movaps xmm0,.cnv1 |
||
340 | movaps xmm1,.cnv2 |
||
341 | ; movlps xmm3,.cz1 ; cz1, cz2 both |
||
342 | movaps xmm3,.ctx1 |
||
343 | movaps xmm5,.ctx2 |
||
344 | movaps xmm4,.l_v |
||
345 | movd xmm6,.x_res |
||
346 | sar ebx,ROUND2 |
||
347 | sar eax,ROUND2 |
||
348 | mov edx,.tx_ptr |
||
349 | mov edi,.screen |
||
350 | |||
351 | mov esi,.Zbuf |
||
352 | |||
353 | call glass_tex_line |
||
354 | |||
355 | popad |
||
356 | movaps xmm0,.cnv1 |
||
357 | movaps xmm1,.cnv2 |
||
358 | ; movss xmm2,.cz1 |
||
359 | ; movss xmm3,.cz2 |
||
360 | movaps xmm2,.ctx1 |
||
361 | movaps xmm3,.ctx2 |
||
362 | addps xmm0,.dn13 |
||
363 | addps xmm1,.dn12 |
||
364 | addps xmm2,.dtx13 |
||
365 | addps xmm3,.dtx12 |
||
366 | add eax,.dx13 |
||
367 | add ebx,.dx12 |
||
368 | |||
369 | movaps .cnv1,xmm0 |
||
370 | movaps .cnv2,xmm1 |
||
371 | ; movss .cz1,xmm2 |
||
372 | ; movss .cz2,xmm3 |
||
373 | movaps .ctx1,xmm2 |
||
374 | movaps .ctx2,xmm3 |
||
375 | add ecx,1 |
||
376 | cmp cx,.y2 |
||
377 | jl .rpt_loop1 |
||
378 | |||
379 | |||
380 | ; jmp .rpt_loop2_end |
||
381 | |||
382 | |||
383 | .rpt_loop1_end: |
||
384 | movsx ecx,word .y2 |
||
385 | cmp cx,.y3 |
||
386 | jge .rpt_loop2_end |
||
387 | |||
388 | movsx ebx,word .x2 ; eax - cur x1 |
||
389 | shl ebx,ROUND2 ; ebx - cur x2 |
||
390 | push dword .z2 |
||
391 | pop dword .cz2 |
||
392 | movd xmm1,.tx2 |
||
393 | pxor xmm2,xmm2 |
||
394 | punpcklwd xmm1,xmm2 |
||
395 | cvtdq2ps xmm1,xmm1 |
||
396 | movlps .ctx2,xmm1 |
||
397 | movaps xmm0,.2_nv |
||
398 | movaps .cnv2,xmm0 |
||
399 | |||
400 | |||
401 | .rpt_loop2: |
||
402 | pushad |
||
403 | |||
404 | movaps xmm2,.y_min |
||
405 | movaps xmm0,.cnv1 |
||
406 | movaps xmm1,.cnv2 |
||
407 | movaps xmm3,.ctx1 |
||
408 | movaps xmm5,.ctx2 |
||
409 | movaps xmm4,.l_v |
||
410 | sar ebx,ROUND2 |
||
411 | sar eax,ROUND2 |
||
412 | mov edx,.tx_ptr |
||
413 | mov edi,.screen |
||
414 | mov esi,.Zbuf |
||
415 | movd xmm6,.x_res |
||
416 | call glass_tex_line |
||
417 | |||
418 | popad |
||
419 | movaps xmm0,.cnv1 |
||
420 | movaps xmm1,.cnv2 |
||
421 | ; movss xmm2,.cz1 |
||
422 | ; movss xmm3,.cz2 |
||
423 | movaps xmm2,.ctx1 |
||
424 | movaps xmm3,.ctx2 |
||
425 | addps xmm0,.dn13 |
||
426 | addps xmm1,.dn23 |
||
427 | ; addss xmm2,.dz13 |
||
428 | ; addss xmm3,.dz23 |
||
429 | addps xmm2,.dtx13 |
||
430 | addps xmm3,.dtx23 |
||
431 | |||
432 | add eax,.dx13 |
||
433 | add ebx,.dx23 |
||
434 | |||
435 | movaps .cnv1,xmm0 |
||
436 | movaps .cnv2,xmm1 |
||
437 | movaps .ctx1,xmm2 |
||
438 | movaps .ctx2,xmm3 |
||
439 | |||
440 | ; movss .cz1,xmm2 |
||
441 | ; movss .cz2,xmm3 |
||
442 | |||
443 | add ecx,1 |
||
444 | cmp cx,.y3 |
||
445 | jl .rpt_loop2 |
||
446 | |||
447 | .second_cause: ;dx13 > dx12 |
||
448 | |||
449 | .rpt_loop2_end: |
||
450 | |||
451 | add esp,512 |
||
452 | pop ebp |
||
453 | |||
454 | ret |
||
9740 | macgub | 455 | |
9237 | leency | 456 | glass_tex_line: |
457 | ; in: |
||
458 | ; xmm0 - normal vector 1 |
||
459 | ; xmm1 - normal vect 2 |
||
460 | ; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float |
||
461 | ; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float |
||
462 | ; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
||
463 | ; as dword integers |
||
464 | ; xmm4 - normalized light vector |
||
465 | ; eax - x1 |
||
466 | ; ebx - x2 |
||
467 | ; ecx - y |
||
468 | ; edi - screen buffer |
||
469 | ; esi - stencil buffer filled with dd floats |
||
470 | ; edx - texture pointer (handle) |
||
471 | ; xmm6 - lowest dword x_res as integer |
||
472 | |||
473 | push ebp |
||
474 | mov ebp,esp |
||
475 | sub esp,350 |
||
476 | sub ebp,16 |
||
477 | and ebp,0xfffffff0 |
||
478 | |||
479 | .n1 equ [ebp-16] |
||
480 | .n2 equ [ebp-32] |
||
481 | .lv equ [ebp-48] |
||
482 | .lx1 equ [ebp-52] |
||
483 | .lx2 equ [ebp-56] |
||
484 | ; .z2 equ [ebp-60] |
||
485 | ; .z1 equ [ebp-64] |
||
486 | .screen equ [ebp-68] |
||
487 | .zbuff equ [ebp-72] |
||
488 | .x_max equ [ebp-74] |
||
489 | .x_min equ [ebp-76] |
||
490 | .y_max equ [ebp-78] |
||
491 | .y_min equ [ebp-80] |
||
492 | .dn equ [ebp-96] |
||
493 | .x_res equ [ebp-100] |
||
494 | .y equ [ebp-104] |
||
495 | .cnv equ [ebp-128] |
||
496 | .z1 equ [ebp-136] |
||
497 | .ty1 equ [ebp-140] |
||
498 | .tx1 equ [ebp-144] |
||
499 | .z2 equ [ebp-152] |
||
500 | .ty2 equ [ebp-156] |
||
501 | .tx2 equ [ebp-160] |
||
502 | .cz equ [ebp-168] |
||
503 | .cty equ [ebp-172] |
||
504 | .ctx equ [ebp-176] |
||
505 | .dz equ [ebp-184] |
||
506 | .dty equ [ebp-188] |
||
507 | .dtx equ [ebp-192] |
||
508 | .yd equ [ebp-196] |
||
509 | .xd equ [ebp-200] |
||
510 | .yf equ [ebp-204] |
||
511 | .xf equ [ebp-208] |
||
512 | .w4 equ [ebp-212] |
||
513 | .w3 equ [ebp-216] |
||
514 | .w2 equ [ebp-220] |
||
515 | .w1 equ [ebp-224] |
||
516 | .p4 equ [ebp-228] |
||
517 | .p3 equ [ebp-232] |
||
518 | .p2 equ [ebp-236] |
||
519 | .p1 equ [ebp-240] |
||
520 | |||
521 | |||
522 | .tx_ptr equ [ebp-244] |
||
523 | |||
524 | ; movaps xmm7,xmm3 |
||
525 | ; movaps xmm3,xmm5 |
||
526 | ; movaps xmm5,xmm7 |
||
527 | |||
528 | |||
529 | mov .y,ecx |
||
530 | packssdw xmm2,xmm2 |
||
531 | ; movaps xmm7,xmm2 |
||
532 | ; movhps xmm2,[the_zero] |
||
533 | ; pshuflw xmm2,xmm2,11111000b |
||
534 | ; pshufd xmm2,xmm2,11111100b |
||
535 | ; movlps xmm7,[the_zero] |
||
536 | ; pshufhw xmm7,xmm7,11111111b |
||
537 | ; movlps xmm7,[the_zero] |
||
538 | ; psrldq xmm7,4 |
||
539 | ; por xmm2,xmm7 |
||
540 | movq .y_min,xmm2 |
||
541 | cmp cx,.y_min |
||
542 | jl .end_line |
||
543 | cmp cx,.y_max |
||
544 | jge .end_line ; |
||
545 | |||
546 | cmp eax,ebx |
||
547 | je .end_line |
||
548 | jl @f |
||
549 | xchg eax,ebx |
||
550 | movaps xmm7,xmm0 |
||
551 | movaps xmm0,xmm1 |
||
552 | movaps xmm1,xmm7 |
||
553 | movaps xmm7,xmm3 |
||
554 | movaps xmm3,xmm5 |
||
555 | movaps xmm5,xmm7 |
||
556 | @@: |
||
557 | |||
558 | cmp ax,.x_max |
||
559 | jge .end_line |
||
560 | cmp bx,.x_min |
||
561 | jle .end_line |
||
562 | movaps .lv,xmm4 |
||
563 | movaps .n1,xmm0 |
||
564 | movaps .n2,xmm1 |
||
565 | mov .lx1,eax |
||
566 | mov .lx2,ebx |
||
567 | movaps .tx1,xmm3 |
||
568 | movaps .tx2,xmm5 |
||
569 | movd .x_res,xmm6 |
||
570 | mov .tx_ptr,edx |
||
571 | sub ebx,eax |
||
572 | cvtsi2ss xmm7,ebx |
||
573 | shufps xmm7,xmm7,0 |
||
574 | subps xmm1,xmm0 |
||
575 | divps xmm1,xmm7 |
||
576 | movaps .dn,xmm1 |
||
577 | subps xmm5,xmm3 |
||
578 | divps xmm5,xmm7 |
||
579 | movaps .dtx,xmm5 |
||
580 | |||
581 | |||
582 | |||
583 | mov ebx,.lx1 |
||
584 | cmp bx,.x_min ; clipping on function4 |
||
585 | jge @f |
||
586 | movzx eax,word .x_min |
||
587 | sub eax,ebx |
||
588 | cvtsi2ss xmm7,eax |
||
589 | shufps xmm7,xmm7,0 |
||
590 | mulps xmm5,xmm7 |
||
591 | mulps xmm1,xmm7 |
||
592 | addps xmm5,.tx1 |
||
593 | addps xmm1,.n1 |
||
594 | movsx eax,word .x_min |
||
595 | movaps .tx1,xmm5 |
||
596 | movaps .n1,xmm1 |
||
597 | mov dword .lx1,eax |
||
598 | |||
599 | @@: |
||
600 | movzx eax,word .x_max |
||
601 | cmp .lx2,eax |
||
602 | jl @f |
||
603 | mov .lx2,eax |
||
604 | @@: |
||
605 | mov eax,.x_res |
||
606 | mul dword .y |
||
607 | add eax,.lx1 |
||
608 | shl eax,2 |
||
609 | add edi,eax |
||
610 | add esi,eax |
||
611 | |||
612 | mov ecx,.lx2 |
||
613 | sub ecx,.lx1 |
||
614 | ; movaps xmm0,.n1 |
||
615 | movaps xmm2,.tx1 |
||
616 | ; xorps xmm1,xmm1 |
||
617 | align 16 |
||
618 | .ddraw: |
||
619 | ; movhlps xmm7,xmm2 |
||
620 | ; cmpnltss xmm7,dword[esi] |
||
621 | ; movd eax,xmm7 |
||
622 | ; or eax,eax |
||
623 | ; jnz .skip |
||
624 | xorps xmm5,xmm5 |
||
625 | ; movhlps xmm7,xmm2 |
||
626 | ; movss [esi],xmm7 |
||
627 | movaps xmm7,.n1 ;xmm0 |
||
628 | mulps xmm7,xmm7 ; normalize |
||
629 | haddps xmm7,xmm7 |
||
630 | haddps xmm7,xmm7 |
||
631 | rsqrtps xmm7,xmm7 |
||
632 | mulps xmm7,.n1 ;xmm0 |
||
633 | ; andps xmm7,[abs_z_coof] |
||
634 | movaps .cnv,xmm7 |
||
635 | |||
636 | movaps xmm6,xmm2 |
||
637 | minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 |
||
638 | cvttps2dq xmm7,xmm6 |
||
639 | cvtdq2ps xmm4,xmm7 |
||
640 | subps xmm6,xmm4 |
||
641 | movlps .xf,xmm6 |
||
642 | ; movaps xmm5,.lv |
||
643 | mov eax,lights_aligned ; global |
||
644 | align 16 |
||
645 | .again_col: |
||
646 | movaps xmm0,[eax] ; calc multple lights |
||
647 | mulps xmm0,.cnv ;.lv ; last dword should be zeroed |
||
648 | haddps xmm0,xmm0 |
||
649 | haddps xmm0,xmm0 |
||
650 | ; andps xmm0,[abs_val] ;calc absolute value |
||
651 | if 1 |
||
652 | ; stencil |
||
653 | movhlps xmm6,xmm2 |
||
654 | movhlps xmm4,xmm2 |
||
655 | addss xmm6,[aprox] |
||
656 | subss xmm4,[aprox] |
||
657 | cmpnltss xmm6,dword[esi] |
||
658 | cmpnltss xmm4,dword[esi] |
||
659 | xorps xmm6,xmm4 |
||
660 | xorps xmm4,xmm4 |
||
661 | movd ebx,xmm6 |
||
662 | cmp ebx,-1 |
||
663 | jne .no_reflective |
||
664 | end if |
||
665 | movaps xmm4,xmm0 |
||
666 | mulps xmm4,xmm4 |
||
667 | mulps xmm4,xmm4 |
||
668 | mulps xmm4,xmm4 |
||
669 | mulps xmm4,xmm4 |
||
670 | mulps xmm4,[eax+48] |
||
671 | |||
672 | .no_reflective: |
||
673 | maxps xmm0,[the_zero] |
||
674 | ; movaps xmm1,xmm0 |
||
675 | mulps xmm0,[eax+16] |
||
676 | addps xmm4,xmm0 |
||
677 | addps xmm4,[eax+32] |
||
678 | maxps xmm5,xmm4 |
||
679 | add eax,64 |
||
680 | cmp eax,lights_aligned_end |
||
681 | jnz .again_col |
||
682 | minps xmm5,[mask_255f] |
||
683 | |||
684 | ; texture coords work |
||
685 | movd eax,xmm7 |
||
686 | psrldq xmm7,4 |
||
687 | movd ebx,xmm7 |
||
688 | shl ebx,TEX_SHIFT |
||
689 | add eax,ebx |
||
690 | lea eax,[eax*3] |
||
691 | add eax,.tx_ptr |
||
692 | mov ebx,eax |
||
693 | add ebx,TEX_X*3 |
||
694 | movd xmm7,[eax] |
||
695 | movd xmm6,[eax+3] |
||
696 | movd xmm4,[ebx] |
||
697 | movd xmm3,[ebx+3] |
||
698 | punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 |
||
699 | punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 |
||
700 | punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... |
||
701 | movdqa xmm6,xmm7 |
||
702 | movdqa xmm4,xmm7 |
||
703 | psrldq xmm6,4 |
||
704 | psrldq xmm4,8 |
||
705 | |||
706 | punpcklbw xmm7,[the_zero] ; broadcasted 0 |
||
707 | punpcklbw xmm6,[the_zero] |
||
708 | punpcklbw xmm4,[the_zero] |
||
709 | punpcklwd xmm7,[the_zero] |
||
710 | punpcklwd xmm6,[the_zero] |
||
711 | punpcklwd xmm4,[the_zero] |
||
712 | |||
713 | |||
714 | ; calc w ......... |
||
715 | movlps xmm3,[the_one] ; broadcasted dword 1.0 |
||
716 | cvtdq2ps xmm7,xmm7 |
||
717 | subps xmm3,.xf |
||
718 | cvtdq2ps xmm6,xmm6 |
||
719 | movhps xmm3,.xf |
||
720 | cvtdq2ps xmm4,xmm4 |
||
721 | movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf |
||
722 | shufps xmm3,xmm3,10001000b |
||
723 | shufps xmm1,xmm1,11110101b |
||
724 | mulps xmm3,xmm1 |
||
725 | |||
726 | mulps xmm7,xmm3 |
||
727 | mulps xmm6,xmm3 |
||
728 | mulps xmm4,xmm3 |
||
729 | haddps xmm7,xmm7 ; r |
||
730 | haddps xmm6,xmm6 ; g |
||
731 | haddps xmm4,xmm4 ; b |
||
732 | haddps xmm7,xmm7 ; r |
||
733 | haddps xmm6,xmm6 ; g |
||
734 | haddps xmm4,xmm4 ; b |
||
735 | movlhps xmm7,xmm6 |
||
736 | shufps xmm7,xmm7,11101000b |
||
737 | movlhps xmm7,xmm4 |
||
738 | |||
739 | mulps xmm5,xmm7 |
||
740 | cvtps2dq xmm5,xmm5 |
||
741 | psrld xmm5,8 |
||
742 | movd xmm6,[edi] |
||
743 | packssdw xmm5,xmm5 |
||
744 | packuswb xmm5,xmm5 |
||
745 | paddusb xmm5,xmm6 |
||
746 | movd [edi],xmm5 |
||
747 | .skip: |
||
748 | add edi,4 |
||
749 | add esi,4 |
||
750 | movaps xmm0,.n1 ; cur normal |
||
751 | addps xmm0,.dn |
||
752 | addps xmm2,.dtx |
||
753 | movaps .n1,xmm0 |
||
754 | sub ecx,1 |
||
755 | jnz .ddraw |
||
756 | |||
757 | .end_line: |
||
758 | add esp,350 |
||
759 | pop ebp |
||
760 | |||
761 | ret |