Rev 8666 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8666 | Rev 9237 | ||
---|---|---|---|
1 | ; Glass like rendering triangle by Maciej Guba. |
1 | ; Glass like rendering triangle by Maciej Guba. |
2 | ; http://macgub.hekko.pl, macgub3@wp.pl |
2 | ; http://macgub.co.pl, macgub3@wp.pl |
3 | 3 | ||
4 | ROUND2 equ 10 |
4 | ROUND2 equ 10 |
5 | glass_tri: |
5 | glass_tri: |
6 | ;----procedure render glass like triangle with z coord -- |
6 | ;----procedure render glass like triangle with z coord -- |
7 | ;----interpolation ( Catmull alghoritm )----------------- |
7 | ;----interpolation ( Catmull alghoritm )----------------- |
8 | ;----I normalize normal vector in every pixel ----------- |
8 | ;----I normalize normal vector in every pixel ----------- |
9 | ;------------------in - eax - x1 shl 16 + y1 ------------ |
9 | ;------------------in - eax - x1 shl 16 + y1 ------------ |
10 | ;---------------------- ebx - x2 shl 16 + y2 ------------ |
10 | ;---------------------- ebx - x2 shl 16 + y2 ------------ |
11 | ;---------------------- ecx - x3 shl 16 + y3 ------------ |
11 | ;---------------------- ecx - x3 shl 16 + y3 ------------ |
12 | ;---------------------- edx - ptr to stencil_buff ------- |
12 | ;---------------------- edx - ptr to stencil_buff ------- |
13 | ;---------------------- esi - pointer to Z-buffer filled- |
13 | ;---------------------- esi - pointer to Z-buffer filled- |
14 | ;---------------------- with dd float variables-------- |
14 | ;---------------------- with dd float variables-------- |
15 | ;---------------------- edi - pointer to screen buffer--- |
15 | ;---------------------- edi - pointer to screen buffer--- |
16 | ;---------------------- xmm0 - 1st normal vector -------- |
16 | ;---------------------- xmm0 - 1st normal vector -------- |
17 | ;---------------------- xmm1 - 2cond normal vector ------ |
17 | ;---------------------- xmm1 - 2cond normal vector ------ |
18 | ;---------------------- xmm2 - 3rd normal vector -------- |
18 | ;---------------------- xmm2 - 3rd normal vector -------- |
19 | ;---------------------- xmm3 - normalized light vector -- |
19 | ;---------------------- xmm3 - normalized light vector -- |
20 | ;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
20 | ;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
21 | ;---------------------- as dwords floats --------------- |
21 | ;---------------------- as dwords floats --------------- |
22 | ;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
22 | ;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
23 | ;---------------------- x_min, x_max as dword integers - |
23 | ;---------------------- x_min, x_max as dword integers - |
24 | ;---------------------- stack - no parameters ----------- |
24 | ;---------------------- stack - no parameters ----------- |
25 | ;-------------------------------------------------------- |
25 | ;-------------------------------------------------------- |
26 | ;----------------- procedure don't save registers !! ---- |
26 | ;----------------- procedure don't save registers !! ---- |
27 | 27 | ||
28 | 28 | ||
29 | 29 | ||
30 | 30 | ||
31 | push ebp |
31 | push ebp |
32 | mov ebp,esp |
32 | mov ebp,esp |
33 | sub esp,512 |
33 | sub esp,512 |
34 | sub ebp,16 |
34 | sub ebp,16 |
35 | and ebp,0xfffffff0 |
35 | and ebp,0xfffffff0 |
36 | 36 | ||
37 | .1_nv equ [ebp-16] |
37 | .1_nv equ [ebp-16] |
38 | .2_nv equ [ebp-32] |
38 | .2_nv equ [ebp-32] |
39 | .3_nv equ [ebp-48] |
39 | .3_nv equ [ebp-48] |
40 | .l_v equ [ebp-64] |
40 | .l_v equ [ebp-64] |
41 | .z3 equ [ebp-72] |
41 | .z3 equ [ebp-72] |
42 | .z2 equ [ebp-76] |
42 | .z2 equ [ebp-76] |
43 | .z1 equ [ebp-80] |
43 | .z1 equ [ebp-80] |
44 | .x1 equ [ebp-82] |
44 | .x1 equ [ebp-82] |
45 | .y1 equ [ebp-84] |
45 | .y1 equ [ebp-84] |
46 | .x2 equ [ebp-86] |
46 | .x2 equ [ebp-86] |
47 | .y2 equ [ebp-88] |
47 | .y2 equ [ebp-88] |
48 | .x3 equ [ebp-90] |
48 | .x3 equ [ebp-90] |
49 | .y3 equ [ebp-92] |
49 | .y3 equ [ebp-92] |
50 | .Zbuf equ [ebp-96] |
50 | .Zbuf equ [ebp-96] |
51 | .x_max equ [ebp-100] |
51 | .x_max equ [ebp-100] |
52 | .x_min equ [ebp-104] |
52 | .x_min equ [ebp-104] |
53 | .y_max equ [ebp-108] |
53 | .y_max equ [ebp-108] |
54 | .y_min equ [ebp-112] |
54 | .y_min equ [ebp-112] |
55 | .screen equ [ebp-116] |
55 | .screen equ [ebp-116] |
56 | .dx12 equ [ebp-120] |
56 | .dx12 equ [ebp-120] |
57 | .dx13 equ [ebp-124] |
57 | .dx13 equ [ebp-124] |
58 | .dx23 equ [ebp-128] |
58 | .dx23 equ [ebp-128] |
59 | .dn12 equ [ebp-144] |
59 | .dn12 equ [ebp-144] |
60 | .dn13 equ [ebp-160] |
60 | .dn13 equ [ebp-160] |
61 | .dn23 equ [ebp-176] |
61 | .dn23 equ [ebp-176] |
62 | .dz12 equ [ebp-180] |
62 | .dz12 equ [ebp-180] |
63 | .dz13 equ [ebp-184] |
63 | .dz13 equ [ebp-184] |
64 | .dz23 equ [ebp-188] |
64 | .dz23 equ [ebp-188] |
65 | 65 | ||
66 | .cnv1 equ [ebp-208] ; cur normal vectors |
66 | .cnv1 equ [ebp-208] ; cur normal vectors |
67 | .cnv2 equ [ebp-224] |
67 | .cnv2 equ [ebp-224] |
68 | .cz2 equ [ebp-228] |
68 | .cz2 equ [ebp-228] |
69 | .cz1 equ [ebp-232] |
69 | .cz1 equ [ebp-232] |
70 | .stencil_buff equ [ebp-236] |
70 | .stencil_buff equ [ebp-236] |
71 | 71 | ||
72 | 72 | ||
73 | 73 | ||
74 | 74 | ||
75 | .sort3: ; sort triangle coordinates... |
75 | .sort3: ; sort triangle coordinates... |
76 | cmp ax,bx |
76 | cmp ax,bx |
77 | jle .sort1 |
77 | jle .sort1 |
78 | xchg eax,ebx |
78 | xchg eax,ebx |
79 | shufps xmm4,xmm4,11100001b |
79 | shufps xmm4,xmm4,11100001b |
80 | movaps xmm6,xmm0 |
80 | movaps xmm6,xmm0 |
81 | movaps xmm0,xmm1 |
81 | movaps xmm0,xmm1 |
82 | movaps xmm1,xmm6 |
82 | movaps xmm1,xmm6 |
83 | 83 | ||
84 | 84 | ||
85 | .sort1: |
85 | .sort1: |
86 | cmp bx,cx |
86 | cmp bx,cx |
87 | jle .sort2 |
87 | jle .sort2 |
88 | xchg ebx,ecx |
88 | xchg ebx,ecx |
89 | shufps xmm4,xmm4,11011000b |
89 | shufps xmm4,xmm4,11011000b |
90 | movaps xmm6,xmm1 |
90 | movaps xmm6,xmm1 |
91 | movaps xmm1,xmm2 |
91 | movaps xmm1,xmm2 |
92 | movaps xmm2,xmm6 |
92 | movaps xmm2,xmm6 |
93 | 93 | ||
94 | jmp .sort3 |
94 | jmp .sort3 |
95 | 95 | ||
96 | .sort2: |
96 | .sort2: |
97 | 97 | ||
98 | movaps .z1,xmm4 |
98 | movaps .z1,xmm4 |
99 | mov .y1,eax |
99 | mov .y1,eax |
100 | mov .y2,ebx |
100 | mov .y2,ebx |
101 | mov .y3,ecx |
101 | mov .y3,ecx |
102 | mov .stencil_buff, edx |
102 | mov .stencil_buff, edx |
103 | 103 | ||
104 | movdqa .y_min,xmm5 |
104 | movdqa .y_min,xmm5 |
105 | if 1 ; check if at last only fragment |
105 | if 1 ; check if at last only fragment |
106 | packssdw xmm5,xmm5 ; of triangle is in visable area |
106 | packssdw xmm5,xmm5 ; of triangle is in visable area |
107 | pshuflw xmm5,xmm5,11011000b |
107 | pshuflw xmm5,xmm5,11011000b |
108 | movdqu xmm7,.y3 |
108 | movdqu xmm7,.y3 |
109 | movdqa xmm6,xmm5 |
109 | movdqa xmm6,xmm5 |
110 | pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
110 | pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
111 | pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
111 | pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
112 | movdqa xmm4,xmm7 |
112 | movdqa xmm4,xmm7 |
113 | pcmpgtw xmm7,xmm5 |
113 | pcmpgtw xmm7,xmm5 |
114 | pcmpgtw xmm4,xmm6 |
114 | pcmpgtw xmm4,xmm6 |
115 | pxor xmm7,xmm4 |
115 | pxor xmm7,xmm4 |
116 | pmovmskb eax,xmm7 |
116 | pmovmskb eax,xmm7 |
117 | and eax,0x00aaaaaa |
117 | and eax,0x00aaaaaa |
118 | or eax,eax |
118 | or eax,eax |
119 | jz .rpt_loop2_end |
119 | jz .rpt_loop2_end |
120 | end if |
120 | end if |
121 | movaps .1_nv,xmm0 |
121 | movaps .1_nv,xmm0 |
122 | movaps .2_nv,xmm1 |
122 | movaps .2_nv,xmm1 |
123 | movaps .3_nv,xmm2 |
123 | movaps .3_nv,xmm2 |
124 | movaps .l_v,xmm3 |
124 | movaps .l_v,xmm3 |
125 | ; mov .Zbuf,esi |
125 | ; mov .Zbuf,esi |
126 | mov .screen,edi |
126 | mov .screen,edi |
127 | 127 | ||
128 | 128 | ||
129 | 129 | ||
130 | mov bx,.y2 ; calc deltas |
130 | mov bx,.y2 ; calc deltas |
131 | sub bx,.y1 |
131 | sub bx,.y1 |
132 | jnz .rpt_dx12_make |
132 | jnz .rpt_dx12_make |
133 | 133 | ||
134 | xorps xmm7,xmm7 |
134 | xorps xmm7,xmm7 |
135 | mov dword .dx12,0 |
135 | mov dword .dx12,0 |
136 | mov dword .dz12,0 |
136 | mov dword .dz12,0 |
137 | movaps .dn12,xmm7 |
137 | movaps .dn12,xmm7 |
138 | jmp .rpt_dx12_done |
138 | jmp .rpt_dx12_done |
139 | 139 | ||
140 | .rpt_dx12_make: |
140 | .rpt_dx12_make: |
141 | mov ax,.x2 |
141 | mov ax,.x2 |
142 | sub ax,.x1 |
142 | sub ax,.x1 |
143 | cwde |
143 | cwde |
144 | movsx ebx,bx |
144 | movsx ebx,bx |
145 | shl eax,ROUND2 |
145 | shl eax,ROUND2 |
146 | cdq |
146 | cdq |
147 | idiv ebx |
147 | idiv ebx |
148 | mov .dx12,eax |
148 | mov .dx12,eax |
149 | 149 | ||
150 | cvtsi2ss xmm6,ebx |
150 | cvtsi2ss xmm6,ebx |
151 | movss xmm5,.z2 |
151 | movss xmm5,.z2 |
152 | subss xmm5,.z1 |
152 | subss xmm5,.z1 |
153 | divss xmm5,xmm6 |
153 | divss xmm5,xmm6 |
154 | movss .dz12,xmm5 |
154 | movss .dz12,xmm5 |
155 | 155 | ||
156 | movaps xmm0,.2_nv |
156 | movaps xmm0,.2_nv |
157 | subps xmm0,.1_nv |
157 | subps xmm0,.1_nv |
158 | shufps xmm6,xmm6,0 |
158 | shufps xmm6,xmm6,0 |
159 | divps xmm0,xmm6 |
159 | divps xmm0,xmm6 |
160 | movaps .dn12,xmm0 |
160 | movaps .dn12,xmm0 |
161 | 161 | ||
162 | 162 | ||
163 | .rpt_dx12_done: |
163 | .rpt_dx12_done: |
164 | 164 | ||
165 | mov bx,.y3 ; calc deltas |
165 | mov bx,.y3 ; calc deltas |
166 | sub bx,.y1 |
166 | sub bx,.y1 |
167 | jnz .rpt_dx13_make |
167 | jnz .rpt_dx13_make |
168 | 168 | ||
169 | xorps xmm7,xmm7 |
169 | xorps xmm7,xmm7 |
170 | mov dword .dx13,0 |
170 | mov dword .dx13,0 |
171 | mov dword .dz13,0 |
171 | mov dword .dz13,0 |
172 | movaps .dn13,xmm7 |
172 | movaps .dn13,xmm7 |
173 | jmp .rpt_dx13_done |
173 | jmp .rpt_dx13_done |
174 | 174 | ||
175 | .rpt_dx13_make: |
175 | .rpt_dx13_make: |
176 | mov ax,.x3 |
176 | mov ax,.x3 |
177 | sub ax,.x1 |
177 | sub ax,.x1 |
178 | cwde |
178 | cwde |
179 | movsx ebx,bx |
179 | movsx ebx,bx |
180 | shl eax,ROUND2 |
180 | shl eax,ROUND2 |
181 | cdq |
181 | cdq |
182 | idiv ebx |
182 | idiv ebx |
183 | mov .dx13,eax |
183 | mov .dx13,eax |
184 | 184 | ||
185 | cvtsi2ss xmm6,ebx |
185 | cvtsi2ss xmm6,ebx |
186 | movss xmm5,.z3 |
186 | movss xmm5,.z3 |
187 | subss xmm5,.z1 |
187 | subss xmm5,.z1 |
188 | divss xmm5,xmm6 |
188 | divss xmm5,xmm6 |
189 | movss .dz13,xmm5 |
189 | movss .dz13,xmm5 |
190 | 190 | ||
191 | movaps xmm0,.3_nv |
191 | movaps xmm0,.3_nv |
192 | subps xmm0,.1_nv |
192 | subps xmm0,.1_nv |
193 | shufps xmm6,xmm6,0 |
193 | shufps xmm6,xmm6,0 |
194 | divps xmm0,xmm6 |
194 | divps xmm0,xmm6 |
195 | movaps .dn13,xmm0 |
195 | movaps .dn13,xmm0 |
196 | 196 | ||
197 | .rpt_dx13_done: |
197 | .rpt_dx13_done: |
198 | 198 | ||
199 | mov bx,.y3 ; calc deltas |
199 | mov bx,.y3 ; calc deltas |
200 | sub bx,.y2 |
200 | sub bx,.y2 |
201 | jnz .rpt_dx23_make |
201 | jnz .rpt_dx23_make |
202 | 202 | ||
203 | xorps xmm7,xmm7 |
203 | xorps xmm7,xmm7 |
204 | mov dword .dx23,0 |
204 | mov dword .dx23,0 |
205 | mov dword .dz23,0 |
205 | mov dword .dz23,0 |
206 | movaps .dn23,xmm7 |
206 | movaps .dn23,xmm7 |
207 | jmp .rpt_dx23_done |
207 | jmp .rpt_dx23_done |
208 | 208 | ||
209 | .rpt_dx23_make: |
209 | .rpt_dx23_make: |
210 | mov ax,.x3 |
210 | mov ax,.x3 |
211 | sub ax,.x2 |
211 | sub ax,.x2 |
212 | cwde |
212 | cwde |
213 | movsx ebx,bx |
213 | movsx ebx,bx |
214 | shl eax,ROUND2 |
214 | shl eax,ROUND2 |
215 | cdq |
215 | cdq |
216 | idiv ebx |
216 | idiv ebx |
217 | mov .dx23,eax |
217 | mov .dx23,eax |
218 | 218 | ||
219 | cvtsi2ss xmm6,ebx |
219 | cvtsi2ss xmm6,ebx |
220 | movss xmm5,.z3 |
220 | movss xmm5,.z3 |
221 | subss xmm5,.z2 |
221 | subss xmm5,.z2 |
222 | divss xmm5,xmm6 |
222 | divss xmm5,xmm6 |
223 | movss .dz23,xmm5 |
223 | movss .dz23,xmm5 |
224 | 224 | ||
225 | movaps xmm0,.3_nv |
225 | movaps xmm0,.3_nv |
226 | subps xmm0,.2_nv |
226 | subps xmm0,.2_nv |
227 | shufps xmm6,xmm6,0 |
227 | shufps xmm6,xmm6,0 |
228 | divps xmm0,xmm6 |
228 | divps xmm0,xmm6 |
229 | movaps .dn23,xmm0 |
229 | movaps .dn23,xmm0 |
230 | 230 | ||
231 | .rpt_dx23_done: |
231 | .rpt_dx23_done: |
232 | 232 | ||
233 | 233 | ||
234 | movsx eax,word .x1 |
234 | movsx eax,word .x1 |
235 | shl eax,ROUND2 |
235 | shl eax,ROUND2 |
236 | mov ebx,eax |
236 | mov ebx,eax |
237 | mov edx,.z1 |
237 | mov edx,.z1 |
238 | mov .cz1,edx |
238 | mov .cz1,edx |
239 | mov .cz2,edx |
239 | mov .cz2,edx |
240 | movaps xmm0,.1_nv |
240 | movaps xmm0,.1_nv |
241 | movaps .cnv1,xmm0 |
241 | movaps .cnv1,xmm0 |
242 | movaps .cnv2,xmm0 |
242 | movaps .cnv2,xmm0 |
243 | 243 | ||
244 | 244 | ||
245 | movsx ecx,word .y1 |
245 | movsx ecx,word .y1 |
246 | cmp cx,.y2 |
246 | cmp cx,.y2 |
247 | 247 | ||
248 | jge .rpt_loop1_end |
248 | jge .rpt_loop1_end |
249 | 249 | ||
250 | .rpt_loop1: |
250 | .rpt_loop1: |
251 | pushad |
251 | pushad |
252 | 252 | ||
253 | movaps xmm2,.y_min |
253 | movaps xmm2,.y_min |
254 | movaps xmm0,.cnv1 |
254 | movaps xmm0,.cnv1 |
255 | movaps xmm1,.cnv2 |
255 | movaps xmm1,.cnv2 |
256 | movlps xmm3,.cz1 |
256 | movlps xmm3,.cz1 |
257 | movaps xmm4,.l_v |
257 | movaps xmm4,.l_v |
258 | sar ebx,ROUND2 |
258 | sar ebx,ROUND2 |
259 | sar eax,ROUND2 |
259 | sar eax,ROUND2 |
260 | mov edx,.stencil_buff |
260 | mov edx,.stencil_buff |
261 | mov edi,.screen |
261 | mov edi,.screen |
262 | ; mov esi,.Zbuf |
262 | ; mov esi,.Zbuf |
263 | 263 | ||
264 | call glass_line |
264 | call glass_line |
265 | 265 | ||
266 | popad |
266 | popad |
267 | movaps xmm0,.cnv1 |
267 | movaps xmm0,.cnv1 |
268 | movaps xmm1,.cnv2 |
268 | movaps xmm1,.cnv2 |
269 | movss xmm2,.cz1 |
269 | movss xmm2,.cz1 |
270 | movss xmm3,.cz2 |
270 | movss xmm3,.cz2 |
271 | addps xmm0,.dn13 |
271 | addps xmm0,.dn13 |
272 | addps xmm1,.dn12 |
272 | addps xmm1,.dn12 |
273 | addss xmm2,.dz13 |
273 | addss xmm2,.dz13 |
274 | addss xmm3,.dz12 |
274 | addss xmm3,.dz12 |
275 | add eax,.dx13 |
275 | add eax,.dx13 |
276 | add ebx,.dx12 |
276 | add ebx,.dx12 |
277 | 277 | ||
278 | movaps .cnv1,xmm0 |
278 | movaps .cnv1,xmm0 |
279 | movaps .cnv2,xmm1 |
279 | movaps .cnv2,xmm1 |
280 | movss .cz1,xmm2 |
280 | movss .cz1,xmm2 |
281 | movss .cz2,xmm3 |
281 | movss .cz2,xmm3 |
282 | 282 | ||
283 | add ecx,1 |
283 | add ecx,1 |
284 | cmp cx,.y2 |
284 | cmp cx,.y2 |
285 | jl .rpt_loop1 |
285 | jl .rpt_loop1 |
286 | 286 | ||
287 | 287 | ||
288 | 288 | ||
289 | 289 | ||
290 | 290 | ||
291 | .rpt_loop1_end: |
291 | .rpt_loop1_end: |
292 | movsx ecx,word .y2 |
292 | movsx ecx,word .y2 |
293 | cmp cx,.y3 |
293 | cmp cx,.y3 |
294 | jge .rpt_loop2_end |
294 | jge .rpt_loop2_end |
295 | 295 | ||
296 | movsx ebx,word .x2 ; eax - cur x1 |
296 | movsx ebx,word .x2 ; eax - cur x1 |
297 | shl ebx,ROUND2 ; ebx - cur x2 |
297 | shl ebx,ROUND2 ; ebx - cur x2 |
298 | push dword .z2 |
298 | push dword .z2 |
299 | pop dword .cz2 |
299 | pop dword .cz2 |
300 | movaps xmm0,.2_nv |
300 | movaps xmm0,.2_nv |
301 | movaps .cnv2,xmm0 |
301 | movaps .cnv2,xmm0 |
302 | 302 | ||
303 | 303 | ||
304 | .rpt_loop2: |
304 | .rpt_loop2: |
305 | pushad |
305 | pushad |
306 | 306 | ||
307 | movaps xmm2,.y_min |
307 | movaps xmm2,.y_min |
308 | movaps xmm0,.cnv1 |
308 | movaps xmm0,.cnv1 |
309 | movaps xmm1,.cnv2 |
309 | movaps xmm1,.cnv2 |
310 | movlps xmm3,.cz1 |
310 | movlps xmm3,.cz1 |
311 | movaps xmm4,.l_v |
311 | movaps xmm4,.l_v |
312 | sar ebx,ROUND2 |
312 | sar ebx,ROUND2 |
313 | sar eax,ROUND2 |
313 | sar eax,ROUND2 |
314 | mov edx,.stencil_buff |
314 | mov edx,.stencil_buff |
315 | mov edi,.screen |
315 | mov edi,.screen |
316 | ; mov esi,.Zbuf |
316 | ; mov esi,.Zbuf |
317 | 317 | ||
318 | call glass_line |
318 | call glass_line |
319 | 319 | ||
320 | popad |
320 | popad |
321 | movaps xmm0,.cnv1 |
321 | movaps xmm0,.cnv1 |
322 | movaps xmm1,.cnv2 |
322 | movaps xmm1,.cnv2 |
323 | movss xmm2,.cz1 |
323 | movss xmm2,.cz1 |
324 | movss xmm3,.cz2 |
324 | movss xmm3,.cz2 |
325 | addps xmm0,.dn13 |
325 | addps xmm0,.dn13 |
326 | addps xmm1,.dn23 |
326 | addps xmm1,.dn23 |
327 | addss xmm2,.dz13 |
327 | addss xmm2,.dz13 |
328 | addss xmm3,.dz23 |
328 | addss xmm3,.dz23 |
329 | add eax,.dx13 |
329 | add eax,.dx13 |
330 | add ebx,.dx23 |
330 | add ebx,.dx23 |
331 | 331 | ||
332 | movaps .cnv1,xmm0 |
332 | movaps .cnv1,xmm0 |
333 | movaps .cnv2,xmm1 |
333 | movaps .cnv2,xmm1 |
334 | movss .cz1,xmm2 |
334 | movss .cz1,xmm2 |
335 | movss .cz2,xmm3 |
335 | movss .cz2,xmm3 |
336 | 336 | ||
337 | add ecx,1 |
337 | add ecx,1 |
338 | cmp cx,.y3 |
338 | cmp cx,.y3 |
339 | jl .rpt_loop2 |
339 | jl .rpt_loop2 |
340 | 340 | ||
341 | .rpt_loop2_end: |
341 | .rpt_loop2_end: |
342 | 342 | ||
343 | add esp,512 |
343 | add esp,512 |
344 | pop ebp |
344 | pop ebp |
345 | 345 | ||
346 | ret |
346 | ret |
347 | align 16 |
347 | align 16 |
348 | glass_line: |
348 | glass_line: |
349 | ; in: |
349 | ; in: |
350 | ; xmm0 - normal vector 1 |
350 | ; xmm0 - normal vector 1 |
351 | ; xmm1 - normal vect 2 |
351 | ; xmm1 - normal vect 2 |
352 | ; xmm3 - lo -> hi z1, z2 coords as dwords floats |
352 | ; xmm3 - lo -> hi z1, z2 coords as dwords floats |
353 | ; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
353 | ; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
354 | ; as dword integers |
354 | ; as dword integers |
355 | ; xmm4 - normalized light vector |
355 | ; xmm4 - normalized light vector |
356 | ; eax - x1 |
356 | ; eax - x1 |
357 | ; ebx - x2 |
357 | ; ebx - x2 |
358 | ; ecx - y |
358 | ; ecx - y |
359 | ; edx - stencil buff ptr |
359 | ; edx - stencil buff ptr |
360 | ; edi - screen buffer |
360 | ; edi - screen buffer |
361 | ; esi - z buffer ===> not needed in glass rendering |
361 | ; esi - z buffer ===> not needed in glass rendering |
362 | 362 | ||
363 | push ebp |
363 | push ebp |
364 | mov ebp,esp |
364 | mov ebp,esp |
365 | sub esp,256 |
365 | sub esp,256 |
366 | sub ebp,16 |
366 | sub ebp,16 |
367 | and ebp,0xfffffff0 |
367 | and ebp,0xfffffff0 |
368 | 368 | ||
369 | .n1 equ [ebp-16] |
369 | .n1 equ [ebp-16] |
370 | .n2 equ [ebp-32] |
370 | .n2 equ [ebp-32] |
371 | .lv equ [ebp-48] |
371 | .lv equ [ebp-48] |
372 | .lx1 equ [ebp-52] |
372 | .lx1 equ [ebp-52] |
373 | .lx2 equ [ebp-56] |
373 | .lx2 equ [ebp-56] |
374 | .z2 equ [ebp-60] |
374 | .z2 equ [ebp-60] |
375 | .z1 equ [ebp-64] |
375 | .z1 equ [ebp-64] |
376 | .screen equ [ebp-68] |
376 | .screen equ [ebp-68] |
377 | .zbuff equ [ebp-72] |
377 | .zbuff equ [ebp-72] |
378 | .x_max equ [ebp-74] |
378 | .x_max equ [ebp-74] |
379 | .x_min equ [ebp-76] |
379 | .x_min equ [ebp-76] |
380 | .y_max equ [ebp-78] |
380 | .y_max equ [ebp-78] |
381 | .y_min equ [ebp-80] |
381 | .y_min equ [ebp-80] |
382 | .dn equ [ebp-96] |
382 | .dn equ [ebp-96] |
383 | .dz equ [ebp-100] |
383 | .dz equ [ebp-100] |
384 | .y equ [ebp-104] |
384 | .y equ [ebp-104] |
385 | .cnv equ [ebp-128] |
385 | .cnv equ [ebp-128] |
386 | .col_sum_b equ [ebp-136] |
386 | .col_sum_b equ [ebp-136] |
387 | .col_sum_g equ [ebp-140] |
387 | .col_sum_g equ [ebp-140] |
388 | .col_sum_r equ [ebp-144] |
388 | .col_sum_r equ [ebp-144] |
389 | .cur_col equ [ebp-160] |
389 | .cur_col equ [ebp-160] |
390 | .stencil_buf equ [ebp-164] |
390 | .stencil_buf equ [ebp-164] |
391 | 391 | ||
392 | mov .y,ecx |
392 | mov .y,ecx |
393 | packssdw xmm2,xmm2 |
393 | packssdw xmm2,xmm2 |
394 | movq .y_min,xmm2 |
394 | movq .y_min,xmm2 |
395 | cmp cx,.y_min |
395 | cmp cx,.y_min |
396 | jl .end_rp_line |
396 | jl .end_rp_line |
397 | cmp cx,.y_max |
397 | cmp cx,.y_max |
398 | jge .end_rp_line ; |
398 | jge .end_rp_line ; |
399 | 399 | ||
400 | cmp eax,ebx |
400 | cmp eax,ebx |
401 | je .end_rp_line |
401 | je .end_rp_line |
402 | jl @f |
402 | jl @f |
403 | xchg eax,ebx |
403 | xchg eax,ebx |
404 | movaps xmm7,xmm0 |
404 | movaps xmm7,xmm0 |
405 | movaps xmm0,xmm1 |
405 | movaps xmm0,xmm1 |
406 | movaps xmm1,xmm7 |
406 | movaps xmm1,xmm7 |
407 | shufps xmm3,xmm3,11100001b |
407 | shufps xmm3,xmm3,11100001b |
408 | @@: |
408 | @@: |
409 | 409 | ||
410 | cmp ax,.x_max |
410 | cmp ax,.x_max |
411 | jge .end_rp_line |
411 | jge .end_rp_line |
412 | cmp bx,.x_min |
412 | cmp bx,.x_min |
413 | jle .end_rp_line |
413 | jle .end_rp_line |
414 | movaps .lv,xmm4 |
414 | movaps .lv,xmm4 |
415 | movaps .n1,xmm0 |
415 | movaps .n1,xmm0 |
416 | movaps .n2,xmm1 |
416 | movaps .n2,xmm1 |
417 | mov .lx1,eax |
417 | mov .lx1,eax |
418 | mov .lx2,ebx |
418 | mov .lx2,ebx |
419 | mov .stencil_buf,edx |
419 | mov .stencil_buf,edx |
420 | movlps .z1,xmm3 |
420 | movlps .z1,xmm3 |
421 | 421 | ||
422 | sub ebx,eax |
422 | sub ebx,eax |
423 | cvtsi2ss xmm7,ebx |
423 | cvtsi2ss xmm7,ebx |
424 | shufps xmm7,xmm7,0 |
424 | shufps xmm7,xmm7,0 |
425 | subps xmm1,xmm0 |
425 | subps xmm1,xmm0 |
426 | divps xmm1,xmm7 |
426 | divps xmm1,xmm7 |
427 | movaps .dn,xmm1 |
427 | movaps .dn,xmm1 |
428 | psrldq xmm3,4 |
428 | psrldq xmm3,4 |
429 | subss xmm3,.z1 |
429 | subss xmm3,.z1 |
430 | divss xmm3,xmm7 |
430 | divss xmm3,xmm7 |
431 | movss .dz,xmm3 |
431 | movss .dz,xmm3 |
432 | 432 | ||
433 | mov ebx,.lx1 |
433 | mov ebx,.lx1 |
434 | cmp bx,.x_min ; clipping on function4 |
434 | cmp bx,.x_min ; clipping on function4 |
435 | jge @f |
435 | jge @f |
436 | movzx eax,word .x_min |
436 | movzx eax,word .x_min |
437 | sub eax,ebx |
437 | sub eax,ebx |
438 | cvtsi2ss xmm7,eax |
438 | cvtsi2ss xmm7,eax |
439 | shufps xmm7,xmm7,0 |
439 | shufps xmm7,xmm7,0 |
440 | mulss xmm3,xmm7 |
440 | mulss xmm3,xmm7 |
441 | mulps xmm1,xmm7 |
441 | mulps xmm1,xmm7 |
442 | addss xmm3,.z1 |
442 | addss xmm3,.z1 |
443 | addps xmm1,.n1 |
443 | addps xmm1,.n1 |
444 | movsx eax,word .x_min |
444 | movsx eax,word .x_min |
445 | movss .z1,xmm3 |
445 | movss .z1,xmm3 |
446 | movaps .n1,xmm1 |
446 | movaps .n1,xmm1 |
447 | mov dword .lx1,eax |
447 | mov dword .lx1,eax |
448 | 448 | ||
449 | @@: |
449 | @@: |
450 | movzx eax,word .x_max |
450 | movzx eax,word .x_max |
451 | cmp .lx2,eax |
451 | cmp .lx2,eax |
452 | jl @f |
452 | jl @f |
453 | mov .lx2,eax |
453 | mov .lx2,eax |
454 | @@: |
454 | @@: |
455 | movzx eax,word[xres_var] |
455 | movzx eax,word[xres_var] |
456 | mul dword .y |
456 | mul dword .y |
457 | 457 | ||
458 | add eax,.lx1 |
458 | add eax,.lx1 |
459 | shl eax,2 |
459 | shl eax,2 |
460 | add edi,eax |
460 | add edi,eax |
461 | mov ebx,eax |
461 | mov ebx,eax |
462 | add ebx,.stencil_buf |
462 | add ebx,.stencil_buf |
463 | 463 | ||
464 | 464 | ||
465 | mov ecx,.lx2 |
465 | mov ecx,.lx2 |
466 | sub ecx,.lx1 |
466 | sub ecx,.lx1 |
467 | 467 | ||
468 | movaps xmm0,.n1 |
468 | movaps xmm0,.n1 |
469 | movss xmm2,.z1 |
469 | movss xmm2,.z1 |
470 | align 16 |
470 | align 16 |
471 | .ddraw: |
471 | .ddraw: |
472 | movaps xmm7,xmm0 |
472 | movaps xmm7,xmm0 |
473 | mulps xmm7,xmm7 ; normalize |
473 | mulps xmm7,xmm7 ; normalize |
474 | andps xmm7,[zero_hgst_dd] |
474 | andps xmm7,[zero_hgst_dd] |
475 | haddps xmm7,xmm7 |
475 | haddps xmm7,xmm7 |
476 | haddps xmm7,xmm7 |
476 | haddps xmm7,xmm7 |
477 | rsqrtps xmm7,xmm7 |
477 | rsqrtps xmm7,xmm7 |
478 | mulps xmm7,xmm0 |
478 | mulps xmm7,xmm0 |
479 | ; maxps xmm7,[the_zero] |
479 | ; maxps xmm7,[the_zero] |
480 | movaps .cnv,xmm7 |
480 | movaps .cnv,xmm7 |
481 | 481 | ||
482 | mov edx,lights_aligned ; lights_aligned - global variable |
482 | mov edx,lights_aligned ; lights_aligned - global variable |
483 | xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
483 | xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
484 | .again_col: |
484 | .again_col: |
485 | movaps xmm3,.cnv |
485 | movaps xmm3,.cnv |
486 | mulps xmm3,[edx] |
486 | mulps xmm3,[edx] |
487 | haddps xmm3,xmm3 |
487 | haddps xmm3,xmm3 |
488 | haddps xmm3,xmm3 ; xmm3 - dot pr |
488 | haddps xmm3,xmm3 ; xmm3 - dot pr |
489 | 489 | ||
490 | ; cmp [bump_flag],1 ; on/off temporaly |
490 | ; cmp [bump_flag],1 ; on/off temporaly |
491 | ; depend on bump button |
491 | ; depend on bump button |
492 | ; je @f |
492 | ; je @f |
493 | ; stencil |
493 | ; stencil |
494 | movss xmm5,xmm2 |
494 | movss xmm5,xmm2 |
495 | movss xmm6,xmm2 |
495 | movss xmm6,xmm2 |
496 | addss xmm5,[aprox] |
496 | addss xmm5,[aprox] |
497 | subss xmm6,[aprox] |
497 | subss xmm6,[aprox] |
498 | cmpnltss xmm5,dword[ebx] |
498 | cmpnltss xmm5,dword[ebx] |
499 | cmpnltss xmm6,dword[ebx] |
499 | cmpnltss xmm6,dword[ebx] |
500 | xorps xmm5,xmm6 |
500 | xorps xmm5,xmm6 |
501 | movd eax,xmm5 |
501 | movd eax,xmm5 |
502 | or eax,eax |
502 | or eax,eax |
503 | jz .no_reflective |
503 | jz .no_reflective |
504 | @@: |
504 | @@: |
505 | movaps xmm6,xmm3 ;xmm7 |
505 | movaps xmm6,xmm3 ;xmm7 |
506 | mulps xmm6,xmm6 |
506 | mulps xmm6,xmm6 |
507 | mulps xmm6,xmm6 |
507 | mulps xmm6,xmm6 |
508 | mulps xmm6,xmm6 |
508 | mulps xmm6,xmm6 |
509 | mulps xmm6,xmm6 |
509 | mulps xmm6,xmm6 |
510 | mulps xmm6,xmm6 |
510 | mulps xmm6,xmm6 |
511 | mulps xmm6,[edx+48] |
511 | mulps xmm6,[edx+48] |
512 | .no_reflective: |
512 | .no_reflective: |
513 | movaps xmm7,xmm3 |
513 | movaps xmm7,xmm3 |
514 | mulps xmm7,[edx+16] |
514 | mulps xmm7,[edx+16] |
515 | addps xmm7,xmm6 |
515 | addps xmm7,xmm6 |
516 | addps xmm7,[edx+32] |
516 | addps xmm7,[edx+32] |
517 | minps xmm7,[mask_255f] ; global |
517 | minps xmm7,[mask_255f] ; global |
518 | 518 | ||
519 | maxps xmm1,xmm7 |
519 | maxps xmm1,xmm7 |
520 | add edx,64 ; size of one light in aligned list |
520 | add edx,64 ; size of one light in aligned list |
521 | cmp edx,lights_aligned_end |
521 | cmp edx,lights_aligned_end |
522 | jl .again_col |
522 | jl .again_col |
523 | cvtps2dq xmm1,xmm1 |
523 | cvtps2dq xmm1,xmm1 |
524 | movd xmm6,[edi] |
524 | movd xmm6,[edi] |
525 | packssdw xmm1,xmm1 |
525 | packssdw xmm1,xmm1 |
526 | packuswb xmm1,xmm1 |
526 | packuswb xmm1,xmm1 |
527 | paddusb xmm1,xmm6 |
527 | paddusb xmm1,xmm6 |
528 | movd [edi],xmm1 |
528 | movd [edi],xmm1 |
529 | 529 | ||
530 | 530 | ||
531 | .skip: |
531 | .skip: |
532 | add edi,4 |
532 | add edi,4 |
533 | add ebx,4 ; stencil_buff |
533 | add ebx,4 ; stencil_buff |
534 | addps xmm0,.dn |
534 | addps xmm0,.dn |
535 | addss xmm2,.dz |
535 | addss xmm2,.dz |
536 | sub ecx,1 |
536 | sub ecx,1 |
537 | jnz .ddraw |
537 | jnz .ddraw |
538 | 538 | ||
539 | .end_rp_line: |
539 | .end_rp_line: |
540 | add esp,256 |
540 | add esp,256 |
541 | pop ebp |
541 | pop ebp |
542 | 542 | ||
543 | ret |
543 | ret |