Rev 8232 | Rev 8719 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8232 | Rev 8666 | ||
---|---|---|---|
1 | ROUND2 equ 10 |
1 | ROUND2 equ 10 |
2 | 2 | ||
3 | stencil_tri: |
3 | stencil_tri: |
4 | ; procedure calculate triangle in stencil buffer |
4 | ; procedure calculate triangle in stencil buffer |
5 | ; ----------------in - eax - x1 shl 16 + y1 ---------------------- |
5 | ; ----------------in - eax - x1 shl 16 + y1 ---------------------- |
6 | ; -------------------- ebx - x2 shl 16 + y2 ---------------------- |
6 | ; -------------------- ebx - x2 shl 16 + y2 ---------------------- |
7 | ; -------------------- ecx - x3 shl 16 + y3 ---------------------- |
7 | ; -------------------- ecx - x3 shl 16 + y3 ---------------------- |
8 | ; -------------------- esi - pointer to s-buffer ----------------- |
8 | ; -------------------- esi - pointer to s-buffer ----------------- |
9 | ; -------------------- xmm0 - lo -> hi z1, z2, z3 as dword float |
9 | ; -------------------- xmm0 - lo -> hi z1, z2, z3 as dword float |
10 | 10 | ||
11 | .x1 equ [ebp-2] |
11 | .x1 equ [ebp-2] |
12 | .y1 equ [ebp-4] |
12 | .y1 equ [ebp-4] |
13 | .x2 equ [ebp-6] |
13 | .x2 equ [ebp-6] |
14 | .y2 equ [ebp-8] |
14 | .y2 equ [ebp-8] |
15 | .x3 equ [ebp-10] |
15 | .x3 equ [ebp-10] |
16 | .y3 equ [ebp-12] |
16 | .y3 equ [ebp-12] |
17 | 17 | ||
18 | .dx12 equ dword[ebp-20] |
18 | .dx12 equ dword[ebp-20] |
19 | .dx13 equ dword[ebp-24] |
19 | .dx13 equ dword[ebp-24] |
20 | .dx23 equ dword[ebp-28] |
20 | .dx23 equ dword[ebp-28] |
21 | .dz12 equ dword[ebp-32] |
21 | .dz12 equ dword[ebp-32] |
22 | .dz13 equ dword[ebp-36] |
22 | .dz13 equ dword[ebp-36] |
23 | .dz23 equ dword[ebp-40] |
23 | .dz23 equ dword[ebp-40] |
24 | .zz2 equ [ebp-44] |
24 | .zz2 equ [ebp-44] |
25 | .zz1 equ [ebp-48] |
25 | .zz1 equ [ebp-48] |
26 | .z3 equ [ebp-56] |
26 | .z3 equ [ebp-56] |
27 | .z2 equ [ebp-60] |
27 | .z2 equ [ebp-60] |
28 | .z1 equ [ebp-64] |
28 | .z1 equ [ebp-64] |
29 | .s_buff equ [ebp-68] |
29 | .s_buff equ [ebp-68] |
30 | 30 | ||
31 | push ebp |
31 | push ebp |
32 | mov ebp,esp |
32 | mov ebp,esp |
33 | sub esp,128 |
33 | sub esp,128 |
34 | and ebp,0xfffffff0 |
34 | and ebp,0xfffffff0 |
35 | .sort2: |
35 | .sort2: |
36 | cmp ax,bx |
36 | cmp ax,bx |
37 | jle .sort1 |
37 | jle .sort1 |
38 | xchg eax,ebx |
38 | xchg eax,ebx |
39 | shufps xmm0,xmm0,11100001b |
39 | shufps xmm0,xmm0,11100001b |
40 | .sort1: |
40 | .sort1: |
41 | cmp bx,cx |
41 | cmp bx,cx |
42 | jle .sort3 |
42 | jle .sort3 |
43 | xchg ebx,ecx |
43 | xchg ebx,ecx |
44 | shufps xmm0,xmm0,11011000b |
44 | shufps xmm0,xmm0,11011000b |
45 | jmp .sort2 |
45 | jmp .sort2 |
46 | .sort3: |
46 | .sort3: |
47 | mov .y1,eax ; store triangle coordinates in user friendly variables |
47 | mov .y1,eax ; store triangle coordinates in user friendly variables |
48 | mov .y2,ebx |
48 | mov .y2,ebx |
49 | mov .y3,ecx |
49 | mov .y3,ecx |
50 | 50 | ||
51 | ; mov edx,100.11 |
51 | ; mov edx,100.11 |
52 | ; movd xmm0,edx |
52 | ; movd xmm0,edx |
53 | ; shufps xmm0,xmm0,11100000b |
53 | ; shufps xmm0,xmm0,11100000b |
54 | 54 | ||
55 | movaps .z1,xmm0 |
55 | movaps .z1,xmm0 |
56 | ; mov dword .z1,edx |
56 | ; mov dword .z1,edx |
57 | ; mov .z2,edx |
57 | ; mov .z2,edx |
58 | ; mov .z3,edx |
58 | ; mov .z3,edx |
59 | mov .s_buff,esi |
59 | mov .s_buff,esi |
60 | 60 | ||
61 | mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
61 | mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
62 | and edx,ebx ; if *all* of them are negative a sign flag is raised |
62 | and edx,ebx ; if *all* of them are negative a sign flag is raised |
63 | and edx,ecx |
63 | and edx,ecx |
64 | and edx,eax |
64 | and edx,eax |
65 | test edx,80008000h ; Check both X&Y at once |
65 | test edx,80008000h ; Check both X&Y at once |
66 | jne .loop2_end |
66 | jne .loop2_end |
67 | 67 | ||
68 | mov bx,.y2 ; calc delta 12 |
68 | mov bx,.y2 ; calc delta 12 |
69 | sub bx,.y1 |
69 | sub bx,.y1 |
70 | jnz .dx12_make |
70 | jnz .dx12_make |
71 | mov .dx12,0 |
71 | mov .dx12,0 |
72 | mov .dz12,0 |
72 | mov .dz12,0 |
73 | jmp .dx12_done |
73 | jmp .dx12_done |
74 | .dx12_make: |
74 | .dx12_make: |
75 | mov ax,.x2 |
75 | mov ax,.x2 |
76 | sub ax,.x1 |
76 | sub ax,.x1 |
77 | cwde |
77 | cwde |
78 | movsx ebx,bx |
78 | movsx ebx,bx |
79 | shl eax,ROUND2 |
79 | shl eax,ROUND2 |
80 | cdq |
80 | cdq |
81 | idiv ebx |
81 | idiv ebx |
82 | mov .dx12,eax |
82 | mov .dx12,eax |
83 | 83 | ||
84 | movss xmm1,.z2 |
84 | movss xmm1,.z2 |
85 | cvtsi2ss xmm2,ebx |
85 | cvtsi2ss xmm2,ebx |
86 | subss xmm1,.z1 |
86 | subss xmm1,.z1 |
87 | divss xmm1,xmm2 |
87 | divss xmm1,xmm2 |
88 | movss .dz12,xmm1 |
88 | movss .dz12,xmm1 |
89 | ; mov .dz12, dword 0.11 |
89 | ; mov .dz12, dword 0.11 |
90 | 90 | ||
91 | .dx12_done: |
91 | .dx12_done: |
92 | mov bx,.y3 ; calc delta 13 |
92 | mov bx,.y3 ; calc delta 13 |
93 | sub bx,.y1 |
93 | sub bx,.y1 |
94 | jnz .dx13_make |
94 | jnz .dx13_make |
95 | mov .dx13,0 |
95 | mov .dx13,0 |
96 | mov .dz13,0 |
96 | mov .dz13,0 |
97 | jmp .dx13_done |
97 | jmp .dx13_done |
98 | .dx13_make: |
98 | .dx13_make: |
99 | mov ax,.x3 |
99 | mov ax,.x3 |
100 | sub ax,.x1 |
100 | sub ax,.x1 |
101 | cwde |
101 | cwde |
102 | movsx ebx,bx |
102 | movsx ebx,bx |
103 | shl eax,ROUND2 |
103 | shl eax,ROUND2 |
104 | cdq |
104 | cdq |
105 | idiv ebx |
105 | idiv ebx |
106 | mov .dx13,eax |
106 | mov .dx13,eax |
107 | 107 | ||
108 | movss xmm1,.z3 |
108 | movss xmm1,.z3 |
109 | cvtsi2ss xmm2,ebx |
109 | cvtsi2ss xmm2,ebx |
110 | subss xmm1,.z1 |
110 | subss xmm1,.z1 |
111 | divss xmm1,xmm2 |
111 | divss xmm1,xmm2 |
112 | movss .dz13,xmm1 |
112 | movss .dz13,xmm1 |
113 | ; mov .dz13, dword 0.11 |
113 | ; mov .dz13, dword 0.11 |
114 | 114 | ||
115 | .dx13_done: |
115 | .dx13_done: |
116 | mov bx,.y3 ; calc delta 23 |
116 | mov bx,.y3 ; calc delta 23 |
117 | sub bx,.y2 |
117 | sub bx,.y2 |
118 | jnz .dx23_make |
118 | jnz .dx23_make |
119 | mov .dx23,0 |
119 | mov .dx23,0 |
120 | mov .dz23,0 |
120 | mov .dz23,0 |
121 | jmp .dx23_done |
121 | jmp .dx23_done |
122 | .dx23_make: |
122 | .dx23_make: |
123 | mov ax,.x3 |
123 | mov ax,.x3 |
124 | sub ax,.x2 |
124 | sub ax,.x2 |
125 | cwde |
125 | cwde |
126 | movsx ebx,bx |
126 | movsx ebx,bx |
127 | shl eax,ROUND2 |
127 | shl eax,ROUND2 |
128 | cdq |
128 | cdq |
129 | idiv ebx |
129 | idiv ebx |
130 | mov .dx23,eax |
130 | mov .dx23,eax |
131 | 131 | ||
132 | movss xmm1,.z3 |
132 | movss xmm1,.z3 |
133 | cvtsi2ss xmm2,ebx |
133 | cvtsi2ss xmm2,ebx |
134 | subss xmm1,.z2 |
134 | subss xmm1,.z2 |
135 | divss xmm1,xmm2 |
135 | divss xmm1,xmm2 |
136 | movss .dz23,xmm1 |
136 | movss .dz23,xmm1 |
137 | 137 | ||
138 | .dx23_done: |
138 | .dx23_done: |
139 | mov edx,.z1 |
139 | mov edx,.z1 |
140 | 140 | ||
141 | mov .zz1,edx |
141 | mov .zz1,edx |
142 | mov .zz2,edx |
142 | mov .zz2,edx |
143 | movsx eax,word .x1 |
143 | movsx eax,word .x1 |
144 | shl eax,ROUND2 ; eax - cur x1 |
144 | shl eax,ROUND2 ; eax - cur x1 |
145 | mov ebx,eax ; ebx - cur x2 |
145 | mov ebx,eax ; ebx - cur x2 |
146 | 146 | ||
147 | mov cx,.y1 |
147 | mov cx,.y1 |
148 | cmp cx,.y2 |
148 | cmp cx,.y2 |
149 | jge .loop1_end |
149 | jge .loop1_end |
150 | .loop1: |
150 | .loop1: |
151 | 151 | ||
152 | pushad |
152 | pushad |
153 | sar ebx,ROUND2 |
153 | sar ebx,ROUND2 |
154 | sar eax,ROUND2 |
154 | sar eax,ROUND2 |
155 | ; movq xmm0,.zz1 |
155 | ; movq xmm0,.zz1 |
156 | movlps xmm0,.zz1 |
156 | movlps xmm0,.zz1 |
157 | ; mov edx,0.11 |
157 | ; mov edx,0.11 |
158 | ; movd xmm0,edx |
158 | ; movd xmm0,edx |
159 | ; shufps xmm0,xmm0,11100000b |
159 | ; shufps xmm0,xmm0,11100000b |
160 | mov esi,.s_buff |
160 | mov esi,.s_buff |
161 | 161 | ||
162 | call stencil_line |
162 | call stencil_line |
163 | 163 | ||
164 | popad |
164 | popad |
165 | add eax,.dx13 |
165 | add eax,.dx13 |
166 | add ebx,.dx12 |
166 | add ebx,.dx12 |
167 | 167 | ||
168 | movss xmm1,.zz1 |
168 | movss xmm1,.zz1 |
169 | movss xmm2,.zz2 |
169 | movss xmm2,.zz2 |
170 | addss xmm1,.dz13 |
170 | addss xmm1,.dz13 |
171 | addss xmm2,.dz12 |
171 | addss xmm2,.dz12 |
172 | movss .zz1,xmm1 |
172 | movss .zz1,xmm1 |
173 | movss .zz2,xmm2 |
173 | movss .zz2,xmm2 |
174 | 174 | ||
175 | add cx,1 |
175 | add cx,1 |
176 | cmp cx,.y2 |
176 | cmp cx,.y2 |
177 | jl .loop1 |
177 | jl .loop1 |
178 | 178 | ||
179 | .loop1_end: |
179 | .loop1_end: |
180 | 180 | ||
181 | mov edx,.z2 |
181 | mov edx,.z2 |
182 | mov .zz2,edx |
182 | mov .zz2,edx |
183 | movsx ebx,word .x2 |
183 | movsx ebx,word .x2 |
184 | shl ebx,ROUND2 |
184 | shl ebx,ROUND2 |
185 | 185 | ||
186 | mov cx,.y2 |
186 | mov cx,.y2 |
187 | cmp cx,.y3 |
187 | cmp cx,.y3 |
188 | jge .loop2_end |
188 | jge .loop2_end |
189 | .loop2: |
189 | .loop2: |
190 | pushad |
190 | pushad |
191 | 191 | ||
192 | sar ebx,ROUND2 |
192 | sar ebx,ROUND2 |
193 | sar eax,ROUND2 |
193 | sar eax,ROUND2 |
194 | movlps xmm0,.zz1 |
194 | movlps xmm0,.zz1 |
195 | mov esi,.s_buff |
195 | mov esi,.s_buff |
196 | 196 | ||
197 | 197 | ||
198 | call stencil_line |
198 | call stencil_line |
199 | 199 | ||
200 | popad |
200 | popad |
201 | 201 | ||
202 | add eax,.dx13 |
202 | add eax,.dx13 |
203 | add ebx,.dx23 |
203 | add ebx,.dx23 |
204 | 204 | ||
205 | movss xmm1,.zz1 |
205 | movss xmm1,.zz1 |
206 | movss xmm2,.zz2 |
206 | movss xmm2,.zz2 |
207 | addss xmm1,.dz13 |
207 | addss xmm1,.dz13 |
208 | addss xmm2,.dz23 |
208 | addss xmm2,.dz23 |
209 | movss .zz1,xmm1 |
209 | movss .zz1,xmm1 |
210 | movss .zz2,xmm2 |
210 | movss .zz2,xmm2 |
211 | 211 | ||
212 | 212 | ||
213 | add cx,1 |
213 | add cx,1 |
214 | cmp cx,.y3 |
214 | cmp cx,.y3 |
215 | jl .loop2 |
215 | jl .loop2 |
216 | .loop2_end: |
216 | .loop2_end: |
217 | 217 | ||
218 | add esp,128 |
218 | add esp,128 |
219 | pop ebp |
219 | pop ebp |
220 | 220 | ||
221 | ret |
221 | ret |
222 | 222 | ||
223 | stencil_line: |
223 | stencil_line: |
224 | ;---------------------------------------------------- |
224 | ;---------------------------------------------------- |
225 | ;-------------in xmm0 - lo -> hi z1, z2 |
225 | ;-------------in xmm0 - lo -> hi z1, z2 |
226 | ;--------------- eax - x1 --------------------------- |
226 | ;--------------- eax - x1 --------------------------- |
227 | ;--------------- ebx - x2 --------------------------- |
227 | ;--------------- ebx - x2 --------------------------- |
228 | ;--------------- ecx - y----------------------------- |
228 | ;--------------- ecx - y----------------------------- |
229 | ;--------------- esi - pointer to s-buffer |
229 | ;--------------- esi - pointer to s-buffer |
230 | 230 | ||
231 | 231 | ||
232 | .dz equ [ebp-4] |
232 | .dz equ [ebp-4] |
233 | .z2 equ [ebp-8] |
233 | .z2 equ [ebp-8] |
234 | .z1 equ [ebp-12] |
234 | .z1 equ [ebp-12] |
235 | .x2 equ [ebp-16] |
235 | .x2 equ [ebp-16] |
236 | .x1 equ [ebp-20] |
236 | .x1 equ [ebp-20] |
237 | .s_buf equ [ebp-24] |
237 | .s_buf equ [ebp-24] |
238 | ; cmp eax,ebx |
238 | ; cmp eax,ebx |
239 | ; je @f |
239 | ; je @f |
240 | ; int3 |
240 | ; int3 |
241 | ; @@: |
241 | ; @@: |
242 | push ebp |
242 | push ebp |
243 | mov ebp,esp |
243 | mov ebp,esp |
244 | sub esp,64 |
244 | sub esp,64 |
245 | ; cmp eax,0 |
245 | ; cmp eax,0 |
246 | ; jg @f |
246 | ; jg @f |
247 | ; |
247 | ; |
248 | ; @@: |
248 | ; @@: |
249 | or cx,cx |
249 | or cx,cx |
250 | jle .l_quit |
250 | jle .l_quit |
251 | 251 | ||
252 | cmp cx,[size_y_var] |
252 | cmp cx,[size_y_var] |
253 | jge .l_quit |
253 | jge .l_quit |
254 | 254 | ||
255 | movzx ecx,cx |
255 | movzx ecx,cx |
256 | mov .s_buf,esi |
256 | mov .s_buf,esi |
257 | cmp eax,ebx |
257 | cmp eax,ebx |
258 | je .l_quit |
258 | je .l_quit |
259 | jl .l_ok |
259 | jl .l_ok |
260 | 260 | ||
261 | xchg eax,ebx |
261 | xchg eax,ebx |
262 | shufps xmm0,xmm0,11100001b |
262 | shufps xmm0,xmm0,11100001b |
263 | .l_ok: |
263 | .l_ok: |
264 | 264 | ||
265 | cmp ax,[size_x_var] |
265 | cmp ax,[size_x_var] |
266 | jge .l_quit |
266 | jge .l_quit |
267 | cmp bx,0 |
267 | cmp bx,0 |
268 | jle .l_quit |
268 | jle .l_quit |
269 | 269 | ||
270 | movlps .z1,xmm0 |
270 | movlps .z1,xmm0 |
271 | mov .x1,eax |
271 | mov .x1,eax |
272 | mov .x2,ebx |
272 | mov .x2,ebx |
273 | 273 | ||
274 | sub ebx,eax |
274 | sub ebx,eax |
275 | movss xmm0,.z2 |
275 | movss xmm0,.z2 |
276 | cvtsi2ss xmm1,ebx |
276 | cvtsi2ss xmm1,ebx |
277 | subss xmm0,.z1 |
277 | subss xmm0,.z1 |
278 | divss xmm0,xmm1 |
278 | divss xmm0,xmm1 |
279 | movss .dz,xmm0 |
279 | movss .dz,xmm0 |
280 | 280 | ||
281 | movzx edx,word[size_x_var] |
281 | movzx edx,word[size_x_var] |
282 | cmp eax,1 |
282 | cmp eax,1 |
283 | jge @f |
283 | jge @f |
284 | mov eax,.x1 |
284 | mov eax,.x1 |
285 | neg eax |
285 | neg eax |
286 | cvtsi2ss xmm2,eax |
286 | cvtsi2ss xmm2,eax |
287 | mulss xmm2,.dz |
287 | mulss xmm2,.dz |
288 | addss xmm2,.z1 |
288 | addss xmm2,.z1 |
289 | movss .z1,xmm2 |
289 | movss .z1,xmm2 |
290 | mov dword .x1,0 |
290 | mov dword .x1,0 |
291 | movzx edx,word[size_x_var] |
291 | movzx edx,word[size_x_var] |
292 | sub edx,1 |
292 | sub edx,1 |
293 | @@: |
293 | @@: |
294 | cmp .x2,edx |
294 | cmp .x2,edx |
295 | jl @f |
295 | jl @f |
296 | mov .x2,edx |
296 | mov .x2,edx |
297 | 297 | ||
298 | @@: |
298 | @@: |
299 | ; mov eax,.x1 |
299 | ; mov eax,.x1 |
300 | ; cmp .x2,eax |
300 | ; cmp .x2,eax |
301 | ; je .l_quit |
301 | ; je .l_quit |
302 | movzx edx,word[size_x_var] |
302 | movzx edx,word[size_x_var] |
303 | mov esi,.s_buf |
303 | mov esi,.s_buf |
304 | mov eax,ecx ; y |
304 | mov eax,ecx ; y |
305 | mul edx |
305 | mul edx |
306 | add eax,.x1 |
306 | add eax,.x1 |
307 | 307 | ||
308 | shl eax,2 |
308 | shl eax,2 |
309 | add esi,eax |
309 | add esi,eax |
310 | 310 | ||
311 | mov ecx,.x2 |
311 | mov ecx,.x2 |
312 | sub ecx,.x1 |
312 | sub ecx,.x1 |
313 | movss xmm2,.z1 ; cz |
313 | movss xmm2,.z1 ; cz |
314 | .ccalc: |
314 | .ccalc: |
315 | movss xmm1,xmm2 |
315 | ; movss xmm1,xmm2 |
316 | cmpltss xmm1,dword[esi] |
316 | comiss xmm2,dword[esi] |
317 | movd eax,xmm1 |
- | |
318 | cmp eax,-1 |
- | |
319 | jnz @f |
317 | jg @f |
320 | movss dword[esi],xmm2 |
318 | movss dword[esi],xmm2 |
321 | @@: |
319 | @@: |
322 | add esi,4 |
320 | add esi,4 |
323 | addss xmm2,.dz |
321 | addss xmm2,.dz |
324 | sub ecx,1 |
322 | sub ecx,1 |
325 | jnz .ccalc |
323 | jnz .ccalc |
326 | .l_quit: |
324 | .l_quit: |
327 | mov esp,ebp |
325 | mov esp,ebp |
328 | pop ebp |
326 | pop ebp |
329 | ret |
327 | ret |