Rev 8719 | Rev 9512 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8719 | Rev 9237 | ||
---|---|---|---|
1 | x3d equ 0 |
1 | x3d equ 0 |
2 | y3d equ 2 |
2 | y3d equ 2 |
3 | z3d equ 4 |
3 | z3d equ 4 |
4 | vec_x equ 0 |
4 | vec_x equ 0 |
5 | vec_y equ 4 |
5 | vec_y equ 4 |
6 | vec_z equ 8 |
6 | vec_z equ 8 |
- | 7 | ||
- | 8 | if 0 ; Ext >= SSE3 |
|
- | 9 | calc_bounding_box: |
|
- | 10 | ; in: |
|
- | 11 | ; xmm0 - normal vector of ray |
|
- | 12 | ; xmm1 - light origin |
|
- | 13 | ; out: |
|
7 | ; 3d point - triple integer word coordinate |
14 | ; eax - axis aligned bounding boxes bit mask |
- | 15 | ||
- | 16 | .rmx equ [ebp-36] |
|
- | 17 | .nray equ [ebp-64] |
|
- | 18 | .origin equ [ebp-80] |
|
- | 19 | .dirfrac equ [ebp-96] |
|
- | 20 | .nrayr equ [ebp-112] |
|
- | 21 | .originr equ [ebp-128] |
|
- | 22 | .tmin equ [ebp-132] |
|
- | 23 | .tmax equ [ebp-136] |
|
- | 24 | ||
- | 25 | ||
- | 26 | push ebp |
|
- | 27 | mov ebp,esp |
|
- | 28 | and ebp,-16 |
|
- | 29 | sub esp,160 |
|
- | 30 | ||
- | 31 | movss xmm5,[rsscale] |
|
- | 32 | shufps xmm5,xmm1,0 |
|
- | 33 | movd xmm2,[vect_x] |
|
- | 34 | punpcklwd xmm2,[the_zero] |
|
- | 35 | cvtdq2ps xmm2,xmm2 |
|
- | 36 | subps xmm1,xmm2 |
|
- | 37 | movaps .origin,xmm1 |
|
- | 38 | mulps xmm0,xmm5 |
|
- | 39 | movaps .nray,xmm0 |
|
- | 40 | ||
- | 41 | mov esi,matrix |
|
- | 42 | lea edi,.rmx |
|
- | 43 | call reverse_mx_3x3 |
|
- | 44 | ||
- | 45 | ; in: esi - ptr to points(normals], each point(normal) coeficient as dword |
|
8 | ; vector - triple float dword coordinate |
46 | ; edi - ptr to rotated points(normals) |
9 | ;----------------------in: -------------------------------- |
47 | ; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix |
10 | ;------------------------ esi - pointer to 1st 3d point --- |
48 | ; ecx - number of points(normals) |
- | 49 | ||
- | 50 | ; reverse transform |
|
- | 51 | lea esi,.nray |
|
- | 52 | lea edi,.nrayr |
|
- | 53 | lea ebx,.rmx |
|
- | 54 | mov ecx,1 |
|
- | 55 | call rotary |
|
- | 56 | ||
- | 57 | lea esi,.origin |
|
- | 58 | lea edi,.originr |
|
- | 59 | lea ebx,.rmx |
|
- | 60 | mov ecx,1 |
|
- | 61 | call rotary |
|
- | 62 | ||
- | 63 | xor ecx,ecx |
|
- | 64 | mov ebx,aabb1 |
|
- | 65 | xor eax,eax |
|
- | 66 | rcpps xmm7,.nrayr |
|
- | 67 | movaps .dirfrac,xmm7 |
|
- | 68 | ||
- | 69 | .nx_aabb: |
|
- | 70 | movaps xmm5,[ebx] |
|
- | 71 | movaps xmm6,[ebx] |
|
- | 72 | minps xmm5,[the_zero] |
|
- | 73 | maxps xmm6,[the_zero] |
|
11 | ;------------------------ edi - pointer to 2nd 3d point --- |
74 | ; xmm5 - lb corner of AABB with minimal coordinates |
12 | ;------------------------ ebx - pointer to result vector -- |
75 | ; xmm6 - rt cor. of AABB wit maximum coords |
- | 76 | subps xmm5,.originr |
|
- | 77 | subps xmm6,.originr |
|
- | 78 | mulps xmm5,.dirfrac ; xmm5 - tx1, ty1 |
|
13 | ;---------------------- out : none ------------------------ |
79 | mulps xmm6,.dirfrac ; xmm6 - tx2, ty2 |
- | 80 | movaps xmm1,xmm6 |
|
- | 81 | movaps xmm2,xmm6 |
|
- | 82 | ||
- | 83 | ||
- | 84 | minps xmm1,xmm5 |
|
- | 85 | maxps xmm2,xmm5 |
|
- | 86 | ||
- | 87 | movaps xmm5,xmm1 |
|
- | 88 | movaps xmm6,xmm2 |
|
- | 89 | shufps xmm5,xmm5,11100001b |
|
- | 90 | shufps xmm6,xmm6,11100001b |
|
- | 91 | maxss xmm1,xmm5 ;t min |
|
- | 92 | minss xmm2,xmm6 ;t max |
|
- | 93 | comiss xmm2,xmm1 |
|
- | 94 | jb .no_inter |
|
- | 95 | .yes: |
|
- | 96 | bts eax,ecx |
|
- | 97 | .no_inter: |
|
- | 98 | add ebx,16 |
|
- | 99 | inc ecx |
|
- | 100 | cmp ecx,8 |
|
- | 101 | jne .nx_aabb |
|
- | 102 | ||
- | 103 | ; out: eax - bit mask |
|
- | 104 | add esp,160 |
|
- | 105 | pop ebp |
|
- | 106 | ret |
|
- | 107 | end if |
|
- | 108 | ||
14 | reverse_mx_3x3: |
109 | reverse_mx_3x3: |
15 | ; esi - source matrix |
110 | ; esi - source matrix |
16 | ; edi - desired reversed matrix |
111 | ; edi - desired reversed matrix |
17 | 112 | ||
18 | push ebp |
113 | push ebp |
19 | mov ebp,esp |
114 | mov ebp,esp |
20 | sub esp,4 |
115 | sub esp,4 |
21 | .det equ ebp-4 |
116 | .det equ ebp-4 |
22 | 117 | ||
23 | fninit |
118 | fninit |
24 | fld dword[esi] |
119 | fld dword[esi] |
25 | fmul dword[esi+16] |
120 | fmul dword[esi+16] |
26 | fmul dword[esi+32] |
121 | fmul dword[esi+32] |
27 | fld dword[esi+12] |
122 | fld dword[esi+12] |
28 | fmul dword[esi+28] |
123 | fmul dword[esi+28] |
29 | fmul dword[esi+8] |
124 | fmul dword[esi+8] |
30 | faddp |
125 | faddp |
31 | fld dword[esi+24] |
126 | fld dword[esi+24] |
32 | fmul dword[esi+4] |
127 | fmul dword[esi+4] |
33 | fmul dword[esi+20] |
128 | fmul dword[esi+20] |
34 | faddp |
129 | faddp |
35 | fld dword[esi] |
130 | fld dword[esi] |
36 | fmul dword[esi+28] |
131 | fmul dword[esi+28] |
37 | fmul dword[esi+20] |
132 | fmul dword[esi+20] |
38 | fchs |
133 | fchs |
39 | faddp |
134 | faddp |
40 | fld dword[esi+24] |
135 | fld dword[esi+24] |
41 | fmul dword[esi+16] |
136 | fmul dword[esi+16] |
42 | fmul dword[esi+8] |
137 | fmul dword[esi+8] |
43 | fchs |
138 | fchs |
44 | faddp |
139 | faddp |
45 | fld dword[esi+12] |
140 | fld dword[esi+12] |
46 | fmul dword[esi+4] |
141 | fmul dword[esi+4] |
47 | fmul dword[esi+32] |
142 | fmul dword[esi+32] |
48 | fchs |
143 | fchs |
49 | faddp |
144 | faddp |
50 | fstp dword[.det] |
145 | fstp dword[.det] |
51 | cmp dword[.det],0 |
146 | cmp dword[.det],0 |
52 | jne @f |
147 | jne @f |
53 | int3 |
148 | int3 |
54 | @@: |
149 | @@: |
55 | ; fld1 |
150 | ; fld1 |
56 | ; fdiv dword[.det] |
151 | ; fdiv dword[.det] |
57 | ; fstp dword[.det] |
152 | ; fstp dword[.det] |
58 | 153 | ||
59 | fld dword[esi+16] |
154 | fld dword[esi+16] |
60 | fmul dword[esi+32] |
155 | fmul dword[esi+32] |
61 | fld dword[esi+20] |
156 | fld dword[esi+20] |
62 | fmul dword[esi+28] |
157 | fmul dword[esi+28] |
63 | fchs |
158 | fchs |
64 | faddp |
159 | faddp |
65 | fdiv dword[.det] |
160 | fdiv dword[.det] |
66 | fstp dword[edi] |
161 | fstp dword[edi] |
67 | 162 | ||
68 | fld dword[esi+8] |
163 | fld dword[esi+8] |
69 | fmul dword[esi+28] |
164 | fmul dword[esi+28] |
70 | fld dword[esi+4] |
165 | fld dword[esi+4] |
71 | fmul dword[esi+32] |
166 | fmul dword[esi+32] |
72 | fchs |
167 | fchs |
73 | faddp |
168 | faddp |
74 | fdiv dword[.det] |
169 | fdiv dword[.det] |
75 | fstp dword[edi+4] |
170 | fstp dword[edi+4] |
76 | 171 | ||
77 | fld dword[esi+4] |
172 | fld dword[esi+4] |
78 | fmul dword[esi+20] |
173 | fmul dword[esi+20] |
79 | fld dword[esi+8] |
174 | fld dword[esi+8] |
80 | fmul dword[esi+16] |
175 | fmul dword[esi+16] |
81 | fchs |
176 | fchs |
82 | faddp |
177 | faddp |
83 | fdiv dword[.det] |
178 | fdiv dword[.det] |
84 | fstp dword[edi+8] |
179 | fstp dword[edi+8] |
85 | 180 | ||
86 | fld dword[esi+20] |
181 | fld dword[esi+20] |
87 | fmul dword[esi+24] |
182 | fmul dword[esi+24] |
88 | fld dword[esi+12] |
183 | fld dword[esi+12] |
89 | fmul dword[esi+32] |
184 | fmul dword[esi+32] |
90 | fchs |
185 | fchs |
91 | faddp |
186 | faddp |
92 | fdiv dword[.det] |
187 | fdiv dword[.det] |
93 | fstp dword[edi+12] |
188 | fstp dword[edi+12] |
94 | 189 | ||
95 | fld dword[esi] |
190 | fld dword[esi] |
96 | fmul dword[esi+32] |
191 | fmul dword[esi+32] |
97 | fld dword[esi+8] |
192 | fld dword[esi+8] |
98 | fmul dword[esi+24] |
193 | fmul dword[esi+24] |
99 | fchs |
194 | fchs |
100 | faddp |
195 | faddp |
101 | fdiv dword[.det] |
196 | fdiv dword[.det] |
102 | fstp dword[edi+16] |
197 | fstp dword[edi+16] |
103 | 198 | ||
104 | fld dword[esi+8] |
199 | fld dword[esi+8] |
105 | fmul dword[esi+12] |
200 | fmul dword[esi+12] |
106 | fld dword[esi] |
201 | fld dword[esi] |
107 | fmul dword[esi+20] |
202 | fmul dword[esi+20] |
108 | fchs |
203 | fchs |
109 | faddp |
204 | faddp |
110 | fdiv dword[.det] |
205 | fdiv dword[.det] |
111 | fstp dword[edi+20] |
206 | fstp dword[edi+20] |
112 | 207 | ||
113 | fld dword[esi+12] |
208 | fld dword[esi+12] |
114 | fmul dword[esi+28] |
209 | fmul dword[esi+28] |
115 | fld dword[esi+16] |
210 | fld dword[esi+16] |
116 | fmul dword[esi+24] |
211 | fmul dword[esi+24] |
117 | fchs |
212 | fchs |
118 | faddp |
213 | faddp |
119 | fdiv dword[.det] |
214 | fdiv dword[.det] |
120 | fstp dword[edi+24] |
215 | fstp dword[edi+24] |
121 | 216 | ||
122 | fld dword[esi+4] |
217 | fld dword[esi+4] |
123 | fmul dword[esi+24] |
218 | fmul dword[esi+24] |
124 | fld dword[esi] |
219 | fld dword[esi] |
125 | fmul dword[esi+28] |
220 | fmul dword[esi+28] |
126 | fchs |
221 | fchs |
127 | faddp |
222 | faddp |
128 | fdiv dword[.det] |
223 | fdiv dword[.det] |
129 | fstp dword[edi+28] |
224 | fstp dword[edi+28] |
130 | 225 | ||
131 | fld dword[esi] |
226 | fld dword[esi] |
132 | fmul dword[esi+16] |
227 | fmul dword[esi+16] |
133 | fld dword[esi+4] |
228 | fld dword[esi+4] |
134 | fmul dword[esi+12] |
229 | fmul dword[esi+12] |
135 | fchs |
230 | fchs |
136 | faddp |
231 | faddp |
137 | fdiv dword[.det] |
232 | fdiv dword[.det] |
138 | fstp dword[edi+32] |
233 | fstp dword[edi+32] |
139 | 234 | ||
140 | 235 | ||
141 | mov esp,ebp |
236 | mov esp,ebp |
142 | pop ebp |
237 | pop ebp |
143 | ret |
238 | ret |
- | 239 | ; 3d point - triple integer word coordinate |
|
- | 240 | ; vector - triple float dword coordinate |
|
- | 241 | ;----------------------in: -------------------------------- |
|
- | 242 | ;------------------------ esi - pointer to 1st 3d point --- |
|
- | 243 | ;------------------------ edi - pointer to 2nd 3d point --- |
|
- | 244 | ;------------------------ ebx - pointer to result vector -- |
|
- | 245 | ;---------------------- out : none ------------------------ |
|
144 | 246 | ||
145 | make_vector_r: |
247 | make_vector_r: |
146 | if Ext < SSE2 |
248 | if Ext < SSE2 |
147 | fninit |
249 | fninit |
148 | fld dword[edi] ;edi+x3d |
250 | fld dword[edi] ;edi+x3d |
149 | fsub dword[esi] ;esi+x3d |
251 | fsub dword[esi] ;esi+x3d |
150 | fstp dword[ebx+vec_x] |
252 | fstp dword[ebx+vec_x] |
151 | 253 | ||
152 | fld dword[edi+4] |
254 | fld dword[edi+4] |
153 | fsub dword[esi+4] |
255 | fsub dword[esi+4] |
154 | fstp dword[ebx+vec_y] |
256 | fstp dword[ebx+vec_y] |
155 | 257 | ||
156 | fld dword[edi+8] |
258 | fld dword[edi+8] |
157 | fsub dword[esi+8] |
259 | fsub dword[esi+8] |
158 | fstp dword[ebx+vec_z] |
260 | fstp dword[ebx+vec_z] |
159 | else |
261 | else |
160 | movups xmm0,[esi] |
262 | movups xmm0,[esi] |
161 | movups xmm1,[edi] |
263 | movups xmm1,[edi] |
162 | subps xmm1,xmm0 |
264 | subps xmm1,xmm0 |
163 | movlps [ebx],xmm1 |
265 | movlps [ebx],xmm1 |
164 | movhlps xmm1,xmm1 |
266 | movhlps xmm1,xmm1 |
165 | movss [ebx+8],xmm1 |
267 | movss [ebx+8],xmm1 |
166 | end if |
268 | end if |
167 | 269 | ||
168 | ret |
270 | ret |
169 | ;---------------------- in: ------------------------------- |
271 | ;---------------------- in: ------------------------------- |
170 | ;--------------------------- esi - pointer to 1st vector -- |
272 | ;--------------------------- esi - pointer to 1st vector -- |
171 | ;--------------------------- edi - pointer to 2nd vector -- |
273 | ;--------------------------- edi - pointer to 2nd vector -- |
172 | ;--------------------------- ebx - pointer to result vector |
274 | ;--------------------------- ebx - pointer to result vector |
173 | ;---------------------- out : none |
275 | ;---------------------- out : none |
174 | cross_product: |
276 | cross_product: |
175 | fninit |
277 | fninit |
176 | fld dword [esi+vec_y] |
278 | fld dword [esi+vec_y] |
177 | fmul dword [edi+vec_z] |
279 | fmul dword [edi+vec_z] |
178 | fld dword [esi+vec_z] |
280 | fld dword [esi+vec_z] |
179 | fmul dword [edi+vec_y] |
281 | fmul dword [edi+vec_y] |
180 | fsubp ;st1 ,st |
282 | fsubp ;st1 ,st |
181 | fstp dword [ebx+vec_x] |
283 | fstp dword [ebx+vec_x] |
182 | 284 | ||
183 | fld dword [esi+vec_z] |
285 | fld dword [esi+vec_z] |
184 | fmul dword [edi+vec_x] |
286 | fmul dword [edi+vec_x] |
185 | fld dword [esi+vec_x] |
287 | fld dword [esi+vec_x] |
186 | fmul dword [edi+vec_z] |
288 | fmul dword [edi+vec_z] |
187 | fsubp ;st1 ,st |
289 | fsubp ;st1 ,st |
188 | fstp dword [ebx+vec_y] |
290 | fstp dword [ebx+vec_y] |
189 | 291 | ||
190 | fld dword [esi+vec_x] |
292 | fld dword [esi+vec_x] |
191 | fmul dword [edi+vec_y] |
293 | fmul dword [edi+vec_y] |
192 | fld dword [esi+vec_y] |
294 | fld dword [esi+vec_y] |
193 | fmul dword [edi+vec_x] |
295 | fmul dword [edi+vec_x] |
194 | fsubp ;st1 ,st |
296 | fsubp ;st1 ,st |
195 | fstp dword [ebx+vec_z] |
297 | fstp dword [ebx+vec_z] |
196 | ret |
298 | ret |
- | 299 | cross_aligned: |
|
- | 300 | movaps xmm0,[esi] |
|
- | 301 | movaps xmm1,[esi] |
|
- | 302 | movaps xmm2,[edi] |
|
- | 303 | movaps xmm3,[edi] |
|
- | 304 | shufps xmm0,xmm0,00001001b |
|
- | 305 | shufps xmm1,xmm1,00010010b |
|
- | 306 | shufps xmm2,xmm2,00010010b |
|
- | 307 | shufps xmm3,xmm3,00001001b |
|
- | 308 | mulps xmm0,xmm2 |
|
- | 309 | mulps xmm1,xmm3 |
|
- | 310 | subps xmm0,xmm1 |
|
- | 311 | movaps [ebx],xmm0 |
|
- | 312 | ret |
|
197 | ;----------------------- in: ------------------------------ |
313 | ;----------------------- in: ------------------------------ |
198 | ;---------------------------- edi - pointer to vector ----- |
314 | ;---------------------------- edi - pointer to vector ----- |
199 | ;----------------------- out : none |
315 | ;----------------------- out : none |
200 | normalize_vector: |
316 | normalize_vector: |
201 | if Ext >= SSE3 |
317 | if Ext >= SSE2 |
202 | movups xmm0,[edi] |
318 | movups xmm0,[edi] |
203 | andps xmm0,[zero_hgst_dd] |
319 | andps xmm0,[zero_hgst_dd] |
204 | movups xmm1,xmm0 |
320 | movups xmm1,xmm0 |
205 | mulps xmm0,xmm0 |
321 | mulps xmm0,xmm0 |
- | 322 | movhlps xmm2,xmm0 |
|
- | 323 | addps xmm0,xmm2 |
|
- | 324 | movaps xmm2,xmm0 |
|
- | 325 | shufps xmm2,xmm2,11100101b |
|
- | 326 | addps xmm0,xmm2 |
|
- | 327 | shufps xmm0,xmm0,0 |
|
206 | haddps xmm0,xmm0 |
328 | ; haddps xmm0,xmm0 |
207 | haddps xmm0,xmm0 |
329 | ; haddps xmm0,xmm0 |
208 | rsqrtps xmm0,xmm0 |
330 | rsqrtps xmm0,xmm0 |
209 | mulps xmm0,xmm1 |
331 | mulps xmm0,xmm1 |
210 | movlps [edi],xmm0 |
332 | movlps [edi],xmm0 |
211 | movhlps xmm0,xmm0 |
333 | movhlps xmm0,xmm0 |
212 | movss [edi+8],xmm0 |
334 | movss [edi+8],xmm0 |
213 | else |
335 | else |
214 | 336 | ||
215 | fninit |
337 | fninit |
216 | fld dword [edi+vec_x] |
338 | fld dword [edi+vec_x] |
217 | fmul st, st |
339 | fmul st, st |
218 | fld dword [edi+vec_y] |
340 | fld dword [edi+vec_y] |
219 | fmul st, st |
341 | fmul st, st |
220 | fld dword [edi+vec_z] |
342 | fld dword [edi+vec_z] |
221 | fmul st, st |
343 | fmul st, st |
222 | faddp st1, st |
344 | faddp st1, st |
223 | faddp st1, st |
345 | faddp st1, st |
224 | fsqrt |
346 | fsqrt |
225 | 347 | ||
226 | ftst |
348 | ftst |
227 | fstsw ax |
349 | fstsw ax |
228 | sahf |
350 | sahf |
229 | jnz @f |
351 | jnz @f |
230 | 352 | ||
231 | fst dword [edi+vec_x] |
353 | fst dword [edi+vec_x] |
232 | fst dword [edi+vec_y] |
354 | fst dword [edi+vec_y] |
233 | fstp dword [edi+vec_z] |
355 | fstp dword [edi+vec_z] |
234 | ret |
356 | ret |
235 | @@: |
357 | @@: |
236 | fld st |
358 | fld st |
237 | fld st |
359 | fld st |
238 | fdivr dword [edi+vec_x] |
360 | fdivr dword [edi+vec_x] |
239 | fstp dword [edi+vec_x] |
361 | fstp dword [edi+vec_x] |
240 | fdivr dword [edi+vec_y] |
362 | fdivr dword [edi+vec_y] |
241 | fstp dword [edi+vec_y] |
363 | fstp dword [edi+vec_y] |
242 | fdivr dword [edi+vec_z] |
364 | fdivr dword [edi+vec_z] |
243 | fstp dword [edi+vec_z] |
365 | fstp dword [edi+vec_z] |
244 | end if |
366 | end if |
245 | ret |
367 | ret |
246 | ;------------------in: ------------------------- |
368 | ;------------------in: ------------------------- |
247 | ;------------------ esi - pointer to 1st vector |
369 | ;------------------ esi - pointer to 1st vector |
248 | ;------------------ edi - pointer to 2nd vector |
370 | ;------------------ edi - pointer to 2nd vector |
249 | ;------------------out: ------------------------ |
371 | ;------------------out: ------------------------ |
250 | ;------------------ st0 - dot-product |
372 | ;------------------ st0 - dot-product |
251 | dot_product: |
373 | dot_product: |
252 | fninit |
374 | fninit |
253 | ;if Ext >=SSE3 |
375 | ;if Ext >=SSE3 |
254 | ; movups xmm0,[esi] |
376 | ; movups xmm0,[esi] |
255 | ; movups xmm1,[edi] |
377 | ; movups xmm1,[edi] |
256 | ; andps xmm0,[zero_hgst_dd] |
378 | ; andps xmm0,[zero_hgst_dd] |
257 | ; mulps xmm0,xmm1 |
379 | ; mulps xmm0,xmm1 |
258 | ; haddps xmm0,xmm0 |
380 | ; haddps xmm0,xmm0 |
259 | ; haddps xmm0,xmm0 |
381 | ; haddps xmm0,xmm0 |
260 | ; movss [esp-4],xmm0 |
382 | ; movss [esp-4],xmm0 |
261 | ; fld dword[esp-4] |
383 | ; fld dword[esp-4] |
262 | ;else |
384 | ;else |
263 | fld dword [esi+vec_x] |
385 | fld dword [esi+vec_x] |
264 | fmul dword [edi+vec_x] |
386 | fmul dword [edi+vec_x] |
265 | fld dword [esi+vec_y] |
387 | fld dword [esi+vec_y] |
266 | fmul dword [edi+vec_y] |
388 | fmul dword [edi+vec_y] |
267 | fld dword [esi+vec_z] |
389 | fld dword [esi+vec_z] |
268 | fmul dword [edi+vec_z] |
390 | fmul dword [edi+vec_z] |
269 | faddp |
391 | faddp |
270 | faddp |
392 | faddp |
271 | ;end if |
393 | ;end if |
272 | ret |
394 | ret |
273 | 395 | ||
274 | ; DOS version Coded by Mikolaj Felix aka Majuma |
396 | ; DOS version Coded by Mikolaj Felix aka Majuma |
275 | ; mfelix@polbox.com |
397 | ; mfelix@polbox.com |
276 | ; www.majuma.xt.pl |
398 | ; www.majuma.xt.pl |
277 | ; into FASM translation by Macgub |
399 | ; into FASM translation by Macgub |
278 | init_sincos_tab: |
400 | init_sincos_tab: |
279 | .counter equ dword [ebp-4] ; cur angle |
401 | .counter equ dword [ebp-4] ; cur angle |
280 | 402 | ||
281 | push ebp |
403 | push ebp |
282 | mov ebp,esp |
404 | mov ebp,esp |
283 | 405 | ||
284 | xor eax,eax |
406 | xor eax,eax |
285 | push eax ; init .counter |
407 | push eax ; init .counter |
286 | mov edi,cos_tab |
408 | mov edi,cos_tab |
287 | mov esi,sin_tab |
409 | mov esi,sin_tab |
288 | mov ecx,256 |
410 | mov ecx,256 |
289 | fninit |
411 | fninit |
290 | 412 | ||
291 | fld .counter |
413 | fld .counter |
292 | @@: |
414 | @@: |
293 | fld st |
415 | fld st |
294 | fsincos |
416 | fsincos |
295 | fstp dword [edi] |
417 | fstp dword [edi] |
296 | fstp dword [esi] |
418 | fstp dword [esi] |
297 | ; fadd [piD180] |
419 | ; fadd [piD180] |
298 | fadd [piD128] |
420 | fadd [piD128] |
299 | add esi,4 |
421 | add esi,4 |
300 | add edi,4 |
422 | add edi,4 |
301 | loop @b |
423 | loop @b |
302 | ffree st |
424 | ffree st |
303 | 425 | ||
304 | mov esp,ebp |
426 | mov esp,ebp |
305 | pop ebp |
427 | pop ebp |
306 | ret |
428 | ret |
307 | ;------ |
429 | ;------ |
308 | ; esi - offset (pointer) to angles, edi offset to 3x3 matrix |
430 | ; esi - offset (pointer) to angles, edi offset to 3x3 matrix |
309 | make_rotation_matrix: |
431 | make_rotation_matrix: |
310 | .sinx equ dword[ebp-4] |
432 | .sinx equ dword[ebp-4] |
311 | .cosx equ dword[ebp-8] |
433 | .cosx equ dword[ebp-8] |
312 | .siny equ dword[ebp-12] |
434 | .siny equ dword[ebp-12] |
313 | .cosy equ dword[ebp-16] |
435 | .cosy equ dword[ebp-16] |
314 | .sinz equ dword[ebp-20] |
436 | .sinz equ dword[ebp-20] |
315 | .cosz equ dword[ebp-24] |
437 | .cosz equ dword[ebp-24] |
316 | push ebp |
438 | push ebp |
317 | mov ebp,esp |
439 | mov ebp,esp |
318 | sub esp,24 |
440 | sub esp,24 |
319 | 441 | ||
320 | movzx ebx,word[esi] |
442 | movzx ebx,word[esi] |
321 | shl ebx,2 |
443 | shl ebx,2 |
322 | mov eax,dword[sin_tab+ebx] |
444 | mov eax,dword[sin_tab+ebx] |
323 | mov .sinx,eax |
445 | mov .sinx,eax |
324 | mov edx,dword[cos_tab+ebx] |
446 | mov edx,dword[cos_tab+ebx] |
325 | mov .cosx,edx |
447 | mov .cosx,edx |
326 | 448 | ||
327 | movzx ebx,word[esi+2] |
449 | movzx ebx,word[esi+2] |
328 | shl ebx,2 |
450 | shl ebx,2 |
329 | mov eax,dword[sin_tab+ebx] |
451 | mov eax,dword[sin_tab+ebx] |
330 | mov .siny,eax |
452 | mov .siny,eax |
331 | mov edx,dword[cos_tab+ebx] |
453 | mov edx,dword[cos_tab+ebx] |
332 | mov .cosy,edx |
454 | mov .cosy,edx |
333 | 455 | ||
334 | movzx ebx,word[esi+4] |
456 | movzx ebx,word[esi+4] |
335 | shl ebx,2 |
457 | shl ebx,2 |
336 | mov eax,dword[sin_tab+ebx] |
458 | mov eax,dword[sin_tab+ebx] |
337 | mov .sinz,eax |
459 | mov .sinz,eax |
338 | mov edx,dword[cos_tab+ebx] |
460 | mov edx,dword[cos_tab+ebx] |
339 | mov .cosz,edx |
461 | mov .cosz,edx |
340 | 462 | ||
341 | fninit |
463 | fninit |
342 | fld .cosy |
464 | fld .cosy |
343 | fmul .cosz |
465 | fmul .cosz |
344 | fstp dword[edi] |
466 | fstp dword[edi] |
345 | 467 | ||
346 | fld .sinx |
468 | fld .sinx |
347 | fmul .siny |
469 | fmul .siny |
348 | fmul .cosz |
470 | fmul .cosz |
349 | fld .cosx |
471 | fld .cosx |
350 | fmul .sinz |
472 | fmul .sinz |
351 | fchs |
473 | fchs |
352 | faddp |
474 | faddp |
353 | fstp dword[edi+12] |
475 | fstp dword[edi+12] |
354 | 476 | ||
355 | fld .cosx |
477 | fld .cosx |
356 | fmul .siny |
478 | fmul .siny |
357 | fmul .cosz |
479 | fmul .cosz |
358 | fld .sinx |
480 | fld .sinx |
359 | fmul .sinz |
481 | fmul .sinz |
360 | faddp |
482 | faddp |
361 | fstp dword[edi+24] |
483 | fstp dword[edi+24] |
362 | 484 | ||
363 | fld .cosy |
485 | fld .cosy |
364 | fmul .sinz |
486 | fmul .sinz |
365 | fstp dword[edi+4] |
487 | fstp dword[edi+4] |
366 | 488 | ||
367 | fld .sinx |
489 | fld .sinx |
368 | fmul .siny |
490 | fmul .siny |
369 | fmul .sinz |
491 | fmul .sinz |
370 | fld .cosx |
492 | fld .cosx |
371 | fmul .cosz |
493 | fmul .cosz |
372 | faddp |
494 | faddp |
373 | fstp dword[edi+16] |
495 | fstp dword[edi+16] |
374 | 496 | ||
375 | fld .cosx |
497 | fld .cosx |
376 | fmul .siny |
498 | fmul .siny |
377 | fmul .sinz |
499 | fmul .sinz |
378 | fld .sinx |
500 | fld .sinx |
379 | fchs |
501 | fchs |
380 | fmul .cosz |
502 | fmul .cosz |
381 | faddp |
503 | faddp |
382 | fstp dword[edi+28] |
504 | fstp dword[edi+28] |
383 | 505 | ||
384 | fld .siny |
506 | fld .siny |
385 | fchs |
507 | fchs |
386 | fstp dword[edi+8] |
508 | fstp dword[edi+8] |
387 | 509 | ||
388 | fld .cosy |
510 | fld .cosy |
389 | fmul .sinx |
511 | fmul .sinx |
390 | fstp dword[edi+20] |
512 | fstp dword[edi+20] |
391 | 513 | ||
392 | fld .cosx |
514 | fld .cosx |
393 | fmul .cosy |
515 | fmul .cosy |
394 | fstp dword[edi+32] |
516 | fstp dword[edi+32] |
395 | 517 | ||
396 | mov esp,ebp |
518 | mov esp,ebp |
397 | pop ebp |
519 | pop ebp |
398 | ret |
520 | ret |
399 | ;--------------------- |
521 | ;--------------------- |
400 | ; in: esi - ptr to points(normals], each point(normal) coeficient as dword |
522 | ; in: esi - ptr to points(normals], each point(normal) coeficient as dword |
401 | ; edi - ptr to rotated points(normals) |
523 | ; edi - ptr to rotated points(normals) |
402 | ; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix |
524 | ; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix |
403 | ; ecx - number of points(normals) |
525 | ; ecx - number of points(normals) |
404 | rotary: |
526 | rotary: |
405 | if Ext |
527 | if Ext |
406 | fninit |
528 | fninit |
407 | .again: |
529 | .again: |
408 | 530 | ||
409 | fld dword[esi] |
531 | fld dword[esi] |
410 | fmul dword[ebx] |
532 | fmul dword[ebx] |
411 | fld dword[esi+4] |
533 | fld dword[esi+4] |
412 | fmul dword[ebx+12] |
534 | fmul dword[ebx+12] |
413 | faddp |
535 | faddp |
414 | fld dword[esi+8] |
536 | fld dword[esi+8] |
415 | fmul dword[ebx+24] |
537 | fmul dword[ebx+24] |
416 | faddp |
538 | faddp |
417 | fstp dword[edi] |
539 | fstp dword[edi] |
418 | 540 | ||
419 | 541 | ||
420 | fld dword[esi+4] |
542 | fld dword[esi+4] |
421 | fmul dword[ebx+16] |
543 | fmul dword[ebx+16] |
422 | fld dword[esi] |
544 | fld dword[esi] |
423 | fmul dword[ebx+4] |
545 | fmul dword[ebx+4] |
424 | faddp |
546 | faddp |
425 | fld dword[esi+8] |
547 | fld dword[esi+8] |
426 | fmul dword[ebx+28] |
548 | fmul dword[ebx+28] |
427 | faddp |
549 | faddp |
428 | fstp dword[edi+4] |
550 | fstp dword[edi+4] |
429 | 551 | ||
430 | 552 | ||
431 | fld dword[esi+8] |
553 | fld dword[esi+8] |
432 | fmul dword[ebx+32] |
554 | fmul dword[ebx+32] |
433 | fld dword[esi] |
555 | fld dword[esi] |
434 | fmul dword[ebx+8] |
556 | fmul dword[ebx+8] |
435 | fld dword[esi+4] |
557 | fld dword[esi+4] |
436 | fmul dword[ebx+20] |
558 | fmul dword[ebx+20] |
437 | faddp |
559 | faddp |
438 | faddp |
560 | faddp |
439 | fstp dword[edi+8] |
561 | fstp dword[edi+8] |
440 | 562 | ||
441 | 563 | ||
442 | add esi,12 |
564 | add esi,12 |
443 | add edi,12 |
565 | add edi,12 |
444 | loop .again |
566 | loop .again |
445 | mov [edi],dword -1 |
567 | mov [edi],dword -1 |
446 | else |
568 | else |
447 | ; Copyright (C) 1999-2001 Brian Paul |
569 | ; Copyright (C) 1999-2001 Brian Paul |
448 | ; Copyright (C) Maciej Guba |
570 | ; Copyright (C) Maciej Guba |
449 | ;--------------------- |
571 | ;--------------------- |
450 | ; in: esi - ptr to points(normals], each point(normal) coeficient as dword |
572 | ; in: esi - ptr to points(normals], each point(normal) coeficient as dword |
451 | ; edi - ptr to rotated points(normals) |
573 | ; edi - ptr to rotated points(normals) |
452 | ; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix |
574 | ; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix |
453 | ; ecx - number of points(normals) |
575 | ; ecx - number of points(normals) |
454 | ;align 32 |
576 | ;align 32 |
455 | movups xmm4,[ebx] |
577 | movups xmm4,[ebx] |
456 | ; lddqu xmm4,[ebx] ; I tried sse3 :D |
578 | ; lddqu xmm4,[ebx] ; I tried sse3 :D |
457 | movups xmm5,[ebx+12] |
579 | movups xmm5,[ebx+12] |
458 | movups xmm6,[ebx+24] |
580 | movups xmm6,[ebx+24] |
459 | ;align 32 |
581 | ;align 32 |
460 | .again: |
582 | .again: |
461 | movss xmm0,dword[esi] |
583 | movss xmm0,dword[esi] |
462 | shufps xmm0,xmm0,0 |
584 | shufps xmm0,xmm0,0 |
463 | mulps xmm0,xmm4 |
585 | mulps xmm0,xmm4 |
464 | 586 | ||
465 | movss xmm1,dword[esi+4] |
587 | movss xmm1,dword[esi+4] |
466 | shufps xmm1,xmm1,0 |
588 | shufps xmm1,xmm1,0 |
467 | mulps xmm1,xmm5 |
589 | mulps xmm1,xmm5 |
468 | 590 | ||
469 | movss xmm2,dword[esi+8] |
591 | movss xmm2,dword[esi+8] |
470 | shufps xmm2,xmm2,0 |
592 | shufps xmm2,xmm2,0 |
471 | mulps xmm2,xmm6 |
593 | mulps xmm2,xmm6 |
472 | 594 | ||
473 | addps xmm0,xmm1 |
595 | addps xmm0,xmm1 |
474 | addps xmm0,xmm2 |
596 | addps xmm0,xmm2 |
475 | 597 | ||
476 | movups [edi],xmm0 |
598 | movups [edi],xmm0 |
477 | 599 | ||
478 | add esi,12 |
600 | add esi,12 |
479 | add edi,12 |
601 | add edi,12 |
480 | dec ecx |
602 | dec ecx |
481 | jne .again |
603 | jne .again |
482 | mov [edi],dword -1 |
604 | mov [edi],dword -1 |
483 | end if |
605 | end if |
484 | ret |
606 | ret |
485 | ;---------------------------------------------- |
607 | ;---------------------------------------------- |
486 | ; esi - pointer to 3x3 matrix |
608 | ; esi - pointer to 3x3 matrix |
487 | add_scale_to_matrix: |
609 | add_scale_to_matrix: |
488 | fninit |
610 | fninit |
489 | fld [rsscale] |
611 | fld [rsscale] |
490 | fld dword[esi] ;----- |
612 | fld dword[esi] ;----- |
491 | fmul st,st1 |
613 | fmul st,st1 |
492 | fstp dword[esi] |
614 | fstp dword[esi] |
493 | fld dword[esi+12] ; x scale |
615 | fld dword[esi+12] ; x scale |
494 | fmul st,st1 |
616 | fmul st,st1 |
495 | fstp dword[esi+12] |
617 | fstp dword[esi+12] |
496 | fld dword[esi+24] |
618 | fld dword[esi+24] |
497 | fmul st,st1 |
619 | fmul st,st1 |
498 | fstp dword[esi+24] ;------ |
620 | fstp dword[esi+24] ;------ |
499 | 621 | ||
500 | fld dword[esi+4] ;----- |
622 | fld dword[esi+4] ;----- |
501 | fmul st,st1 |
623 | fmul st,st1 |
502 | fstp dword[esi+4] |
624 | fstp dword[esi+4] |
503 | fld dword[esi+16] ; y scale |
625 | fld dword[esi+16] ; y scale |
504 | fmul st,st1 |
626 | fmul st,st1 |
505 | fstp dword[esi+16] |
627 | fstp dword[esi+16] |
506 | fld dword[esi+28] |
628 | fld dword[esi+28] |
507 | fmul st,st1 |
629 | fmul st,st1 |
508 | fstp dword[esi+28] ;------ |
630 | fstp dword[esi+28] ;------ |
509 | 631 | ||
510 | 632 | ||
511 | fld dword[esi+8] ;----- |
633 | fld dword[esi+8] ;----- |
512 | fmul st,st1 |
634 | fmul st,st1 |
513 | fstp dword[esi+8] |
635 | fstp dword[esi+8] |
514 | fld dword[esi+20] ; z scale |
636 | fld dword[esi+20] ; z scale |
515 | fmul st,st1 |
637 | fmul st,st1 |
516 | fstp dword[esi+20] |
638 | fstp dword[esi+20] |
517 | fld dword[esi+32] |
639 | fld dword[esi+32] |
518 | fmulp st1,st |
640 | fmulp st1,st |
519 | fstp dword[esi+32] ;------ |
641 | fstp dword[esi+32] ;------ |
520 | 642 | ||
521 | ret |
643 | ret |
522 | 644 | ||
523 | ;in esi - offset to 3d points (point as 3 dwords float) |
645 | ;in esi - offset to 3d points (point as 3 dwords float) |
524 | ; edi - offset to 2d points ( as 3 words integer) |
646 | ; edi - offset to 2d points ( as 3 words integer) |
525 | ; ecx - number of points |
647 | ; ecx - number of points |
526 | translate_points: ; just convert into integer; z coord still needed |
648 | translate_points: ; just convert into integer; z coord still needed |
527 | fninit |
649 | fninit |
528 | .again: |
650 | .again: |
529 | if 0 |
651 | if 0 |
530 | fld dword[esi+8] |
652 | fld dword[esi+8] |
531 | ; fmul [rsscale] |
653 | ; fmul [rsscale] |
532 | fist word[edi+4] |
654 | fist word[edi+4] |
533 | 655 | ||
534 | fisub [zobs] |
656 | fisub [zobs] |
535 | fchs |
657 | fchs |
536 | 658 | ||
537 | fld dword[esi] |
659 | fld dword[esi] |
538 | ; fmul [rsscale] |
660 | ; fmul [rsscale] |
539 | fisub [xobs] |
661 | fisub [xobs] |
540 | fimul [zobs] |
662 | fimul [zobs] |
541 | fdiv st0,st1 |
663 | fdiv st0,st1 |
542 | 664 | ||
543 | fiadd [xobs] |
665 | fiadd [xobs] |
544 | fiadd [vect_x] |
666 | fiadd [vect_x] |
545 | fistp word[edi] |
667 | fistp word[edi] |
546 | 668 | ||
547 | fld dword[esi+4] |
669 | fld dword[esi+4] |
548 | ; fmul [rsscale] |
670 | ; fmul [rsscale] |
549 | fisub [yobs] |
671 | fisub [yobs] |
550 | fimul [zobs] |
672 | fimul [zobs] |
551 | fdivrp ; st0,st1 |
673 | fdivrp ; st0,st1 |
552 | 674 | ||
553 | fiadd [yobs] |
675 | fiadd [yobs] |
554 | fiadd [vect_y] |
676 | fiadd [vect_y] |
555 | fistp word[edi+2] |
677 | fistp word[edi+2] |
556 | end if |
678 | end if |
557 | ; movups xmm0,[esi] |
679 | ; movups xmm0,[esi] |
558 | ; cvtps2dq xmm0,xmm0 |
680 | ; cvtps2dq xmm0,xmm0 |
559 | ; packsdw xmm0,xmm0 |
681 | ; packsdw xmm0,xmm0 |
560 | ; movq [edi] |
682 | ; movq [edi] |
561 | fld dword[esi] |
683 | fld dword[esi] |
562 | fiadd [vect_x] |
684 | fiadd word[vect_x] |
563 | fistp word[edi] |
685 | fistp word[edi] |
564 | fld dword[esi+4] |
686 | fld dword[esi+4] |
565 | fiadd [vect_y] |
687 | fiadd [vect_y] |
566 | fistp word[edi+2] |
688 | fistp word[edi+2] |
567 | fld dword[esi+8] |
689 | fld dword[esi+8] |
568 | fistp word[edi+4] |
690 | fistp word[edi+4] |
569 | add esi,12 |
691 | add esi,12 |
570 | add edi,6 |
692 | add edi,6 |
571 | dec ecx |
693 | dec ecx |
572 | jnz .again |
694 | jnz .again |
573 | 695 | ||
574 | ret |
696 | ret |
575 | >> |
697 | >> |