Rev 554 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
554 | serge | 1 | ;***************************************************************************** |
2 | ;* |
||
3 | ;* Open Watcom Project |
||
4 | ;* |
||
5 | ;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved. |
||
6 | ;* |
||
7 | ;* ======================================================================== |
||
8 | ;* |
||
9 | ;* This file contains Original Code and/or Modifications of Original |
||
10 | ;* Code as defined in and that are subject to the Sybase Open Watcom |
||
11 | ;* Public License version 1.0 (the 'License'). You may not use this file |
||
12 | ;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO |
||
13 | ;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is |
||
14 | ;* provided with the Original Code and Modifications, and is also |
||
15 | ;* available at www.sybase.com/developer/opensource. |
||
16 | ;* |
||
17 | ;* The Original Code and all software distributed under the License are |
||
18 | ;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
||
19 | ;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM |
||
20 | ;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF |
||
21 | ;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR |
||
22 | ;* NON-INFRINGEMENT. Please see the License for the specific language |
||
23 | ;* governing rights and limitations under the License. |
||
24 | ;* |
||
25 | ;* ======================================================================== |
||
26 | ;* |
||
27 | ;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE |
||
28 | ;* DESCRIBE IT HERE! |
||
29 | ;* |
||
30 | ;***************************************************************************** |
||
31 | |||
32 | |||
33 | ; static char sccs_id[] = "@(#)fpatan32.asm 1.7 12/21/94 08:33:45"; |
||
34 | ; |
||
35 | ; This code is being published by Intel to users of the Pentium(tm) |
||
36 | ; processor. Recipients are authorized to copy, modify, compile, use and |
||
37 | ; distribute the code. |
||
38 | ; |
||
39 | ; Intel makes no warranty of any kind with regard to this code, including |
||
40 | ; but not limited to, implied warranties or merchantability and fitness for |
||
41 | ; a particular purpose. Intel assumes no responsibility for any errors that |
||
42 | ; may appear in this code. |
||
43 | ; |
||
44 | ; No patent licenses are granted, express or implied. |
||
45 | ; |
||
46 | ; |
||
47 | include mdef.inc |
||
48 | |||
49 | .386 |
||
50 | .387 |
||
51 | |||
52 | |||
53 | _TEXT SEGMENT PARA PUBLIC USE32 'CODE' |
||
704 | serge | 54 | _TEXT ENDS |
554 | serge | 55 | |
56 | |||
704 | serge | 57 | _DATA SEGMENT DWORD PUBLIC USE32 'DATA' |
554 | serge | 58 | |
59 | |||
60 | Y EQU 0 |
||
61 | X EQU 12 |
||
62 | PREV_CW EQU 24 |
||
63 | PATCH_CW EQU 28 |
||
64 | SPILL EQU 32 |
||
65 | STACK_SIZE EQU 36 |
||
66 | |||
67 | |||
68 | pos_1 DD 00000000H |
||
69 | DD 3ff00000H |
||
70 | |||
71 | neg_1 DD 00000000H |
||
72 | DD 0bff00000H |
||
73 | |||
74 | |||
75 | dispatch_table DD offset label0 |
||
76 | DD offset label1 |
||
77 | DD offset label2 |
||
78 | DD offset label3 |
||
79 | DD offset label4 |
||
80 | DD offset label5 |
||
81 | DD offset label6 |
||
82 | DD offset label7 |
||
83 | ;end dispatch table |
||
84 | |||
85 | pi DB 35H |
||
86 | DB 0c2H |
||
87 | DD 0daa22168H |
||
88 | DD 4000c90fH |
||
89 | |||
90 | pi_by_2 DB 35H |
||
91 | DB 0c2H |
||
92 | DD 0daa22168H |
||
93 | DD 3fffc90fH |
||
94 | |||
95 | flt_sixteen DD 41800000H |
||
96 | |||
97 | one_by_sixteen DD 3d800000H |
||
98 | |||
99 | |||
100 | B1 DW 0AAA8H |
||
101 | DD 0AAAAAAAAH |
||
102 | DD 0BFFDAAAAH |
||
103 | |||
104 | B2 DW 2D6EH |
||
105 | DD 0CCCCCCCCH |
||
106 | DD 3FFCCCCCH |
||
107 | |||
108 | B3 DW 4892H |
||
109 | DD 249241F9H |
||
110 | DD 0BFFC9249H |
||
111 | |||
112 | B4 DW 0C592H |
||
113 | DD 3897CDECH |
||
114 | DD 3FFBE38EH |
||
115 | |||
116 | B5 DW 5DDDH |
||
117 | DD 0C17BC162H |
||
118 | DD 0BFFBBA2DH |
||
119 | |||
120 | B6 DW 4854H |
||
121 | DD 77C7C78EH |
||
122 | DD 3FFB9C80H |
||
123 | |||
124 | |||
125 | atan_k_by_16 dd 000000000H, 000000000H, 000000000H, 000000000H |
||
126 | dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H |
||
127 | dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H |
||
128 | dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H |
||
129 | dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H |
||
130 | dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H |
||
131 | dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H |
||
132 | dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H |
||
133 | dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H |
||
134 | dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H |
||
135 | dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H |
||
136 | dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H |
||
137 | dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H |
||
138 | dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H |
||
139 | dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H |
||
140 | dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H |
||
141 | dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H |
||
142 | |||
704 | serge | 143 | _DATA ENDS |
554 | serge | 144 | |
704 | serge | 145 | _BSS SEGMENT DWORD PUBLIC USE32 'BSS' |
146 | _BSS ENDS |
||
554 | serge | 147 | |
148 | |||
149 | EXTRN __fdiv_fpr:NEAR |
||
150 | |||
704 | serge | 151 | DGROUP GROUP _DATA,_BSS |
554 | serge | 152 | |
153 | |||
154 | _TEXT SEGMENT PARA PUBLIC USE32 'CODE' |
||
704 | serge | 155 | ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:DGROUP |
554 | serge | 156 | public __fpatan_chk |
157 | |||
158 | defpe __fpatan_chk |
||
159 | push eax |
||
160 | push ecx |
||
161 | push edx |
||
162 | sub esp, STACK_SIZE |
||
163 | fstp tbyte ptr [esp+X] ; save X |
||
164 | fstp tbyte ptr [esp+Y] ; save Y |
||
165 | |||
166 | mov ecx, [esp+Y+4] |
||
167 | add ecx, ecx |
||
168 | jnc hw_fpatan ; unnormals (explicit 1 missing) |
||
169 | mov eax, [esp+X+4] |
||
170 | add eax, eax |
||
171 | jnc hw_fpatan ; unnormals (explicit 1 missing) |
||
172 | mov ecx, [esp+Y+8] ; save high part of Y |
||
173 | mov eax, [esp+X+8] ; save high part of Y |
||
174 | and ecx, 7fffh ; Ey = exponent Y |
||
175 | jz hw_fpatan ; Ey = 0 |
||
176 | and eax, 7fffh ; Ex = exponent X |
||
177 | jz hw_fpatan ; Ex = 0 |
||
178 | cmp ecx, 7fffh ; check if Ey = 0x7fffh |
||
179 | je hw_fpatan |
||
180 | cmp eax, 7fffh ; check if Ex = 0x7fffh |
||
181 | je hw_fpatan |
||
182 | |||
183 | fld tbyte ptr [esp+X] ; reload X |
||
184 | fabs ; |X| = u |
||
185 | fld tbyte ptr [esp+Y] ; reload Y |
||
186 | fabs ; |Y| = v |
||
187 | |||
188 | ; The following five lines turn off exceptions and set the |
||
189 | ; precision control to 80 bits. The former is necessary to |
||
190 | ; force any traps to be taken at the divide instead of the scaling |
||
191 | ; code. The latter is necessary in order to get full precision for |
||
192 | ; codes with incoming 32 and 64 bit precision settings. If |
||
193 | ; it can be guaranteed that before reaching this point, the underflow |
||
194 | ; exception is masked and the precision control is at 80 bits, these |
||
195 | ; five lines can be omitted. |
||
196 | ; |
||
197 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
198 | mov edx, [PREV_CW+esp] |
||
199 | or edx, 033fh ; mask exceptions, pc=80 |
||
200 | and edx, 0f3ffh |
||
201 | mov [PATCH_CW+esp], edx |
||
202 | fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
||
203 | |||
204 | |||
205 | xor edx, edx ; initialize sflag = 0 |
||
206 | fcom ; |Y| > |x| |
||
207 | push eax |
||
208 | fstsw ax |
||
209 | sahf |
||
210 | pop eax |
||
211 | jb order_X_Y_ok |
||
212 | fxch |
||
213 | inc edx ; sflag = 1 |
||
214 | order_X_Y_ok: |
||
215 | push eax |
||
216 | mov eax, 0fh |
||
217 | call __fdiv_fpr ; v/u = z |
||
218 | pop eax |
||
219 | fld dword ptr flt_sixteen ; 16.0 |
||
220 | fmul st, st(1) ; z*16.0 |
||
221 | ; Top of stack looks like k, z |
||
222 | fistp dword ptr [SPILL+esp] ; store k as int |
||
223 | mov ecx, [SPILL+esp] |
||
224 | shl ecx, 4 |
||
225 | fild dword ptr[SPILL+esp] |
||
226 | fmul dword ptr one_by_sixteen; 1.0/16.0 |
||
227 | ; Top of stack looks like g, z |
||
228 | fld st(1) ; duplicate g |
||
229 | fsub st, st(1) ; z-g = r |
||
230 | fxch |
||
231 | ; Top of stack looks like g, r, z |
||
232 | fmulp st(2), st ; g*z |
||
233 | ; Top of stack looks like r, g*z |
||
234 | fld qword ptr pos_1 ; load 1.0 |
||
235 | faddp st(2), st ; 1+g*z |
||
236 | ; Top of stack looks like r, 1+g*z |
||
237 | push eax |
||
238 | mov eax, 0fh |
||
239 | call __fdiv_fpr ; v/u = z |
||
240 | pop eax |
||
241 | fld st(0) ; duplicate s |
||
242 | fmul st,st(1) ; t = s*s |
||
243 | ; Top of stack looks like t, s |
||
244 | |||
245 | fld st(0) |
||
246 | fmul st, st(1) |
||
247 | ; Top of stack looks like t2, t, s |
||
248 | fld st(0) |
||
249 | fmul st, st(1) |
||
250 | fld tbyte ptr B6 |
||
251 | fld tbyte ptr B5 |
||
252 | ; Top of stack looks like B5, B6, t4, t2, t, s |
||
253 | fxch |
||
254 | fmul st, st(2) |
||
255 | fld tbyte ptr B4 |
||
256 | fxch st(2) |
||
257 | fmul st, st(3) |
||
258 | ; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s |
||
259 | fld tbyte ptr B3 |
||
260 | fxch st(2) |
||
261 | fmul st, st(5) |
||
262 | ; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s |
||
263 | fxch st(3) |
||
264 | fmulp st(4), st |
||
265 | fld tbyte ptr B2 |
||
266 | ; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s |
||
267 | fxch st(3) |
||
268 | faddp st(4), st |
||
269 | mov eax, [esp+X+8] |
||
270 | fld tbyte ptr B1 |
||
271 | fxch |
||
272 | shl eax, 16 |
||
273 | ; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s |
||
274 | fmul st, st(6) |
||
275 | fxch st(2) |
||
276 | add eax, eax |
||
277 | fmul st, st(5) |
||
278 | ; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s |
||
279 | fxch st(3) |
||
280 | adc edx, edx ; |sflag|Sx| |
||
281 | fmulp st(5), st |
||
282 | fxch st(2) |
||
283 | mov eax, [Y+8+esp] ; save high part of Y |
||
284 | fmul st, st(5) |
||
285 | ; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s |
||
286 | fxch st(2) |
||
287 | shl eax, 16 |
||
288 | fmulp st(5), st |
||
289 | ; Top of stack looks like B5t5, B3t3, even, B2t2, B1t, s |
||
290 | fxch st(2) |
||
291 | faddp st(3), st |
||
292 | add eax, eax |
||
293 | faddp st(1), st |
||
294 | adc edx, edx ; |sflag|Sx|Sy| |
||
295 | ; Top of stack looks like odd, even, B1t, s |
||
296 | faddp st(2), st |
||
297 | faddp st(1), st |
||
298 | fmul st,st(1) ; s*(odd+even) |
||
299 | faddp st(1), st ; poly |
||
300 | |||
301 | fld tbyte ptr atan_k_by_16[ecx] ; arctan[k;16] |
||
302 | faddp st(1), st ; w = poly + arctan(g) |
||
303 | |||
304 | jmp dword ptr dispatch_table[edx*4] |
||
305 | |||
306 | label0: |
||
307 | fldcw [esp+PREV_CW] |
||
308 | add esp, STACK_SIZE |
||
309 | pop edx |
||
310 | pop ecx |
||
311 | pop eax |
||
312 | ret |
||
313 | label1: |
||
314 | fchs |
||
315 | fldcw [esp+PREV_CW] |
||
316 | add esp, STACK_SIZE |
||
317 | pop edx |
||
318 | pop ecx |
||
319 | pop eax |
||
320 | ret |
||
321 | label2: |
||
322 | fld tbyte ptr pi |
||
323 | fsubrp st(1), st ; pi - w |
||
324 | fldcw [esp+PREV_CW] |
||
325 | add esp, STACK_SIZE |
||
326 | pop edx |
||
327 | pop ecx |
||
328 | pop eax |
||
329 | ret |
||
330 | label3: |
||
331 | fld tbyte ptr pi |
||
332 | fsubrp st(1), st ; pi - w |
||
333 | fchs ; - (pi - w) |
||
334 | fldcw [esp+PREV_CW] |
||
335 | add esp, STACK_SIZE |
||
336 | pop edx |
||
337 | pop ecx |
||
338 | pop eax |
||
339 | ret |
||
340 | label4: |
||
341 | fld tbyte ptr pi_by_2 |
||
342 | fsubrp st(1), st ; pi/2 - w |
||
343 | fldcw [esp+PREV_CW] |
||
344 | add esp, STACK_SIZE |
||
345 | pop edx |
||
346 | pop ecx |
||
347 | pop eax |
||
348 | ret |
||
349 | label5: |
||
350 | fld tbyte ptr pi_by_2 |
||
351 | fsubrp st(1), st ; pi/2 - w |
||
352 | fchs ; - (pi/2 - w) |
||
353 | fldcw [esp+PREV_CW] |
||
354 | add esp, STACK_SIZE |
||
355 | pop edx |
||
356 | pop ecx |
||
357 | pop eax |
||
358 | ret |
||
359 | label6: |
||
360 | fld tbyte ptr pi_by_2 |
||
361 | faddp st(1), st ; pi/2 + w |
||
362 | fldcw [esp+PREV_CW] |
||
363 | add esp, STACK_SIZE |
||
364 | pop edx |
||
365 | pop ecx |
||
366 | pop eax |
||
367 | ret |
||
368 | label7: |
||
369 | fld tbyte ptr pi_by_2 |
||
370 | faddp st(1), st ; pi/2 + w |
||
371 | fchs ; -(pi/2+w) |
||
372 | fldcw [esp+PREV_CW] |
||
373 | add esp, STACK_SIZE |
||
374 | pop edx |
||
375 | pop ecx |
||
376 | pop eax |
||
377 | ret |
||
378 | |||
379 | |||
380 | hw_fpatan: |
||
381 | fld tbyte ptr [esp+Y] ; reload Y |
||
382 | fld tbyte ptr [esp+X] ; reload X |
||
383 | fpatan |
||
384 | add esp, STACK_SIZE |
||
385 | pop edx |
||
386 | pop ecx |
||
387 | pop eax |
||
388 | ret |
||
389 | __fpatan_chk ENDP |
||
390 | |||
391 | _TEXT ENDS |
||
704 | serge | 392 | END |