Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
554 | serge | 1 | ;***************************************************************************** |
2 | ;* |
||
3 | ;* Open Watcom Project |
||
4 | ;* |
||
5 | ;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved. |
||
6 | ;* |
||
7 | ;* ======================================================================== |
||
8 | ;* |
||
9 | ;* This file contains Original Code and/or Modifications of Original |
||
10 | ;* Code as defined in and that are subject to the Sybase Open Watcom |
||
11 | ;* Public License version 1.0 (the 'License'). You may not use this file |
||
12 | ;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO |
||
13 | ;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is |
||
14 | ;* provided with the Original Code and Modifications, and is also |
||
15 | ;* available at www.sybase.com/developer/opensource. |
||
16 | ;* |
||
17 | ;* The Original Code and all software distributed under the License are |
||
18 | ;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
||
19 | ;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM |
||
20 | ;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF |
||
21 | ;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR |
||
22 | ;* NON-INFRINGEMENT. Please see the License for the specific language |
||
23 | ;* governing rights and limitations under the License. |
||
24 | ;* |
||
25 | ;* ======================================================================== |
||
26 | ;* |
||
27 | ;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE |
||
28 | ;* DESCRIBE IT HERE! |
||
29 | ;* |
||
30 | ;***************************************************************************** |
||
31 | |||
32 | |||
33 | ; static char sccs_id[] = "@(#)fpatan32.asm 1.7 12/21/94 08:33:45"; |
||
34 | ; |
||
35 | ; This code is being published by Intel to users of the Pentium(tm) |
||
36 | ; processor. Recipients are authorized to copy, modify, compile, use and |
||
37 | ; distribute the code. |
||
38 | ; |
||
39 | ; Intel makes no warranty of any kind with regard to this code, including |
||
40 | ; but not limited to, implied warranties or merchantability and fitness for |
||
41 | ; a particular purpose. Intel assumes no responsibility for any errors that |
||
42 | ; may appear in this code. |
||
43 | ; |
||
44 | ; No patent licenses are granted, express or implied. |
||
45 | ; |
||
46 | ; |
||
47 | include mdef.inc |
||
48 | |||
49 | .386 |
||
50 | .387 |
||
51 | |||
52 | |||
53 | _TEXT SEGMENT PARA PUBLIC USE32 'CODE' |
||
54 | _TEXT ENDS |
||
55 | |||
56 | CONST SEGMENT DWORD PUBLIC USE32 'DATA' |
||
57 | CONST ENDS |
||
58 | |||
59 | CONST2 SEGMENT DWORD PUBLIC USE32 'DATA' |
||
60 | CONST2 ENDS |
||
61 | |||
62 | DATA32 SEGMENT DWORD PUBLIC USE32 'DATA' |
||
63 | |||
64 | |||
65 | Y EQU 0 |
||
66 | X EQU 12 |
||
67 | PREV_CW EQU 24 |
||
68 | PATCH_CW EQU 28 |
||
69 | SPILL EQU 32 |
||
70 | STACK_SIZE EQU 36 |
||
71 | |||
72 | |||
73 | pos_1 DD 00000000H |
||
74 | DD 3ff00000H |
||
75 | |||
76 | neg_1 DD 00000000H |
||
77 | DD 0bff00000H |
||
78 | |||
79 | |||
80 | dispatch_table DD offset label0 |
||
81 | DD offset label1 |
||
82 | DD offset label2 |
||
83 | DD offset label3 |
||
84 | DD offset label4 |
||
85 | DD offset label5 |
||
86 | DD offset label6 |
||
87 | DD offset label7 |
||
88 | ;end dispatch table |
||
89 | |||
90 | pi DB 35H |
||
91 | DB 0c2H |
||
92 | DD 0daa22168H |
||
93 | DD 4000c90fH |
||
94 | |||
95 | pi_by_2 DB 35H |
||
96 | DB 0c2H |
||
97 | DD 0daa22168H |
||
98 | DD 3fffc90fH |
||
99 | |||
100 | flt_sixteen DD 41800000H |
||
101 | |||
102 | one_by_sixteen DD 3d800000H |
||
103 | |||
104 | |||
105 | B1 DW 0AAA8H |
||
106 | DD 0AAAAAAAAH |
||
107 | DD 0BFFDAAAAH |
||
108 | |||
109 | B2 DW 2D6EH |
||
110 | DD 0CCCCCCCCH |
||
111 | DD 3FFCCCCCH |
||
112 | |||
113 | B3 DW 4892H |
||
114 | DD 249241F9H |
||
115 | DD 0BFFC9249H |
||
116 | |||
117 | B4 DW 0C592H |
||
118 | DD 3897CDECH |
||
119 | DD 3FFBE38EH |
||
120 | |||
121 | B5 DW 5DDDH |
||
122 | DD 0C17BC162H |
||
123 | DD 0BFFBBA2DH |
||
124 | |||
125 | B6 DW 4854H |
||
126 | DD 77C7C78EH |
||
127 | DD 3FFB9C80H |
||
128 | |||
129 | |||
130 | atan_k_by_16 dd 000000000H, 000000000H, 000000000H, 000000000H |
||
131 | dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H |
||
132 | dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H |
||
133 | dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H |
||
134 | dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H |
||
135 | dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H |
||
136 | dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H |
||
137 | dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H |
||
138 | dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H |
||
139 | dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H |
||
140 | dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H |
||
141 | dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H |
||
142 | dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H |
||
143 | dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H |
||
144 | dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H |
||
145 | dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H |
||
146 | dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H |
||
147 | |||
148 | DATA32 ENDS |
||
149 | |||
150 | BSS32 SEGMENT DWORD PUBLIC USE32 'BSS' |
||
151 | BSS32 ENDS |
||
152 | |||
153 | |||
154 | EXTRN __fdiv_fpr:NEAR |
||
155 | |||
156 | DGROUP GROUP CONST,CONST2,DATA32,BSS32 |
||
157 | |||
158 | |||
159 | _TEXT SEGMENT PARA PUBLIC USE32 'CODE' |
||
160 | ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:nothing |
||
161 | public __fpatan_chk |
||
162 | |||
163 | defpe __fpatan_chk |
||
164 | push eax |
||
165 | push ecx |
||
166 | push edx |
||
167 | sub esp, STACK_SIZE |
||
168 | fstp tbyte ptr [esp+X] ; save X |
||
169 | fstp tbyte ptr [esp+Y] ; save Y |
||
170 | |||
171 | mov ecx, [esp+Y+4] |
||
172 | add ecx, ecx |
||
173 | jnc hw_fpatan ; unnormals (explicit 1 missing) |
||
174 | mov eax, [esp+X+4] |
||
175 | add eax, eax |
||
176 | jnc hw_fpatan ; unnormals (explicit 1 missing) |
||
177 | mov ecx, [esp+Y+8] ; save high part of Y |
||
178 | mov eax, [esp+X+8] ; save high part of Y |
||
179 | and ecx, 7fffh ; Ey = exponent Y |
||
180 | jz hw_fpatan ; Ey = 0 |
||
181 | and eax, 7fffh ; Ex = exponent X |
||
182 | jz hw_fpatan ; Ex = 0 |
||
183 | cmp ecx, 7fffh ; check if Ey = 0x7fffh |
||
184 | je hw_fpatan |
||
185 | cmp eax, 7fffh ; check if Ex = 0x7fffh |
||
186 | je hw_fpatan |
||
187 | |||
188 | fld tbyte ptr [esp+X] ; reload X |
||
189 | fabs ; |X| = u |
||
190 | fld tbyte ptr [esp+Y] ; reload Y |
||
191 | fabs ; |Y| = v |
||
192 | |||
193 | ; The following five lines turn off exceptions and set the |
||
194 | ; precision control to 80 bits. The former is necessary to |
||
195 | ; force any traps to be taken at the divide instead of the scaling |
||
196 | ; code. The latter is necessary in order to get full precision for |
||
197 | ; codes with incoming 32 and 64 bit precision settings. If |
||
198 | ; it can be guaranteed that before reaching this point, the underflow |
||
199 | ; exception is masked and the precision control is at 80 bits, these |
||
200 | ; five lines can be omitted. |
||
201 | ; |
||
202 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
203 | mov edx, [PREV_CW+esp] |
||
204 | or edx, 033fh ; mask exceptions, pc=80 |
||
205 | and edx, 0f3ffh |
||
206 | mov [PATCH_CW+esp], edx |
||
207 | fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
||
208 | |||
209 | |||
210 | xor edx, edx ; initialize sflag = 0 |
||
211 | fcom ; |Y| > |x| |
||
212 | push eax |
||
213 | fstsw ax |
||
214 | sahf |
||
215 | pop eax |
||
216 | jb order_X_Y_ok |
||
217 | fxch |
||
218 | inc edx ; sflag = 1 |
||
219 | order_X_Y_ok: |
||
220 | push eax |
||
221 | mov eax, 0fh |
||
222 | call __fdiv_fpr ; v/u = z |
||
223 | pop eax |
||
224 | fld dword ptr flt_sixteen ; 16.0 |
||
225 | fmul st, st(1) ; z*16.0 |
||
226 | ; Top of stack looks like k, z |
||
227 | fistp dword ptr [SPILL+esp] ; store k as int |
||
228 | mov ecx, [SPILL+esp] |
||
229 | shl ecx, 4 |
||
230 | fild dword ptr[SPILL+esp] |
||
231 | fmul dword ptr one_by_sixteen; 1.0/16.0 |
||
232 | ; Top of stack looks like g, z |
||
233 | fld st(1) ; duplicate g |
||
234 | fsub st, st(1) ; z-g = r |
||
235 | fxch |
||
236 | ; Top of stack looks like g, r, z |
||
237 | fmulp st(2), st ; g*z |
||
238 | ; Top of stack looks like r, g*z |
||
239 | fld qword ptr pos_1 ; load 1.0 |
||
240 | faddp st(2), st ; 1+g*z |
||
241 | ; Top of stack looks like r, 1+g*z |
||
242 | push eax |
||
243 | mov eax, 0fh |
||
244 | call __fdiv_fpr ; v/u = z |
||
245 | pop eax |
||
246 | fld st(0) ; duplicate s |
||
247 | fmul st,st(1) ; t = s*s |
||
248 | ; Top of stack looks like t, s |
||
249 | |||
250 | fld st(0) |
||
251 | fmul st, st(1) |
||
252 | ; Top of stack looks like t2, t, s |
||
253 | fld st(0) |
||
254 | fmul st, st(1) |
||
255 | fld tbyte ptr B6 |
||
256 | fld tbyte ptr B5 |
||
257 | ; Top of stack looks like B5, B6, t4, t2, t, s |
||
258 | fxch |
||
259 | fmul st, st(2) |
||
260 | fld tbyte ptr B4 |
||
261 | fxch st(2) |
||
262 | fmul st, st(3) |
||
263 | ; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s |
||
264 | fld tbyte ptr B3 |
||
265 | fxch st(2) |
||
266 | fmul st, st(5) |
||
267 | ; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s |
||
268 | fxch st(3) |
||
269 | fmulp st(4), st |
||
270 | fld tbyte ptr B2 |
||
271 | ; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s |
||
272 | fxch st(3) |
||
273 | faddp st(4), st |
||
274 | mov eax, [esp+X+8] |
||
275 | fld tbyte ptr B1 |
||
276 | fxch |
||
277 | shl eax, 16 |
||
278 | ; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s |
||
279 | fmul st, st(6) |
||
280 | fxch st(2) |
||
281 | add eax, eax |
||
282 | fmul st, st(5) |
||
283 | ; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s |
||
284 | fxch st(3) |
||
285 | adc edx, edx ; |sflag|Sx| |
||
286 | fmulp st(5), st |
||
287 | fxch st(2) |
||
288 | mov eax, [Y+8+esp] ; save high part of Y |
||
289 | fmul st, st(5) |
||
290 | ; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s |
||
291 | fxch st(2) |
||
292 | shl eax, 16 |
||
293 | fmulp st(5), st |
||
294 | ; Top of stack looks like B5t5, B3t3, even, B2t2, B1t, s |
||
295 | fxch st(2) |
||
296 | faddp st(3), st |
||
297 | add eax, eax |
||
298 | faddp st(1), st |
||
299 | adc edx, edx ; |sflag|Sx|Sy| |
||
300 | ; Top of stack looks like odd, even, B1t, s |
||
301 | faddp st(2), st |
||
302 | faddp st(1), st |
||
303 | fmul st,st(1) ; s*(odd+even) |
||
304 | faddp st(1), st ; poly |
||
305 | |||
306 | fld tbyte ptr atan_k_by_16[ecx] ; arctan[k;16] |
||
307 | faddp st(1), st ; w = poly + arctan(g) |
||
308 | |||
309 | jmp dword ptr dispatch_table[edx*4] |
||
310 | |||
311 | label0: |
||
312 | fldcw [esp+PREV_CW] |
||
313 | add esp, STACK_SIZE |
||
314 | pop edx |
||
315 | pop ecx |
||
316 | pop eax |
||
317 | ret |
||
318 | label1: |
||
319 | fchs |
||
320 | fldcw [esp+PREV_CW] |
||
321 | add esp, STACK_SIZE |
||
322 | pop edx |
||
323 | pop ecx |
||
324 | pop eax |
||
325 | ret |
||
326 | label2: |
||
327 | fld tbyte ptr pi |
||
328 | fsubrp st(1), st ; pi - w |
||
329 | fldcw [esp+PREV_CW] |
||
330 | add esp, STACK_SIZE |
||
331 | pop edx |
||
332 | pop ecx |
||
333 | pop eax |
||
334 | ret |
||
335 | label3: |
||
336 | fld tbyte ptr pi |
||
337 | fsubrp st(1), st ; pi - w |
||
338 | fchs ; - (pi - w) |
||
339 | fldcw [esp+PREV_CW] |
||
340 | add esp, STACK_SIZE |
||
341 | pop edx |
||
342 | pop ecx |
||
343 | pop eax |
||
344 | ret |
||
345 | label4: |
||
346 | fld tbyte ptr pi_by_2 |
||
347 | fsubrp st(1), st ; pi/2 - w |
||
348 | fldcw [esp+PREV_CW] |
||
349 | add esp, STACK_SIZE |
||
350 | pop edx |
||
351 | pop ecx |
||
352 | pop eax |
||
353 | ret |
||
354 | label5: |
||
355 | fld tbyte ptr pi_by_2 |
||
356 | fsubrp st(1), st ; pi/2 - w |
||
357 | fchs ; - (pi/2 - w) |
||
358 | fldcw [esp+PREV_CW] |
||
359 | add esp, STACK_SIZE |
||
360 | pop edx |
||
361 | pop ecx |
||
362 | pop eax |
||
363 | ret |
||
364 | label6: |
||
365 | fld tbyte ptr pi_by_2 |
||
366 | faddp st(1), st ; pi/2 + w |
||
367 | fldcw [esp+PREV_CW] |
||
368 | add esp, STACK_SIZE |
||
369 | pop edx |
||
370 | pop ecx |
||
371 | pop eax |
||
372 | ret |
||
373 | label7: |
||
374 | fld tbyte ptr pi_by_2 |
||
375 | faddp st(1), st ; pi/2 + w |
||
376 | fchs ; -(pi/2+w) |
||
377 | fldcw [esp+PREV_CW] |
||
378 | add esp, STACK_SIZE |
||
379 | pop edx |
||
380 | pop ecx |
||
381 | pop eax |
||
382 | ret |
||
383 | |||
384 | |||
385 | hw_fpatan: |
||
386 | fld tbyte ptr [esp+Y] ; reload Y |
||
387 | fld tbyte ptr [esp+X] ; reload X |
||
388 | fpatan |
||
389 | add esp, STACK_SIZE |
||
390 | pop edx |
||
391 | pop ecx |
||
392 | pop eax |
||
393 | ret |
||
394 | __fpatan_chk ENDP |
||
395 | |||
396 | _TEXT ENDS |
||
397 | END |