0,0 → 1,397 |
;***************************************************************************** |
;* |
;* Open Watcom Project |
;* |
;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved. |
;* |
;* ======================================================================== |
;* |
;* This file contains Original Code and/or Modifications of Original |
;* Code as defined in and that are subject to the Sybase Open Watcom |
;* Public License version 1.0 (the 'License'). You may not use this file |
;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO |
;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is |
;* provided with the Original Code and Modifications, and is also |
;* available at www.sybase.com/developer/opensource. |
;* |
;* The Original Code and all software distributed under the License are |
;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM |
;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF |
;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR |
;* NON-INFRINGEMENT. Please see the License for the specific language |
;* governing rights and limitations under the License. |
;* |
;* ======================================================================== |
;* |
;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE |
;* DESCRIBE IT HERE! |
;* |
;***************************************************************************** |
|
|
; static char sccs_id[] = "@(#)fpatan32.asm 1.7 12/21/94 08:33:45"; |
; |
; This code is being published by Intel to users of the Pentium(tm) |
; processor. Recipients are authorized to copy, modify, compile, use and |
; distribute the code. |
; |
; Intel makes no warranty of any kind with regard to this code, including |
; but not limited to, implied warranties or merchantability and fitness for |
; a particular purpose. Intel assumes no responsibility for any errors that |
; may appear in this code. |
; |
; No patent licenses are granted, express or implied. |
; |
; |
include mdef.inc |
|
.386 |
.387 |
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE' |
_TEXT ENDS |
|
CONST SEGMENT DWORD PUBLIC USE32 'DATA' |
CONST ENDS |
|
CONST2 SEGMENT DWORD PUBLIC USE32 'DATA' |
CONST2 ENDS |
|
DATA32 SEGMENT DWORD PUBLIC USE32 'DATA' |
|
|
Y EQU 0 |
X EQU 12 |
PREV_CW EQU 24 |
PATCH_CW EQU 28 |
SPILL EQU 32 |
STACK_SIZE EQU 36 |
|
|
pos_1 DD 00000000H |
DD 3ff00000H |
|
neg_1 DD 00000000H |
DD 0bff00000H |
|
|
dispatch_table DD offset label0 |
DD offset label1 |
DD offset label2 |
DD offset label3 |
DD offset label4 |
DD offset label5 |
DD offset label6 |
DD offset label7 |
;end dispatch table |
|
pi DB 35H |
DB 0c2H |
DD 0daa22168H |
DD 4000c90fH |
|
pi_by_2 DB 35H |
DB 0c2H |
DD 0daa22168H |
DD 3fffc90fH |
|
flt_sixteen DD 41800000H |
|
one_by_sixteen DD 3d800000H |
|
|
B1 DW 0AAA8H |
DD 0AAAAAAAAH |
DD 0BFFDAAAAH |
|
B2 DW 2D6EH |
DD 0CCCCCCCCH |
DD 3FFCCCCCH |
|
B3 DW 4892H |
DD 249241F9H |
DD 0BFFC9249H |
|
B4 DW 0C592H |
DD 3897CDECH |
DD 3FFBE38EH |
|
B5 DW 5DDDH |
DD 0C17BC162H |
DD 0BFFBBA2DH |
|
B6 DW 4854H |
DD 77C7C78EH |
DD 3FFB9C80H |
|
|
atan_k_by_16 dd 000000000H, 000000000H, 000000000H, 000000000H |
dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H |
dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H |
dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H |
dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H |
dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H |
dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H |
dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H |
dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H |
dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H |
dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H |
dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H |
dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H |
dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H |
dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H |
dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H |
dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H |
|
DATA32 ENDS |
|
BSS32 SEGMENT DWORD PUBLIC USE32 'BSS' |
BSS32 ENDS |
|
|
EXTRN __fdiv_fpr:NEAR |
|
DGROUP GROUP CONST,CONST2,DATA32,BSS32 |
|
|
_TEXT SEGMENT PARA PUBLIC USE32 'CODE' |
ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:nothing |
public __fpatan_chk |
|
defpe __fpatan_chk |
push eax |
push ecx |
push edx |
sub esp, STACK_SIZE |
fstp tbyte ptr [esp+X] ; save X |
fstp tbyte ptr [esp+Y] ; save Y |
|
mov ecx, [esp+Y+4] |
add ecx, ecx |
jnc hw_fpatan ; unnormals (explicit 1 missing) |
mov eax, [esp+X+4] |
add eax, eax |
jnc hw_fpatan ; unnormals (explicit 1 missing) |
mov ecx, [esp+Y+8] ; save high part of Y |
mov eax, [esp+X+8] ; save high part of Y |
and ecx, 7fffh ; Ey = exponent Y |
jz hw_fpatan ; Ey = 0 |
and eax, 7fffh ; Ex = exponent X |
jz hw_fpatan ; Ex = 0 |
cmp ecx, 7fffh ; check if Ey = 0x7fffh |
je hw_fpatan |
cmp eax, 7fffh ; check if Ex = 0x7fffh |
je hw_fpatan |
|
fld tbyte ptr [esp+X] ; reload X |
fabs ; |X| = u |
fld tbyte ptr [esp+Y] ; reload Y |
fabs ; |Y| = v |
|
; The following five lines turn off exceptions and set the |
; precision control to 80 bits. The former is necessary to |
; force any traps to be taken at the divide instead of the scaling |
; code. The latter is necessary in order to get full precision for |
; codes with incoming 32 and 64 bit precision settings. If |
; it can be guaranteed that before reaching this point, the underflow |
; exception is masked and the precision control is at 80 bits, these |
; five lines can be omitted. |
; |
fnstcw [PREV_CW+esp] ; save caller's control word |
mov edx, [PREV_CW+esp] |
or edx, 033fh ; mask exceptions, pc=80 |
and edx, 0f3ffh |
mov [PATCH_CW+esp], edx |
fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
|
|
xor edx, edx ; initialize sflag = 0 |
fcom ; |Y| > |x| |
push eax |
fstsw ax |
sahf |
pop eax |
jb order_X_Y_ok |
fxch |
inc edx ; sflag = 1 |
order_X_Y_ok: |
push eax |
mov eax, 0fh |
call __fdiv_fpr ; v/u = z |
pop eax |
fld dword ptr flt_sixteen ; 16.0 |
fmul st, st(1) ; z*16.0 |
; Top of stack looks like k, z |
fistp dword ptr [SPILL+esp] ; store k as int |
mov ecx, [SPILL+esp] |
shl ecx, 4 |
fild dword ptr[SPILL+esp] |
fmul dword ptr one_by_sixteen; 1.0/16.0 |
; Top of stack looks like g, z |
fld st(1) ; duplicate g |
fsub st, st(1) ; z-g = r |
fxch |
; Top of stack looks like g, r, z |
fmulp st(2), st ; g*z |
; Top of stack looks like r, g*z |
fld qword ptr pos_1 ; load 1.0 |
faddp st(2), st ; 1+g*z |
; Top of stack looks like r, 1+g*z |
push eax |
mov eax, 0fh |
call __fdiv_fpr ; v/u = z |
pop eax |
fld st(0) ; duplicate s |
fmul st,st(1) ; t = s*s |
; Top of stack looks like t, s |
|
fld st(0) |
fmul st, st(1) |
; Top of stack looks like t2, t, s |
fld st(0) |
fmul st, st(1) |
fld tbyte ptr B6 |
fld tbyte ptr B5 |
; Top of stack looks like B5, B6, t4, t2, t, s |
fxch |
fmul st, st(2) |
fld tbyte ptr B4 |
fxch st(2) |
fmul st, st(3) |
; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s |
fld tbyte ptr B3 |
fxch st(2) |
fmul st, st(5) |
; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s |
fxch st(3) |
fmulp st(4), st |
fld tbyte ptr B2 |
; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s |
fxch st(3) |
faddp st(4), st |
mov eax, [esp+X+8] |
fld tbyte ptr B1 |
fxch |
shl eax, 16 |
; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s |
fmul st, st(6) |
fxch st(2) |
add eax, eax |
fmul st, st(5) |
; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s |
fxch st(3) |
adc edx, edx ; |sflag|Sx| |
fmulp st(5), st |
fxch st(2) |
mov eax, [Y+8+esp] ; save high part of Y |
fmul st, st(5) |
; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s |
fxch st(2) |
shl eax, 16 |
fmulp st(5), st |
; Top of stack looks like B5t5, B3t3, even, B2t2, B1t, s |
fxch st(2) |
faddp st(3), st |
add eax, eax |
faddp st(1), st |
adc edx, edx ; |sflag|Sx|Sy| |
; Top of stack looks like odd, even, B1t, s |
faddp st(2), st |
faddp st(1), st |
fmul st,st(1) ; s*(odd+even) |
faddp st(1), st ; poly |
|
fld tbyte ptr atan_k_by_16[ecx] ; arctan[k;16] |
faddp st(1), st ; w = poly + arctan(g) |
|
jmp dword ptr dispatch_table[edx*4] |
|
label0: |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label1: |
fchs |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label2: |
fld tbyte ptr pi |
fsubrp st(1), st ; pi - w |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label3: |
fld tbyte ptr pi |
fsubrp st(1), st ; pi - w |
fchs ; - (pi - w) |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label4: |
fld tbyte ptr pi_by_2 |
fsubrp st(1), st ; pi/2 - w |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label5: |
fld tbyte ptr pi_by_2 |
fsubrp st(1), st ; pi/2 - w |
fchs ; - (pi/2 - w) |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label6: |
fld tbyte ptr pi_by_2 |
faddp st(1), st ; pi/2 + w |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
label7: |
fld tbyte ptr pi_by_2 |
faddp st(1), st ; pi/2 + w |
fchs ; -(pi/2+w) |
fldcw [esp+PREV_CW] |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
|
|
hw_fpatan: |
fld tbyte ptr [esp+Y] ; reload Y |
fld tbyte ptr [esp+X] ; reload X |
fpatan |
add esp, STACK_SIZE |
pop edx |
pop ecx |
pop eax |
ret |
__fpatan_chk ENDP |
|
_TEXT ENDS |
END |