Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
554 serge 1
;*****************************************************************************
2
;*
3
;*                            Open Watcom Project
4
;*
5
;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
6
;*
7
;*  ========================================================================
8
;*
9
;*    This file contains Original Code and/or Modifications of Original
10
;*    Code as defined in and that are subject to the Sybase Open Watcom
11
;*    Public License version 1.0 (the 'License'). You may not use this file
12
;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
13
;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
14
;*    provided with the Original Code and Modifications, and is also
15
;*    available at www.sybase.com/developer/opensource.
16
;*
17
;*    The Original Code and all software distributed under the License are
18
;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19
;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
20
;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
21
;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
22
;*    NON-INFRINGEMENT. Please see the License for the specific language
23
;*    governing rights and limitations under the License.
24
;*
25
;*  ========================================================================
26
;*
27
;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
28
;*               DESCRIBE IT HERE!
29
;*
30
;*****************************************************************************
31
 
32
 
33
; static char sccs_id[] = "@(#)fpatan32.asm     1.7  12/21/94  08:33:45";
34
;
35
; This code is being published by Intel to users of the Pentium(tm)
36
; processor.  Recipients are authorized to copy, modify, compile, use and
37
; distribute the code.
38
;
39
; Intel makes no warranty of any kind with regard to this code, including
40
; but not limited to, implied warranties or merchantability and fitness for
41
; a particular purpose. Intel assumes no responsibility for any errors that
42
; may appear in this code.
43
;
44
; No patent licenses are granted, express or implied.
45
;
46
;
47
include mdef.inc
48
 
49
        .386
50
        .387
51
 
52
 
53
_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
54
_TEXT  ENDS
55
 
56
CONST   SEGMENT DWORD PUBLIC USE32 'DATA'
57
CONST   ENDS
58
 
59
CONST2  SEGMENT DWORD PUBLIC USE32 'DATA'
60
CONST2  ENDS
61
 
62
DATA32   SEGMENT DWORD PUBLIC USE32 'DATA'
63
 
64
 
65
Y               EQU     0
66
X               EQU     12
67
PREV_CW         EQU     24
68
PATCH_CW        EQU     28
69
SPILL           EQU     32
70
STACK_SIZE      EQU     36
71
 
72
 
73
pos_1   DD   00000000H
74
        DD   3ff00000H
75
 
76
neg_1   DD   00000000H
77
        DD   0bff00000H
78
 
79
 
80
dispatch_table  DD      offset label0
81
                DD      offset label1
82
                DD      offset label2
83
                DD      offset label3
84
                DD      offset label4
85
                DD      offset label5
86
                DD      offset label6
87
                DD      offset label7
88
;end dispatch table
89
 
90
pi      DB      35H
91
        DB      0c2H
92
        DD      0daa22168H
93
        DD      4000c90fH
94
 
95
pi_by_2 DB      35H
96
        DB      0c2H
97
        DD      0daa22168H
98
        DD      3fffc90fH
99
 
100
flt_sixteen DD  41800000H
101
 
102
one_by_sixteen  DD 3d800000H
103
 
104
 
105
B1      DW      0AAA8H
106
        DD      0AAAAAAAAH
107
        DD      0BFFDAAAAH
108
 
109
B2      DW      2D6EH
110
        DD      0CCCCCCCCH
111
        DD      3FFCCCCCH
112
 
113
B3      DW      4892H
114
        DD      249241F9H
115
        DD      0BFFC9249H
116
 
117
B4      DW      0C592H
118
        DD      3897CDECH
119
        DD      3FFBE38EH
120
 
121
B5      DW      5DDDH
122
        DD      0C17BC162H
123
        DD      0BFFBBA2DH
124
 
125
B6      DW      4854H
126
        DD      77C7C78EH
127
        DD      3FFB9C80H
128
 
129
 
130
atan_k_by_16    dd 000000000H, 000000000H, 000000000H, 000000000H
131
                dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H
132
                dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H
133
                dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H
134
                dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H
135
                dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H
136
                dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H
137
                dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H
138
                dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H
139
                dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H
140
                dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H
141
                dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H
142
                dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H
143
                dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H
144
                dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H
145
                dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H
146
                dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H
147
 
148
DATA32  ENDS
149
 
150
BSS32   SEGMENT DWORD PUBLIC USE32 'BSS'
151
BSS32   ENDS
152
 
153
 
154
EXTRN   __fdiv_fpr:NEAR
155
 
156
DGROUP  GROUP CONST,CONST2,DATA32,BSS32
157
 
158
 
159
_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
160
        ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:nothing
161
        public __fpatan_chk
162
 
163
        defpe   __fpatan_chk
164
        push    eax
165
        push    ecx
166
        push    edx
167
        sub     esp, STACK_SIZE
168
        fstp    tbyte ptr [esp+X]       ; save X
169
        fstp    tbyte ptr [esp+Y]       ; save Y
170
 
171
        mov     ecx, [esp+Y+4]
172
        add     ecx, ecx
173
        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
174
        mov     eax, [esp+X+4]
175
        add     eax, eax
176
        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
177
        mov     ecx, [esp+Y+8]          ; save high part of Y
178
        mov     eax, [esp+X+8]          ; save high part of Y
179
        and     ecx, 7fffh              ; Ey = exponent Y
180
        jz      hw_fpatan               ; Ey = 0
181
        and     eax, 7fffh              ; Ex = exponent X
182
        jz      hw_fpatan               ; Ex = 0
183
        cmp     ecx, 7fffh              ; check if Ey = 0x7fffh
184
        je      hw_fpatan
185
        cmp     eax, 7fffh              ; check if Ex = 0x7fffh
186
        je      hw_fpatan
187
 
188
        fld     tbyte ptr [esp+X]       ; reload X
189
        fabs                            ; |X| = u
190
        fld     tbyte ptr [esp+Y]       ; reload Y
191
        fabs                            ; |Y| = v
192
 
193
;  The following five lines turn off exceptions and set the
194
;  precision control to 80 bits.  The former is necessary to
195
;  force any traps to be taken at the divide instead of the scaling
196
;  code.  The latter is necessary in order to get full precision for
197
;  codes with incoming 32 and 64 bit precision settings.  If
198
;  it can be guaranteed that before reaching this point, the underflow
199
;  exception is masked and the precision control is at 80 bits, these
200
;  five lines can be omitted.
201
;
202
        fnstcw  [PREV_CW+esp]           ; save caller's control word
203
        mov     edx, [PREV_CW+esp]
204
        or      edx, 033fh              ; mask exceptions, pc=80
205
        and     edx, 0f3ffh
206
        mov     [PATCH_CW+esp], edx
207
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
208
 
209
 
210
        xor     edx, edx                ; initialize sflag = 0
211
        fcom                            ; |Y| > |x|
212
        push    eax
213
        fstsw  ax
214
        sahf
215
        pop     eax
216
        jb      order_X_Y_ok
217
        fxch
218
        inc     edx                     ; sflag = 1
219
order_X_Y_ok:
220
        push    eax
221
        mov     eax, 0fh
222
        call    __fdiv_fpr                  ; v/u = z
223
        pop     eax
224
        fld     dword ptr flt_sixteen   ; 16.0
225
        fmul    st, st(1)               ; z*16.0
226
; Top of stack looks like k, z
227
        fistp   dword ptr [SPILL+esp]   ; store k as int
228
        mov     ecx, [SPILL+esp]
229
        shl     ecx, 4
230
        fild    dword ptr[SPILL+esp]
231
        fmul    dword ptr one_by_sixteen; 1.0/16.0
232
; Top of stack looks like g, z
233
        fld     st(1)                   ; duplicate g
234
        fsub    st, st(1)               ; z-g = r
235
        fxch
236
; Top of stack looks like g, r, z
237
        fmulp   st(2), st               ; g*z
238
; Top of stack looks like r, g*z
239
        fld     qword ptr pos_1         ; load 1.0
240
        faddp   st(2), st               ; 1+g*z
241
; Top of stack looks like r, 1+g*z
242
        push    eax
243
        mov     eax, 0fh
244
        call    __fdiv_fpr                  ; v/u = z
245
        pop     eax
246
        fld     st(0)                   ; duplicate s
247
        fmul    st,st(1)                ; t = s*s
248
; Top of stack looks like t, s
249
 
250
        fld     st(0)
251
        fmul    st, st(1)
252
; Top of stack looks like t2, t, s
253
        fld     st(0)
254
        fmul    st, st(1)
255
        fld     tbyte ptr B6
256
        fld     tbyte ptr B5
257
; Top of stack looks like B5, B6, t4, t2, t, s
258
        fxch
259
        fmul    st, st(2)
260
        fld     tbyte ptr B4
261
        fxch    st(2)
262
        fmul    st, st(3)
263
; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s
264
        fld     tbyte ptr B3
265
        fxch    st(2)
266
        fmul    st, st(5)
267
; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s
268
        fxch    st(3)
269
        fmulp   st(4), st
270
        fld     tbyte ptr B2
271
; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s
272
        fxch    st(3)
273
        faddp   st(4), st
274
        mov     eax, [esp+X+8]
275
        fld     tbyte ptr B1
276
        fxch
277
        shl     eax, 16
278
; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s
279
        fmul    st, st(6)
280
        fxch    st(2)
281
        add     eax, eax
282
        fmul    st, st(5)
283
; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s
284
        fxch    st(3)
285
        adc     edx, edx                ; |sflag|Sx|
286
        fmulp   st(5), st
287
        fxch    st(2)
288
        mov     eax, [Y+8+esp]          ; save high part of Y
289
        fmul    st, st(5)
290
; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s
291
        fxch    st(2)
292
        shl     eax, 16
293
        fmulp   st(5), st
294
; Top of stack looks like  B5t5, B3t3, even, B2t2, B1t, s
295
        fxch    st(2)
296
        faddp   st(3), st
297
        add     eax, eax
298
        faddp   st(1), st
299
        adc     edx, edx                ; |sflag|Sx|Sy|
300
; Top of stack looks like  odd, even, B1t, s
301
        faddp   st(2), st
302
        faddp   st(1), st
303
        fmul    st,st(1)                ; s*(odd+even)
304
        faddp   st(1), st               ; poly
305
 
306
        fld     tbyte ptr atan_k_by_16[ecx]     ; arctan[k;16]
307
        faddp   st(1), st               ; w = poly + arctan(g)
308
 
309
        jmp     dword ptr dispatch_table[edx*4]
310
 
311
label0:
312
        fldcw   [esp+PREV_CW]
313
        add     esp, STACK_SIZE
314
        pop     edx
315
        pop     ecx
316
        pop     eax
317
        ret
318
label1:
319
        fchs
320
        fldcw   [esp+PREV_CW]
321
        add     esp, STACK_SIZE
322
        pop     edx
323
        pop     ecx
324
        pop     eax
325
        ret
326
label2:
327
        fld     tbyte ptr pi
328
        fsubrp  st(1), st               ; pi - w
329
        fldcw   [esp+PREV_CW]
330
        add     esp, STACK_SIZE
331
        pop     edx
332
        pop     ecx
333
        pop     eax
334
        ret
335
label3:
336
        fld     tbyte ptr pi
337
        fsubrp  st(1), st               ; pi - w
338
        fchs                            ; - (pi - w)
339
        fldcw   [esp+PREV_CW]
340
        add     esp, STACK_SIZE
341
        pop     edx
342
        pop     ecx
343
        pop     eax
344
        ret
345
label4:
346
        fld     tbyte ptr pi_by_2
347
        fsubrp  st(1), st               ; pi/2 - w
348
        fldcw   [esp+PREV_CW]
349
        add     esp, STACK_SIZE
350
        pop     edx
351
        pop     ecx
352
        pop     eax
353
        ret
354
label5:
355
        fld     tbyte ptr pi_by_2
356
        fsubrp  st(1), st               ; pi/2 - w
357
        fchs                            ; - (pi/2 - w)
358
        fldcw   [esp+PREV_CW]
359
        add     esp, STACK_SIZE
360
        pop     edx
361
        pop     ecx
362
        pop     eax
363
        ret
364
label6:
365
        fld     tbyte ptr pi_by_2
366
        faddp   st(1), st               ; pi/2 + w
367
        fldcw   [esp+PREV_CW]
368
        add     esp, STACK_SIZE
369
        pop     edx
370
        pop     ecx
371
        pop     eax
372
        ret
373
label7:
374
        fld     tbyte ptr pi_by_2
375
        faddp   st(1), st               ; pi/2 + w
376
        fchs                            ; -(pi/2+w)
377
        fldcw   [esp+PREV_CW]
378
        add     esp, STACK_SIZE
379
        pop     edx
380
        pop     ecx
381
        pop     eax
382
        ret
383
 
384
 
385
hw_fpatan:
386
        fld     tbyte ptr [esp+Y]       ; reload Y
387
        fld     tbyte ptr [esp+X]       ; reload X
388
        fpatan
389
        add     esp, STACK_SIZE
390
        pop     edx
391
        pop     ecx
392
        pop     eax
393
        ret
394
__fpatan_chk       ENDP
395
 
396
_TEXT  ENDS
397
        END