Subversion Repositories Kolibri OS

Rev

Rev 554 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
554 serge 1
;*****************************************************************************
2
;*
3
;*                            Open Watcom Project
4
;*
5
;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
6
;*
7
;*  ========================================================================
8
;*
9
;*    This file contains Original Code and/or Modifications of Original
10
;*    Code as defined in and that are subject to the Sybase Open Watcom
11
;*    Public License version 1.0 (the 'License'). You may not use this file
12
;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
13
;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
14
;*    provided with the Original Code and Modifications, and is also
15
;*    available at www.sybase.com/developer/opensource.
16
;*
17
;*    The Original Code and all software distributed under the License are
18
;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19
;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
20
;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
21
;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
22
;*    NON-INFRINGEMENT. Please see the License for the specific language
23
;*    governing rights and limitations under the License.
24
;*
25
;*  ========================================================================
26
;*
27
;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
28
;*               DESCRIBE IT HERE!
29
;*
30
;*****************************************************************************
31
 
32
 
33
; static char sccs_id[] = "@(#)fpatan32.asm     1.7  12/21/94  08:33:45";
34
;
35
; This code is being published by Intel to users of the Pentium(tm)
36
; processor.  Recipients are authorized to copy, modify, compile, use and
37
; distribute the code.
38
;
39
; Intel makes no warranty of any kind with regard to this code, including
40
; but not limited to, implied warranties or merchantability and fitness for
41
; a particular purpose. Intel assumes no responsibility for any errors that
42
; may appear in this code.
43
;
44
; No patent licenses are granted, express or implied.
45
;
46
;
47
include mdef.inc
48
 
49
        .386
50
        .387
51
 
52
 
53
_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
704 serge 54
_TEXT   ENDS
554 serge 55
 
56
 
704 serge 57
_DATA   SEGMENT DWORD PUBLIC USE32 'DATA'
554 serge 58
 
59
 
60
Y               EQU     0
61
X               EQU     12
62
PREV_CW         EQU     24
63
PATCH_CW        EQU     28
64
SPILL           EQU     32
65
STACK_SIZE      EQU     36
66
 
67
 
68
pos_1   DD   00000000H
69
        DD   3ff00000H
70
 
71
neg_1   DD   00000000H
72
        DD   0bff00000H
73
 
74
 
75
dispatch_table  DD      offset label0
76
                DD      offset label1
77
                DD      offset label2
78
                DD      offset label3
79
                DD      offset label4
80
                DD      offset label5
81
                DD      offset label6
82
                DD      offset label7
83
;end dispatch table
84
 
85
pi      DB      35H
86
        DB      0c2H
87
        DD      0daa22168H
88
        DD      4000c90fH
89
 
90
pi_by_2 DB      35H
91
        DB      0c2H
92
        DD      0daa22168H
93
        DD      3fffc90fH
94
 
95
flt_sixteen DD  41800000H
96
 
97
one_by_sixteen  DD 3d800000H
98
 
99
 
100
B1      DW      0AAA8H
101
        DD      0AAAAAAAAH
102
        DD      0BFFDAAAAH
103
 
104
B2      DW      2D6EH
105
        DD      0CCCCCCCCH
106
        DD      3FFCCCCCH
107
 
108
B3      DW      4892H
109
        DD      249241F9H
110
        DD      0BFFC9249H
111
 
112
B4      DW      0C592H
113
        DD      3897CDECH
114
        DD      3FFBE38EH
115
 
116
B5      DW      5DDDH
117
        DD      0C17BC162H
118
        DD      0BFFBBA2DH
119
 
120
B6      DW      4854H
121
        DD      77C7C78EH
122
        DD      3FFB9C80H
123
 
124
 
125
atan_k_by_16    dd 000000000H, 000000000H, 000000000H, 000000000H
126
                dd 067EF4E37H, 0FFAADDB9H, 000003FFAH, 000000000H
127
                dd 0617B6E33H, 0FEADD4D5H, 000003FFBH, 000000000H
128
                dd 072D81135H, 0BDCBDA5EH, 000003FFCH, 000000000H
129
                dd 06406EB15H, 0FADBAFC9H, 000003FFCH, 000000000H
130
                dd 03F5E5E6AH, 09B13B9B8H, 000003FFDH, 000000000H
131
                dd 026F78474H, 0B7B0CA0FH, 000003FFDH, 000000000H
132
                dd 0611FE5B6H, 0D327761EH, 000003FFDH, 000000000H
133
                dd 00DDA7B45H, 0ED63382BH, 000003FFDH, 000000000H
134
                dd 0D9867E2AH, 0832BF4A6H, 000003FFEH, 000000000H
135
                dd 0F7F59F9BH, 08F005D5EH, 000003FFEH, 000000000H
136
                dd 071BDDA20H, 09A2F80E6H, 000003FFEH, 000000000H
137
                dd 034F70924H, 0A4BC7D19H, 000003FFEH, 000000000H
138
                dd 0B4D8C080H, 0AEAC4C38H, 000003FFEH, 000000000H
139
                dd 0C2319E74H, 0B8053E2BH, 000003FFEH, 000000000H
140
                dd 0AC526641H, 0C0CE85B8H, 000003FFEH, 000000000H
141
                dd 02168C235H, 0C90FDAA2H, 000003FFEH, 000000000H
142
 
704 serge 143
_DATA   ENDS
554 serge 144
 
704 serge 145
_BSS     SEGMENT DWORD PUBLIC USE32 'BSS'
146
_BSS     ENDS
554 serge 147
 
148
 
149
EXTRN   __fdiv_fpr:NEAR
150
 
704 serge 151
DGROUP  GROUP _DATA,_BSS
554 serge 152
 
153
 
154
_TEXT   SEGMENT PARA PUBLIC USE32 'CODE'
704 serge 155
        ASSUME CS:_TEXT,DS:DGROUP,ES:DGROUP, SS:DGROUP
554 serge 156
        public __fpatan_chk
157
 
158
        defpe   __fpatan_chk
159
        push    eax
160
        push    ecx
161
        push    edx
162
        sub     esp, STACK_SIZE
163
        fstp    tbyte ptr [esp+X]       ; save X
164
        fstp    tbyte ptr [esp+Y]       ; save Y
165
 
166
        mov     ecx, [esp+Y+4]
167
        add     ecx, ecx
168
        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
169
        mov     eax, [esp+X+4]
170
        add     eax, eax
171
        jnc     hw_fpatan               ; unnormals (explicit 1 missing)
172
        mov     ecx, [esp+Y+8]          ; save high part of Y
173
        mov     eax, [esp+X+8]          ; save high part of Y
174
        and     ecx, 7fffh              ; Ey = exponent Y
175
        jz      hw_fpatan               ; Ey = 0
176
        and     eax, 7fffh              ; Ex = exponent X
177
        jz      hw_fpatan               ; Ex = 0
178
        cmp     ecx, 7fffh              ; check if Ey = 0x7fffh
179
        je      hw_fpatan
180
        cmp     eax, 7fffh              ; check if Ex = 0x7fffh
181
        je      hw_fpatan
182
 
183
        fld     tbyte ptr [esp+X]       ; reload X
184
        fabs                            ; |X| = u
185
        fld     tbyte ptr [esp+Y]       ; reload Y
186
        fabs                            ; |Y| = v
187
 
188
;  The following five lines turn off exceptions and set the
189
;  precision control to 80 bits.  The former is necessary to
190
;  force any traps to be taken at the divide instead of the scaling
191
;  code.  The latter is necessary in order to get full precision for
192
;  codes with incoming 32 and 64 bit precision settings.  If
193
;  it can be guaranteed that before reaching this point, the underflow
194
;  exception is masked and the precision control is at 80 bits, these
195
;  five lines can be omitted.
196
;
197
        fnstcw  [PREV_CW+esp]           ; save caller's control word
198
        mov     edx, [PREV_CW+esp]
199
        or      edx, 033fh              ; mask exceptions, pc=80
200
        and     edx, 0f3ffh
201
        mov     [PATCH_CW+esp], edx
202
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
203
 
204
 
205
        xor     edx, edx                ; initialize sflag = 0
206
        fcom                            ; |Y| > |x|
207
        push    eax
208
        fstsw  ax
209
        sahf
210
        pop     eax
211
        jb      order_X_Y_ok
212
        fxch
213
        inc     edx                     ; sflag = 1
214
order_X_Y_ok:
215
        push    eax
216
        mov     eax, 0fh
217
        call    __fdiv_fpr                  ; v/u = z
218
        pop     eax
219
        fld     dword ptr flt_sixteen   ; 16.0
220
        fmul    st, st(1)               ; z*16.0
221
; Top of stack looks like k, z
222
        fistp   dword ptr [SPILL+esp]   ; store k as int
223
        mov     ecx, [SPILL+esp]
224
        shl     ecx, 4
225
        fild    dword ptr[SPILL+esp]
226
        fmul    dword ptr one_by_sixteen; 1.0/16.0
227
; Top of stack looks like g, z
228
        fld     st(1)                   ; duplicate g
229
        fsub    st, st(1)               ; z-g = r
230
        fxch
231
; Top of stack looks like g, r, z
232
        fmulp   st(2), st               ; g*z
233
; Top of stack looks like r, g*z
234
        fld     qword ptr pos_1         ; load 1.0
235
        faddp   st(2), st               ; 1+g*z
236
; Top of stack looks like r, 1+g*z
237
        push    eax
238
        mov     eax, 0fh
239
        call    __fdiv_fpr                  ; v/u = z
240
        pop     eax
241
        fld     st(0)                   ; duplicate s
242
        fmul    st,st(1)                ; t = s*s
243
; Top of stack looks like t, s
244
 
245
        fld     st(0)
246
        fmul    st, st(1)
247
; Top of stack looks like t2, t, s
248
        fld     st(0)
249
        fmul    st, st(1)
250
        fld     tbyte ptr B6
251
        fld     tbyte ptr B5
252
; Top of stack looks like B5, B6, t4, t2, t, s
253
        fxch
254
        fmul    st, st(2)
255
        fld     tbyte ptr B4
256
        fxch    st(2)
257
        fmul    st, st(3)
258
; Top of stack looks like B5t4, B6t4, B4, t4, t2, t, s
259
        fld     tbyte ptr B3
260
        fxch    st(2)
261
        fmul    st, st(5)
262
; Top of stack looks like B6t6, B5t4, B3, B4, t4, t2, t, s
263
        fxch    st(3)
264
        fmulp   st(4), st
265
        fld     tbyte ptr B2
266
; Top of stack looks like B2, B5t4, B3, B6t6, B4t4, t2, t, s
267
        fxch    st(3)
268
        faddp   st(4), st
269
        mov     eax, [esp+X+8]
270
        fld     tbyte ptr B1
271
        fxch
272
        shl     eax, 16
273
; Top of stack looks like B5t4, B1, B3, B2, even, t2, t, s
274
        fmul    st, st(6)
275
        fxch    st(2)
276
        add     eax, eax
277
        fmul    st, st(5)
278
; Top of stack looks like B3t2, B1, B5t5, B2, even, t2, t, s
279
        fxch    st(3)
280
        adc     edx, edx                ; |sflag|Sx|
281
        fmulp   st(5), st
282
        fxch    st(2)
283
        mov     eax, [Y+8+esp]          ; save high part of Y
284
        fmul    st, st(5)
285
; Top of stack looks like B3t3, B5t5, B1, even, B2t2, t, s
286
        fxch    st(2)
287
        shl     eax, 16
288
        fmulp   st(5), st
289
; Top of stack looks like  B5t5, B3t3, even, B2t2, B1t, s
290
        fxch    st(2)
291
        faddp   st(3), st
292
        add     eax, eax
293
        faddp   st(1), st
294
        adc     edx, edx                ; |sflag|Sx|Sy|
295
; Top of stack looks like  odd, even, B1t, s
296
        faddp   st(2), st
297
        faddp   st(1), st
298
        fmul    st,st(1)                ; s*(odd+even)
299
        faddp   st(1), st               ; poly
300
 
301
        fld     tbyte ptr atan_k_by_16[ecx]     ; arctan[k;16]
302
        faddp   st(1), st               ; w = poly + arctan(g)
303
 
304
        jmp     dword ptr dispatch_table[edx*4]
305
 
306
label0:
307
        fldcw   [esp+PREV_CW]
308
        add     esp, STACK_SIZE
309
        pop     edx
310
        pop     ecx
311
        pop     eax
312
        ret
313
label1:
314
        fchs
315
        fldcw   [esp+PREV_CW]
316
        add     esp, STACK_SIZE
317
        pop     edx
318
        pop     ecx
319
        pop     eax
320
        ret
321
label2:
322
        fld     tbyte ptr pi
323
        fsubrp  st(1), st               ; pi - w
324
        fldcw   [esp+PREV_CW]
325
        add     esp, STACK_SIZE
326
        pop     edx
327
        pop     ecx
328
        pop     eax
329
        ret
330
label3:
331
        fld     tbyte ptr pi
332
        fsubrp  st(1), st               ; pi - w
333
        fchs                            ; - (pi - w)
334
        fldcw   [esp+PREV_CW]
335
        add     esp, STACK_SIZE
336
        pop     edx
337
        pop     ecx
338
        pop     eax
339
        ret
340
label4:
341
        fld     tbyte ptr pi_by_2
342
        fsubrp  st(1), st               ; pi/2 - w
343
        fldcw   [esp+PREV_CW]
344
        add     esp, STACK_SIZE
345
        pop     edx
346
        pop     ecx
347
        pop     eax
348
        ret
349
label5:
350
        fld     tbyte ptr pi_by_2
351
        fsubrp  st(1), st               ; pi/2 - w
352
        fchs                            ; - (pi/2 - w)
353
        fldcw   [esp+PREV_CW]
354
        add     esp, STACK_SIZE
355
        pop     edx
356
        pop     ecx
357
        pop     eax
358
        ret
359
label6:
360
        fld     tbyte ptr pi_by_2
361
        faddp   st(1), st               ; pi/2 + w
362
        fldcw   [esp+PREV_CW]
363
        add     esp, STACK_SIZE
364
        pop     edx
365
        pop     ecx
366
        pop     eax
367
        ret
368
label7:
369
        fld     tbyte ptr pi_by_2
370
        faddp   st(1), st               ; pi/2 + w
371
        fchs                            ; -(pi/2+w)
372
        fldcw   [esp+PREV_CW]
373
        add     esp, STACK_SIZE
374
        pop     edx
375
        pop     ecx
376
        pop     eax
377
        ret
378
 
379
 
380
hw_fpatan:
381
        fld     tbyte ptr [esp+Y]       ; reload Y
382
        fld     tbyte ptr [esp+X]       ; reload X
383
        fpatan
384
        add     esp, STACK_SIZE
385
        pop     edx
386
        pop     ecx
387
        pop     eax
388
        ret
389
__fpatan_chk       ENDP
390
 
391
_TEXT  ENDS
704 serge 392
       END