Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
359 serge 1
;*****************************************************************************
2
;*
3
;*                            Open Watcom Project
4
;*
5
;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
6
;*
7
;*  ========================================================================
8
;*
9
;*    This file contains Original Code and/or Modifications of Original
10
;*    Code as defined in and that are subject to the Sybase Open Watcom
11
;*    Public License version 1.0 (the 'License'). You may not use this file
12
;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
13
;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
14
;*    provided with the Original Code and Modifications, and is also
15
;*    available at www.sybase.com/developer/opensource.
16
;*
17
;*    The Original Code and all software distributed under the License are
18
;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19
;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
20
;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
21
;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
22
;*    NON-INFRINGEMENT. Please see the License for the specific language
23
;*    governing rights and limitations under the License.
24
;*
25
;*  ========================================================================
26
;*
27
;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
28
;*               DESCRIBE IT HERE!
29
;*
30
;*****************************************************************************
31
 
32
 
33
; static char sccs_id[] = "@(#)fprem32.asm      1.5  12/22/94  12:48:07";
34
;
35
; This code is being published by Intel to users of the Pentium(tm)
36
; processor.  Recipients are authorized to copy, modify, compile, use and
37
; distribute the code.
38
;
39
; Intel makes no warranty of any kind with regard to this code, including
40
; but not limited to, implied warranties or merchantability and fitness for
41
; a particular purpose. Intel assumes no responsibility for any errors that
42
; may appear in this code.
43
;
44
; No patent licenses are granted, express or implied.
45
;
46
;
47
include mdef.inc
48
 
49
        .386
50
        .387
51
 
52
;
53
;  PRELIMINARY VERSION of the software patch for the floating
54
;  point remainder.
55
;
56
 
57
 
58
CHECKSW MACRO
59
ifdef   DEBUG
60
        fnstsw  [fpsw]
61
        fnstcw  [fpcw]
62
endif
63
ENDM
64
 
65
 
66
DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
67
 
68
;
69
;  Stack variables for remainder routines.
70
;
71
 
72
FLT_SIZE        EQU     12
73
DENOM           EQU     0
74
DENOM_SAVE      EQU     DENOM + FLT_SIZE
75
NUMER           EQU     DENOM_SAVE + FLT_SIZE
76
PREV_CW         EQU     NUMER + FLT_SIZE
77
PATCH_CW        EQU     PREV_CW + 4
78
FPREM_SW        EQU     PATCH_CW + 4
79
STACK_SIZE      EQU     FPREM_SW + 4
80
RET_SIZE        EQU     4
81
PUSH_SIZE       EQU     4
82
 
83
MAIN_FUDGE      EQU     RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE
84
 
85
MAIN_DENOM              EQU     DENOM + MAIN_FUDGE
86
MAIN_DENOM_SAVE         EQU     DENOM_SAVE + MAIN_FUDGE
87
MAIN_NUMER              EQU     NUMER + MAIN_FUDGE
88
MAIN_PREV_CW            EQU     PREV_CW + MAIN_FUDGE
89
MAIN_PATCH_CW           EQU     PATCH_CW + MAIN_FUDGE
90
MAIN_FPREM_SW           EQU     FPREM_SW + MAIN_FUDGE
91
 
92
ONESMASK        EQU     700h
93
 
94
fprem_risc_table        DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
95
fprem_scale             DB      0, 0, 0, 0, 0, 0, 0eeh, 03fh
96
one_shl_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 043h
97
one_shr_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 03bh
98
one                     DB      0, 0, 0, 0, 0, 0, 0f0h, 03fh
99
half                    DB      0, 0, 0, 0, 0, 0, 0e0h, 03fh
100
big_number              DB      0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
101
 
102
ifdef   DEBUG
103
        public  fpcw
104
        public  fpsw
105
fpcw    dw      0
106
fpsw    dw      0
107
endif
108
 
109
FPU_STATE       STRUC
110
        CONTROL_WORD    DW      ?
111
        reserved_1      DW      ?
112
        STATUS_WORD     DD      ?
113
        TAG_WORD        DW      ?
114
        reserved_3      DW      ?
115
        IP_OFFSET       DD      ?
116
        CS_SLCT         DW      ?
117
        OPCODE          DW      ?
118
        DATA_OFFSET     DD      ?
119
        OPERAND_SLCT    DW      ?
120
        reserved_4      DW      ?
121
FPU_STATE       ENDS
122
 
123
ENV_SIZE        EQU     28
124
 
125
 
126
DATA32 ENDS
127
 
128
_TEXT  SEGMENT DWORD USE32 PUBLIC 'CODE'
129
_TEXT  ENDS
130
 
131
DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
132
DATA32  ENDS
133
 
134
CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
135
CONST32 ENDS
136
 
137
BSS32   SEGMENT DWORD USE32 PUBLIC 'BSS'
138
BSS32   ENDS
139
 
140
DGROUP  GROUP CONST32, BSS32, DATA32
141
 
142
 
143
 
144
CODE32  SEGMENT   DWORD USE32 PUBLIC 'CODE'
145
 
146
        assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
147
 
148
 
149
fprem_common    PROC    NEAR
150
 
151
        push    eax
152
        push    ebx
153
        push    ecx
154
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
155
        xor     eax, ONESMASK           ; invert bits that have to be one
156
        test    eax, ONESMASK           ; check bits that have to be one
157
        jnz     remainder_hardware_ok
158
        shr     eax, 11
159
        and     eax, 0fh
160
        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
161
        jz      remainder_hardware_ok
162
 
163
; The denominator has the bit pattern. Weed out the funny cases like NaNs
164
; before applying the software version. Our caller guarantees that the
165
; denominator is not a denormal. Here we check for:
166
;       denominator     inf, NaN, unnormal
167
;       numerator       inf, NaN, unnormal, denormal
168
 
169
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
170
        and     eax, 07fff0000h         ; mask the exponent only
171
        cmp     eax, 07fff0000h         ; check for INF or NaN
172
        je      remainder_hardware_ok
173
        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
174
        and     eax, 07fff0000h         ; mask the exponent only
175
        jz      remainder_hardware_ok   ; jif numerator denormal
176
        cmp     eax, 07fff0000h         ; check for INF or NaN
177
        je      remainder_hardware_ok
178
        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
179
        add     eax, eax                ; set carry if explicit bit set
180
        jnz     remainder_hardware_ok   ; jmp if numerator is unnormal
181
        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
182
        add     eax, eax                ; set carry if explicit bit set
183
        jnz     remainder_hardware_ok   ; jmp if denominator is unnormal
184
 
185
rem_patch:
186
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
187
        and     eax, 07fffh              ; clear sy
188
        add     eax, 63                  ; evaluate ey + 63
189
        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
190
        and     ebx, 07fffh              ; clear sx
191
        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
192
        ja      rem_large               ; if ex > ey + 63, case of large arguments
193
rem_patch_loop:
194
        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
195
        and     eax, 07fffh             ; clear sy
196
        add     eax, 10                 ; evaluate ey + 10
197
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
198
        and     ebx, 07fffh             ; clear sx
199
        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
200
        js      remainder_hardware_ok   ; safe if ey + 10 > ex
201
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
202
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
203
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
204
        and     ebx, 07fffh             ; clear sx
205
        mov     ecx, ebx
206
        sub     ebx, eax
207
        and     ebx, 07h
208
        or      ebx, 04h
209
        sub     ecx, ebx
210
        mov     ebx, eax
211
        and     ebx, 08000h             ; keep sy
212
        or      ecx, ebx                ; merge the sign of y
213
        mov     dword ptr [MAIN_DENOM+8+esp], ecx
214
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
215
        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
216
        fxch
217
        fprem                           ; this rem is safe
218
        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
219
        fstp    st(0)                   ; pop the stack
220
        jmp rem_patch_loop
221
rem_large:
222
        test    edx, 02h                ; is denominator already saved
223
        jnz     already_saved
224
        fld     tbyte ptr[esp + MAIN_DENOM]
225
        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
226
already_saved:
227
        ; Save user's precision control and institute 80.  The fp ops in
228
        ; rem_large_loop must not round to user's precision (if it is less
229
        ; than 80) because the hardware would not have done so.  We are
230
        ; aping the hardware here, which is all extended.
231
 
232
        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
233
        mov     eax, dword ptr[esp + MAIN_PREV_CW]
234
        or      eax, 033fh              ; mask exceptions, pc=80
235
        mov     [esp + MAIN_PATCH_CW], eax
236
        fldcw   [esp + MAIN_PATCH_CW]
237
 
238
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
239
        and     eax, 07fffh             ; clear sy
240
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
241
        and     ebx, 07fffh             ; clear sx
242
        sub     ebx, eax                ; evaluate the exponent difference
243
        and     ebx, 03fh
244
        or      ebx, 020h
245
        add     ebx, 1
246
        mov     ecx, ebx
247
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
248
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
249
        and     ebx, 07fffh             ; clear sx
250
        and     eax, 08000h             ; keep sy
251
        or      ebx, eax                ; merge the sign of y
252
        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
253
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
254
        fabs
255
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
256
        fabs
257
rem_large_loop:
258
        fcom
259
        fstsw  ax
260
        and     eax, 00100h
261
        jnz     rem_no_sub
262
        fsub    st, st(1)
263
rem_no_sub:
264
        fxch
265
        fmul    qword ptr half
266
        fxch
267
        sub     ecx, 1                  ; decrement the loop counter
268
        jnz     rem_large_loop
269
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
270
        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
271
        fstp    st                      ; toss modified denom
272
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
273
        fld     tbyte ptr[big_number]   ; force C2 to be set
274
        fprem
275
        fstp    st
276
        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
277
 
278
        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
279
        and     ebx, 08000h             ; keep sx
280
        jz      rem_done
281
        fchs
282
        jmp     rem_done
283
remainder_hardware_ok:
284
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
285
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
286
        fprem                           ; and finally do a remainder
287
; prem_main_routine end
288
rem_done:
289
        test    edx, 03h
290
        jz      rem_exit
291
        fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
292
        test    edx, 01h
293
        jz      do_not_de_scale
294
; De-scale the result. Go to pc=80 to prevent from fmul
295
; from user precision (fprem does not round the result).
296
        fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
297
        mov     eax, [esp + MAIN_PREV_CW]
298
        or      eax, 0300h              ; pc = 80
299
        mov     [esp + MAIN_PATCH_CW], eax
300
        fldcw   [esp + MAIN_PATCH_CW]
301
        fmul    qword ptr one_shr_64
302
        fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
303
do_not_de_scale:
304
        mov     eax, [esp + MAIN_FPREM_SW]
305
        fxch
306
        fstp    st
307
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
308
        fxch
309
        and     eax, 04300h             ; restore saved Q0, Q1, Q2
310
        sub     esp, ENV_SIZE
311
        fnstenv [esp]
312
        and     [esp].STATUS_WORD, 0bcffh
313
        or      [esp].STATUS_WORD, eax
314
        fldenv  [esp]
315
        add     esp, ENV_SIZE
316
rem_exit:
317
        pop     ecx
318
        pop     ebx
319
        pop     eax
320
        CHECKSW                         ; debug only: save status
321
        ret
322
fprem_common    ENDP
323
 
324
comment ~****************************************************************
325
 
326
;
327
; float frem_chk (float numer, float denom)
328
;
329
        public  frem_chk
330
frem_chk        PROC    NEAR
331
        push    edx
332
        sub     esp, STACK_SIZE
333
        fld     dword ptr [STACK_SIZE+8+esp]
334
        fstp    tbyte ptr [NUMER+esp]
335
        fld     dword ptr [STACK_SIZE+12+esp]
336
        fstp    tbyte ptr [DENOM+esp]
337
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
338
        call    fprem_common
339
        fxch
340
        fstp    st
341
        add     esp, STACK_SIZE
342
        pop     edx
343
        ret
344
frem_chk        ENDP
345
; end frem_chk
346
 
347
;
348
; double drem_chk (double numer, double denom)
349
;
350
        public  drem_chk
351
drem_chk        PROC    NEAR
352
        push    edx
353
        sub     esp, STACK_SIZE
354
        fld     qword ptr [STACK_SIZE+8+esp]
355
        fstp    tbyte ptr [NUMER+esp]
356
        fld     qword ptr [STACK_SIZE+16+esp]
357
        fstp    tbyte ptr [DENOM+esp]
358
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
359
        call    fprem_common
360
        fxch
361
        fstp    st
362
        add     esp, STACK_SIZE
363
        pop     edx
364
        ret
365
 
366
drem_chk        ENDP
367
; end drem_chk
368
 
369
;
370
; long double lrem_chk(long double number,long double denom)
371
;
372
        public  lrem_chk
373
lrem_chk        PROC    NEAR
374
        fld     tbyte ptr [20+esp]
375
        fld     tbyte ptr [4+esp]
376
        call    fprem_chk
377
        fxch
378
        fstp    st
379
        ret
380
lrem_chk        ENDP
381
 
382
**********************************************************************~
383
 
384
;
385
; FPREM: ST = remainder(ST, ST(1))
386
;
387
; Compiler version of the FPREM must preserve the arguments in the floating
388
; point stack.
389
 
390
        public  __fprem_chk
391
        defpe   __fprem_chk
392
        push    edx
393
        sub     esp, STACK_SIZE
394
        fstp    tbyte ptr [NUMER+esp]
395
        fstp    tbyte ptr [DENOM+esp]
396
        xor     edx, edx
397
; prem_main_routine begin
398
        mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
399
        test    eax,07fff0000h          ; check for denormal
400
        jz      denormal
401
        call    fprem_common
402
        add     esp, STACK_SIZE
403
        pop     edx
404
        ret
405
 
406
denormal:
407
        fld     tbyte ptr [DENOM+esp]   ; load the denominator
408
        fld     tbyte ptr [NUMER+esp]   ; load the numerator
409
        mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
410
        or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
411
        jz      remainder_hardware_ok_l ; denominator is zero
412
        fxch
413
        fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
414
        fld     tbyte ptr[esp + DENOM]
415
        fxch
416
        or      edx, 02h
417
;
418
; For this we need pc=80.  Also, mask exceptions so we don't take any
419
; denormal operand exceptions.  It is guaranteed that the descaling
420
; later on will take underflow, which is what the hardware would have done
421
; on a normal fprem.
422
;
423
        fnstcw  [PREV_CW+esp]           ; save caller's control word
424
        mov     eax, [PREV_CW+esp]
425
        or      eax, 0033fh             ; mask exceptions, pc=80
426
        mov     [PATCH_CW+esp], eax
427
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
428
 
429
; The denominator is a denormal.  For most numerators, scale both numerator
430
; and denominator to get rid of denormals.  Then execute the common code
431
; with the flag set to indicate that the result must be de-scaled.
432
; For large numerators this won't work because the scaling would cause
433
; overflow.  In this case we know the numerator is large, the denominator
434
; is small (denormal), so the exponent difference is also large.  This means
435
; the rem_large code will be used and this code depends on the difference
436
; in exponents modulo 64.  Adding 64 to the denominators exponent
437
; doesn't change the modulo 64 difference.  So we can scale the denominator
438
; by 64, making it not denormal, and this won't effect the result.
439
;
440
; To start with, figure out if numerator is large
441
 
442
        mov     eax, [esp + NUMER + 8]  ; load numerator exponent
443
        and     eax, 7fffh              ; isolate numerator exponent
444
        cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
445
        ja      big_numer_rem_de        ; jif big numerator
446
 
447
; So the numerator is not large scale both numerator and denominator
448
 
449
        or      edx, 1                  ; edx = 1, if denormal extended divisor
450
        fmul    qword ptr one_shl_64    ; make numerator not denormal
451
        fstp    tbyte ptr[esp + NUMER]
452
        fmul    qword ptr one_shl_64    ; make denominator not denormal
453
        fstp    tbyte ptr[esp + DENOM]
454
        jmp     scaling_done
455
 
456
; The numerator is large.  Scale only the denominator, which will not
457
; change the result which we know will be partial.  Set the scale flag
458
; to false.
459
big_numer_rem_de:
460
        ; We must do this with pc=80 to avoid rounding to single/double.
461
        ; In this case we do not mask exceptions so that we will take
462
        ; denormal operand, as would the hardware.
463
        fnstcw  [PREV_CW+esp]           ; save caller's control word
464
        mov     eax, [PREV_CW+esp]
465
        or      eax, 00300h             ; pc=80
466
        mov     [PATCH_CW+esp], eax
467
        fldcw   [PATCH_CW+esp]          ;  pc=80
468
 
469
        fstp    st                      ; Toss numerator
470
        fmul    qword ptr one_shl_64    ; make denominator not denormal
471
        fstp    tbyte ptr[esp + DENOM]
472
 
473
; Restore the control word which was fiddled to scale at 80-bit precision.
474
; Then call the common code.
475
scaling_done:
476
        fldcw   [esp + PREV_CW]         ; restore callers control word
477
        call    fprem_common
478
        add     esp, STACK_SIZE
479
        pop     edx
480
        ret
481
 
482
remainder_hardware_ok_l:
483
        fprem                           ; and finally do a remainder
484
 
485
        CHECKSW
486
 
487
        add     esp, STACK_SIZE
488
        pop     edx
489
        ret
490
__fprem_chk       ENDP
491
; end fprem_chk
492
 
493
 
494
;
495
; FPREM1 code begins here
496
;
497
 
498
 
499
fprem1_common   PROC    NEAR
500
 
501
        push    eax
502
        push    ebx
503
        push    ecx
504
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
505
        xor     eax, ONESMASK           ; invert bits that have to be one
506
        test    eax, ONESMASK           ; check bits that have to be one
507
        jnz     remainder1_hardware_ok
508
        shr     eax, 11
509
        and     eax, 0fh
510
        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
511
        jz      remainder1_hardware_ok
512
 
513
; The denominator has the bit pattern. Weed out the funny cases like NaNs
514
; before applying the software version. Our caller guarantees that the
515
; denominator is not a denormal. Here we check for:
516
;       denominator     inf, NaN, unnormal
517
;       numerator       inf, NaN, unnormal, denormal
518
 
519
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
520
        and     eax, 07fff0000h         ; mask the exponent only
521
        cmp     eax, 07fff0000h         ; check for INF or NaN
522
        je      remainder1_hardware_ok
523
        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
524
        and     eax, 07fff0000h         ; mask the exponent only
525
        jz      remainder1_hardware_ok  ; jif numerator denormal
526
        cmp     eax, 07fff0000h         ; check for INF or NaN
527
        je      remainder1_hardware_ok
528
        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
529
        add     eax, eax                ; set carry if explicit bit set
530
        jnz     remainder1_hardware_ok  ; jmp if numerator is unnormal
531
        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
532
        add     eax, eax                ; set carry if explicit bit set
533
        jnz     remainder1_hardware_ok  ; jmp if denominator is unnormal
534
 
535
rem1_patch:
536
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
537
        and     eax, 07fffh              ; clear sy
538
        add     eax, 63                  ; evaluate ey + 63
539
        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
540
        and     ebx, 07fffh              ; clear sx
541
        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
542
        ja      rem1_large              ; if ex > ey + 63, case of large arguments
543
rem1_patch_loop:
544
        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
545
        and     eax, 07fffh             ; clear sy
546
        add     eax, 10                 ; evaluate ey + 10
547
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
548
        and     ebx, 07fffh             ; clear sx
549
        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
550
        js      remainder1_hardware_ok  ; safe if ey + 10 > ex
551
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
552
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
553
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
554
        and     ebx, 07fffh             ; clear sx
555
        mov     ecx, ebx
556
        sub     ebx, eax
557
        and     ebx, 07h
558
        or      ebx, 04h
559
        sub     ecx, ebx
560
        mov     ebx, eax
561
        and     ebx, 08000h             ; keep sy
562
        or      ecx, ebx                ; merge the sign of y
563
        mov     dword ptr [MAIN_DENOM+8+esp], ecx
564
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
565
        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
566
        fxch
567
        fprem                           ; this rem is safe
568
        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
569
        fstp    st(0)                   ; pop the stack
570
        jmp rem1_patch_loop
571
rem1_large:
572
        test    ebx, 02h                ; is denominator already saved
573
        jnz     already_saved1
574
        fld     tbyte ptr[esp + MAIN_DENOM]
575
        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
576
already_saved1:
577
        ; Save user's precision control and institute 80.  The fp ops in
578
        ; rem1_large_loop must not round to user's precision (if it is less
579
        ; than 80) because the hardware would not have done so.  We are
580
        ; aping the hardware here, which is all extended.
581
 
582
        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
583
        mov     eax, dword ptr[esp + MAIN_PREV_CW]
584
        or      eax, 033fh              ; mask exceptions, pc=80
585
        mov     [esp + MAIN_PATCH_CW], eax
586
        fldcw   [esp + MAIN_PATCH_CW]
587
 
588
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
589
        and     eax, 07fffh             ; clear sy
590
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
591
        and     ebx, 07fffh             ; clear sx
592
        sub     ebx, eax                ; evaluate the exponent difference
593
        and     ebx, 03fh
594
        or      ebx, 020h
595
        add     ebx, 1
596
        mov     ecx, ebx
597
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
598
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
599
        and     ebx, 07fffh             ; clear sx
600
        and     eax, 08000h             ; keep sy
601
        or      ebx, eax                ; merge the sign of y
602
        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
603
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
604
        fabs
605
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
606
        fabs
607
rem1_large_loop:
608
        fcom
609
        fstsw  ax
610
        and     eax, 00100h
611
        jnz     rem1_no_sub
612
        fsub    st, st(1)
613
rem1_no_sub:
614
        fxch
615
        fmul    qword ptr half
616
        fxch
617
        sub     ecx, 1                  ; decrement the loop counter
618
        jnz     rem1_large_loop
619
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
620
        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
621
        fstp    st                      ; toss modified denom
622
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
623
        fld     tbyte ptr[big_number]   ; force C2 to be set
624
        fprem1
625
        fstp    st
626
        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
627
 
628
        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
629
        and     ebx, 08000h             ; keep sx
630
        jz      rem1_done
631
        fchs
632
        jmp     rem1_done
633
remainder1_hardware_ok:
634
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
635
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
636
        fprem1                           ; and finally do a remainder
637
; prem1_main_routine end
638
rem1_done:
639
        test    edx, 03h
640
        jz      rem1_exit
641
        fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
642
        test    edx, 01h
643
        jz      do_not_de_scale1
644
; De-scale the result. Go to pc=80 to prevent from fmul
645
; from user precision (fprem does not round the result).
646
        fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
647
        mov     eax, [esp + MAIN_PREV_CW]
648
        or      eax, 0300h              ; pc = 80
649
        mov     [esp + MAIN_PATCH_CW], eax
650
        fldcw   [esp + MAIN_PATCH_CW]
651
        fmul    qword ptr one_shr_64
652
        fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
653
do_not_de_scale1:
654
        mov     eax, [esp + MAIN_FPREM_SW]
655
        fxch
656
        fstp    st
657
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
658
        fxch
659
        and     eax, 04300h             ; restore saved Q0, Q1, Q2
660
        sub     esp, ENV_SIZE
661
        fnstenv [esp]
662
        and     [esp].STATUS_WORD, 0bcffh
663
        or      [esp].STATUS_WORD, eax
664
        fldenv  [esp]
665
        add     esp, ENV_SIZE
666
rem1_exit:
667
        pop     ecx
668
        pop     ebx
669
        pop     eax
670
        CHECKSW                         ; debug only: save status
671
        ret
672
fprem1_common   ENDP
673
 
674
 
675
comment ~***************************************************************
676
;
677
; float frem1_chk (float numer, float denom)
678
;
679
        public  frem1_chk
680
frem1_chk       PROC    NEAR
681
        push    edx
682
        sub     esp, STACK_SIZE
683
        fld     dword ptr [STACK_SIZE+8+esp]
684
        fstp    tbyte ptr [NUMER+esp]
685
        fld     dword ptr [STACK_SIZE+12+esp]
686
        fstp    tbyte ptr [DENOM+esp]
687
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
688
        call    fprem1_common
689
        fxch
690
        fstp    st
691
        add     esp, STACK_SIZE
692
        pop     edx
693
        ret
694
frem1_chk       ENDP
695
; end frem1_chk
696
 
697
;
698
; double drem1_chk (double numer, double denom)
699
;
700
        public  drem1_chk
701
drem1_chk       PROC    NEAR
702
        push    edx
703
        sub     esp, STACK_SIZE
704
        fld     qword ptr [STACK_SIZE+8+esp]
705
        fstp    tbyte ptr [NUMER+esp]
706
        fld     qword ptr [STACK_SIZE+16+esp]
707
        fstp    tbyte ptr [DENOM+esp]
708
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
709
        call    fprem1_common
710
        fxch
711
        fstp    st
712
        add     esp, STACK_SIZE
713
        pop     edx
714
        ret
715
 
716
drem1_chk       ENDP
717
; end drem1_chk
718
 
719
;
720
; long double lrem1_chk(long double number,long double denom)
721
;
722
        public  lrem1_chk
723
lrem1_chk       PROC    NEAR
724
        fld     tbyte ptr [20+esp]
725
        fld     tbyte ptr [4+esp]
726
        call    fprem1_chk
727
        fxch
728
        fstp    st
729
        ret
730
lrem1_chk       ENDP
731
********************************************************************~
732
 
733
;
734
; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding
735
;
736
; Compiler version of the FPREM must preserve the arguments in the floating
737
; point stack.
738
 
739
        public  __fprem1_chk
740
        defpe   __fprem1_chk
741
        push    edx
742
        sub     esp, STACK_SIZE
743
        fstp    tbyte ptr [NUMER+esp]
744
        fstp    tbyte ptr [DENOM+esp]
745
        mov     edx, 0
746
; prem1_main_routine begin
747
        mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
748
        test    eax,07fff0000h          ; check for denormal
749
        jz      denormal1
750
        call    fprem1_common
751
        add     esp, STACK_SIZE
752
        pop     edx
753
        ret
754
 
755
denormal1:
756
        fld     tbyte ptr [DENOM+esp]   ; load the denominator
757
        fld     tbyte ptr [NUMER+esp]   ; load the numerator
758
        mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
759
        or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
760
        jz      remainder1_hardware_ok_l ; denominator is zero
761
        fxch
762
        fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
763
        fld     tbyte ptr[esp + DENOM]
764
        fxch
765
        or      edx, 02h
766
;
767
; For this we need pc=80.  Also, mask exceptions so we don't take any
768
; denormal operand exceptions.  It is guaranteed that the descaling
769
; later on will take underflow, which is what the hardware would have done
770
; on a normal fprem.
771
;
772
        fnstcw  [PREV_CW+esp]           ; save caller's control word
773
        mov     eax, [PREV_CW+esp]
774
        or      eax, 0033fh             ; mask exceptions, pc=80
775
        mov     [PATCH_CW+esp], eax
776
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
777
 
778
; The denominator is a denormal.  For most numerators, scale both numerator
779
; and denominator to get rid of denormals.  Then execute the common code
780
; with the flag set to indicate that the result must be de-scaled.
781
; For large numerators this won't work because the scaling would cause
782
; overflow.  In this case we know the numerator is large, the denominator
783
; is small (denormal), so the exponent difference is also large.  This means
784
; the rem1_large code will be used and this code depends on the difference
785
; in exponents modulo 64.  Adding 64 to the denominators exponent
786
; doesn't change the modulo 64 difference.  So we can scale the denominator
787
; by 64, making it not denormal, and this won't effect the result.
788
;
789
; To start with, figure out if numerator is large
790
 
791
        mov     eax, [esp + NUMER + 8]  ; load numerator exponent
792
        and     eax, 7fffh              ; isolate numerator exponent
793
        cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
794
        ja      big_numer_rem1_de       ; jif big numerator
795
 
796
; So the numerator is not large scale both numerator and denominator
797
 
798
        or      edx, 1                  ; edx = 1, if denormal extended divisor
799
        fmul    qword ptr one_shl_64    ; make numerator not denormal
800
        fstp    tbyte ptr[esp + NUMER]
801
        fmul    qword ptr one_shl_64    ; make denominator not denormal
802
        fstp    tbyte ptr[esp + DENOM]
803
        jmp     scaling_done1
804
 
805
; The numerator is large.  Scale only the denominator, which will not
806
; change the result which we know will be partial.  Set the scale flag
807
; to false.
808
big_numer_rem1_de:
809
        ; We must do this with pc=80 to avoid rounding to single/double.
810
        ; In this case we do not mask exceptions so that we will take
811
        ; denormal operand, as would the hardware.
812
        fnstcw  [PREV_CW+esp]           ; save caller's control word
813
        mov     eax, [PREV_CW+esp]
814
        or      eax, 00300h             ; pc=80
815
        mov     [PATCH_CW+esp], eax
816
        fldcw   [PATCH_CW+esp]          ;  pc=80
817
 
818
        fstp    st                      ; Toss numerator
819
        fmul    qword ptr one_shl_64    ; make denominator not denormal
820
        fstp    tbyte ptr[esp + DENOM]
821
 
822
; Restore the control word which was fiddled to scale at 80-bit precision.
823
; Then call the common code.
824
scaling_done1:
825
        fldcw   [esp + PREV_CW]         ; restore callers control word
826
        call    fprem1_common
827
        add     esp, STACK_SIZE
828
        pop     edx
829
        ret
830
 
831
remainder1_hardware_ok_l:
832
        fprem                           ; and finally do a remainder
833
 
834
        CHECKSW
835
 
836
        add     esp, STACK_SIZE
837
        pop     edx
838
        ret
839
__fprem1_chk      ENDP
840
; end fprem1_chk
841
 
842
ifdef   DEBUG
843
        public  fpinit
844
fpinit  PROC    NEAR
845
        fninit
846
        ret
847
fpinit  ENDP
848
endif
849
 
850
CODE32 ENDS
851
       END