Subversion Repositories Kolibri OS

Rev

Rev 554 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
554 serge 1
;*****************************************************************************
2
;*
3
;*                            Open Watcom Project
4
;*
5
;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
6
;*
7
;*  ========================================================================
8
;*
9
;*    This file contains Original Code and/or Modifications of Original
10
;*    Code as defined in and that are subject to the Sybase Open Watcom
11
;*    Public License version 1.0 (the 'License'). You may not use this file
12
;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
13
;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
14
;*    provided with the Original Code and Modifications, and is also
15
;*    available at www.sybase.com/developer/opensource.
16
;*
17
;*    The Original Code and all software distributed under the License are
18
;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
19
;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
20
;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
21
;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
22
;*    NON-INFRINGEMENT. Please see the License for the specific language
23
;*    governing rights and limitations under the License.
24
;*
25
;*  ========================================================================
26
;*
27
;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
28
;*               DESCRIBE IT HERE!
29
;*
30
;*****************************************************************************
31
 
32
 
33
; static char sccs_id[] = "@(#)fprem32.asm      1.5  12/22/94  12:48:07";
34
;
35
; This code is being published by Intel to users of the Pentium(tm)
36
; processor.  Recipients are authorized to copy, modify, compile, use and
37
; distribute the code.
38
;
39
; Intel makes no warranty of any kind with regard to this code, including
40
; but not limited to, implied warranties or merchantability and fitness for
41
; a particular purpose. Intel assumes no responsibility for any errors that
42
; may appear in this code.
43
;
44
; No patent licenses are granted, express or implied.
45
;
46
;
47
include mdef.inc
48
 
49
        .386
50
        .387
51
 
52
;
53
;  PRELIMINARY VERSION of the software patch for the floating
54
;  point remainder.
55
;
56
 
57
 
58
CHECKSW MACRO
59
ifdef   DEBUG
60
        fnstsw  [fpsw]
61
        fnstcw  [fpcw]
62
endif
63
ENDM
64
 
65
 
704 serge 66
_DATA  SEGMENT DWORD USE32 PUBLIC 'DATA'
554 serge 67
 
68
;
69
;  Stack variables for remainder routines.
70
;
71
 
72
FLT_SIZE        EQU     12
73
DENOM           EQU     0
74
DENOM_SAVE      EQU     DENOM + FLT_SIZE
75
NUMER           EQU     DENOM_SAVE + FLT_SIZE
76
PREV_CW         EQU     NUMER + FLT_SIZE
77
PATCH_CW        EQU     PREV_CW + 4
78
FPREM_SW        EQU     PATCH_CW + 4
79
STACK_SIZE      EQU     FPREM_SW + 4
80
RET_SIZE        EQU     4
81
PUSH_SIZE       EQU     4
82
 
83
MAIN_FUDGE      EQU     RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE
84
 
85
MAIN_DENOM              EQU     DENOM + MAIN_FUDGE
86
MAIN_DENOM_SAVE         EQU     DENOM_SAVE + MAIN_FUDGE
87
MAIN_NUMER              EQU     NUMER + MAIN_FUDGE
88
MAIN_PREV_CW            EQU     PREV_CW + MAIN_FUDGE
89
MAIN_PATCH_CW           EQU     PATCH_CW + MAIN_FUDGE
90
MAIN_FPREM_SW           EQU     FPREM_SW + MAIN_FUDGE
91
 
92
ONESMASK        EQU     700h
93
 
94
fprem_risc_table        DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
95
fprem_scale             DB      0, 0, 0, 0, 0, 0, 0eeh, 03fh
96
one_shl_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 043h
97
one_shr_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 03bh
98
one                     DB      0, 0, 0, 0, 0, 0, 0f0h, 03fh
99
half                    DB      0, 0, 0, 0, 0, 0, 0e0h, 03fh
100
big_number              DB      0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
101
 
102
ifdef   DEBUG
103
        public  fpcw
104
        public  fpsw
105
fpcw    dw      0
106
fpsw    dw      0
107
endif
108
 
109
FPU_STATE       STRUC
110
        CONTROL_WORD    DW      ?
111
        reserved_1      DW      ?
112
        STATUS_WORD     DD      ?
113
        TAG_WORD        DW      ?
114
        reserved_3      DW      ?
115
        IP_OFFSET       DD      ?
116
        CS_SLCT         DW      ?
117
        OPCODE          DW      ?
118
        DATA_OFFSET     DD      ?
119
        OPERAND_SLCT    DW      ?
120
        reserved_4      DW      ?
121
FPU_STATE       ENDS
122
 
123
ENV_SIZE        EQU     28
124
 
125
 
704 serge 126
_DATA  ENDS
554 serge 127
 
128
_TEXT  SEGMENT DWORD USE32 PUBLIC 'CODE'
129
_TEXT  ENDS
130
 
131
 
704 serge 132
DGROUP  GROUP _DATA
554 serge 133
 
134
 
135
 
704 serge 136
_TEXT  SEGMENT   DWORD USE32 PUBLIC 'CODE'
554 serge 137
 
138
        assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
139
 
140
 
141
fprem_common    PROC    NEAR
142
 
143
        push    eax
144
        push    ebx
145
        push    ecx
146
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
147
        xor     eax, ONESMASK           ; invert bits that have to be one
148
        test    eax, ONESMASK           ; check bits that have to be one
149
        jnz     remainder_hardware_ok
150
        shr     eax, 11
151
        and     eax, 0fh
152
        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
153
        jz      remainder_hardware_ok
154
 
155
; The denominator has the bit pattern. Weed out the funny cases like NaNs
156
; before applying the software version. Our caller guarantees that the
157
; denominator is not a denormal. Here we check for:
158
;       denominator     inf, NaN, unnormal
159
;       numerator       inf, NaN, unnormal, denormal
160
 
161
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
162
        and     eax, 07fff0000h         ; mask the exponent only
163
        cmp     eax, 07fff0000h         ; check for INF or NaN
164
        je      remainder_hardware_ok
165
        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
166
        and     eax, 07fff0000h         ; mask the exponent only
167
        jz      remainder_hardware_ok   ; jif numerator denormal
168
        cmp     eax, 07fff0000h         ; check for INF or NaN
169
        je      remainder_hardware_ok
170
        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
171
        add     eax, eax                ; set carry if explicit bit set
172
        jnz     remainder_hardware_ok   ; jmp if numerator is unnormal
173
        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
174
        add     eax, eax                ; set carry if explicit bit set
175
        jnz     remainder_hardware_ok   ; jmp if denominator is unnormal
176
 
177
rem_patch:
178
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
179
        and     eax, 07fffh              ; clear sy
180
        add     eax, 63                  ; evaluate ey + 63
181
        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
182
        and     ebx, 07fffh              ; clear sx
183
        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
184
        ja      rem_large               ; if ex > ey + 63, case of large arguments
185
rem_patch_loop:
186
        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
187
        and     eax, 07fffh             ; clear sy
188
        add     eax, 10                 ; evaluate ey + 10
189
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
190
        and     ebx, 07fffh             ; clear sx
191
        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
192
        js      remainder_hardware_ok   ; safe if ey + 10 > ex
193
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
194
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
195
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
196
        and     ebx, 07fffh             ; clear sx
197
        mov     ecx, ebx
198
        sub     ebx, eax
199
        and     ebx, 07h
200
        or      ebx, 04h
201
        sub     ecx, ebx
202
        mov     ebx, eax
203
        and     ebx, 08000h             ; keep sy
204
        or      ecx, ebx                ; merge the sign of y
205
        mov     dword ptr [MAIN_DENOM+8+esp], ecx
206
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
207
        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
208
        fxch
209
        fprem                           ; this rem is safe
210
        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
211
        fstp    st(0)                   ; pop the stack
212
        jmp rem_patch_loop
213
rem_large:
214
        test    edx, 02h                ; is denominator already saved
215
        jnz     already_saved
216
        fld     tbyte ptr[esp + MAIN_DENOM]
217
        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
218
already_saved:
219
        ; Save user's precision control and institute 80.  The fp ops in
220
        ; rem_large_loop must not round to user's precision (if it is less
221
        ; than 80) because the hardware would not have done so.  We are
222
        ; aping the hardware here, which is all extended.
223
 
224
        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
225
        mov     eax, dword ptr[esp + MAIN_PREV_CW]
226
        or      eax, 033fh              ; mask exceptions, pc=80
227
        mov     [esp + MAIN_PATCH_CW], eax
228
        fldcw   [esp + MAIN_PATCH_CW]
229
 
230
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
231
        and     eax, 07fffh             ; clear sy
232
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
233
        and     ebx, 07fffh             ; clear sx
234
        sub     ebx, eax                ; evaluate the exponent difference
235
        and     ebx, 03fh
236
        or      ebx, 020h
237
        add     ebx, 1
238
        mov     ecx, ebx
239
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
240
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
241
        and     ebx, 07fffh             ; clear sx
242
        and     eax, 08000h             ; keep sy
243
        or      ebx, eax                ; merge the sign of y
244
        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
245
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
246
        fabs
247
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
248
        fabs
249
rem_large_loop:
250
        fcom
251
        fstsw  ax
252
        and     eax, 00100h
253
        jnz     rem_no_sub
254
        fsub    st, st(1)
255
rem_no_sub:
256
        fxch
257
        fmul    qword ptr half
258
        fxch
259
        sub     ecx, 1                  ; decrement the loop counter
260
        jnz     rem_large_loop
261
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
262
        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
263
        fstp    st                      ; toss modified denom
264
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
265
        fld     tbyte ptr[big_number]   ; force C2 to be set
266
        fprem
267
        fstp    st
268
        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
269
 
270
        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
271
        and     ebx, 08000h             ; keep sx
272
        jz      rem_done
273
        fchs
274
        jmp     rem_done
275
remainder_hardware_ok:
276
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
277
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
278
        fprem                           ; and finally do a remainder
279
; prem_main_routine end
280
rem_done:
281
        test    edx, 03h
282
        jz      rem_exit
283
        fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
284
        test    edx, 01h
285
        jz      do_not_de_scale
286
; De-scale the result. Go to pc=80 to prevent from fmul
287
; from user precision (fprem does not round the result).
288
        fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
289
        mov     eax, [esp + MAIN_PREV_CW]
290
        or      eax, 0300h              ; pc = 80
291
        mov     [esp + MAIN_PATCH_CW], eax
292
        fldcw   [esp + MAIN_PATCH_CW]
293
        fmul    qword ptr one_shr_64
294
        fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
295
do_not_de_scale:
296
        mov     eax, [esp + MAIN_FPREM_SW]
297
        fxch
298
        fstp    st
299
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
300
        fxch
301
        and     eax, 04300h             ; restore saved Q0, Q1, Q2
302
        sub     esp, ENV_SIZE
303
        fnstenv [esp]
304
        and     [esp].STATUS_WORD, 0bcffh
305
        or      [esp].STATUS_WORD, eax
306
        fldenv  [esp]
307
        add     esp, ENV_SIZE
308
rem_exit:
309
        pop     ecx
310
        pop     ebx
311
        pop     eax
312
        CHECKSW                         ; debug only: save status
313
        ret
314
fprem_common    ENDP
315
 
316
comment ~****************************************************************
317
 
318
;
319
; float frem_chk (float numer, float denom)
320
;
321
        public  frem_chk
322
frem_chk        PROC    NEAR
323
        push    edx
324
        sub     esp, STACK_SIZE
325
        fld     dword ptr [STACK_SIZE+8+esp]
326
        fstp    tbyte ptr [NUMER+esp]
327
        fld     dword ptr [STACK_SIZE+12+esp]
328
        fstp    tbyte ptr [DENOM+esp]
329
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
330
        call    fprem_common
331
        fxch
332
        fstp    st
333
        add     esp, STACK_SIZE
334
        pop     edx
335
        ret
336
frem_chk        ENDP
337
; end frem_chk
338
 
339
;
340
; double drem_chk (double numer, double denom)
341
;
342
        public  drem_chk
343
drem_chk        PROC    NEAR
344
        push    edx
345
        sub     esp, STACK_SIZE
346
        fld     qword ptr [STACK_SIZE+8+esp]
347
        fstp    tbyte ptr [NUMER+esp]
348
        fld     qword ptr [STACK_SIZE+16+esp]
349
        fstp    tbyte ptr [DENOM+esp]
350
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
351
        call    fprem_common
352
        fxch
353
        fstp    st
354
        add     esp, STACK_SIZE
355
        pop     edx
356
        ret
357
 
358
drem_chk        ENDP
359
; end drem_chk
360
 
361
;
362
; long double lrem_chk(long double number,long double denom)
363
;
364
        public  lrem_chk
365
lrem_chk        PROC    NEAR
366
        fld     tbyte ptr [20+esp]
367
        fld     tbyte ptr [4+esp]
368
        call    fprem_chk
369
        fxch
370
        fstp    st
371
        ret
372
lrem_chk        ENDP
373
 
374
**********************************************************************~
375
 
376
;
377
; FPREM: ST = remainder(ST, ST(1))
378
;
379
; Compiler version of the FPREM must preserve the arguments in the floating
380
; point stack.
381
 
382
        public  __fprem_chk
383
        defpe   __fprem_chk
384
        push    edx
385
        sub     esp, STACK_SIZE
386
        fstp    tbyte ptr [NUMER+esp]
387
        fstp    tbyte ptr [DENOM+esp]
388
        xor     edx, edx
389
; prem_main_routine begin
390
        mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
391
        test    eax,07fff0000h          ; check for denormal
392
        jz      denormal
393
        call    fprem_common
394
        add     esp, STACK_SIZE
395
        pop     edx
396
        ret
397
 
398
denormal:
399
        fld     tbyte ptr [DENOM+esp]   ; load the denominator
400
        fld     tbyte ptr [NUMER+esp]   ; load the numerator
401
        mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
402
        or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
403
        jz      remainder_hardware_ok_l ; denominator is zero
404
        fxch
405
        fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
406
        fld     tbyte ptr[esp + DENOM]
407
        fxch
408
        or      edx, 02h
409
;
410
; For this we need pc=80.  Also, mask exceptions so we don't take any
411
; denormal operand exceptions.  It is guaranteed that the descaling
412
; later on will take underflow, which is what the hardware would have done
413
; on a normal fprem.
414
;
415
        fnstcw  [PREV_CW+esp]           ; save caller's control word
416
        mov     eax, [PREV_CW+esp]
417
        or      eax, 0033fh             ; mask exceptions, pc=80
418
        mov     [PATCH_CW+esp], eax
419
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
420
 
421
; The denominator is a denormal.  For most numerators, scale both numerator
422
; and denominator to get rid of denormals.  Then execute the common code
423
; with the flag set to indicate that the result must be de-scaled.
424
; For large numerators this won't work because the scaling would cause
425
; overflow.  In this case we know the numerator is large, the denominator
426
; is small (denormal), so the exponent difference is also large.  This means
427
; the rem_large code will be used and this code depends on the difference
428
; in exponents modulo 64.  Adding 64 to the denominators exponent
429
; doesn't change the modulo 64 difference.  So we can scale the denominator
430
; by 64, making it not denormal, and this won't effect the result.
431
;
432
; To start with, figure out if numerator is large
433
 
434
        mov     eax, [esp + NUMER + 8]  ; load numerator exponent
435
        and     eax, 7fffh              ; isolate numerator exponent
436
        cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
437
        ja      big_numer_rem_de        ; jif big numerator
438
 
439
; So the numerator is not large scale both numerator and denominator
440
 
441
        or      edx, 1                  ; edx = 1, if denormal extended divisor
442
        fmul    qword ptr one_shl_64    ; make numerator not denormal
443
        fstp    tbyte ptr[esp + NUMER]
444
        fmul    qword ptr one_shl_64    ; make denominator not denormal
445
        fstp    tbyte ptr[esp + DENOM]
446
        jmp     scaling_done
447
 
448
; The numerator is large.  Scale only the denominator, which will not
449
; change the result which we know will be partial.  Set the scale flag
450
; to false.
451
big_numer_rem_de:
452
        ; We must do this with pc=80 to avoid rounding to single/double.
453
        ; In this case we do not mask exceptions so that we will take
454
        ; denormal operand, as would the hardware.
455
        fnstcw  [PREV_CW+esp]           ; save caller's control word
456
        mov     eax, [PREV_CW+esp]
457
        or      eax, 00300h             ; pc=80
458
        mov     [PATCH_CW+esp], eax
459
        fldcw   [PATCH_CW+esp]          ;  pc=80
460
 
461
        fstp    st                      ; Toss numerator
462
        fmul    qword ptr one_shl_64    ; make denominator not denormal
463
        fstp    tbyte ptr[esp + DENOM]
464
 
465
; Restore the control word which was fiddled to scale at 80-bit precision.
466
; Then call the common code.
467
scaling_done:
468
        fldcw   [esp + PREV_CW]         ; restore callers control word
469
        call    fprem_common
470
        add     esp, STACK_SIZE
471
        pop     edx
472
        ret
473
 
474
remainder_hardware_ok_l:
475
        fprem                           ; and finally do a remainder
476
 
477
        CHECKSW
478
 
479
        add     esp, STACK_SIZE
480
        pop     edx
481
        ret
482
__fprem_chk       ENDP
483
; end fprem_chk
484
 
485
 
486
;
487
; FPREM1 code begins here
488
;
489
 
490
 
491
fprem1_common   PROC    NEAR
492
 
493
        push    eax
494
        push    ebx
495
        push    ecx
496
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
497
        xor     eax, ONESMASK           ; invert bits that have to be one
498
        test    eax, ONESMASK           ; check bits that have to be one
499
        jnz     remainder1_hardware_ok
500
        shr     eax, 11
501
        and     eax, 0fh
502
        cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
503
        jz      remainder1_hardware_ok
504
 
505
; The denominator has the bit pattern. Weed out the funny cases like NaNs
506
; before applying the software version. Our caller guarantees that the
507
; denominator is not a denormal. Here we check for:
508
;       denominator     inf, NaN, unnormal
509
;       numerator       inf, NaN, unnormal, denormal
510
 
511
        mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
512
        and     eax, 07fff0000h         ; mask the exponent only
513
        cmp     eax, 07fff0000h         ; check for INF or NaN
514
        je      remainder1_hardware_ok
515
        mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
516
        and     eax, 07fff0000h         ; mask the exponent only
517
        jz      remainder1_hardware_ok  ; jif numerator denormal
518
        cmp     eax, 07fff0000h         ; check for INF or NaN
519
        je      remainder1_hardware_ok
520
        mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
521
        add     eax, eax                ; set carry if explicit bit set
522
        jnz     remainder1_hardware_ok  ; jmp if numerator is unnormal
523
        mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
524
        add     eax, eax                ; set carry if explicit bit set
525
        jnz     remainder1_hardware_ok  ; jmp if denominator is unnormal
526
 
527
rem1_patch:
528
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
529
        and     eax, 07fffh              ; clear sy
530
        add     eax, 63                  ; evaluate ey + 63
531
        mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
532
        and     ebx, 07fffh              ; clear sx
533
        sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
534
        ja      rem1_large              ; if ex > ey + 63, case of large arguments
535
rem1_patch_loop:
536
        mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
537
        and     eax, 07fffh             ; clear sy
538
        add     eax, 10                 ; evaluate ey + 10
539
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
540
        and     ebx, 07fffh             ; clear sx
541
        sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
542
        js      remainder1_hardware_ok  ; safe if ey + 10 > ex
543
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
544
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
545
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
546
        and     ebx, 07fffh             ; clear sx
547
        mov     ecx, ebx
548
        sub     ebx, eax
549
        and     ebx, 07h
550
        or      ebx, 04h
551
        sub     ecx, ebx
552
        mov     ebx, eax
553
        and     ebx, 08000h             ; keep sy
554
        or      ecx, ebx                ; merge the sign of y
555
        mov     dword ptr [MAIN_DENOM+8+esp], ecx
556
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
557
        mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
558
        fxch
559
        fprem                           ; this rem is safe
560
        fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
561
        fstp    st(0)                   ; pop the stack
562
        jmp rem1_patch_loop
563
rem1_large:
564
        test    ebx, 02h                ; is denominator already saved
565
        jnz     already_saved1
566
        fld     tbyte ptr[esp + MAIN_DENOM]
567
        fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
568
already_saved1:
569
        ; Save user's precision control and institute 80.  The fp ops in
570
        ; rem1_large_loop must not round to user's precision (if it is less
571
        ; than 80) because the hardware would not have done so.  We are
572
        ; aping the hardware here, which is all extended.
573
 
574
        fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
575
        mov     eax, dword ptr[esp + MAIN_PREV_CW]
576
        or      eax, 033fh              ; mask exceptions, pc=80
577
        mov     [esp + MAIN_PATCH_CW], eax
578
        fldcw   [esp + MAIN_PATCH_CW]
579
 
580
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
581
        and     eax, 07fffh             ; clear sy
582
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
583
        and     ebx, 07fffh             ; clear sx
584
        sub     ebx, eax                ; evaluate the exponent difference
585
        and     ebx, 03fh
586
        or      ebx, 020h
587
        add     ebx, 1
588
        mov     ecx, ebx
589
        mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
590
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
591
        and     ebx, 07fffh             ; clear sx
592
        and     eax, 08000h             ; keep sy
593
        or      ebx, eax                ; merge the sign of y
594
        mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
595
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
596
        fabs
597
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
598
        fabs
599
rem1_large_loop:
600
        fcom
601
        fstsw  ax
602
        and     eax, 00100h
603
        jnz     rem1_no_sub
604
        fsub    st, st(1)
605
rem1_no_sub:
606
        fxch
607
        fmul    qword ptr half
608
        fxch
609
        sub     ecx, 1                  ; decrement the loop counter
610
        jnz     rem1_large_loop
611
        mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
612
        fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
613
        fstp    st                      ; toss modified denom
614
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
615
        fld     tbyte ptr[big_number]   ; force C2 to be set
616
        fprem1
617
        fstp    st
618
        fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
619
 
620
        fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
621
        and     ebx, 08000h             ; keep sx
622
        jz      rem1_done
623
        fchs
624
        jmp     rem1_done
625
remainder1_hardware_ok:
626
        fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
627
        fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
628
        fprem1                           ; and finally do a remainder
629
; prem1_main_routine end
630
rem1_done:
631
        test    edx, 03h
632
        jz      rem1_exit
633
        fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
634
        test    edx, 01h
635
        jz      do_not_de_scale1
636
; De-scale the result. Go to pc=80 to prevent from fmul
637
; from user precision (fprem does not round the result).
638
        fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
639
        mov     eax, [esp + MAIN_PREV_CW]
640
        or      eax, 0300h              ; pc = 80
641
        mov     [esp + MAIN_PATCH_CW], eax
642
        fldcw   [esp + MAIN_PATCH_CW]
643
        fmul    qword ptr one_shr_64
644
        fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
645
do_not_de_scale1:
646
        mov     eax, [esp + MAIN_FPREM_SW]
647
        fxch
648
        fstp    st
649
        fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
650
        fxch
651
        and     eax, 04300h             ; restore saved Q0, Q1, Q2
652
        sub     esp, ENV_SIZE
653
        fnstenv [esp]
654
        and     [esp].STATUS_WORD, 0bcffh
655
        or      [esp].STATUS_WORD, eax
656
        fldenv  [esp]
657
        add     esp, ENV_SIZE
658
rem1_exit:
659
        pop     ecx
660
        pop     ebx
661
        pop     eax
662
        CHECKSW                         ; debug only: save status
663
        ret
664
fprem1_common   ENDP
665
 
666
 
667
comment ~***************************************************************
668
;
669
; float frem1_chk (float numer, float denom)
670
;
671
        public  frem1_chk
672
frem1_chk       PROC    NEAR
673
        push    edx
674
        sub     esp, STACK_SIZE
675
        fld     dword ptr [STACK_SIZE+8+esp]
676
        fstp    tbyte ptr [NUMER+esp]
677
        fld     dword ptr [STACK_SIZE+12+esp]
678
        fstp    tbyte ptr [DENOM+esp]
679
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
680
        call    fprem1_common
681
        fxch
682
        fstp    st
683
        add     esp, STACK_SIZE
684
        pop     edx
685
        ret
686
frem1_chk       ENDP
687
; end frem1_chk
688
 
689
;
690
; double drem1_chk (double numer, double denom)
691
;
692
        public  drem1_chk
693
drem1_chk       PROC    NEAR
694
        push    edx
695
        sub     esp, STACK_SIZE
696
        fld     qword ptr [STACK_SIZE+8+esp]
697
        fstp    tbyte ptr [NUMER+esp]
698
        fld     qword ptr [STACK_SIZE+16+esp]
699
        fstp    tbyte ptr [DENOM+esp]
700
        mov     edx, 0                  ; dx = 1 if denormal extended divisor
701
        call    fprem1_common
702
        fxch
703
        fstp    st
704
        add     esp, STACK_SIZE
705
        pop     edx
706
        ret
707
 
708
drem1_chk       ENDP
709
; end drem1_chk
710
 
711
;
712
; long double lrem1_chk(long double number,long double denom)
713
;
714
        public  lrem1_chk
715
lrem1_chk       PROC    NEAR
716
        fld     tbyte ptr [20+esp]
717
        fld     tbyte ptr [4+esp]
718
        call    fprem1_chk
719
        fxch
720
        fstp    st
721
        ret
722
lrem1_chk       ENDP
723
********************************************************************~
724
 
725
;
726
; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding
727
;
728
; Compiler version of the FPREM must preserve the arguments in the floating
729
; point stack.
730
 
731
        public  __fprem1_chk
732
        defpe   __fprem1_chk
733
        push    edx
734
        sub     esp, STACK_SIZE
735
        fstp    tbyte ptr [NUMER+esp]
736
        fstp    tbyte ptr [DENOM+esp]
737
        mov     edx, 0
738
; prem1_main_routine begin
739
        mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
740
        test    eax,07fff0000h          ; check for denormal
741
        jz      denormal1
742
        call    fprem1_common
743
        add     esp, STACK_SIZE
744
        pop     edx
745
        ret
746
 
747
denormal1:
748
        fld     tbyte ptr [DENOM+esp]   ; load the denominator
749
        fld     tbyte ptr [NUMER+esp]   ; load the numerator
750
        mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
751
        or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
752
        jz      remainder1_hardware_ok_l ; denominator is zero
753
        fxch
754
        fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
755
        fld     tbyte ptr[esp + DENOM]
756
        fxch
757
        or      edx, 02h
758
;
759
; For this we need pc=80.  Also, mask exceptions so we don't take any
760
; denormal operand exceptions.  It is guaranteed that the descaling
761
; later on will take underflow, which is what the hardware would have done
762
; on a normal fprem.
763
;
764
        fnstcw  [PREV_CW+esp]           ; save caller's control word
765
        mov     eax, [PREV_CW+esp]
766
        or      eax, 0033fh             ; mask exceptions, pc=80
767
        mov     [PATCH_CW+esp], eax
768
        fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
769
 
770
; The denominator is a denormal.  For most numerators, scale both numerator
771
; and denominator to get rid of denormals.  Then execute the common code
772
; with the flag set to indicate that the result must be de-scaled.
773
; For large numerators this won't work because the scaling would cause
774
; overflow.  In this case we know the numerator is large, the denominator
775
; is small (denormal), so the exponent difference is also large.  This means
776
; the rem1_large code will be used and this code depends on the difference
777
; in exponents modulo 64.  Adding 64 to the denominators exponent
778
; doesn't change the modulo 64 difference.  So we can scale the denominator
779
; by 64, making it not denormal, and this won't effect the result.
780
;
781
; To start with, figure out if numerator is large
782
 
783
        mov     eax, [esp + NUMER + 8]  ; load numerator exponent
784
        and     eax, 7fffh              ; isolate numerator exponent
785
        cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
786
        ja      big_numer_rem1_de       ; jif big numerator
787
 
788
; So the numerator is not large scale both numerator and denominator
789
 
790
        or      edx, 1                  ; edx = 1, if denormal extended divisor
791
        fmul    qword ptr one_shl_64    ; make numerator not denormal
792
        fstp    tbyte ptr[esp + NUMER]
793
        fmul    qword ptr one_shl_64    ; make denominator not denormal
794
        fstp    tbyte ptr[esp + DENOM]
795
        jmp     scaling_done1
796
 
797
; The numerator is large.  Scale only the denominator, which will not
798
; change the result which we know will be partial.  Set the scale flag
799
; to false.
800
big_numer_rem1_de:
801
        ; We must do this with pc=80 to avoid rounding to single/double.
802
        ; In this case we do not mask exceptions so that we will take
803
        ; denormal operand, as would the hardware.
804
        fnstcw  [PREV_CW+esp]           ; save caller's control word
805
        mov     eax, [PREV_CW+esp]
806
        or      eax, 00300h             ; pc=80
807
        mov     [PATCH_CW+esp], eax
808
        fldcw   [PATCH_CW+esp]          ;  pc=80
809
 
810
        fstp    st                      ; Toss numerator
811
        fmul    qword ptr one_shl_64    ; make denominator not denormal
812
        fstp    tbyte ptr[esp + DENOM]
813
 
814
; Restore the control word which was fiddled to scale at 80-bit precision.
815
; Then call the common code.
816
scaling_done1:
817
        fldcw   [esp + PREV_CW]         ; restore callers control word
818
        call    fprem1_common
819
        add     esp, STACK_SIZE
820
        pop     edx
821
        ret
822
 
823
remainder1_hardware_ok_l:
824
        fprem                           ; and finally do a remainder
825
 
826
        CHECKSW
827
 
828
        add     esp, STACK_SIZE
829
        pop     edx
830
        ret
831
__fprem1_chk      ENDP
832
; end fprem1_chk
833
 
834
ifdef   DEBUG
835
        public  fpinit
836
fpinit  PROC    NEAR
837
        fninit
838
        ret
839
fpinit  ENDP
840
endif
841
 
704 serge 842
_TEXT  ENDS
554 serge 843
       END