Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. ;*****************************************************************************
  2. ;*
  3. ;*                            Open Watcom Project
  4. ;*
  5. ;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
  6. ;*
  7. ;*  ========================================================================
  8. ;*
  9. ;*    This file contains Original Code and/or Modifications of Original
  10. ;*    Code as defined in and that are subject to the Sybase Open Watcom
  11. ;*    Public License version 1.0 (the 'License'). You may not use this file
  12. ;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
  13. ;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
  14. ;*    provided with the Original Code and Modifications, and is also
  15. ;*    available at www.sybase.com/developer/opensource.
  16. ;*
  17. ;*    The Original Code and all software distributed under the License are
  18. ;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  19. ;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
  20. ;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
  21. ;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
  22. ;*    NON-INFRINGEMENT. Please see the License for the specific language
  23. ;*    governing rights and limitations under the License.
  24. ;*
  25. ;*  ========================================================================
  26. ;*
  27. ;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
  28. ;*               DESCRIBE IT HERE!
  29. ;*
  30. ;*****************************************************************************
  31.  
  32.  
  33. ; static char sccs_id[] = "@(#)fprem32.asm      1.5  12/22/94  12:48:07";
  34. ;
  35. ; This code is being published by Intel to users of the Pentium(tm)
  36. ; processor.  Recipients are authorized to copy, modify, compile, use and
  37. ; distribute the code.
  38. ;
  39. ; Intel makes no warranty of any kind with regard to this code, including
  40. ; but not limited to, implied warranties or merchantability and fitness for
  41. ; a particular purpose. Intel assumes no responsibility for any errors that
  42. ; may appear in this code.
  43. ;
  44. ; No patent licenses are granted, express or implied.
  45. ;
  46. ;
  47. include mdef.inc
  48.  
  49.         .386
  50.         .387
  51.  
  52. ;
  53. ;  PRELIMINARY VERSION of the software patch for the floating
  54. ;  point remainder.
  55. ;
  56.  
  57.  
  58. CHECKSW MACRO
  59. ifdef   DEBUG
  60.         fnstsw  [fpsw]
  61.         fnstcw  [fpcw]
  62. endif
  63. ENDM
  64.  
  65.  
  66. DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
  67.  
  68. ;
  69. ;  Stack variables for remainder routines.
  70. ;
  71.  
  72. FLT_SIZE        EQU     12
  73. DENOM           EQU     0
  74. DENOM_SAVE      EQU     DENOM + FLT_SIZE
  75. NUMER           EQU     DENOM_SAVE + FLT_SIZE
  76. PREV_CW         EQU     NUMER + FLT_SIZE
  77. PATCH_CW        EQU     PREV_CW + 4
  78. FPREM_SW        EQU     PATCH_CW + 4
  79. STACK_SIZE      EQU     FPREM_SW + 4
  80. RET_SIZE        EQU     4
  81. PUSH_SIZE       EQU     4
  82.  
  83. MAIN_FUDGE      EQU     RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE
  84.  
  85. MAIN_DENOM              EQU     DENOM + MAIN_FUDGE
  86. MAIN_DENOM_SAVE         EQU     DENOM_SAVE + MAIN_FUDGE
  87. MAIN_NUMER              EQU     NUMER + MAIN_FUDGE
  88. MAIN_PREV_CW            EQU     PREV_CW + MAIN_FUDGE
  89. MAIN_PATCH_CW           EQU     PATCH_CW + MAIN_FUDGE
  90. MAIN_FPREM_SW           EQU     FPREM_SW + MAIN_FUDGE
  91.  
  92. ONESMASK        EQU     700h
  93.  
  94. fprem_risc_table        DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
  95. fprem_scale             DB      0, 0, 0, 0, 0, 0, 0eeh, 03fh
  96. one_shl_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 043h
  97. one_shr_64              DB      0, 0, 0, 0, 0, 0, 0f0h, 03bh
  98. one                     DB      0, 0, 0, 0, 0, 0, 0f0h, 03fh
  99. half                    DB      0, 0, 0, 0, 0, 0, 0e0h, 03fh
  100. big_number              DB      0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
  101.  
  102. ifdef   DEBUG
  103.         public  fpcw
  104.         public  fpsw
  105. fpcw    dw      0
  106. fpsw    dw      0
  107. endif
  108.  
  109. FPU_STATE       STRUC
  110.         CONTROL_WORD    DW      ?
  111.         reserved_1      DW      ?
  112.         STATUS_WORD     DD      ?
  113.         TAG_WORD        DW      ?
  114.         reserved_3      DW      ?
  115.         IP_OFFSET       DD      ?
  116.         CS_SLCT         DW      ?
  117.         OPCODE          DW      ?
  118.         DATA_OFFSET     DD      ?
  119.         OPERAND_SLCT    DW      ?
  120.         reserved_4      DW      ?
  121. FPU_STATE       ENDS
  122.  
  123. ENV_SIZE        EQU     28
  124.  
  125.  
  126. DATA32 ENDS
  127.  
  128. _TEXT  SEGMENT DWORD USE32 PUBLIC 'CODE'
  129. _TEXT  ENDS
  130.  
  131. DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
  132. DATA32  ENDS
  133.  
  134. CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
  135. CONST32 ENDS
  136.  
  137. BSS32   SEGMENT DWORD USE32 PUBLIC 'BSS'
  138. BSS32   ENDS
  139.  
  140. DGROUP  GROUP CONST32, BSS32, DATA32
  141.  
  142.  
  143.  
  144. CODE32  SEGMENT   DWORD USE32 PUBLIC 'CODE'
  145.  
  146.         assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
  147.  
  148.  
  149. fprem_common    PROC    NEAR
  150.  
  151.         push    eax
  152.         push    ebx
  153.         push    ecx
  154.         mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  155.         xor     eax, ONESMASK           ; invert bits that have to be one
  156.         test    eax, ONESMASK           ; check bits that have to be one
  157.         jnz     remainder_hardware_ok
  158.         shr     eax, 11
  159.         and     eax, 0fh
  160.         cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
  161.         jz      remainder_hardware_ok
  162.  
  163. ; The denominator has the bit pattern. Weed out the funny cases like NaNs
  164. ; before applying the software version. Our caller guarantees that the
  165. ; denominator is not a denormal. Here we check for:
  166. ;       denominator     inf, NaN, unnormal
  167. ;       numerator       inf, NaN, unnormal, denormal
  168.  
  169.         mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  170.         and     eax, 07fff0000h         ; mask the exponent only
  171.         cmp     eax, 07fff0000h         ; check for INF or NaN
  172.         je      remainder_hardware_ok
  173.         mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
  174.         and     eax, 07fff0000h         ; mask the exponent only
  175.         jz      remainder_hardware_ok   ; jif numerator denormal
  176.         cmp     eax, 07fff0000h         ; check for INF or NaN
  177.         je      remainder_hardware_ok
  178.         mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
  179.         add     eax, eax                ; set carry if explicit bit set
  180.         jnz     remainder_hardware_ok   ; jmp if numerator is unnormal
  181.         mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
  182.         add     eax, eax                ; set carry if explicit bit set
  183.         jnz     remainder_hardware_ok   ; jmp if denominator is unnormal
  184.  
  185. rem_patch:
  186.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  187.         and     eax, 07fffh              ; clear sy
  188.         add     eax, 63                  ; evaluate ey + 63
  189.         mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
  190.         and     ebx, 07fffh              ; clear sx
  191.         sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
  192.         ja      rem_large               ; if ex > ey + 63, case of large arguments
  193. rem_patch_loop:
  194.         mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
  195.         and     eax, 07fffh             ; clear sy
  196.         add     eax, 10                 ; evaluate ey + 10
  197.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  198.         and     ebx, 07fffh             ; clear sx
  199.         sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
  200.         js      remainder_hardware_ok   ; safe if ey + 10 > ex
  201.         fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
  202.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  203.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  204.         and     ebx, 07fffh             ; clear sx
  205.         mov     ecx, ebx
  206.         sub     ebx, eax
  207.         and     ebx, 07h
  208.         or      ebx, 04h
  209.         sub     ecx, ebx
  210.         mov     ebx, eax
  211.         and     ebx, 08000h             ; keep sy
  212.         or      ecx, ebx                ; merge the sign of y
  213.         mov     dword ptr [MAIN_DENOM+8+esp], ecx
  214.         fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
  215.         mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
  216.         fxch
  217.         fprem                           ; this rem is safe
  218.         fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
  219.         fstp    st(0)                   ; pop the stack
  220.         jmp rem_patch_loop
  221. rem_large:
  222.         test    edx, 02h                ; is denominator already saved
  223.         jnz     already_saved
  224.         fld     tbyte ptr[esp + MAIN_DENOM]
  225.         fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
  226. already_saved:
  227.         ; Save user's precision control and institute 80.  The fp ops in
  228.         ; rem_large_loop must not round to user's precision (if it is less
  229.         ; than 80) because the hardware would not have done so.  We are
  230.         ; aping the hardware here, which is all extended.
  231.  
  232.         fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
  233.         mov     eax, dword ptr[esp + MAIN_PREV_CW]
  234.         or      eax, 033fh              ; mask exceptions, pc=80
  235.         mov     [esp + MAIN_PATCH_CW], eax
  236.         fldcw   [esp + MAIN_PATCH_CW]
  237.  
  238.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  239.         and     eax, 07fffh             ; clear sy
  240.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  241.         and     ebx, 07fffh             ; clear sx
  242.         sub     ebx, eax                ; evaluate the exponent difference
  243.         and     ebx, 03fh
  244.         or      ebx, 020h
  245.         add     ebx, 1
  246.         mov     ecx, ebx
  247.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  248.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  249.         and     ebx, 07fffh             ; clear sx
  250.         and     eax, 08000h             ; keep sy
  251.         or      ebx, eax                ; merge the sign of y
  252.         mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
  253.         fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
  254.         fabs
  255.         fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
  256.         fabs
  257. rem_large_loop:
  258.         fcom
  259.         fstsw  ax
  260.         and     eax, 00100h
  261.         jnz     rem_no_sub
  262.         fsub    st, st(1)
  263. rem_no_sub:
  264.         fxch
  265.         fmul    qword ptr half
  266.         fxch
  267.         sub     ecx, 1                  ; decrement the loop counter
  268.         jnz     rem_large_loop
  269.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  270.         fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
  271.         fstp    st                      ; toss modified denom
  272.         fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
  273.         fld     tbyte ptr[big_number]   ; force C2 to be set
  274.         fprem
  275.         fstp    st
  276.         fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
  277.  
  278.         fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
  279.         and     ebx, 08000h             ; keep sx
  280.         jz      rem_done
  281.         fchs
  282.         jmp     rem_done
  283. remainder_hardware_ok:
  284.         fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
  285.         fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
  286.         fprem                           ; and finally do a remainder
  287. ; prem_main_routine end
  288. rem_done:
  289.         test    edx, 03h
  290.         jz      rem_exit
  291.         fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
  292.         test    edx, 01h
  293.         jz      do_not_de_scale
  294. ; De-scale the result. Go to pc=80 to prevent from fmul
  295. ; from user precision (fprem does not round the result).
  296.         fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
  297.         mov     eax, [esp + MAIN_PREV_CW]
  298.         or      eax, 0300h              ; pc = 80
  299.         mov     [esp + MAIN_PATCH_CW], eax
  300.         fldcw   [esp + MAIN_PATCH_CW]
  301.         fmul    qword ptr one_shr_64
  302.         fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
  303. do_not_de_scale:
  304.         mov     eax, [esp + MAIN_FPREM_SW]
  305.         fxch
  306.         fstp    st
  307.         fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
  308.         fxch
  309.         and     eax, 04300h             ; restore saved Q0, Q1, Q2
  310.         sub     esp, ENV_SIZE
  311.         fnstenv [esp]
  312.         and     [esp].STATUS_WORD, 0bcffh
  313.         or      [esp].STATUS_WORD, eax
  314.         fldenv  [esp]
  315.         add     esp, ENV_SIZE
  316. rem_exit:
  317.         pop     ecx
  318.         pop     ebx
  319.         pop     eax
  320.         CHECKSW                         ; debug only: save status
  321.         ret
  322. fprem_common    ENDP
  323.  
  324. comment ~****************************************************************
  325.  
  326. ;
  327. ; float frem_chk (float numer, float denom)
  328. ;
  329.         public  frem_chk
  330. frem_chk        PROC    NEAR
  331.         push    edx
  332.         sub     esp, STACK_SIZE
  333.         fld     dword ptr [STACK_SIZE+8+esp]
  334.         fstp    tbyte ptr [NUMER+esp]
  335.         fld     dword ptr [STACK_SIZE+12+esp]
  336.         fstp    tbyte ptr [DENOM+esp]
  337.         mov     edx, 0                  ; dx = 1 if denormal extended divisor
  338.         call    fprem_common
  339.         fxch
  340.         fstp    st
  341.         add     esp, STACK_SIZE
  342.         pop     edx
  343.         ret
  344. frem_chk        ENDP
  345. ; end frem_chk
  346.  
  347. ;
  348. ; double drem_chk (double numer, double denom)
  349. ;
  350.         public  drem_chk
  351. drem_chk        PROC    NEAR
  352.         push    edx
  353.         sub     esp, STACK_SIZE
  354.         fld     qword ptr [STACK_SIZE+8+esp]
  355.         fstp    tbyte ptr [NUMER+esp]
  356.         fld     qword ptr [STACK_SIZE+16+esp]
  357.         fstp    tbyte ptr [DENOM+esp]
  358.         mov     edx, 0                  ; dx = 1 if denormal extended divisor
  359.         call    fprem_common
  360.         fxch
  361.         fstp    st
  362.         add     esp, STACK_SIZE
  363.         pop     edx
  364.         ret
  365.  
  366. drem_chk        ENDP
  367. ; end drem_chk
  368.  
  369. ;
  370. ; long double lrem_chk(long double number,long double denom)
  371. ;
  372.         public  lrem_chk
  373. lrem_chk        PROC    NEAR
  374.         fld     tbyte ptr [20+esp]
  375.         fld     tbyte ptr [4+esp]
  376.         call    fprem_chk
  377.         fxch
  378.         fstp    st
  379.         ret
  380. lrem_chk        ENDP
  381.  
  382. **********************************************************************~
  383.  
  384. ;
  385. ; FPREM: ST = remainder(ST, ST(1))
  386. ;
  387. ; Compiler version of the FPREM must preserve the arguments in the floating
  388. ; point stack.
  389.  
  390.         public  __fprem_chk
  391.         defpe   __fprem_chk
  392.         push    edx
  393.         sub     esp, STACK_SIZE
  394.         fstp    tbyte ptr [NUMER+esp]
  395.         fstp    tbyte ptr [DENOM+esp]
  396.         xor     edx, edx
  397. ; prem_main_routine begin
  398.         mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
  399.         test    eax,07fff0000h          ; check for denormal
  400.         jz      denormal
  401.         call    fprem_common
  402.         add     esp, STACK_SIZE
  403.         pop     edx
  404.         ret
  405.  
  406. denormal:
  407.         fld     tbyte ptr [DENOM+esp]   ; load the denominator
  408.         fld     tbyte ptr [NUMER+esp]   ; load the numerator
  409.         mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
  410.         or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
  411.         jz      remainder_hardware_ok_l ; denominator is zero
  412.         fxch
  413.         fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
  414.         fld     tbyte ptr[esp + DENOM]
  415.         fxch
  416.         or      edx, 02h
  417. ;
  418. ; For this we need pc=80.  Also, mask exceptions so we don't take any
  419. ; denormal operand exceptions.  It is guaranteed that the descaling
  420. ; later on will take underflow, which is what the hardware would have done
  421. ; on a normal fprem.
  422. ;
  423.         fnstcw  [PREV_CW+esp]           ; save caller's control word
  424.         mov     eax, [PREV_CW+esp]
  425.         or      eax, 0033fh             ; mask exceptions, pc=80
  426.         mov     [PATCH_CW+esp], eax
  427.         fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
  428.  
  429. ; The denominator is a denormal.  For most numerators, scale both numerator
  430. ; and denominator to get rid of denormals.  Then execute the common code
  431. ; with the flag set to indicate that the result must be de-scaled.
  432. ; For large numerators this won't work because the scaling would cause
  433. ; overflow.  In this case we know the numerator is large, the denominator
  434. ; is small (denormal), so the exponent difference is also large.  This means
  435. ; the rem_large code will be used and this code depends on the difference
  436. ; in exponents modulo 64.  Adding 64 to the denominators exponent
  437. ; doesn't change the modulo 64 difference.  So we can scale the denominator
  438. ; by 64, making it not denormal, and this won't effect the result.
  439. ;
  440. ; To start with, figure out if numerator is large
  441.  
  442.         mov     eax, [esp + NUMER + 8]  ; load numerator exponent
  443.         and     eax, 7fffh              ; isolate numerator exponent
  444.         cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
  445.         ja      big_numer_rem_de        ; jif big numerator
  446.  
  447. ; So the numerator is not large scale both numerator and denominator
  448.  
  449.         or      edx, 1                  ; edx = 1, if denormal extended divisor
  450.         fmul    qword ptr one_shl_64    ; make numerator not denormal
  451.         fstp    tbyte ptr[esp + NUMER]
  452.         fmul    qword ptr one_shl_64    ; make denominator not denormal
  453.         fstp    tbyte ptr[esp + DENOM]
  454.         jmp     scaling_done
  455.  
  456. ; The numerator is large.  Scale only the denominator, which will not
  457. ; change the result which we know will be partial.  Set the scale flag
  458. ; to false.
  459. big_numer_rem_de:
  460.         ; We must do this with pc=80 to avoid rounding to single/double.
  461.         ; In this case we do not mask exceptions so that we will take
  462.         ; denormal operand, as would the hardware.
  463.         fnstcw  [PREV_CW+esp]           ; save caller's control word
  464.         mov     eax, [PREV_CW+esp]
  465.         or      eax, 00300h             ; pc=80
  466.         mov     [PATCH_CW+esp], eax
  467.         fldcw   [PATCH_CW+esp]          ;  pc=80
  468.  
  469.         fstp    st                      ; Toss numerator
  470.         fmul    qword ptr one_shl_64    ; make denominator not denormal
  471.         fstp    tbyte ptr[esp + DENOM]
  472.  
  473. ; Restore the control word which was fiddled to scale at 80-bit precision.
  474. ; Then call the common code.
  475. scaling_done:
  476.         fldcw   [esp + PREV_CW]         ; restore callers control word
  477.         call    fprem_common
  478.         add     esp, STACK_SIZE
  479.         pop     edx
  480.         ret
  481.  
  482. remainder_hardware_ok_l:
  483.         fprem                           ; and finally do a remainder
  484.  
  485.         CHECKSW
  486.  
  487.         add     esp, STACK_SIZE
  488.         pop     edx
  489.         ret
  490. __fprem_chk       ENDP
  491. ; end fprem_chk
  492.  
  493.  
  494. ;
  495. ; FPREM1 code begins here
  496. ;
  497.  
  498.  
  499. fprem1_common   PROC    NEAR
  500.  
  501.         push    eax
  502.         push    ebx
  503.         push    ecx
  504.         mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  505.         xor     eax, ONESMASK           ; invert bits that have to be one
  506.         test    eax, ONESMASK           ; check bits that have to be one
  507.         jnz     remainder1_hardware_ok
  508.         shr     eax, 11
  509.         and     eax, 0fh
  510.         cmp     byte ptr fprem_risc_table[eax], 0     ; check for (1,4,7,a,d)
  511.         jz      remainder1_hardware_ok
  512.  
  513. ; The denominator has the bit pattern. Weed out the funny cases like NaNs
  514. ; before applying the software version. Our caller guarantees that the
  515. ; denominator is not a denormal. Here we check for:
  516. ;       denominator     inf, NaN, unnormal
  517. ;       numerator       inf, NaN, unnormal, denormal
  518.  
  519.         mov     eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
  520.         and     eax, 07fff0000h         ; mask the exponent only
  521.         cmp     eax, 07fff0000h         ; check for INF or NaN
  522.         je      remainder1_hardware_ok
  523.         mov     eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
  524.         and     eax, 07fff0000h         ; mask the exponent only
  525.         jz      remainder1_hardware_ok  ; jif numerator denormal
  526.         cmp     eax, 07fff0000h         ; check for INF or NaN
  527.         je      remainder1_hardware_ok
  528.         mov     eax, [esp + MAIN_NUMER + 4]     ; high mantissa bits - numerator
  529.         add     eax, eax                ; set carry if explicit bit set
  530.         jnz     remainder1_hardware_ok  ; jmp if numerator is unnormal
  531.         mov     eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
  532.         add     eax, eax                ; set carry if explicit bit set
  533.         jnz     remainder1_hardware_ok  ; jmp if denominator is unnormal
  534.  
  535. rem1_patch:
  536.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  537.         and     eax, 07fffh              ; clear sy
  538.         add     eax, 63                  ; evaluate ey + 63
  539.         mov     ebx, [MAIN_NUMER+8+esp]  ; sign and exponent of x (numerator)
  540.         and     ebx, 07fffh              ; clear sx
  541.         sub     ebx, eax                 ; evaluate the exponent difference (ex - ey)
  542.         ja      rem1_large              ; if ex > ey + 63, case of large arguments
  543. rem1_patch_loop:
  544.         mov     eax, [MAIN_DENOM+8+esp]  ; sign and exponent of y (denominator)
  545.         and     eax, 07fffh             ; clear sy
  546.         add     eax, 10                 ; evaluate ey + 10
  547.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  548.         and     ebx, 07fffh             ; clear sx
  549.         sub     ebx, eax                ; evaluate the exponent difference (ex - ey)
  550.         js      remainder1_hardware_ok  ; safe if ey + 10 > ex
  551.         fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
  552.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  553.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  554.         and     ebx, 07fffh             ; clear sx
  555.         mov     ecx, ebx
  556.         sub     ebx, eax
  557.         and     ebx, 07h
  558.         or      ebx, 04h
  559.         sub     ecx, ebx
  560.         mov     ebx, eax
  561.         and     ebx, 08000h             ; keep sy
  562.         or      ecx, ebx                ; merge the sign of y
  563.         mov     dword ptr [MAIN_DENOM+8+esp], ecx
  564.         fld     tbyte ptr [MAIN_DENOM+esp]   ; load the shifted denominator
  565.         mov     dword ptr [MAIN_DENOM+8+esp], eax       ; restore the initial denominator
  566.         fxch
  567.         fprem                           ; this rem is safe
  568.         fstp    tbyte ptr [MAIN_NUMER+esp]      ; update the numerator
  569.         fstp    st(0)                   ; pop the stack
  570.         jmp rem1_patch_loop
  571. rem1_large:
  572.         test    ebx, 02h                ; is denominator already saved
  573.         jnz     already_saved1
  574.         fld     tbyte ptr[esp + MAIN_DENOM]
  575.         fstp    tbyte ptr[esp + MAIN_DENOM_SAVE]        ; save denominator
  576. already_saved1:
  577.         ; Save user's precision control and institute 80.  The fp ops in
  578.         ; rem1_large_loop must not round to user's precision (if it is less
  579.         ; than 80) because the hardware would not have done so.  We are
  580.         ; aping the hardware here, which is all extended.
  581.  
  582.         fnstcw  [esp+MAIN_PREV_CW]      ; save caller's control word
  583.         mov     eax, dword ptr[esp + MAIN_PREV_CW]
  584.         or      eax, 033fh              ; mask exceptions, pc=80
  585.         mov     [esp + MAIN_PATCH_CW], eax
  586.         fldcw   [esp + MAIN_PATCH_CW]
  587.  
  588.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  589.         and     eax, 07fffh             ; clear sy
  590.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  591.         and     ebx, 07fffh             ; clear sx
  592.         sub     ebx, eax                ; evaluate the exponent difference
  593.         and     ebx, 03fh
  594.         or      ebx, 020h
  595.         add     ebx, 1
  596.         mov     ecx, ebx
  597.         mov     eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
  598.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  599.         and     ebx, 07fffh             ; clear sx
  600.         and     eax, 08000h             ; keep sy
  601.         or      ebx, eax                ; merge the sign of y
  602.         mov     dword ptr[MAIN_DENOM+8+esp], ebx        ; make ey equal to ex (scaled denominator)
  603.         fld     tbyte ptr [MAIN_DENOM+esp]   ; load the scaled denominator
  604.         fabs
  605.         fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
  606.         fabs
  607. rem1_large_loop:
  608.         fcom
  609.         fstsw  ax
  610.         and     eax, 00100h
  611.         jnz     rem1_no_sub
  612.         fsub    st, st(1)
  613. rem1_no_sub:
  614.         fxch
  615.         fmul    qword ptr half
  616.         fxch
  617.         sub     ecx, 1                  ; decrement the loop counter
  618.         jnz     rem1_large_loop
  619.         mov     ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
  620.         fstp    tbyte ptr[esp + MAIN_NUMER]     ; save result
  621.         fstp    st                      ; toss modified denom
  622.         fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
  623.         fld     tbyte ptr[big_number]   ; force C2 to be set
  624.         fprem1
  625.         fstp    st
  626.         fld     tbyte ptr[esp + MAIN_NUMER]     ; restore saved result
  627.  
  628.         fldcw   [esp + MAIN_PREV_CW]    ; restore caller's control word
  629.         and     ebx, 08000h             ; keep sx
  630.         jz      rem1_done
  631.         fchs
  632.         jmp     rem1_done
  633. remainder1_hardware_ok:
  634.         fld     tbyte ptr [MAIN_DENOM+esp]   ; load the denominator
  635.         fld     tbyte ptr [MAIN_NUMER+esp]   ; load the numerator
  636.         fprem1                           ; and finally do a remainder
  637. ; prem1_main_routine end
  638. rem1_done:
  639.         test    edx, 03h
  640.         jz      rem1_exit
  641.         fnstsw  [esp + MAIN_FPREM_SW]   ; save Q0 Q1 and Q2
  642.         test    edx, 01h
  643.         jz      do_not_de_scale1
  644. ; De-scale the result. Go to pc=80 to prevent from fmul
  645. ; from user precision (fprem does not round the result).
  646.         fnstcw  [esp + MAIN_PREV_CW]    ; save callers control word
  647.         mov     eax, [esp + MAIN_PREV_CW]
  648.         or      eax, 0300h              ; pc = 80
  649.         mov     [esp + MAIN_PATCH_CW], eax
  650.         fldcw   [esp + MAIN_PATCH_CW]
  651.         fmul    qword ptr one_shr_64
  652.         fldcw   [esp + MAIN_PREV_CW]    ; restore callers CW
  653. do_not_de_scale1:
  654.         mov     eax, [esp + MAIN_FPREM_SW]
  655.         fxch
  656.         fstp    st
  657.         fld     tbyte ptr[esp + MAIN_DENOM_SAVE]
  658.         fxch
  659.         and     eax, 04300h             ; restore saved Q0, Q1, Q2
  660.         sub     esp, ENV_SIZE
  661.         fnstenv [esp]
  662.         and     [esp].STATUS_WORD, 0bcffh
  663.         or      [esp].STATUS_WORD, eax
  664.         fldenv  [esp]
  665.         add     esp, ENV_SIZE
  666. rem1_exit:
  667.         pop     ecx
  668.         pop     ebx
  669.         pop     eax
  670.         CHECKSW                         ; debug only: save status
  671.         ret
  672. fprem1_common   ENDP
  673.  
  674.  
  675. comment ~***************************************************************
  676. ;
  677. ; float frem1_chk (float numer, float denom)
  678. ;
  679.         public  frem1_chk
  680. frem1_chk       PROC    NEAR
  681.         push    edx
  682.         sub     esp, STACK_SIZE
  683.         fld     dword ptr [STACK_SIZE+8+esp]
  684.         fstp    tbyte ptr [NUMER+esp]
  685.         fld     dword ptr [STACK_SIZE+12+esp]
  686.         fstp    tbyte ptr [DENOM+esp]
  687.         mov     edx, 0                  ; dx = 1 if denormal extended divisor
  688.         call    fprem1_common
  689.         fxch
  690.         fstp    st
  691.         add     esp, STACK_SIZE
  692.         pop     edx
  693.         ret
  694. frem1_chk       ENDP
  695. ; end frem1_chk
  696.  
  697. ;
  698. ; double drem1_chk (double numer, double denom)
  699. ;
  700.         public  drem1_chk
  701. drem1_chk       PROC    NEAR
  702.         push    edx
  703.         sub     esp, STACK_SIZE
  704.         fld     qword ptr [STACK_SIZE+8+esp]
  705.         fstp    tbyte ptr [NUMER+esp]
  706.         fld     qword ptr [STACK_SIZE+16+esp]
  707.         fstp    tbyte ptr [DENOM+esp]
  708.         mov     edx, 0                  ; dx = 1 if denormal extended divisor
  709.         call    fprem1_common
  710.         fxch
  711.         fstp    st
  712.         add     esp, STACK_SIZE
  713.         pop     edx
  714.         ret
  715.  
  716. drem1_chk       ENDP
  717. ; end drem1_chk
  718.  
  719. ;
  720. ; long double lrem1_chk(long double number,long double denom)
  721. ;
  722.         public  lrem1_chk
  723. lrem1_chk       PROC    NEAR
  724.         fld     tbyte ptr [20+esp]
  725.         fld     tbyte ptr [4+esp]
  726.         call    fprem1_chk
  727.         fxch
  728.         fstp    st
  729.         ret
  730. lrem1_chk       ENDP
  731. ********************************************************************~
  732.  
  733. ;
  734. ; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding
  735. ;
  736. ; Compiler version of the FPREM must preserve the arguments in the floating
  737. ; point stack.
  738.  
  739.         public  __fprem1_chk
  740.         defpe   __fprem1_chk
  741.         push    edx
  742.         sub     esp, STACK_SIZE
  743.         fstp    tbyte ptr [NUMER+esp]
  744.         fstp    tbyte ptr [DENOM+esp]
  745.         mov     edx, 0
  746. ; prem1_main_routine begin
  747.         mov     eax,[DENOM+6+esp]       ; exponent and high 16 bits of mantissa
  748.         test    eax,07fff0000h          ; check for denormal
  749.         jz      denormal1
  750.         call    fprem1_common
  751.         add     esp, STACK_SIZE
  752.         pop     edx
  753.         ret
  754.  
  755. denormal1:
  756.         fld     tbyte ptr [DENOM+esp]   ; load the denominator
  757.         fld     tbyte ptr [NUMER+esp]   ; load the numerator
  758.         mov     eax, [DENOM+esp]        ; test for whole mantissa == 0
  759.         or      eax, [DENOM+4+esp]      ; test for whole mantissa == 0
  760.         jz      remainder1_hardware_ok_l ; denominator is zero
  761.         fxch
  762.         fstp    tbyte ptr[esp + DENOM_SAVE]     ; save org denominator
  763.         fld     tbyte ptr[esp + DENOM]
  764.         fxch
  765.         or      edx, 02h
  766. ;
  767. ; For this we need pc=80.  Also, mask exceptions so we don't take any
  768. ; denormal operand exceptions.  It is guaranteed that the descaling
  769. ; later on will take underflow, which is what the hardware would have done
  770. ; on a normal fprem.
  771. ;
  772.         fnstcw  [PREV_CW+esp]           ; save caller's control word
  773.         mov     eax, [PREV_CW+esp]
  774.         or      eax, 0033fh             ; mask exceptions, pc=80
  775.         mov     [PATCH_CW+esp], eax
  776.         fldcw   [PATCH_CW+esp]          ; mask exceptions & pc=80
  777.  
  778. ; The denominator is a denormal.  For most numerators, scale both numerator
  779. ; and denominator to get rid of denormals.  Then execute the common code
  780. ; with the flag set to indicate that the result must be de-scaled.
  781. ; For large numerators this won't work because the scaling would cause
  782. ; overflow.  In this case we know the numerator is large, the denominator
  783. ; is small (denormal), so the exponent difference is also large.  This means
  784. ; the rem1_large code will be used and this code depends on the difference
  785. ; in exponents modulo 64.  Adding 64 to the denominators exponent
  786. ; doesn't change the modulo 64 difference.  So we can scale the denominator
  787. ; by 64, making it not denormal, and this won't effect the result.
  788. ;
  789. ; To start with, figure out if numerator is large
  790.  
  791.         mov     eax, [esp + NUMER + 8]  ; load numerator exponent
  792.         and     eax, 7fffh              ; isolate numerator exponent
  793.         cmp     eax, 7fbeh              ; compare Nexp to Maxexp-64
  794.         ja      big_numer_rem1_de       ; jif big numerator
  795.  
  796. ; So the numerator is not large scale both numerator and denominator
  797.  
  798.         or      edx, 1                  ; edx = 1, if denormal extended divisor
  799.         fmul    qword ptr one_shl_64    ; make numerator not denormal
  800.         fstp    tbyte ptr[esp + NUMER]
  801.         fmul    qword ptr one_shl_64    ; make denominator not denormal
  802.         fstp    tbyte ptr[esp + DENOM]
  803.         jmp     scaling_done1
  804.  
  805. ; The numerator is large.  Scale only the denominator, which will not
  806. ; change the result which we know will be partial.  Set the scale flag
  807. ; to false.
  808. big_numer_rem1_de:
  809.         ; We must do this with pc=80 to avoid rounding to single/double.
  810.         ; In this case we do not mask exceptions so that we will take
  811.         ; denormal operand, as would the hardware.
  812.         fnstcw  [PREV_CW+esp]           ; save caller's control word
  813.         mov     eax, [PREV_CW+esp]
  814.         or      eax, 00300h             ; pc=80
  815.         mov     [PATCH_CW+esp], eax
  816.         fldcw   [PATCH_CW+esp]          ;  pc=80
  817.  
  818.         fstp    st                      ; Toss numerator
  819.         fmul    qword ptr one_shl_64    ; make denominator not denormal
  820.         fstp    tbyte ptr[esp + DENOM]
  821.  
  822. ; Restore the control word which was fiddled to scale at 80-bit precision.
  823. ; Then call the common code.
  824. scaling_done1:
  825.         fldcw   [esp + PREV_CW]         ; restore callers control word
  826.         call    fprem1_common
  827.         add     esp, STACK_SIZE
  828.         pop     edx
  829.         ret
  830.  
  831. remainder1_hardware_ok_l:
  832.         fprem                           ; and finally do a remainder
  833.  
  834.         CHECKSW
  835.  
  836.         add     esp, STACK_SIZE
  837.         pop     edx
  838.         ret
  839. __fprem1_chk      ENDP
  840. ; end fprem1_chk
  841.  
  842. ifdef   DEBUG
  843.         public  fpinit
  844. fpinit  PROC    NEAR
  845.         fninit
  846.         ret
  847. fpinit  ENDP
  848. endif
  849.  
  850. CODE32 ENDS
  851.        END
  852.