Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | Download | RSS feed

  1. ;*****************************************************************************
  2. ;*
  3. ;*                            Open Watcom Project
  4. ;*
  5. ;*    Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
  6. ;*
  7. ;*  ========================================================================
  8. ;*
  9. ;*    This file contains Original Code and/or Modifications of Original
  10. ;*    Code as defined in and that are subject to the Sybase Open Watcom
  11. ;*    Public License version 1.0 (the 'License'). You may not use this file
  12. ;*    except in compliance with the License. BY USING THIS FILE YOU AGREE TO
  13. ;*    ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
  14. ;*    provided with the Original Code and Modifications, and is also
  15. ;*    available at www.sybase.com/developer/opensource.
  16. ;*
  17. ;*    The Original Code and all software distributed under the License are
  18. ;*    distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  19. ;*    EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
  20. ;*    ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
  21. ;*    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
  22. ;*    NON-INFRINGEMENT. Please see the License for the specific language
  23. ;*    governing rights and limitations under the License.
  24. ;*
  25. ;*  ========================================================================
  26. ;*
  27. ;* Description:  WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
  28. ;*               DESCRIBE IT HERE!
  29. ;*
  30. ;*****************************************************************************
  31.  
  32.  
  33. ; static char sccs_id[] = "@(#)fptan32.asm      1.4  12/20/94  16:51:51";
  34. ;
  35. ; This code is being published by Intel to users of the Pentium(tm)
  36. ; processor.  Recipients are authorized to copy, modify, compile, use and
  37. ; distribute the code.
  38. ;
  39. ; Intel makes no warranty of any kind with regard to this code, including
  40. ; but not limited to, implied warranties or merchantability and fitness for
  41. ; a particular purpose. Intel assumes no responsibility for any errors that
  42. ; may appear in this code.
  43. ;
  44. ; No patent licenses are granted, express or implied.
  45. ;
  46. ;
  47. ;  The following code is a PRELIMINARY IMPLEMENTATION of a
  48. ;  software patch for the floating point divide instructions.
  49. ;
  50. ;
  51. include mdef.inc
  52.  
  53. .386
  54. .387
  55.  
  56. PATCH_CW        EQU     00ch
  57. PREV_CW         EQU     010h
  58. COSINE          EQU     0               ; These two are overlaid because they
  59. ANGLE           EQU     0               ; are not live at the same time.
  60.  
  61.  
  62. STACK_SIZE      EQU     014h
  63.  
  64. ONESMASK        EQU     0e000000h
  65.  
  66.  
  67. DATA32  SEGMENT DWORD USE32 PUBLIC 'DATA'
  68.  
  69. fdiv_risk_table DB      0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
  70. fdiv_scale      DD      03f700000h              ; 0.9375
  71. one_shl_63      DD      05f000000h
  72.  
  73. DATA32  ENDS
  74.  
  75. DGROUP GROUP DATA32
  76.  
  77. _TEXT  SEGMENT   DWORD USE32 PUBLIC 'CODE'
  78.  
  79.  
  80.         assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
  81.         public  __fptan_chk
  82.  
  83. ;
  84. ;  PRELIMINARY VERSION for register-register divides.
  85. ;
  86.  
  87.  
  88.         defpe   __fptan_chk
  89.  
  90.         push    eax
  91.         sub     esp, STACK_SIZE
  92.         fstp    tbyte ptr [esp+ANGLE]
  93.         mov     eax, [esp+ANGLE+8]
  94.         and     eax, 07fffh
  95.         jz      use_hardware            ; denormals, ...
  96.         cmp     eax, 07fffh
  97.         je      use_hardware            ; NaNs, infinities, ...
  98.         mov     eax, [esp+ANGLE+4]
  99.         add     eax, eax
  100.         jnc     use_hardware            ; unnormals (explicit 1 missing)
  101.         fld     tbyte ptr [esp+ANGLE]
  102.  
  103. ;
  104. ; Check for proper parameter range ( |<angle>| < 2^63)
  105. ;
  106.         fabs
  107.         fcomp   one_shl_63
  108.         fstsw  ax
  109.         sahf
  110.         jae     use_hardware
  111.  
  112.         fld     tbyte ptr [esp+ANGLE]
  113.         fsincos
  114.         fstp    tbyte ptr [esp+COSINE]
  115.         fld     tbyte ptr [esp+COSINE]  ; load the denominator (cos(x))
  116.         mov     eax, [esp+COSINE+4]     ; get mantissa bits 32-64
  117.         add     eax,eax                 ; shift the one's bit onto carry
  118.         xor     eax, ONESMASK           ; invert the bits that must be ones
  119.         test    eax, ONESMASK           ; and make sure they are all ones
  120.         jz      scale_if_needed         ; if all are one scale numbers
  121.         fdivp   st(1), st               ; use of hardware is OK.
  122.         fld1                            ; push 1.0 onto FPU stack
  123.         add     esp, STACK_SIZE
  124.         pop     eax
  125.         ret
  126.  
  127. scale_if_needed:
  128.         shr     eax, 28                 ; keep first 4 bits after point
  129.         cmp     fdiv_risk_table[eax], ah        ; check for (1,4,7,a,d)
  130.         jnz     divide_scaled           ; are in potential problem area
  131.         fdivp   st(1), st               ; use of hardware is OK.
  132.         fld1                            ; push 1.0 onto FPU stack
  133.         add     esp, STACK_SIZE
  134.         pop     eax
  135.         ret
  136.  
  137. divide_scaled:
  138.         fwait                           ; catch preceding exceptions
  139.         fstcw   [esp+PREV_CW]           ; save caller's control word
  140.         mov     eax, [esp+PREV_CW]
  141.         or      eax, 033fh              ; mask exceptions, pc=80
  142.         mov     [esp+PATCH_CW], eax
  143.         fldcw   [esp+PATCH_CW]          ; mask exceptions & pc=80
  144.         fmul    fdiv_scale              ; scale denominator by 15/16
  145.         fxch
  146.         fmul    fdiv_scale              ; scale numerator by 15/16
  147.         fxch
  148.  
  149. ; This assures correctly rounded result if pc=64 as well
  150.  
  151.         fldcw   [esp+PREV_CW]           ; restore caller's control word
  152.         fdivp   st(1), st               ; use of hardware is OK.
  153.         fld1                            ; push 1.0 onto FPU stack
  154.         add     esp, STACK_SIZE
  155.         pop     eax
  156.         ret
  157.  
  158. use_hardware:
  159.         fld     tbyte ptr [esp+ANGLE]
  160.         fptan
  161.         add     esp, STACK_SIZE
  162.         pop     eax
  163.         ret
  164. __fptan_chk       ENDP
  165.  
  166. _TEXT  ENDS
  167.         end
  168.