Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
  2.  
  3. This file is part of GCC.
  4.  
  5. GCC is free software; you can redistribute it and/or modify it under
  6. the terms of the GNU General Public License as published by the Free
  7. Software Foundation; either version 3, or (at your option) any later
  8. version.
  9.  
  10. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13. for more details.
  14.  
  15. Under Section 7 of GPL version 3, you are granted additional
  16. permissions described in the GCC Runtime Library Exception, version
  17. 3.1, as published by the Free Software Foundation.
  18.  
  19. You should have received a copy of the GNU General Public License and
  20. a copy of the GCC Runtime Library Exception along with this program;
  21. see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22. <http://www.gnu.org/licenses/>.  */
  23.  
  24. /*****************************************************************************
  25.  *    BID64 fma
  26.  *****************************************************************************
  27.  *
  28.  *  Algorithm description:
  29.  *
  30.  *  if multiplication is guranteed exact (short coefficients)
  31.  *     call the unpacked arg. equivalent of bid64_add(x*y, z)
  32.  *  else
  33.  *     get full coefficient_x*coefficient_y product
  34.  *     call subroutine to perform addition of 64-bit argument
  35.  *                                         to 128-bit product
  36.  *
  37.  ****************************************************************************/
  38.  
  39. #include "bid_inline_add.h"
  40.  
  41. #if DECIMAL_CALL_BY_REFERENCE
  42. extern void bid64_mul (UINT64 * pres, UINT64 * px,
  43.                        UINT64 *
  44.                        py _RND_MODE_PARAM _EXC_FLAGS_PARAM
  45.                        _EXC_MASKS_PARAM _EXC_INFO_PARAM);
  46. #else
  47.  
  48. extern UINT64 bid64_mul (UINT64 x,
  49.                          UINT64 y _RND_MODE_PARAM
  50.                          _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  51.                          _EXC_INFO_PARAM);
  52. #endif
  53.  
  54. #if DECIMAL_CALL_BY_REFERENCE
  55.  
  56. void
  57. bid64_fma (UINT64 * pres, UINT64 * px, UINT64 * py,
  58.            UINT64 *
  59.            pz _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
  60.            _EXC_INFO_PARAM) {
  61.   UINT64 x, y, z;
  62. #else
  63.  
  64. UINT64
  65. bid64_fma (UINT64 x, UINT64 y,
  66.            UINT64 z _RND_MODE_PARAM _EXC_FLAGS_PARAM
  67.            _EXC_MASKS_PARAM _EXC_INFO_PARAM) {
  68. #endif
  69.   UINT128 P, PU, CT, CZ;
  70.   UINT64 sign_x, sign_y, coefficient_x, coefficient_y, sign_z,
  71.     coefficient_z;
  72.   UINT64 C64, remainder_y, res;
  73.   UINT64 CYh, CY0L, T, valid_x, valid_y, valid_z;
  74.   int_double tempx, tempy;
  75.   int extra_digits, exponent_x, exponent_y, bin_expon_cx, bin_expon_cy,
  76.     bin_expon_product, rmode;
  77.   int digits_p, bp, final_exponent, exponent_z, digits_z, ez, ey,
  78.     scale_z, uf_status;
  79.  
  80. #if DECIMAL_CALL_BY_REFERENCE
  81. #if !DECIMAL_GLOBAL_ROUNDING
  82.   _IDEC_round rnd_mode = *prnd_mode;
  83. #endif
  84.   x = *px;
  85.   y = *py;
  86.   z = *pz;
  87. #endif
  88.  
  89.   valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x);
  90.   valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y);
  91.   valid_z = unpack_BID64 (&sign_z, &exponent_z, &coefficient_z, z);
  92.  
  93.   // unpack arguments, check for NaN, Infinity, or 0
  94.   if (!valid_x || !valid_y || !valid_z) {
  95.  
  96.     if ((y & MASK_NAN) == MASK_NAN) {   // y is NAN
  97.       // if x = {0, f, inf, NaN}, y = NaN, z = {0, f, inf, NaN} then res = Q (y)
  98.       // check first for non-canonical NaN payload
  99.       y = y & 0xfe03ffffffffffffull;    // clear G6-G12
  100.       if ((y & 0x0003ffffffffffffull) > 999999999999999ull) {
  101.         y = y & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
  102.       }
  103.       if ((y & MASK_SNAN) == MASK_SNAN) {       // y is SNAN
  104.         // set invalid flag
  105.         *pfpsf |= INVALID_EXCEPTION;
  106.         // return quiet (y)
  107.         res = y & 0xfdffffffffffffffull;
  108.       } else {  // y is QNaN
  109.         // return y
  110.         res = y;
  111.         // if z = SNaN or x = SNaN signal invalid exception
  112.         if ((z & MASK_SNAN) == MASK_SNAN
  113.             || (x & MASK_SNAN) == MASK_SNAN) {
  114.           // set invalid flag
  115.           *pfpsf |= INVALID_EXCEPTION;
  116.         }
  117.       }
  118.       BID_RETURN (res)
  119.     } else if ((z & MASK_NAN) == MASK_NAN) {    // z is NAN
  120.       // if x = {0, f, inf, NaN}, y = {0, f, inf}, z = NaN then res = Q (z)
  121.       // check first for non-canonical NaN payload
  122.       z = z & 0xfe03ffffffffffffull;    // clear G6-G12
  123.       if ((z & 0x0003ffffffffffffull) > 999999999999999ull) {
  124.         z = z & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
  125.       }
  126.       if ((z & MASK_SNAN) == MASK_SNAN) {       // z is SNAN
  127.         // set invalid flag
  128.         *pfpsf |= INVALID_EXCEPTION;
  129.         // return quiet (z)
  130.         res = z & 0xfdffffffffffffffull;
  131.       } else {  // z is QNaN
  132.         // return z
  133.         res = z;
  134.         // if x = SNaN signal invalid exception
  135.         if ((x & MASK_SNAN) == MASK_SNAN) {
  136.           // set invalid flag
  137.           *pfpsf |= INVALID_EXCEPTION;
  138.         }
  139.       }
  140.       BID_RETURN (res)
  141.     } else if ((x & MASK_NAN) == MASK_NAN) {    // x is NAN
  142.       // if x = NaN, y = {0, f, inf}, z = {0, f, inf} then res = Q (x)
  143.       // check first for non-canonical NaN payload
  144.       x = x & 0xfe03ffffffffffffull;    // clear G6-G12
  145.       if ((x & 0x0003ffffffffffffull) > 999999999999999ull) {
  146.         x = x & 0xfe00000000000000ull;  // clear G6-G12 and the payload bits
  147.       }
  148.       if ((x & MASK_SNAN) == MASK_SNAN) {       // x is SNAN
  149.         // set invalid flag
  150.         *pfpsf |= INVALID_EXCEPTION;
  151.         // return quiet (x)
  152.         res = x & 0xfdffffffffffffffull;
  153.       } else {  // x is QNaN
  154.         // return x
  155.         res = x;        // clear out G[6]-G[16]
  156.       }
  157.       BID_RETURN (res)
  158.     }
  159.  
  160.     if (!valid_x) {
  161.       // x is Inf. or 0
  162.  
  163.       // x is Infinity?
  164.       if ((x & 0x7800000000000000ull) == 0x7800000000000000ull) {
  165.         // check if y is 0
  166.         if (!coefficient_y) {
  167.           // y==0, return NaN
  168. #ifdef SET_STATUS_FLAGS
  169.           if ((z & 0x7e00000000000000ull) != 0x7c00000000000000ull)
  170.             __set_status_flags (pfpsf, INVALID_EXCEPTION);
  171. #endif
  172.           BID_RETURN (0x7c00000000000000ull);
  173.         }
  174.         // test if z is Inf of oposite sign
  175.         if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
  176.             && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
  177.           // return NaN
  178. #ifdef SET_STATUS_FLAGS
  179.           __set_status_flags (pfpsf, INVALID_EXCEPTION);
  180. #endif
  181.           BID_RETURN (0x7c00000000000000ull);
  182.         }
  183.         // otherwise return +/-Inf
  184.         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
  185.                     0x7800000000000000ull);
  186.       }
  187.       // x is 0
  188.       if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)
  189.           && ((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
  190.  
  191.         if (coefficient_z) {
  192.           exponent_y = exponent_x - DECIMAL_EXPONENT_BIAS + exponent_y;
  193.  
  194.           sign_z = z & 0x8000000000000000ull;
  195.  
  196.           if (exponent_y >= exponent_z)
  197.             BID_RETURN (z);
  198.           res =
  199.             add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
  200.                         &rnd_mode, pfpsf);
  201.           BID_RETURN (res);
  202.         }
  203.       }
  204.     }
  205.     if (!valid_y) {
  206.       // y is Inf. or 0
  207.  
  208.       // y is Infinity?
  209.       if ((y & 0x7800000000000000ull) == 0x7800000000000000ull) {
  210.         // check if x is 0
  211.         if (!coefficient_x) {
  212.           // y==0, return NaN
  213. #ifdef SET_STATUS_FLAGS
  214.           __set_status_flags (pfpsf, INVALID_EXCEPTION);
  215. #endif
  216.           BID_RETURN (0x7c00000000000000ull);
  217.         }
  218.         // test if z is Inf of oposite sign
  219.         if (((z & 0x7c00000000000000ull) == 0x7800000000000000ull)
  220.             && (((x ^ y) ^ z) & 0x8000000000000000ull)) {
  221. #ifdef SET_STATUS_FLAGS
  222.           __set_status_flags (pfpsf, INVALID_EXCEPTION);
  223. #endif
  224.           // return NaN
  225.           BID_RETURN (0x7c00000000000000ull);
  226.         }
  227.         // otherwise return +/-Inf
  228.         BID_RETURN (((x ^ y) & 0x8000000000000000ull) |
  229.                     0x7800000000000000ull);
  230.       }
  231.       // y is 0
  232.       if (((z & 0x7800000000000000ull) != 0x7800000000000000ull)) {
  233.  
  234.         if (coefficient_z) {
  235.           exponent_y += exponent_x - DECIMAL_EXPONENT_BIAS;
  236.  
  237.           sign_z = z & 0x8000000000000000ull;
  238.  
  239.           if (exponent_y >= exponent_z)
  240.             BID_RETURN (z);
  241.           res =
  242.             add_zero64 (exponent_y, sign_z, exponent_z, coefficient_z,
  243.                         &rnd_mode, pfpsf);
  244.           BID_RETURN (res);
  245.         }
  246.       }
  247.     }
  248.  
  249.     if (!valid_z) {
  250.       // y is Inf. or 0
  251.  
  252.       // test if y is NaN/Inf
  253.       if ((z & 0x7800000000000000ull) == 0x7800000000000000ull) {
  254.         BID_RETURN (coefficient_z & QUIET_MASK64);
  255.       }
  256.       // z is 0, return x*y
  257.       if ((!coefficient_x) || (!coefficient_y)) {
  258.         //0+/-0
  259.         exponent_x += exponent_y - DECIMAL_EXPONENT_BIAS;
  260.         if (exponent_x > DECIMAL_MAX_EXPON_64)
  261.           exponent_x = DECIMAL_MAX_EXPON_64;
  262.         else if (exponent_x < 0)
  263.           exponent_x = 0;
  264.         if (exponent_x <= exponent_z)
  265.           res = ((UINT64) exponent_x) << 53;
  266.         else
  267.           res = ((UINT64) exponent_z) << 53;
  268.         if ((sign_x ^ sign_y) == sign_z)
  269.           res |= sign_z;
  270. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  271. #ifndef IEEE_ROUND_NEAREST
  272.         else if (rnd_mode == ROUNDING_DOWN)
  273.           res |= 0x8000000000000000ull;
  274. #endif
  275. #endif
  276.         BID_RETURN (res);
  277.       }
  278.     }
  279.   }
  280.  
  281.   /* get binary coefficients of x and y */
  282.  
  283.   //--- get number of bits in the coefficients of x and y ---
  284.   // version 2 (original)
  285.   tempx.d = (double) coefficient_x;
  286.   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52);
  287.  
  288.   tempy.d = (double) coefficient_y;
  289.   bin_expon_cy = ((tempy.i & MASK_BINARY_EXPONENT) >> 52);
  290.  
  291.   // magnitude estimate for coefficient_x*coefficient_y is
  292.   //        2^(unbiased_bin_expon_cx + unbiased_bin_expon_cx)
  293.   bin_expon_product = bin_expon_cx + bin_expon_cy;
  294.  
  295.   // check if coefficient_x*coefficient_y<2^(10*k+3)
  296.   // equivalent to unbiased_bin_expon_cx + unbiased_bin_expon_cx < 10*k+1
  297.   if (bin_expon_product < UPPER_EXPON_LIMIT + 2 * BINARY_EXPONENT_BIAS) {
  298.     //  easy multiply
  299.     C64 = coefficient_x * coefficient_y;
  300.     final_exponent = exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS;
  301.     if ((final_exponent > 0) || (!coefficient_z)) {
  302.       res =
  303.         get_add64 (sign_x ^ sign_y,
  304.                    final_exponent, C64, sign_z, exponent_z, coefficient_z, rnd_mode, pfpsf);
  305.       BID_RETURN (res);
  306.     } else {
  307.       P.w[0] = C64;
  308.       P.w[1] = 0;
  309.       extra_digits = 0;
  310.     }
  311.   } else {
  312.     if (!coefficient_z) {
  313. #if DECIMAL_CALL_BY_REFERENCE
  314.       bid64_mul (&res, px,
  315.                  py _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
  316.                  _EXC_INFO_ARG);
  317. #else
  318.       res =
  319.         bid64_mul (x,
  320.                    y _RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
  321.                    _EXC_INFO_ARG);
  322. #endif
  323.       BID_RETURN (res);
  324.     }
  325.     // get 128-bit product: coefficient_x*coefficient_y
  326.     __mul_64x64_to_128 (P, coefficient_x, coefficient_y);
  327.  
  328.     // tighten binary range of P:  leading bit is 2^bp
  329.     // unbiased_bin_expon_product <= bp <= unbiased_bin_expon_product+1
  330.     bin_expon_product -= 2 * BINARY_EXPONENT_BIAS;
  331.     __tight_bin_range_128 (bp, P, bin_expon_product);
  332.  
  333.     // get number of decimal digits in the product
  334.     digits_p = estimate_decimal_digits[bp];
  335.     if (!(__unsigned_compare_gt_128 (power10_table_128[digits_p], P)))
  336.       digits_p++;       // if power10_table_128[digits_p] <= P
  337.  
  338.     // determine number of decimal digits to be rounded out
  339.     extra_digits = digits_p - MAX_FORMAT_DIGITS;
  340.     final_exponent =
  341.       exponent_x + exponent_y + extra_digits - DECIMAL_EXPONENT_BIAS;
  342.   }
  343.  
  344.   if (((unsigned) final_exponent) >= 3 * 256) {
  345.     if (final_exponent < 0) {
  346.       //--- get number of bits in the coefficients of z  ---
  347.       tempx.d = (double) coefficient_z;
  348.       bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  349.       // get number of decimal digits in the coeff_x
  350.       digits_z = estimate_decimal_digits[bin_expon_cx];
  351.       if (coefficient_z >= power10_table_128[digits_z].w[0])
  352.         digits_z++;
  353.       // underflow
  354.       if ((final_exponent + 16 < 0)
  355.           || (exponent_z + digits_z > 33 + final_exponent)) {
  356.         res =
  357.           BID_normalize (sign_z, exponent_z, coefficient_z,
  358.                          sign_x ^ sign_y, 1, rnd_mode, pfpsf);
  359.         BID_RETURN (res);
  360.       }
  361.  
  362.       ez = exponent_z + digits_z - 16;
  363.       if (ez < 0)
  364.         ez = 0;
  365.       scale_z = exponent_z - ez;
  366.       coefficient_z *= power10_table_128[scale_z].w[0];
  367.       ey = final_exponent - extra_digits;
  368.       extra_digits = ez - ey;
  369.       if (extra_digits > 33) {
  370.         res =
  371.           BID_normalize (sign_z, exponent_z, coefficient_z,
  372.                          sign_x ^ sign_y, 1, rnd_mode, pfpsf);
  373.         BID_RETURN (res);
  374.       }
  375.       //else  // extra_digits<=32
  376.  
  377.       if (extra_digits > 17) {
  378.         CYh = __truncate (P, 16);
  379.         // get remainder
  380.         T = power10_table_128[16].w[0];
  381.         __mul_64x64_to_64 (CY0L, CYh, T);
  382.         remainder_y = P.w[0] - CY0L;
  383.  
  384.         extra_digits -= 16;
  385.         P.w[0] = CYh;
  386.         P.w[1] = 0;
  387.       } else
  388.         remainder_y = 0;
  389.  
  390.       // align coeff_x, CYh
  391.       __mul_64x64_to_128 (CZ, coefficient_z,
  392.                           power10_table_128[extra_digits].w[0]);
  393.  
  394.       if (sign_z == (sign_y ^ sign_x)) {
  395.         __add_128_128 (CT, CZ, P);
  396.         if (__unsigned_compare_ge_128
  397.             (CT, power10_table_128[16 + extra_digits])) {
  398.           extra_digits++;
  399.           ez++;
  400.         }
  401.       } else {
  402.         if (remainder_y && (__unsigned_compare_ge_128 (CZ, P))) {
  403.           P.w[0]++;
  404.           if (!P.w[0])
  405.             P.w[1]++;
  406.         }
  407.         __sub_128_128 (CT, CZ, P);
  408.         if (((SINT64) CT.w[1]) < 0) {
  409.           sign_z = sign_y ^ sign_x;
  410.           CT.w[0] = 0 - CT.w[0];
  411.           CT.w[1] = 0 - CT.w[1];
  412.           if (CT.w[0])
  413.             CT.w[1]--;
  414.         } else if(!(CT.w[1]|CT.w[0]))
  415.                 sign_z = (rnd_mode!=ROUNDING_DOWN)? 0: 0x8000000000000000ull;
  416.         if (ez
  417.             &&
  418.             (__unsigned_compare_gt_128
  419.              (power10_table_128[15 + extra_digits], CT))) {
  420.           extra_digits--;
  421.           ez--;
  422.         }
  423.       }
  424.  
  425. #ifdef SET_STATUS_FLAGS
  426.       uf_status = 0;
  427.       if ((!ez)
  428.           &&
  429.           __unsigned_compare_gt_128 (power10_table_128
  430.                                      [extra_digits + 15], CT)) {
  431.         rmode = rnd_mode;
  432.         if (sign_z && (unsigned) (rmode - 1) < 2)
  433.           rmode = 3 - rmode;
  434.         //__add_128_64(PU, CT, round_const_table[rmode][extra_digits]);
  435.         PU = power10_table_128[extra_digits + 15];
  436.         PU.w[0]--;
  437.         if (__unsigned_compare_gt_128 (PU, CT)
  438.             || (rmode == ROUNDING_DOWN)
  439.             || (rmode == ROUNDING_TO_ZERO))
  440.           uf_status = UNDERFLOW_EXCEPTION;
  441.         else if (extra_digits < 2) {
  442.           if ((rmode == ROUNDING_UP)) {
  443.             if (!extra_digits)
  444.               uf_status = UNDERFLOW_EXCEPTION;
  445.             else {
  446.               if (remainder_y && (sign_z != (sign_y ^ sign_x)))
  447.                 remainder_y = power10_table_128[16].w[0] - remainder_y;
  448.  
  449.               if (power10_table_128[15].w[0] > remainder_y)
  450.                 uf_status = UNDERFLOW_EXCEPTION;
  451.             }
  452.           } else        // RN or RN_away
  453.           {
  454.             if (remainder_y && (sign_z != (sign_y ^ sign_x)))
  455.               remainder_y = power10_table_128[16].w[0] - remainder_y;
  456.  
  457.             if (!extra_digits) {
  458.               remainder_y += round_const_table[rmode][15];
  459.               if (remainder_y < power10_table_128[16].w[0])
  460.                 uf_status = UNDERFLOW_EXCEPTION;
  461.             } else {
  462.               if (remainder_y < round_const_table[rmode][16])
  463.                 uf_status = UNDERFLOW_EXCEPTION;
  464.             }
  465.           }
  466.           //__set_status_flags (pfpsf, uf_status);
  467.         }
  468.       }
  469. #endif
  470.       res =
  471.         __bid_full_round64_remainder (sign_z, ez - extra_digits, CT,
  472.                                       extra_digits, remainder_y,
  473.                                       rnd_mode, pfpsf, uf_status);
  474.       BID_RETURN (res);
  475.  
  476.     } else {
  477.       if ((sign_z == (sign_x ^ sign_y))
  478.           || (final_exponent > 3 * 256 + 15)) {
  479.         res =
  480.           fast_get_BID64_check_OF (sign_x ^ sign_y, final_exponent,
  481.                                    1000000000000000ull, rnd_mode,
  482.                                    pfpsf);
  483.         BID_RETURN (res);
  484.       }
  485.     }
  486.   }
  487.  
  488.  
  489.   if (extra_digits > 0) {
  490.     res =
  491.       get_add128 (sign_z, exponent_z, coefficient_z, sign_x ^ sign_y,
  492.                   final_exponent, P, extra_digits, rnd_mode, pfpsf);
  493.     BID_RETURN (res);
  494.   }
  495.   // go to convert_format and exit
  496.   else {
  497.     C64 = __low_64 (P);
  498.  
  499.     res =
  500.       get_add64 (sign_x ^ sign_y,
  501.                  exponent_x + exponent_y - DECIMAL_EXPONENT_BIAS, C64,
  502.                  sign_z, exponent_z, coefficient_z,
  503.                  rnd_mode, pfpsf);
  504.     BID_RETURN (res);
  505.   }
  506. }
  507.