Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /* Copyright (C) 2007-2015 Free Software Foundation, Inc.
  2.  
  3. This file is part of GCC.
  4.  
  5. GCC is free software; you can redistribute it and/or modify it under
  6. the terms of the GNU General Public License as published by the Free
  7. Software Foundation; either version 3, or (at your option) any later
  8. version.
  9.  
  10. GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  11. WARRANTY; without even the implied warranty of MERCHANTABILITY or
  12. FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13. for more details.
  14.  
  15. Under Section 7 of GPL version 3, you are granted additional
  16. permissions described in the GCC Runtime Library Exception, version
  17. 3.1, as published by the Free Software Foundation.
  18.  
  19. You should have received a copy of the GNU General Public License and
  20. a copy of the GCC Runtime Library Exception along with this program;
  21. see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
  22. <http://www.gnu.org/licenses/>.  */
  23.  
  24. /*****************************************************************************
  25.  *
  26.  *    Helper add functions (for fma)
  27.  *
  28.  *    __BID_INLINE__ UINT64 get_add64(
  29.  *        UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  30.  *        UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
  31.  *                                       int rounding_mode)
  32.  *
  33.  *   __BID_INLINE__ UINT64 get_add128(
  34.  *                       UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  35.  *                       UINT64 sign_y, int final_exponent_y, UINT128 CY,
  36.  *                       int extra_digits, int rounding_mode)
  37.  *
  38.  *****************************************************************************
  39.  *
  40.  *  Algorithm description:
  41.  *
  42.  *  get_add64:  same as BID64 add, but arguments are unpacked and there
  43.  *                                 are no special case checks
  44.  *
  45.  *  get_add128: add 64-bit coefficient to 128-bit product (which contains
  46.  *                                        16+extra_digits decimal digits),
  47.  *                         return BID64 result
  48.  *              - the exponents are compared and the two coefficients are
  49.  *                properly aligned for addition/subtraction
  50.  *              - multiple paths are needed
  51.  *              - final result exponent is calculated and the lower term is
  52.  *                      rounded first if necessary, to avoid manipulating
  53.  *                      coefficients longer than 128 bits
  54.  *
  55.  ****************************************************************************/
  56.  
  57. #ifndef _INLINE_BID_ADD_H_
  58. #define _INLINE_BID_ADD_H_
  59.  
  60. #include "bid_internal.h"
  61.  
  62. #define MAX_FORMAT_DIGITS     16
  63. #define DECIMAL_EXPONENT_BIAS 398
  64. #define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
  65. #define BINARY_EXPONENT_BIAS  0x3ff
  66. #define UPPER_EXPON_LIMIT     51
  67.  
  68. ///////////////////////////////////////////////////////////////////////
  69. //
  70. // get_add64() is essentially the same as bid_add(), except that
  71. //             the arguments are unpacked
  72. //
  73. //////////////////////////////////////////////////////////////////////
  74. __BID_INLINE__ UINT64
  75. get_add64 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  76.            UINT64 sign_y, int exponent_y, UINT64 coefficient_y,
  77.            int rounding_mode, unsigned *fpsc) {
  78.   UINT128 CA, CT, CT_new;
  79.   UINT64 sign_a, sign_b, coefficient_a, coefficient_b, sign_s, sign_ab,
  80.     rem_a;
  81.   UINT64 saved_ca, saved_cb, C0_64, C64, remainder_h, T1, carry, tmp,
  82.     C64_new;
  83.   int_double tempx;
  84.   int exponent_a, exponent_b, diff_dec_expon;
  85.   int bin_expon_ca, extra_digits, amount, scale_k, scale_ca;
  86.   unsigned rmode, status;
  87.  
  88.   // sort arguments by exponent
  89.   if (exponent_x <= exponent_y) {
  90.     sign_a = sign_y;
  91.     exponent_a = exponent_y;
  92.     coefficient_a = coefficient_y;
  93.     sign_b = sign_x;
  94.     exponent_b = exponent_x;
  95.     coefficient_b = coefficient_x;
  96.   } else {
  97.     sign_a = sign_x;
  98.     exponent_a = exponent_x;
  99.     coefficient_a = coefficient_x;
  100.     sign_b = sign_y;
  101.     exponent_b = exponent_y;
  102.     coefficient_b = coefficient_y;
  103.   }
  104.  
  105.   // exponent difference
  106.   diff_dec_expon = exponent_a - exponent_b;
  107.  
  108.   /* get binary coefficients of x and y */
  109.  
  110.   //--- get number of bits in the coefficients of x and y ---
  111.  
  112.   tempx.d = (double) coefficient_a;
  113.   bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  114.  
  115.   if (!coefficient_a) {
  116.     return get_BID64 (sign_b, exponent_b, coefficient_b, rounding_mode,
  117.                       fpsc);
  118.   }
  119.   if (diff_dec_expon > MAX_FORMAT_DIGITS) {
  120.     // normalize a to a 16-digit coefficient
  121.  
  122.     scale_ca = estimate_decimal_digits[bin_expon_ca];
  123.     if (coefficient_a >= power10_table_128[scale_ca].w[0])
  124.       scale_ca++;
  125.  
  126.     scale_k = 16 - scale_ca;
  127.  
  128.     coefficient_a *= power10_table_128[scale_k].w[0];
  129.  
  130.     diff_dec_expon -= scale_k;
  131.     exponent_a -= scale_k;
  132.  
  133.     /* get binary coefficients of x and y */
  134.  
  135.     //--- get number of bits in the coefficients of x and y ---
  136.     tempx.d = (double) coefficient_a;
  137.     bin_expon_ca = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  138.  
  139.     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
  140. #ifdef SET_STATUS_FLAGS
  141.       if (coefficient_b) {
  142.         __set_status_flags (fpsc, INEXACT_EXCEPTION);
  143.       }
  144. #endif
  145.  
  146. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  147. #ifndef IEEE_ROUND_NEAREST
  148.       if (((rounding_mode) & 3) && coefficient_b)       // not ROUNDING_TO_NEAREST
  149.       {
  150.         switch (rounding_mode) {
  151.         case ROUNDING_DOWN:
  152.           if (sign_b) {
  153.             coefficient_a -= ((((SINT64) sign_a) >> 63) | 1);
  154.             if (coefficient_a < 1000000000000000ull) {
  155.               exponent_a--;
  156.               coefficient_a = 9999999999999999ull;
  157.             } else if (coefficient_a >= 10000000000000000ull) {
  158.               exponent_a++;
  159.               coefficient_a = 1000000000000000ull;
  160.             }
  161.           }
  162.           break;
  163.         case ROUNDING_UP:
  164.           if (!sign_b) {
  165.             coefficient_a += ((((SINT64) sign_a) >> 63) | 1);
  166.             if (coefficient_a < 1000000000000000ull) {
  167.               exponent_a--;
  168.               coefficient_a = 9999999999999999ull;
  169.             } else if (coefficient_a >= 10000000000000000ull) {
  170.               exponent_a++;
  171.               coefficient_a = 1000000000000000ull;
  172.             }
  173.           }
  174.           break;
  175.         default:        // RZ
  176.           if (sign_a != sign_b) {
  177.             coefficient_a--;
  178.             if (coefficient_a < 1000000000000000ull) {
  179.               exponent_a--;
  180.               coefficient_a = 9999999999999999ull;
  181.             }
  182.           }
  183.           break;
  184.         }
  185.       } else
  186. #endif
  187. #endif
  188.         // check special case here
  189.         if ((coefficient_a == 1000000000000000ull)
  190.             && (diff_dec_expon == MAX_FORMAT_DIGITS + 1)
  191.             && (sign_a ^ sign_b)
  192.             && (coefficient_b > 5000000000000000ull)) {
  193.         coefficient_a = 9999999999999999ull;
  194.         exponent_a--;
  195.       }
  196.  
  197.       return get_BID64 (sign_a, exponent_a, coefficient_a,
  198.                         rounding_mode, fpsc);
  199.     }
  200.   }
  201.   // test whether coefficient_a*10^(exponent_a-exponent_b)  may exceed 2^62
  202.   if (bin_expon_ca + estimate_bin_expon[diff_dec_expon] < 60) {
  203.     // coefficient_a*10^(exponent_a-exponent_b)<2^63
  204.  
  205.     // multiply by 10^(exponent_a-exponent_b)
  206.     coefficient_a *= power10_table_128[diff_dec_expon].w[0];
  207.  
  208.     // sign mask
  209.     sign_b = ((SINT64) sign_b) >> 63;
  210.     // apply sign to coeff. of b
  211.     coefficient_b = (coefficient_b + sign_b) ^ sign_b;
  212.  
  213.     // apply sign to coefficient a
  214.     sign_a = ((SINT64) sign_a) >> 63;
  215.     coefficient_a = (coefficient_a + sign_a) ^ sign_a;
  216.  
  217.     coefficient_a += coefficient_b;
  218.     // get sign
  219.     sign_s = ((SINT64) coefficient_a) >> 63;
  220.     coefficient_a = (coefficient_a + sign_s) ^ sign_s;
  221.     sign_s &= 0x8000000000000000ull;
  222.  
  223.     // coefficient_a < 10^16 ?
  224.     if (coefficient_a < power10_table_128[MAX_FORMAT_DIGITS].w[0]) {
  225. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  226. #ifndef IEEE_ROUND_NEAREST
  227.       if (rounding_mode == ROUNDING_DOWN && (!coefficient_a)
  228.           && sign_a != sign_b)
  229.         sign_s = 0x8000000000000000ull;
  230. #endif
  231. #endif
  232.       return get_BID64 (sign_s, exponent_b, coefficient_a,
  233.                         rounding_mode, fpsc);
  234.     }
  235.     // otherwise rounding is necessary
  236.  
  237.     // already know coefficient_a<10^19
  238.     // coefficient_a < 10^17 ?
  239.     if (coefficient_a < power10_table_128[17].w[0])
  240.       extra_digits = 1;
  241.     else if (coefficient_a < power10_table_128[18].w[0])
  242.       extra_digits = 2;
  243.     else
  244.       extra_digits = 3;
  245.  
  246. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  247. #ifndef IEEE_ROUND_NEAREST
  248.     rmode = rounding_mode;
  249.     if (sign_s && (unsigned) (rmode - 1) < 2)
  250.       rmode = 3 - rmode;
  251. #else
  252.     rmode = 0;
  253. #endif
  254. #else
  255.     rmode = 0;
  256. #endif
  257.     coefficient_a += round_const_table[rmode][extra_digits];
  258.  
  259.     // get P*(2^M[extra_digits])/10^extra_digits
  260.     __mul_64x64_to_128 (CT, coefficient_a,
  261.                         reciprocals10_64[extra_digits]);
  262.  
  263.     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
  264.     amount = short_recip_scale[extra_digits];
  265.     C64 = CT.w[1] >> amount;
  266.  
  267.   } else {
  268.     // coefficient_a*10^(exponent_a-exponent_b) is large
  269.     sign_s = sign_a;
  270.  
  271. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  272. #ifndef IEEE_ROUND_NEAREST
  273.     rmode = rounding_mode;
  274.     if (sign_s && (unsigned) (rmode - 1) < 2)
  275.       rmode = 3 - rmode;
  276. #else
  277.     rmode = 0;
  278. #endif
  279. #else
  280.     rmode = 0;
  281. #endif
  282.  
  283.     // check whether we can take faster path
  284.     scale_ca = estimate_decimal_digits[bin_expon_ca];
  285.  
  286.     sign_ab = sign_a ^ sign_b;
  287.     sign_ab = ((SINT64) sign_ab) >> 63;
  288.  
  289.     // T1 = 10^(16-diff_dec_expon)
  290.     T1 = power10_table_128[16 - diff_dec_expon].w[0];
  291.  
  292.     // get number of digits in coefficient_a
  293.     //P_ca = power10_table_128[scale_ca].w[0];
  294.     //P_ca_m1 = power10_table_128[scale_ca-1].w[0];
  295.     if (coefficient_a >= power10_table_128[scale_ca].w[0]) {
  296.       scale_ca++;
  297.       //P_ca_m1 = P_ca;
  298.       //P_ca = power10_table_128[scale_ca].w[0];
  299.     }
  300.  
  301.     scale_k = 16 - scale_ca;
  302.  
  303.     // apply sign
  304.     //Ts = (T1 + sign_ab) ^ sign_ab;
  305.  
  306.     // test range of ca
  307.     //X = coefficient_a + Ts - P_ca_m1;
  308.  
  309.     // addition
  310.     saved_ca = coefficient_a - T1;
  311.     coefficient_a =
  312.       (SINT64) saved_ca *(SINT64) power10_table_128[scale_k].w[0];
  313.     extra_digits = diff_dec_expon - scale_k;
  314.  
  315.     // apply sign
  316.     saved_cb = (coefficient_b + sign_ab) ^ sign_ab;
  317.     // add 10^16 and rounding constant
  318.     coefficient_b =
  319.       saved_cb + 10000000000000000ull +
  320.       round_const_table[rmode][extra_digits];
  321.  
  322.     // get P*(2^M[extra_digits])/10^extra_digits
  323.     __mul_64x64_to_128 (CT, coefficient_b,
  324.                         reciprocals10_64[extra_digits]);
  325.  
  326.     // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
  327.     amount = short_recip_scale[extra_digits];
  328.     C0_64 = CT.w[1] >> amount;
  329.  
  330.     // result coefficient
  331.     C64 = C0_64 + coefficient_a;
  332.     // filter out difficult (corner) cases
  333.     // the following test is equivalent to
  334.     // ( (initial_coefficient_a + Ts) < P_ca &&
  335.     //     (initial_coefficient_a + Ts) > P_ca_m1 ),
  336.     // which ensures the number of digits in coefficient_a does not change
  337.     // after adding (the appropriately scaled and rounded) coefficient_b
  338.     if ((UINT64) (C64 - 1000000000000000ull - 1) >
  339.         9000000000000000ull - 2) {
  340.       if (C64 >= 10000000000000000ull) {
  341.         // result has more than 16 digits
  342.         if (!scale_k) {
  343.           // must divide coeff_a by 10
  344.           saved_ca = saved_ca + T1;
  345.           __mul_64x64_to_128 (CA, saved_ca, 0x3333333333333334ull);
  346.           //reciprocals10_64[1]);
  347.           coefficient_a = CA.w[1] >> 1;
  348.           rem_a =
  349.             saved_ca - (coefficient_a << 3) - (coefficient_a << 1);
  350.           coefficient_a = coefficient_a - T1;
  351.  
  352.           saved_cb +=
  353.             /*90000000000000000 */ +rem_a *
  354.             power10_table_128[diff_dec_expon].w[0];
  355.         } else
  356.           coefficient_a =
  357.             (SINT64) (saved_ca - T1 -
  358.                       (T1 << 3)) * (SINT64) power10_table_128[scale_k -
  359.                                                               1].w[0];
  360.  
  361.         extra_digits++;
  362.         coefficient_b =
  363.           saved_cb + 100000000000000000ull +
  364.           round_const_table[rmode][extra_digits];
  365.  
  366.         // get P*(2^M[extra_digits])/10^extra_digits
  367.         __mul_64x64_to_128 (CT, coefficient_b,
  368.                             reciprocals10_64[extra_digits]);
  369.  
  370.         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
  371.         amount = short_recip_scale[extra_digits];
  372.         C0_64 = CT.w[1] >> amount;
  373.  
  374.         // result coefficient
  375.         C64 = C0_64 + coefficient_a;
  376.       } else if (C64 <= 1000000000000000ull) {
  377.         // less than 16 digits in result
  378.         coefficient_a =
  379.           (SINT64) saved_ca *(SINT64) power10_table_128[scale_k +
  380.                                                         1].w[0];
  381.         //extra_digits --;
  382.         exponent_b--;
  383.         coefficient_b =
  384.           (saved_cb << 3) + (saved_cb << 1) + 100000000000000000ull +
  385.           round_const_table[rmode][extra_digits];
  386.  
  387.         // get P*(2^M[extra_digits])/10^extra_digits
  388.         __mul_64x64_to_128 (CT_new, coefficient_b,
  389.                             reciprocals10_64[extra_digits]);
  390.  
  391.         // now get P/10^extra_digits: shift C64 right by M[extra_digits]-128
  392.         amount = short_recip_scale[extra_digits];
  393.         C0_64 = CT_new.w[1] >> amount;
  394.  
  395.         // result coefficient
  396.         C64_new = C0_64 + coefficient_a;
  397.         if (C64_new < 10000000000000000ull) {
  398.           C64 = C64_new;
  399. #ifdef SET_STATUS_FLAGS
  400.           CT = CT_new;
  401. #endif
  402.         } else
  403.           exponent_b++;
  404.       }
  405.  
  406.     }
  407.  
  408.   }
  409.  
  410. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  411. #ifndef IEEE_ROUND_NEAREST
  412.   if (rmode == 0)       //ROUNDING_TO_NEAREST
  413. #endif
  414.     if (C64 & 1) {
  415.       // check whether fractional part of initial_P/10^extra_digits
  416.       // is exactly .5
  417.       // this is the same as fractional part of
  418.       //      (initial_P + 0.5*10^extra_digits)/10^extra_digits is exactly zero
  419.  
  420.       // get remainder
  421.       remainder_h = CT.w[1] << (64 - amount);
  422.  
  423.       // test whether fractional part is 0
  424.       if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits])) {
  425.         C64--;
  426.       }
  427.     }
  428. #endif
  429.  
  430. #ifdef SET_STATUS_FLAGS
  431.   status = INEXACT_EXCEPTION;
  432.  
  433.   // get remainder
  434.   remainder_h = CT.w[1] << (64 - amount);
  435.  
  436.   switch (rmode) {
  437.   case ROUNDING_TO_NEAREST:
  438.   case ROUNDING_TIES_AWAY:
  439.     // test whether fractional part is 0
  440.     if ((remainder_h == 0x8000000000000000ull)
  441.         && (CT.w[0] < reciprocals10_64[extra_digits]))
  442.       status = EXACT_STATUS;
  443.     break;
  444.   case ROUNDING_DOWN:
  445.   case ROUNDING_TO_ZERO:
  446.     if (!remainder_h && (CT.w[0] < reciprocals10_64[extra_digits]))
  447.       status = EXACT_STATUS;
  448.     break;
  449.   default:
  450.     // round up
  451.     __add_carry_out (tmp, carry, CT.w[0],
  452.                      reciprocals10_64[extra_digits]);
  453.     if ((remainder_h >> (64 - amount)) + carry >=
  454.         (((UINT64) 1) << amount))
  455.       status = EXACT_STATUS;
  456.     break;
  457.   }
  458.   __set_status_flags (fpsc, status);
  459.  
  460. #endif
  461.  
  462.   return get_BID64 (sign_s, exponent_b + extra_digits, C64,
  463.                     rounding_mode, fpsc);
  464. }
  465.  
  466.  
  467. ///////////////////////////////////////////////////////////////////
  468. // round 128-bit coefficient and return result in BID64 format
  469. // do not worry about midpoint cases
  470. //////////////////////////////////////////////////////////////////
  471. static UINT64
  472. __bid_simple_round64_sticky (UINT64 sign, int exponent, UINT128 P,
  473.                              int extra_digits, int rounding_mode,
  474.                              unsigned *fpsc) {
  475.   UINT128 Q_high, Q_low, C128;
  476.   UINT64 C64;
  477.   int amount, rmode;
  478.  
  479.   rmode = rounding_mode;
  480. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  481. #ifndef IEEE_ROUND_NEAREST
  482.   if (sign && (unsigned) (rmode - 1) < 2)
  483.     rmode = 3 - rmode;
  484. #endif
  485. #endif
  486.   __add_128_64 (P, P, round_const_table[rmode][extra_digits]);
  487.  
  488.   // get P*(2^M[extra_digits])/10^extra_digits
  489.   __mul_128x128_full (Q_high, Q_low, P,
  490.                       reciprocals10_128[extra_digits]);
  491.  
  492.   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
  493.   amount = recip_scale[extra_digits];
  494.   __shr_128 (C128, Q_high, amount);
  495.  
  496.   C64 = __low_64 (C128);
  497.  
  498. #ifdef SET_STATUS_FLAGS
  499.  
  500.   __set_status_flags (fpsc, INEXACT_EXCEPTION);
  501.  
  502. #endif
  503.  
  504.   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
  505. }
  506.  
  507. ///////////////////////////////////////////////////////////////////
  508. // round 128-bit coefficient and return result in BID64 format
  509. ///////////////////////////////////////////////////////////////////
  510. static UINT64
  511. __bid_full_round64 (UINT64 sign, int exponent, UINT128 P,
  512.                     int extra_digits, int rounding_mode,
  513.                     unsigned *fpsc) {
  514.   UINT128 Q_high, Q_low, C128, Stemp, PU;
  515.   UINT64 remainder_h, C64, carry, CY;
  516.   int amount, amount2, rmode, status = 0;
  517.  
  518.   if (exponent < 0) {
  519.     if (exponent >= -16 && (extra_digits + exponent < 0)) {
  520.       extra_digits = -exponent;
  521. #ifdef SET_STATUS_FLAGS
  522.       if (extra_digits > 0) {
  523.         rmode = rounding_mode;
  524.         if (sign && (unsigned) (rmode - 1) < 2)
  525.           rmode = 3 - rmode;
  526.         __add_128_128 (PU, P,
  527.                        round_const_table_128[rmode][extra_digits]);
  528.         if (__unsigned_compare_gt_128
  529.             (power10_table_128[extra_digits + 15], PU))
  530.           status = UNDERFLOW_EXCEPTION;
  531.       }
  532. #endif
  533.     }
  534.   }
  535.  
  536.   if (extra_digits > 0) {
  537.     exponent += extra_digits;
  538.     rmode = rounding_mode;
  539.     if (sign && (unsigned) (rmode - 1) < 2)
  540.       rmode = 3 - rmode;
  541.     __add_128_128 (P, P, round_const_table_128[rmode][extra_digits]);
  542.  
  543.     // get P*(2^M[extra_digits])/10^extra_digits
  544.     __mul_128x128_full (Q_high, Q_low, P,
  545.                         reciprocals10_128[extra_digits]);
  546.  
  547.     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
  548.     amount = recip_scale[extra_digits];
  549.     __shr_128_long (C128, Q_high, amount);
  550.  
  551.     C64 = __low_64 (C128);
  552.  
  553. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  554. #ifndef IEEE_ROUND_NEAREST
  555.     if (rmode == 0)     //ROUNDING_TO_NEAREST
  556. #endif
  557.       if (C64 & 1) {
  558.         // check whether fractional part of initial_P/10^extra_digits
  559.         // is exactly .5
  560.  
  561.         // get remainder
  562.         amount2 = 64 - amount;
  563.         remainder_h = 0;
  564.         remainder_h--;
  565.         remainder_h >>= amount2;
  566.         remainder_h = remainder_h & Q_high.w[0];
  567.  
  568.         if (!remainder_h
  569.             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
  570.                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
  571.                     && Q_low.w[0] <
  572.                     reciprocals10_128[extra_digits].w[0]))) {
  573.           C64--;
  574.         }
  575.       }
  576. #endif
  577.  
  578. #ifdef SET_STATUS_FLAGS
  579.     status |= INEXACT_EXCEPTION;
  580.  
  581.     // get remainder
  582.     remainder_h = Q_high.w[0] << (64 - amount);
  583.  
  584.     switch (rmode) {
  585.     case ROUNDING_TO_NEAREST:
  586.     case ROUNDING_TIES_AWAY:
  587.       // test whether fractional part is 0
  588.       if (remainder_h == 0x8000000000000000ull
  589.           && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
  590.               || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
  591.                   && Q_low.w[0] <
  592.                   reciprocals10_128[extra_digits].w[0])))
  593.         status = EXACT_STATUS;
  594.       break;
  595.     case ROUNDING_DOWN:
  596.     case ROUNDING_TO_ZERO:
  597.       if (!remainder_h
  598.           && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
  599.               || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
  600.                   && Q_low.w[0] <
  601.                   reciprocals10_128[extra_digits].w[0])))
  602.         status = EXACT_STATUS;
  603.       break;
  604.     default:
  605.       // round up
  606.       __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
  607.                        reciprocals10_128[extra_digits].w[0]);
  608.       __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
  609.                           reciprocals10_128[extra_digits].w[1], CY);
  610.       if ((remainder_h >> (64 - amount)) + carry >=
  611.           (((UINT64) 1) << amount))
  612.         status = EXACT_STATUS;
  613.     }
  614.  
  615.     __set_status_flags (fpsc, status);
  616.  
  617. #endif
  618.   } else {
  619.     C64 = P.w[0];
  620.     if (!C64) {
  621.       sign = 0;
  622.       if (rounding_mode == ROUNDING_DOWN)
  623.         sign = 0x8000000000000000ull;
  624.     }
  625.   }
  626.   return get_BID64 (sign, exponent, C64, rounding_mode, fpsc);
  627. }
  628.  
  629. /////////////////////////////////////////////////////////////////////////////////
  630. // round 192-bit coefficient (P, remainder_P) and return result in BID64 format
  631. // the lowest 64 bits (remainder_P) are used for midpoint checking only
  632. ////////////////////////////////////////////////////////////////////////////////
  633. static UINT64
  634. __bid_full_round64_remainder (UINT64 sign, int exponent, UINT128 P,
  635.                               int extra_digits, UINT64 remainder_P,
  636.                               int rounding_mode, unsigned *fpsc,
  637.                               unsigned uf_status) {
  638.   UINT128 Q_high, Q_low, C128, Stemp;
  639.   UINT64 remainder_h, C64, carry, CY;
  640.   int amount, amount2, rmode, status = uf_status;
  641.  
  642.   rmode = rounding_mode;
  643.   if (sign && (unsigned) (rmode - 1) < 2)
  644.     rmode = 3 - rmode;
  645.   if (rmode == ROUNDING_UP && remainder_P) {
  646.     P.w[0]++;
  647.     if (!P.w[0])
  648.       P.w[1]++;
  649.   }
  650.  
  651.   if (extra_digits) {
  652.     __add_128_64 (P, P, round_const_table[rmode][extra_digits]);
  653.  
  654.     // get P*(2^M[extra_digits])/10^extra_digits
  655.     __mul_128x128_full (Q_high, Q_low, P,
  656.                         reciprocals10_128[extra_digits]);
  657.  
  658.     // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
  659.     amount = recip_scale[extra_digits];
  660.     __shr_128 (C128, Q_high, amount);
  661.  
  662.     C64 = __low_64 (C128);
  663.  
  664. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  665. #ifndef IEEE_ROUND_NEAREST
  666.     if (rmode == 0)     //ROUNDING_TO_NEAREST
  667. #endif
  668.       if (!remainder_P && (C64 & 1)) {
  669.         // check whether fractional part of initial_P/10^extra_digits
  670.         // is exactly .5
  671.  
  672.         // get remainder
  673.         amount2 = 64 - amount;
  674.         remainder_h = 0;
  675.         remainder_h--;
  676.         remainder_h >>= amount2;
  677.         remainder_h = remainder_h & Q_high.w[0];
  678.  
  679.         if (!remainder_h
  680.             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
  681.                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
  682.                     && Q_low.w[0] <
  683.                     reciprocals10_128[extra_digits].w[0]))) {
  684.           C64--;
  685.         }
  686.       }
  687. #endif
  688.  
  689. #ifdef SET_STATUS_FLAGS
  690.     status |= INEXACT_EXCEPTION;
  691.  
  692.     if (!remainder_P) {
  693.       // get remainder
  694.       remainder_h = Q_high.w[0] << (64 - amount);
  695.  
  696.       switch (rmode) {
  697.       case ROUNDING_TO_NEAREST:
  698.       case ROUNDING_TIES_AWAY:
  699.         // test whether fractional part is 0
  700.         if (remainder_h == 0x8000000000000000ull
  701.             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
  702.                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
  703.                     && Q_low.w[0] <
  704.                     reciprocals10_128[extra_digits].w[0])))
  705.           status = EXACT_STATUS;
  706.         break;
  707.       case ROUNDING_DOWN:
  708.       case ROUNDING_TO_ZERO:
  709.         if (!remainder_h
  710.             && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
  711.                 || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
  712.                     && Q_low.w[0] <
  713.                     reciprocals10_128[extra_digits].w[0])))
  714.           status = EXACT_STATUS;
  715.         break;
  716.       default:
  717.         // round up
  718.         __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
  719.                          reciprocals10_128[extra_digits].w[0]);
  720.         __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
  721.                             reciprocals10_128[extra_digits].w[1], CY);
  722.         if ((remainder_h >> (64 - amount)) + carry >=
  723.             (((UINT64) 1) << amount))
  724.           status = EXACT_STATUS;
  725.       }
  726.     }
  727.     __set_status_flags (fpsc, status);
  728.  
  729. #endif
  730.   } else {
  731.     C64 = P.w[0];
  732. #ifdef SET_STATUS_FLAGS
  733.     if (remainder_P) {
  734.       __set_status_flags (fpsc, uf_status | INEXACT_EXCEPTION);
  735.     }
  736. #endif
  737.   }
  738.  
  739.   return get_BID64 (sign, exponent + extra_digits, C64, rounding_mode,
  740.                     fpsc);
  741. }
  742.  
  743.  
  744. ///////////////////////////////////////////////////////////////////
  745. // get P/10^extra_digits
  746. // result fits in 64 bits
  747. ///////////////////////////////////////////////////////////////////
  748. __BID_INLINE__ UINT64
  749. __truncate (UINT128 P, int extra_digits)
  750. // extra_digits <= 16
  751. {
  752.   UINT128 Q_high, Q_low, C128;
  753.   UINT64 C64;
  754.   int amount;
  755.  
  756.   // get P*(2^M[extra_digits])/10^extra_digits
  757.   __mul_128x128_full (Q_high, Q_low, P,
  758.                       reciprocals10_128[extra_digits]);
  759.  
  760.   // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
  761.   amount = recip_scale[extra_digits];
  762.   __shr_128 (C128, Q_high, amount);
  763.  
  764.   C64 = __low_64 (C128);
  765.  
  766.   return C64;
  767. }
  768.  
  769.  
  770. ///////////////////////////////////////////////////////////////////
  771. // return number of decimal digits in 128-bit value X
  772. ///////////////////////////////////////////////////////////////////
  773. __BID_INLINE__ int
  774. __get_dec_digits64 (UINT128 X) {
  775.   int_double tempx;
  776.   int digits_x, bin_expon_cx;
  777.  
  778.   if (!X.w[1]) {
  779.     //--- get number of bits in the coefficients of x and y ---
  780.     tempx.d = (double) X.w[0];
  781.     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  782.     // get number of decimal digits in the coeff_x
  783.     digits_x = estimate_decimal_digits[bin_expon_cx];
  784.     if (X.w[0] >= power10_table_128[digits_x].w[0])
  785.       digits_x++;
  786.     return digits_x;
  787.   }
  788.   tempx.d = (double) X.w[1];
  789.   bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  790.   // get number of decimal digits in the coeff_x
  791.   digits_x = estimate_decimal_digits[bin_expon_cx + 64];
  792.   if (__unsigned_compare_ge_128 (X, power10_table_128[digits_x]))
  793.     digits_x++;
  794.  
  795.   return digits_x;
  796. }
  797.  
  798.  
  799. ////////////////////////////////////////////////////////////////////////////////
  800. //
  801. // add 64-bit coefficient to 128-bit coefficient, return result in BID64 format
  802. //
  803. ////////////////////////////////////////////////////////////////////////////////
  804. __BID_INLINE__ UINT64
  805. get_add128 (UINT64 sign_x, int exponent_x, UINT64 coefficient_x,
  806.             UINT64 sign_y, int final_exponent_y, UINT128 CY,
  807.             int extra_digits, int rounding_mode, unsigned *fpsc) {
  808.   UINT128 CY_L, CX, FS, F, CT, ST, T2;
  809.   UINT64 CYh, CY0L, T, S, coefficient_y, remainder_y;
  810.   SINT64 D = 0;
  811.   int_double tempx;
  812.   int diff_dec_expon, extra_digits2, exponent_y, status;
  813.   int extra_dx, diff_dec2, bin_expon_cx, digits_x, rmode;
  814.  
  815.   // CY has more than 16 decimal digits
  816.  
  817.   exponent_y = final_exponent_y - extra_digits;
  818.  
  819. #ifdef IEEE_ROUND_NEAREST_TIES_AWAY
  820.   rounding_mode = 0;
  821. #endif
  822. #ifdef IEEE_ROUND_NEAREST
  823.   rounding_mode = 0;
  824. #endif
  825.  
  826.   if (exponent_x > exponent_y) {
  827.     // normalize x
  828.     //--- get number of bits in the coefficients of x and y ---
  829.     tempx.d = (double) coefficient_x;
  830.     bin_expon_cx = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  831.     // get number of decimal digits in the coeff_x
  832.     digits_x = estimate_decimal_digits[bin_expon_cx];
  833.     if (coefficient_x >= power10_table_128[digits_x].w[0])
  834.       digits_x++;
  835.  
  836.     extra_dx = 16 - digits_x;
  837.     coefficient_x *= power10_table_128[extra_dx].w[0];
  838.     if ((sign_x ^ sign_y) && (coefficient_x == 1000000000000000ull)) {
  839.       extra_dx++;
  840.       coefficient_x = 10000000000000000ull;
  841.     }
  842.     exponent_x -= extra_dx;
  843.  
  844.     if (exponent_x > exponent_y) {
  845.  
  846.       // exponent_x > exponent_y
  847.       diff_dec_expon = exponent_x - exponent_y;
  848.  
  849.       if (exponent_x <= final_exponent_y + 1) {
  850.         __mul_64x64_to_128 (CX, coefficient_x,
  851.                             power10_table_128[diff_dec_expon].w[0]);
  852.  
  853.         if (sign_x == sign_y) {
  854.           __add_128_128 (CT, CY, CX);
  855.           if ((exponent_x >
  856.                final_exponent_y) /*&& (final_exponent_y>0) */ )
  857.             extra_digits++;
  858.           if (__unsigned_compare_ge_128
  859.               (CT, power10_table_128[16 + extra_digits]))
  860.             extra_digits++;
  861.         } else {
  862.           __sub_128_128 (CT, CY, CX);
  863.           if (((SINT64) CT.w[1]) < 0) {
  864.             CT.w[0] = 0 - CT.w[0];
  865.             CT.w[1] = 0 - CT.w[1];
  866.             if (CT.w[0])
  867.               CT.w[1]--;
  868.             sign_y = sign_x;
  869.           } else if (!(CT.w[1] | CT.w[0])) {
  870.             sign_y =
  871.               (rounding_mode !=
  872.                ROUNDING_DOWN) ? 0 : 0x8000000000000000ull;
  873.           }
  874.           if ((exponent_x + 1 >=
  875.                final_exponent_y) /*&& (final_exponent_y>=0) */ ) {
  876.             extra_digits = __get_dec_digits64 (CT) - 16;
  877.             if (extra_digits <= 0) {
  878.               if (!CT.w[0] && rounding_mode == ROUNDING_DOWN)
  879.                 sign_y = 0x8000000000000000ull;
  880.               return get_BID64 (sign_y, exponent_y, CT.w[0],
  881.                                 rounding_mode, fpsc);
  882.             }
  883.           } else
  884.             if (__unsigned_compare_gt_128
  885.                 (power10_table_128[15 + extra_digits], CT))
  886.             extra_digits--;
  887.         }
  888.  
  889.         return __bid_full_round64 (sign_y, exponent_y, CT, extra_digits,
  890.                                    rounding_mode, fpsc);
  891.       }
  892.       // diff_dec2+extra_digits is the number of digits to eliminate from
  893.       //                           argument CY
  894.       diff_dec2 = exponent_x - final_exponent_y;
  895.  
  896.       if (diff_dec2 >= 17) {
  897. #ifndef IEEE_ROUND_NEAREST
  898. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  899.         if ((rounding_mode) & 3) {
  900.           switch (rounding_mode) {
  901.           case ROUNDING_UP:
  902.             if (!sign_y) {
  903.               D = ((SINT64) (sign_x ^ sign_y)) >> 63;
  904.               D = D + D + 1;
  905.               coefficient_x += D;
  906.             }
  907.             break;
  908.           case ROUNDING_DOWN:
  909.             if (sign_y) {
  910.               D = ((SINT64) (sign_x ^ sign_y)) >> 63;
  911.               D = D + D + 1;
  912.               coefficient_x += D;
  913.             }
  914.             break;
  915.           case ROUNDING_TO_ZERO:
  916.             if (sign_y != sign_x) {
  917.               D = 0 - 1;
  918.               coefficient_x += D;
  919.             }
  920.             break;
  921.           }
  922.           if (coefficient_x < 1000000000000000ull) {
  923.             coefficient_x -= D;
  924.             coefficient_x =
  925.               D + (coefficient_x << 1) + (coefficient_x << 3);
  926.             exponent_x--;
  927.           }
  928.         }
  929. #endif
  930. #endif
  931. #ifdef SET_STATUS_FLAGS
  932.         if (CY.w[1] | CY.w[0])
  933.           __set_status_flags (fpsc, INEXACT_EXCEPTION);
  934. #endif
  935.         return get_BID64 (sign_x, exponent_x, coefficient_x,
  936.                           rounding_mode, fpsc);
  937.       }
  938.       // here exponent_x <= 16+final_exponent_y
  939.  
  940.       // truncate CY to 16 dec. digits
  941.       CYh = __truncate (CY, extra_digits);
  942.  
  943.       // get remainder
  944.       T = power10_table_128[extra_digits].w[0];
  945.       __mul_64x64_to_64 (CY0L, CYh, T);
  946.  
  947.       remainder_y = CY.w[0] - CY0L;
  948.  
  949.       // align coeff_x, CYh
  950.       __mul_64x64_to_128 (CX, coefficient_x,
  951.                           power10_table_128[diff_dec2].w[0]);
  952.  
  953.       if (sign_x == sign_y) {
  954.         __add_128_64 (CT, CX, CYh);
  955.         if (__unsigned_compare_ge_128
  956.             (CT, power10_table_128[16 + diff_dec2]))
  957.           diff_dec2++;
  958.       } else {
  959.         if (remainder_y)
  960.           CYh++;
  961.         __sub_128_64 (CT, CX, CYh);
  962.         if (__unsigned_compare_gt_128
  963.             (power10_table_128[15 + diff_dec2], CT))
  964.           diff_dec2--;
  965.       }
  966.  
  967.       return __bid_full_round64_remainder (sign_x, final_exponent_y, CT,
  968.                                            diff_dec2, remainder_y,
  969.                                            rounding_mode, fpsc, 0);
  970.     }
  971.   }
  972.   // Here (exponent_x <= exponent_y)
  973.   {
  974.     diff_dec_expon = exponent_y - exponent_x;
  975.  
  976.     if (diff_dec_expon > MAX_FORMAT_DIGITS) {
  977.       rmode = rounding_mode;
  978.  
  979.       if ((sign_x ^ sign_y)) {
  980.         if (!CY.w[0])
  981.           CY.w[1]--;
  982.         CY.w[0]--;
  983.         if (__unsigned_compare_gt_128
  984.             (power10_table_128[15 + extra_digits], CY)) {
  985.           if (rmode & 3) {
  986.             extra_digits--;
  987.             final_exponent_y--;
  988.           } else {
  989.             CY.w[0] = 1000000000000000ull;
  990.             CY.w[1] = 0;
  991.             extra_digits = 0;
  992.           }
  993.         }
  994.       }
  995.       __scale128_10 (CY, CY);
  996.       extra_digits++;
  997.       CY.w[0] |= 1;
  998.  
  999.       return __bid_simple_round64_sticky (sign_y, final_exponent_y, CY,
  1000.                                           extra_digits, rmode, fpsc);
  1001.     }
  1002.     // apply sign to coeff_x
  1003.     sign_x ^= sign_y;
  1004.     sign_x = ((SINT64) sign_x) >> 63;
  1005.     CX.w[0] = (coefficient_x + sign_x) ^ sign_x;
  1006.     CX.w[1] = sign_x;
  1007.  
  1008.     // check whether CY (rounded to 16 digits) and CX have
  1009.     //                     any digits in the same position
  1010.     diff_dec2 = final_exponent_y - exponent_x;
  1011.  
  1012.     if (diff_dec2 <= 17) {
  1013.       // align CY to 10^ex
  1014.       S = power10_table_128[diff_dec_expon].w[0];
  1015.       __mul_64x128_short (CY_L, S, CY);
  1016.  
  1017.       __add_128_128 (ST, CY_L, CX);
  1018.       extra_digits2 = __get_dec_digits64 (ST) - 16;
  1019.       return __bid_full_round64 (sign_y, exponent_x, ST, extra_digits2,
  1020.                                  rounding_mode, fpsc);
  1021.     }
  1022.     // truncate CY to 16 dec. digits
  1023.     CYh = __truncate (CY, extra_digits);
  1024.  
  1025.     // get remainder
  1026.     T = power10_table_128[extra_digits].w[0];
  1027.     __mul_64x64_to_64 (CY0L, CYh, T);
  1028.  
  1029.     coefficient_y = CY.w[0] - CY0L;
  1030.     // add rounding constant
  1031.     rmode = rounding_mode;
  1032.     if (sign_y && (unsigned) (rmode - 1) < 2)
  1033.       rmode = 3 - rmode;
  1034. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1035. #ifndef IEEE_ROUND_NEAREST
  1036.     if (!(rmode & 3))   //ROUNDING_TO_NEAREST
  1037. #endif
  1038. #endif
  1039.     {
  1040.       coefficient_y += round_const_table[rmode][extra_digits];
  1041.     }
  1042.     // align coefficient_y,  coefficient_x
  1043.     S = power10_table_128[diff_dec_expon].w[0];
  1044.     __mul_64x64_to_128 (F, coefficient_y, S);
  1045.  
  1046.     // fraction
  1047.     __add_128_128 (FS, F, CX);
  1048.  
  1049. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1050. #ifndef IEEE_ROUND_NEAREST
  1051.     if (rmode == 0)     //ROUNDING_TO_NEAREST
  1052. #endif
  1053.     {
  1054.       // rounding code, here RN_EVEN
  1055.       // 10^(extra_digits+diff_dec_expon)
  1056.       T2 = power10_table_128[diff_dec_expon + extra_digits];
  1057.       if (__unsigned_compare_gt_128 (FS, T2)
  1058.           || ((CYh & 1) && __test_equal_128 (FS, T2))) {
  1059.         CYh++;
  1060.         __sub_128_128 (FS, FS, T2);
  1061.       }
  1062.     }
  1063. #endif
  1064. #ifndef IEEE_ROUND_NEAREST
  1065. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1066.     if (rmode == 4)     //ROUNDING_TO_NEAREST
  1067. #endif
  1068.     {
  1069.       // rounding code, here RN_AWAY
  1070.       // 10^(extra_digits+diff_dec_expon)
  1071.       T2 = power10_table_128[diff_dec_expon + extra_digits];
  1072.       if (__unsigned_compare_ge_128 (FS, T2)) {
  1073.         CYh++;
  1074.         __sub_128_128 (FS, FS, T2);
  1075.       }
  1076.     }
  1077. #endif
  1078. #ifndef IEEE_ROUND_NEAREST
  1079. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1080.     switch (rmode) {
  1081.     case ROUNDING_DOWN:
  1082.     case ROUNDING_TO_ZERO:
  1083.       if ((SINT64) FS.w[1] < 0) {
  1084.         CYh--;
  1085.         if (CYh < 1000000000000000ull) {
  1086.           CYh = 9999999999999999ull;
  1087.           final_exponent_y--;
  1088.         }
  1089.       } else {
  1090.         T2 = power10_table_128[diff_dec_expon + extra_digits];
  1091.         if (__unsigned_compare_ge_128 (FS, T2)) {
  1092.           CYh++;
  1093.           __sub_128_128 (FS, FS, T2);
  1094.         }
  1095.       }
  1096.       break;
  1097.     case ROUNDING_UP:
  1098.       if ((SINT64) FS.w[1] < 0)
  1099.         break;
  1100.       T2 = power10_table_128[diff_dec_expon + extra_digits];
  1101.       if (__unsigned_compare_gt_128 (FS, T2)) {
  1102.         CYh += 2;
  1103.         __sub_128_128 (FS, FS, T2);
  1104.       } else if ((FS.w[1] == T2.w[1]) && (FS.w[0] == T2.w[0])) {
  1105.         CYh++;
  1106.         FS.w[1] = FS.w[0] = 0;
  1107.       } else if (FS.w[1] | FS.w[0])
  1108.         CYh++;
  1109.       break;
  1110.     }
  1111. #endif
  1112. #endif
  1113.  
  1114. #ifdef SET_STATUS_FLAGS
  1115.     status = INEXACT_EXCEPTION;
  1116. #ifndef IEEE_ROUND_NEAREST
  1117. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1118.     if (!(rmode & 3))
  1119. #endif
  1120. #endif
  1121.     {
  1122.       // RN modes
  1123.       if ((FS.w[1] ==
  1124.            round_const_table_128[0][diff_dec_expon + extra_digits].w[1])
  1125.           && (FS.w[0] ==
  1126.               round_const_table_128[0][diff_dec_expon +
  1127.                                        extra_digits].w[0]))
  1128.         status = EXACT_STATUS;
  1129.     }
  1130. #ifndef IEEE_ROUND_NEAREST
  1131. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1132.     else if (!FS.w[1] && !FS.w[0])
  1133.       status = EXACT_STATUS;
  1134. #endif
  1135. #endif
  1136.  
  1137.     __set_status_flags (fpsc, status);
  1138. #endif
  1139.  
  1140.     return get_BID64 (sign_y, final_exponent_y, CYh, rounding_mode,
  1141.                       fpsc);
  1142.   }
  1143.  
  1144. }
  1145.  
  1146. //////////////////////////////////////////////////////////////////////////
  1147. //
  1148. //  If coefficient_z is less than 16 digits long, normalize to 16 digits
  1149. //
  1150. /////////////////////////////////////////////////////////////////////////
  1151. static UINT64
  1152. BID_normalize (UINT64 sign_z, int exponent_z,
  1153.                UINT64 coefficient_z, UINT64 round_dir, int round_flag,
  1154.                int rounding_mode, unsigned *fpsc) {
  1155.   SINT64 D;
  1156.   int_double tempx;
  1157.   int digits_z, bin_expon, scale, rmode;
  1158.  
  1159. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1160. #ifndef IEEE_ROUND_NEAREST
  1161.   rmode = rounding_mode;
  1162.   if (sign_z && (unsigned) (rmode - 1) < 2)
  1163.     rmode = 3 - rmode;
  1164. #else
  1165.   if (coefficient_z >= power10_table_128[15].w[0])
  1166.     return z;
  1167. #endif
  1168. #endif
  1169.  
  1170.   //--- get number of bits in the coefficients of x and y ---
  1171.   tempx.d = (double) coefficient_z;
  1172.   bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  1173.   // get number of decimal digits in the coeff_x
  1174.   digits_z = estimate_decimal_digits[bin_expon];
  1175.   if (coefficient_z >= power10_table_128[digits_z].w[0])
  1176.     digits_z++;
  1177.  
  1178.   scale = 16 - digits_z;
  1179.   exponent_z -= scale;
  1180.   if (exponent_z < 0) {
  1181.     scale += exponent_z;
  1182.     exponent_z = 0;
  1183.   }
  1184.   coefficient_z *= power10_table_128[scale].w[0];
  1185.  
  1186. #ifdef SET_STATUS_FLAGS
  1187.   if (round_flag) {
  1188.     __set_status_flags (fpsc, INEXACT_EXCEPTION);
  1189.     if (coefficient_z < 1000000000000000ull)
  1190.       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
  1191.     else if ((coefficient_z == 1000000000000000ull) && !exponent_z
  1192.              && ((SINT64) (round_dir ^ sign_z) < 0) && round_flag
  1193.              && (rmode == ROUNDING_DOWN || rmode == ROUNDING_TO_ZERO))
  1194.       __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
  1195.   }
  1196. #endif
  1197.  
  1198. #ifndef IEEE_ROUND_NEAREST_TIES_AWAY
  1199. #ifndef IEEE_ROUND_NEAREST
  1200.   if (round_flag && (rmode & 3)) {
  1201.     D = round_dir ^ sign_z;
  1202.  
  1203.     if (rmode == ROUNDING_UP) {
  1204.       if (D >= 0)
  1205.         coefficient_z++;
  1206.     } else {
  1207.       if (D < 0)
  1208.         coefficient_z--;
  1209.       if (coefficient_z < 1000000000000000ull && exponent_z) {
  1210.         coefficient_z = 9999999999999999ull;
  1211.         exponent_z--;
  1212.       }
  1213.     }
  1214.   }
  1215. #endif
  1216. #endif
  1217.  
  1218.   return get_BID64 (sign_z, exponent_z, coefficient_z, rounding_mode,
  1219.                     fpsc);
  1220. }
  1221.  
  1222.  
  1223. //////////////////////////////////////////////////////////////////////////
  1224. //
  1225. //    0*10^ey + cz*10^ez,   ey<ez  
  1226. //
  1227. //////////////////////////////////////////////////////////////////////////
  1228.  
  1229. __BID_INLINE__ UINT64
  1230. add_zero64 (int exponent_y, UINT64 sign_z, int exponent_z,
  1231.             UINT64 coefficient_z, unsigned *prounding_mode,
  1232.             unsigned *fpsc) {
  1233.   int_double tempx;
  1234.   int bin_expon, scale_k, scale_cz;
  1235.   int diff_expon;
  1236.  
  1237.   diff_expon = exponent_z - exponent_y;
  1238.  
  1239.   tempx.d = (double) coefficient_z;
  1240.   bin_expon = ((tempx.i & MASK_BINARY_EXPONENT) >> 52) - 0x3ff;
  1241.   scale_cz = estimate_decimal_digits[bin_expon];
  1242.   if (coefficient_z >= power10_table_128[scale_cz].w[0])
  1243.     scale_cz++;
  1244.  
  1245.   scale_k = 16 - scale_cz;
  1246.   if (diff_expon < scale_k)
  1247.     scale_k = diff_expon;
  1248.   coefficient_z *= power10_table_128[scale_k].w[0];
  1249.  
  1250.   return get_BID64 (sign_z, exponent_z - scale_k, coefficient_z,
  1251.                     *prounding_mode, fpsc);
  1252. }
  1253. #endif
  1254.