0,0 → 1,1795 |
/* Copyright (C) 2007-2015 Free Software Foundation, Inc. |
|
This file is part of GCC. |
|
GCC is free software; you can redistribute it and/or modify it under |
the terms of the GNU General Public License as published by the Free |
Software Foundation; either version 3, or (at your option) any later |
version. |
|
GCC is distributed in the hope that it will be useful, but WITHOUT ANY |
WARRANTY; without even the implied warranty of MERCHANTABILITY or |
FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
for more details. |
|
Under Section 7 of GPL version 3, you are granted additional |
permissions described in the GCC Runtime Library Exception, version |
3.1, as published by the Free Software Foundation. |
|
You should have received a copy of the GNU General Public License and |
a copy of the GCC Runtime Library Exception along with this program; |
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see |
<http://www.gnu.org/licenses/>. */ |
|
/***************************************************************************** |
* BID64 divide |
***************************************************************************** |
* |
* Algorithm description: |
* |
* if(coefficient_x<coefficient_y) |
* p = number_digits(coefficient_y) - number_digits(coefficient_x) |
* A = coefficient_x*10^p |
* B = coefficient_y |
* CA= A*10^(15+j), j=0 for A>=B, 1 otherwise |
* Q = 0 |
* else |
* get Q=(int)(coefficient_x/coefficient_y) |
* (based on double precision divide) |
* check for exact divide case |
* Let R = coefficient_x - Q*coefficient_y |
* Let m=16-number_digits(Q) |
* CA=R*10^m, Q=Q*10^m |
* B = coefficient_y |
* endif |
* if (CA<2^64) |
* Q += CA/B (64-bit unsigned divide) |
* else |
* get final Q using double precision divide, followed by 3 integer |
* iterations |
* if exact result, eliminate trailing zeros |
* check for underflow |
* round coefficient to nearest |
* |
****************************************************************************/ |
|
#include "bid_internal.h" |
#include "bid_div_macros.h" |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
#include <fenv.h> |
|
#define FE_ALL_FLAGS FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT |
#endif |
|
extern UINT32 convert_table[5][128][2]; |
extern SINT8 factors[][2]; |
extern UINT8 packed_10000_zeros[]; |
|
|
#if DECIMAL_CALL_BY_REFERENCE |
|
void |
bid64_div (UINT64 * pres, UINT64 * px, |
UINT64 * |
py _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM |
_EXC_INFO_PARAM) { |
UINT64 x, y; |
#else |
|
UINT64 |
bid64_div (UINT64 x, |
UINT64 y _RND_MODE_PARAM _EXC_FLAGS_PARAM |
_EXC_MASKS_PARAM _EXC_INFO_PARAM) { |
#endif |
UINT128 CA, CT; |
UINT64 sign_x, sign_y, coefficient_x, coefficient_y, A, B, QX, PD; |
UINT64 A2, Q, Q2, B2, B4, B5, R, T, DU, res; |
UINT64 valid_x, valid_y; |
SINT64 D; |
int_double t_scale, tempq, temp_b; |
int_float tempx, tempy; |
double da, db, dq, da_h, da_l; |
int exponent_x, exponent_y, bin_expon_cx; |
int diff_expon, ed1, ed2, bin_index; |
int rmode, amount; |
int nzeros, i, j, k, d5; |
UINT32 QX32, tdigit[3], digit, digit_h, digit_low; |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
fexcept_t binaryflags = 0; |
#endif |
|
#if DECIMAL_CALL_BY_REFERENCE |
#if !DECIMAL_GLOBAL_ROUNDING |
_IDEC_round rnd_mode = *prnd_mode; |
#endif |
x = *px; |
y = *py; |
#endif |
|
valid_x = unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x); |
valid_y = unpack_BID64 (&sign_y, &exponent_y, &coefficient_y, y); |
|
// unpack arguments, check for NaN or Infinity |
if (!valid_x) { |
// x is Inf. or NaN |
#ifdef SET_STATUS_FLAGS |
if ((y & SNAN_MASK64) == SNAN_MASK64) // y is sNaN |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
|
// test if x is NaN |
if ((x & NAN_MASK64) == NAN_MASK64) { |
#ifdef SET_STATUS_FLAGS |
if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
BID_RETURN (coefficient_x & QUIET_MASK64); |
} |
// x is Infinity? |
if ((x & INFINITY_MASK64) == INFINITY_MASK64) { |
// check if y is Inf or NaN |
if ((y & INFINITY_MASK64) == INFINITY_MASK64) { |
// y==Inf, return NaN |
if ((y & NAN_MASK64) == INFINITY_MASK64) { // Inf/Inf |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
BID_RETURN (NAN_MASK64); |
} |
} else { |
// otherwise return +/-Inf |
BID_RETURN (((x ^ y) & 0x8000000000000000ull) | |
INFINITY_MASK64); |
} |
} |
// x==0 |
if (((y & INFINITY_MASK64) != INFINITY_MASK64) |
&& !(coefficient_y)) { |
// y==0 , return NaN |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
BID_RETURN (NAN_MASK64); |
} |
if (((y & INFINITY_MASK64) != INFINITY_MASK64)) { |
if ((y & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) |
exponent_y = ((UINT32) (y >> 51)) & 0x3ff; |
else |
exponent_y = ((UINT32) (y >> 53)) & 0x3ff; |
sign_y = y & 0x8000000000000000ull; |
|
exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; |
if (exponent_x > DECIMAL_MAX_EXPON_64) |
exponent_x = DECIMAL_MAX_EXPON_64; |
else if (exponent_x < 0) |
exponent_x = 0; |
BID_RETURN ((sign_x ^ sign_y) | (((UINT64) exponent_x) << 53)); |
} |
|
} |
if (!valid_y) { |
// y is Inf. or NaN |
|
// test if y is NaN |
if ((y & NAN_MASK64) == NAN_MASK64) { |
#ifdef SET_STATUS_FLAGS |
if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
BID_RETURN (coefficient_y & QUIET_MASK64); |
} |
// y is Infinity? |
if ((y & INFINITY_MASK64) == INFINITY_MASK64) { |
// return +/-0 |
BID_RETURN (((x ^ y) & 0x8000000000000000ull)); |
} |
// y is 0 |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); |
#endif |
BID_RETURN ((sign_x ^ sign_y) | INFINITY_MASK64); |
} |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; |
|
if (coefficient_x < coefficient_y) { |
// get number of decimal digits for c_x, c_y |
|
//--- get number of bits in the coefficients of x and y --- |
tempx.d = (float) coefficient_x; |
tempy.d = (float) coefficient_y; |
bin_index = (tempy.i - tempx.i) >> 23; |
|
A = coefficient_x * power10_index_binexp[bin_index]; |
B = coefficient_y; |
|
temp_b.d = (double) B; |
|
// compare A, B |
DU = (A - B) >> 63; |
ed1 = 15 + (int) DU; |
ed2 = estimate_decimal_digits[bin_index] + ed1; |
T = power10_table_128[ed1].w[0]; |
__mul_64x64_to_128 (CA, A, T); |
|
Q = 0; |
diff_expon = diff_expon - ed2; |
|
// adjust double precision db, to ensure that later A/B - (int)(da/db) > -1 |
if (coefficient_y < 0x0020000000000000ull) { |
temp_b.i += 1; |
db = temp_b.d; |
} else |
db = (double) (B + 2 + (B & 1)); |
|
} else { |
// get c_x/c_y |
|
// set last bit before conversion to DP |
A2 = coefficient_x | 1; |
da = (double) A2; |
|
db = (double) coefficient_y; |
|
tempq.d = da / db; |
Q = (UINT64) tempq.d; |
|
R = coefficient_x - coefficient_y * Q; |
|
// will use to get number of dec. digits of Q |
bin_expon_cx = (tempq.i >> 52) - 0x3ff; |
|
// R<0 ? |
D = ((SINT64) R) >> 63; |
Q += D; |
R += (coefficient_y & D); |
|
// exact result ? |
if (((SINT64) R) <= 0) { |
// can have R==-1 for coeff_y==1 |
res = |
get_BID64 (sign_x ^ sign_y, diff_expon, (Q + R), rnd_mode, |
pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
// get decimal digits of Q |
DU = power10_index_binexp[bin_expon_cx] - Q - 1; |
DU >>= 63; |
|
ed2 = 16 - estimate_decimal_digits[bin_expon_cx] - (int) DU; |
|
T = power10_table_128[ed2].w[0]; |
__mul_64x64_to_128 (CA, R, T); |
B = coefficient_y; |
|
Q *= power10_table_128[ed2].w[0]; |
diff_expon -= ed2; |
|
} |
|
if (!CA.w[1]) { |
Q2 = CA.w[0] / B; |
B2 = B + B; |
B4 = B2 + B2; |
R = CA.w[0] - Q2 * B; |
Q += Q2; |
} else { |
|
// 2^64 |
t_scale.i = 0x43f0000000000000ull; |
// convert CA to DP |
da_h = CA.w[1]; |
da_l = CA.w[0]; |
da = da_h * t_scale.d + da_l; |
|
// quotient |
dq = da / db; |
Q2 = (UINT64) dq; |
|
// get w[0] remainder |
R = CA.w[0] - Q2 * B; |
|
// R<0 ? |
D = ((SINT64) R) >> 63; |
Q2 += D; |
R += (B & D); |
|
// now R<6*B |
|
// quick divide |
|
// 4*B |
B2 = B + B; |
B4 = B2 + B2; |
|
R = R - B4; |
// R<0 ? |
D = ((SINT64) R) >> 63; |
// restore R if negative |
R += (B4 & D); |
Q2 += ((~D) & 4); |
|
R = R - B2; |
// R<0 ? |
D = ((SINT64) R) >> 63; |
// restore R if negative |
R += (B2 & D); |
Q2 += ((~D) & 2); |
|
R = R - B; |
// R<0 ? |
D = ((SINT64) R) >> 63; |
// restore R if negative |
R += (B & D); |
Q2 += ((~D) & 1); |
|
Q += Q2; |
} |
|
#ifdef SET_STATUS_FLAGS |
if (R) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#ifndef LEAVE_TRAILING_ZEROS |
else |
#endif |
#else |
#ifndef LEAVE_TRAILING_ZEROS |
if (!R) |
#endif |
#endif |
#ifndef LEAVE_TRAILING_ZEROS |
{ |
// eliminate trailing zeros |
|
// check whether CX, CY are short |
if ((coefficient_x <= 1024) && (coefficient_y <= 1024)) { |
i = (int) coefficient_y - 1; |
j = (int) coefficient_x - 1; |
// difference in powers of 2 factors for Y and X |
nzeros = ed2 - factors[i][0] + factors[j][0]; |
// difference in powers of 5 factors |
d5 = ed2 - factors[i][1] + factors[j][1]; |
if (d5 < nzeros) |
nzeros = d5; |
|
__mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]); |
|
// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 |
amount = short_recip_scale[nzeros]; |
Q = CT.w[1] >> amount; |
|
diff_expon += nzeros; |
} else { |
tdigit[0] = Q & 0x3ffffff; |
tdigit[1] = 0; |
QX = Q >> 26; |
QX32 = QX; |
nzeros = 0; |
|
for (j = 0; QX32; j++, QX32 >>= 7) { |
k = (QX32 & 127); |
tdigit[0] += convert_table[j][k][0]; |
tdigit[1] += convert_table[j][k][1]; |
if (tdigit[0] >= 100000000) { |
tdigit[0] -= 100000000; |
tdigit[1]++; |
} |
} |
|
digit = tdigit[0]; |
if (!digit && !tdigit[1]) |
nzeros += 16; |
else { |
if (!digit) { |
nzeros += 8; |
digit = tdigit[1]; |
} |
// decompose digit |
PD = (UINT64) digit *0x068DB8BBull; |
digit_h = (UINT32) (PD >> 40); |
digit_low = digit - digit_h * 10000; |
|
if (!digit_low) |
nzeros += 4; |
else |
digit_h = digit_low; |
|
if (!(digit_h & 1)) |
nzeros += |
3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> |
(digit_h & 7)); |
} |
|
if (nzeros) { |
__mul_64x64_to_128 (CT, Q, reciprocals10_64[nzeros]); |
|
// now get P/10^extra_digits: shift C64 right by M[extra_digits]-128 |
amount = short_recip_scale[nzeros]; |
Q = CT.w[1] >> amount; |
} |
diff_expon += nzeros; |
|
} |
if (diff_expon >= 0) { |
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, |
rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
} |
#endif |
|
if (diff_expon >= 0) { |
#ifdef IEEE_ROUND_NEAREST |
// round to nearest code |
// R*10 |
R += R; |
R = (R << 2) + R; |
B5 = B4 + B; |
|
// compare 10*R to 5*B |
R = B5 - R; |
// correction for (R==0 && (Q&1)) |
R -= (Q & 1); |
// R<0 ? |
D = ((UINT64) R) >> 63; |
Q += D; |
#else |
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY |
// round to nearest code |
// R*10 |
R += R; |
R = (R << 2) + R; |
B5 = B4 + B; |
|
// compare 10*R to 5*B |
R = B5 - R; |
// correction for (R==0 && (Q&1)) |
R -= (Q & 1); |
// R<0 ? |
D = ((UINT64) R) >> 63; |
Q += D; |
#else |
rmode = rnd_mode; |
if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) |
rmode = 3 - rmode; |
switch (rmode) { |
case 0: // round to nearest code |
case ROUNDING_TIES_AWAY: |
// R*10 |
R += R; |
R = (R << 2) + R; |
B5 = B4 + B; |
// compare 10*R to 5*B |
R = B5 - R; |
// correction for (R==0 && (Q&1)) |
R -= ((Q | (rmode >> 2)) & 1); |
// R<0 ? |
D = ((UINT64) R) >> 63; |
Q += D; |
break; |
case ROUNDING_DOWN: |
case ROUNDING_TO_ZERO: |
break; |
default: // rounding up |
Q++; |
break; |
} |
#endif |
#endif |
|
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, Q, rnd_mode, |
pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} else { |
// UF occurs |
|
#ifdef SET_STATUS_FLAGS |
if ((diff_expon + 16 < 0)) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#endif |
rmode = rnd_mode; |
res = |
get_BID64_UF (sign_x ^ sign_y, diff_expon, Q, R, rmode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
|
} |
} |
|
|
|
TYPE0_FUNCTION_ARGTYPE1_ARG128 (UINT64, bid64dq_div, UINT64, x, y) |
UINT256 CA4 = |
{ {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256; |
UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp; |
UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res; |
int_float fx, fy, f64; |
UINT32 QX32, tdigit[3], digit, digit_h, digit_low; |
int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, |
digits_q, amount; |
int nzeros, i, j, k, d5, done = 0; |
unsigned rmode; |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
fexcept_t binaryflags = 0; |
#endif |
|
valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); |
|
// unpack arguments, check for NaN or Infinity |
CX.w[1] = 0; |
if (!unpack_BID64 (&sign_x, &exponent_x, &CX.w[0], (x))) { |
#ifdef SET_STATUS_FLAGS |
if (((y.w[1] & SNAN_MASK64) == SNAN_MASK64) || // y is sNaN |
((x & SNAN_MASK64) == SNAN_MASK64)) |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
// test if x is NaN |
if (((x) & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
res = CX.w[0]; |
BID_RETURN (res & QUIET_MASK64); |
} |
// x is Infinity? |
if (((x) & 0x7800000000000000ull) == 0x7800000000000000ull) { |
// check if y is Inf. |
if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) |
// return NaN |
{ |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { |
// otherwise return +/-Inf |
res = |
(((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; |
BID_RETURN (res); |
} |
} |
// x is 0 |
if ((y.w[1] & INFINITY_MASK64) != INFINITY_MASK64) { |
if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
// x=y=0, return NaN |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
// return 0 |
res = ((x) ^ y.w[1]) & 0x8000000000000000ull; |
exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS_128; |
if (exponent_x > DECIMAL_MAX_EXPON_64) |
exponent_x = DECIMAL_MAX_EXPON_64; |
else if (exponent_x < 0) |
exponent_x = 0; |
res |= (((UINT64) exponent_x) << 53); |
BID_RETURN (res); |
} |
} |
exponent_x += (DECIMAL_EXPONENT_BIAS_128 - DECIMAL_EXPONENT_BIAS); |
if (!valid_y) { |
// y is Inf. or NaN |
|
// test if y is NaN |
if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
#ifdef SET_STATUS_FLAGS |
if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull); |
Tmp.w[0] = CY.w[0]; |
TP128 = reciprocals10_128[18]; |
__mul_128x128_high (Qh, Tmp, TP128); |
amount = recip_scale[18]; |
__shr_128 (Tmp, Qh, amount); |
res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; |
BID_RETURN (res); |
} |
// y is Infinity? |
if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
// return +/-0 |
res = sign_x ^ sign_y; |
BID_RETURN (res); |
} |
// y is 0, return +/-Inf |
res = |
(((x) ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); |
#endif |
BID_RETURN (res); |
} |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; |
|
if (__unsigned_compare_gt_128 (CY, CX)) { |
// CX < CY |
|
// 2^64 |
f64.i = 0x5f800000; |
|
// fx ~ CX, fy ~ CY |
fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; |
fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; |
// expon_cy - expon_cx |
bin_index = (fy.i - fx.i) >> 23; |
|
if (CX.w[1]) { |
T = power10_index_binexp_128[bin_index].w[0]; |
__mul_64x128_short (CA, T, CX); |
} else { |
T128 = power10_index_binexp_128[bin_index]; |
__mul_64x128_short (CA, CX.w[0], T128); |
} |
|
ed2 = 15; |
if (__unsigned_compare_gt_128 (CY, CA)) |
ed2++; |
|
T128 = power10_table_128[ed2]; |
__mul_128x128_to_256 (CA4, CA, T128); |
|
ed2 += estimate_decimal_digits[bin_index]; |
CQ.w[0] = CQ.w[1] = 0; |
diff_expon = diff_expon - ed2; |
|
} else { |
// get CQ = CX/CY |
__div_128_by_128 (&CQ, &CR, CX, CY); |
|
// get number of decimal digits in CQ |
// 2^64 |
f64.i = 0x5f800000; |
fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; |
// binary expon. of CQ |
bin_expon = (fx.i - 0x3f800000) >> 23; |
|
digits_q = estimate_decimal_digits[bin_expon]; |
TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; |
TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; |
if (__unsigned_compare_ge_128 (CQ, TP128)) |
digits_q++; |
|
if (digits_q <= 16) { |
if (!CR.w[1] && !CR.w[0]) { |
res = get_BID64 (sign_x ^ sign_y, diff_expon, |
CQ.w[0], rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
|
ed2 = 16 - digits_q; |
T128.w[0] = power10_table_128[ed2].w[0]; |
__mul_64x128_to_192 (CA4, (T128.w[0]), CR); |
diff_expon = diff_expon - ed2; |
CQ.w[0] *= T128.w[0]; |
} else { |
ed2 = digits_q - 16; |
diff_expon += ed2; |
T128 = reciprocals10_128[ed2]; |
__mul_128x128_to_256 (P256, CQ, T128); |
amount = recip_scale[ed2]; |
CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount)); |
CQ.w[1] = 0; |
|
__mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0])); |
|
__mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]); |
QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0]; |
|
CA4.w[1] = CX.w[1] - QB256.w[1]; |
CA4.w[0] = CX.w[0] - QB256.w[0]; |
if (CX.w[0] < QB256.w[0]) |
CA4.w[1]--; |
if (CR.w[0] || CR.w[1]) |
CA4.w[0] |= 1; |
done = 1; |
|
} |
|
} |
if (!done) { |
__div_256_by_128 (&CQ, &CA4, CY); |
} |
|
|
|
#ifdef SET_STATUS_FLAGS |
if (CA4.w[0] || CA4.w[1]) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#ifndef LEAVE_TRAILING_ZEROS |
else |
#endif |
#else |
#ifndef LEAVE_TRAILING_ZEROS |
if (!CA4.w[0] && !CA4.w[1]) |
#endif |
#endif |
#ifndef LEAVE_TRAILING_ZEROS |
// check whether result is exact |
{ |
// check whether CX, CY are short |
if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { |
i = (int) CY.w[0] - 1; |
j = (int) CX.w[0] - 1; |
// difference in powers of 2 factors for Y and X |
nzeros = ed2 - factors[i][0] + factors[j][0]; |
// difference in powers of 5 factors |
d5 = ed2 - factors[i][1] + factors[j][1]; |
if (d5 < nzeros) |
nzeros = d5; |
// get P*(2^M[extra_digits])/10^extra_digits |
__mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
|
// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
amount = recip_scale[nzeros]; |
__shr_128_long (CQ, Qh, amount); |
|
diff_expon += nzeros; |
} else { |
// decompose Q as Qh*10^17 + Ql |
Q_low = CQ.w[0]; |
|
{ |
tdigit[0] = Q_low & 0x3ffffff; |
tdigit[1] = 0; |
QX = Q_low >> 26; |
QX32 = QX; |
nzeros = 0; |
|
for (j = 0; QX32; j++, QX32 >>= 7) { |
k = (QX32 & 127); |
tdigit[0] += convert_table[j][k][0]; |
tdigit[1] += convert_table[j][k][1]; |
if (tdigit[0] >= 100000000) { |
tdigit[0] -= 100000000; |
tdigit[1]++; |
} |
} |
|
if (tdigit[1] >= 100000000) { |
tdigit[1] -= 100000000; |
if (tdigit[1] >= 100000000) |
tdigit[1] -= 100000000; |
} |
|
digit = tdigit[0]; |
if (!digit && !tdigit[1]) |
nzeros += 16; |
else { |
if (!digit) { |
nzeros += 8; |
digit = tdigit[1]; |
} |
// decompose digit |
PD = (UINT64) digit *0x068DB8BBull; |
digit_h = (UINT32) (PD >> 40); |
digit_low = digit - digit_h * 10000; |
|
if (!digit_low) |
nzeros += 4; |
else |
digit_h = digit_low; |
|
if (!(digit_h & 1)) |
nzeros += |
3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> |
(digit_h & 7)); |
} |
|
if (nzeros) { |
// get P*(2^M[extra_digits])/10^extra_digits |
__mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
|
// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
amount = recip_scale[nzeros]; |
__shr_128 (CQ, Qh, amount); |
} |
diff_expon += nzeros; |
|
} |
} |
if(diff_expon>=0){ |
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], |
rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
} |
#endif |
|
if (diff_expon >= 0) { |
#ifdef IEEE_ROUND_NEAREST |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
|
D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); |
|
CQ.w[0] += carry64; |
#else |
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
|
D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; |
|
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
#else |
rmode = rnd_mode; |
if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) |
rmode = 3 - rmode; |
switch (rmode) { |
case ROUNDING_TO_NEAREST: // round to nearest code |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); |
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
break; |
case ROUNDING_TIES_AWAY: |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; |
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
break; |
case ROUNDING_DOWN: |
case ROUNDING_TO_ZERO: |
break; |
default: // rounding up |
CQ.w[0]++; |
if (!CQ.w[0]) |
CQ.w[1]++; |
break; |
} |
#endif |
#endif |
|
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode, |
pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} else { |
// UF occurs |
|
#ifdef SET_STATUS_FLAGS |
if ((diff_expon + 16 < 0)) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#endif |
rmode = rnd_mode; |
res = |
get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
|
} |
|
} |
|
|
//#define LEAVE_TRAILING_ZEROS |
|
TYPE0_FUNCTION_ARG128_ARGTYPE2 (UINT64, bid64qd_div, x, UINT64, y) |
|
UINT256 CA4 = |
{ {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256; |
UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp; |
UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, PD, res, valid_y; |
int_float fx, fy, f64; |
UINT32 QX32, tdigit[3], digit, digit_h, digit_low; |
int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, |
digits_q, amount; |
int nzeros, i, j, k, d5, done = 0; |
unsigned rmode; |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
fexcept_t binaryflags = 0; |
#endif |
|
valid_y = unpack_BID64 (&sign_y, &exponent_y, &CY.w[0], (y)); |
|
// unpack arguments, check for NaN or Infinity |
if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { |
// test if x is NaN |
if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
#ifdef SET_STATUS_FLAGS |
if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN |
(y & 0x7e00000000000000ull) == 0x7e00000000000000ull) |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull); |
Tmp.w[0] = CX.w[0]; |
TP128 = reciprocals10_128[18]; |
__mul_128x128_high (Qh, Tmp, TP128); |
amount = recip_scale[18]; |
__shr_128 (Tmp, Qh, amount); |
res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; |
BID_RETURN (res); |
} |
// x is Infinity? |
if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
// check if y is Inf. |
if (((y & 0x7c00000000000000ull) == 0x7800000000000000ull)) |
// return NaN |
{ |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
if (((y & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { |
// otherwise return +/-Inf |
res = |
((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; |
BID_RETURN (res); |
} |
} |
// x is 0 |
if (((y & INFINITY_MASK64) != INFINITY_MASK64) && |
!(CY.w[0])) { |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
// x=y=0, return NaN |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
// return 0 |
if (((y & 0x7800000000000000ull) != 0x7800000000000000ull)) { |
if (!CY.w[0]) { |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
exponent_x = |
exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 + |
(DECIMAL_EXPONENT_BIAS << 1); |
if (exponent_x > DECIMAL_MAX_EXPON_64) |
exponent_x = DECIMAL_MAX_EXPON_64; |
else if (exponent_x < 0) |
exponent_x = 0; |
res = (sign_x ^ sign_y) | (((UINT64) exponent_x) << 53); |
BID_RETURN (res); |
} |
} |
CY.w[1] = 0; |
if (!valid_y) { |
// y is Inf. or NaN |
|
// test if y is NaN |
if ((y & NAN_MASK64) == NAN_MASK64) { |
#ifdef SET_STATUS_FLAGS |
if ((y & SNAN_MASK64) == SNAN_MASK64) // sNaN |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
BID_RETURN (CY.w[0] & QUIET_MASK64); |
} |
// y is Infinity? |
if (((y) & 0x7800000000000000ull) == 0x7800000000000000ull) { |
// return +/-0 |
res = sign_x ^ sign_y; |
BID_RETURN (res); |
} |
// y is 0, return +/-Inf |
res = |
((x.w[1] ^ (y)) & 0x8000000000000000ull) | 0x7800000000000000ull; |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); |
#endif |
BID_RETURN (res); |
} |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
diff_expon = |
exponent_x - exponent_y - DECIMAL_EXPONENT_BIAS_128 + |
(DECIMAL_EXPONENT_BIAS << 1); |
|
if (__unsigned_compare_gt_128 (CY, CX)) { |
// CX < CY |
|
// 2^64 |
f64.i = 0x5f800000; |
|
// fx ~ CX, fy ~ CY |
fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; |
fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; |
// expon_cy - expon_cx |
bin_index = (fy.i - fx.i) >> 23; |
|
if (CX.w[1]) { |
T = power10_index_binexp_128[bin_index].w[0]; |
__mul_64x128_short (CA, T, CX); |
} else { |
T128 = power10_index_binexp_128[bin_index]; |
__mul_64x128_short (CA, CX.w[0], T128); |
} |
|
ed2 = 15; |
if (__unsigned_compare_gt_128 (CY, CA)) |
ed2++; |
|
T128 = power10_table_128[ed2]; |
__mul_128x128_to_256 (CA4, CA, T128); |
|
ed2 += estimate_decimal_digits[bin_index]; |
CQ.w[0] = CQ.w[1] = 0; |
diff_expon = diff_expon - ed2; |
|
} else { |
// get CQ = CX/CY |
__div_128_by_128 (&CQ, &CR, CX, CY); |
|
// get number of decimal digits in CQ |
// 2^64 |
f64.i = 0x5f800000; |
fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; |
// binary expon. of CQ |
bin_expon = (fx.i - 0x3f800000) >> 23; |
|
digits_q = estimate_decimal_digits[bin_expon]; |
TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; |
TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; |
if (__unsigned_compare_ge_128 (CQ, TP128)) |
digits_q++; |
|
if (digits_q <= 16) { |
if (!CR.w[1] && !CR.w[0]) { |
res = get_BID64 (sign_x ^ sign_y, diff_expon, |
CQ.w[0], rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
|
ed2 = 16 - digits_q; |
T128.w[0] = power10_table_128[ed2].w[0]; |
__mul_64x128_to_192 (CA4, (T128.w[0]), CR); |
diff_expon = diff_expon - ed2; |
CQ.w[0] *= T128.w[0]; |
} else { |
ed2 = digits_q - 16; |
diff_expon += ed2; |
T128 = reciprocals10_128[ed2]; |
__mul_128x128_to_256 (P256, CQ, T128); |
amount = recip_scale[ed2]; |
CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount)); |
CQ.w[1] = 0; |
|
__mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0])); |
|
__mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]); |
QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0]; |
|
CA4.w[1] = CX.w[1] - QB256.w[1]; |
CA4.w[0] = CX.w[0] - QB256.w[0]; |
if (CX.w[0] < QB256.w[0]) |
CA4.w[1]--; |
if (CR.w[0] || CR.w[1]) |
CA4.w[0] |= 1; |
done = 1; |
if(CA4.w[1]|CA4.w[0]) { |
__mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY); |
} |
|
} |
|
} |
|
if (!done) { |
__div_256_by_128 (&CQ, &CA4, CY); |
} |
|
#ifdef SET_STATUS_FLAGS |
if (CA4.w[0] || CA4.w[1]) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#ifndef LEAVE_TRAILING_ZEROS |
else |
#endif |
#else |
#ifndef LEAVE_TRAILING_ZEROS |
if (!CA4.w[0] && !CA4.w[1]) |
#endif |
#endif |
#ifndef LEAVE_TRAILING_ZEROS |
// check whether result is exact |
{ |
if(!done) { |
// check whether CX, CY are short |
if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { |
i = (int) CY.w[0] - 1; |
j = (int) CX.w[0] - 1; |
// difference in powers of 2 factors for Y and X |
nzeros = ed2 - factors[i][0] + factors[j][0]; |
// difference in powers of 5 factors |
d5 = ed2 - factors[i][1] + factors[j][1]; |
if (d5 < nzeros) |
nzeros = d5; |
// get P*(2^M[extra_digits])/10^extra_digits |
__mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
//__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
|
// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
amount = recip_scale[nzeros]; |
__shr_128_long (CQ, Qh, amount); |
|
diff_expon += nzeros; |
} else { |
// decompose Q as Qh*10^17 + Ql |
//T128 = reciprocals10_128[17]; |
Q_low = CQ.w[0]; |
|
{ |
tdigit[0] = Q_low & 0x3ffffff; |
tdigit[1] = 0; |
QX = Q_low >> 26; |
QX32 = QX; |
nzeros = 0; |
|
for (j = 0; QX32; j++, QX32 >>= 7) { |
k = (QX32 & 127); |
tdigit[0] += convert_table[j][k][0]; |
tdigit[1] += convert_table[j][k][1]; |
if (tdigit[0] >= 100000000) { |
tdigit[0] -= 100000000; |
tdigit[1]++; |
} |
} |
|
if (tdigit[1] >= 100000000) { |
tdigit[1] -= 100000000; |
if (tdigit[1] >= 100000000) |
tdigit[1] -= 100000000; |
} |
|
digit = tdigit[0]; |
if (!digit && !tdigit[1]) |
nzeros += 16; |
else { |
if (!digit) { |
nzeros += 8; |
digit = tdigit[1]; |
} |
// decompose digit |
PD = (UINT64) digit *0x068DB8BBull; |
digit_h = (UINT32) (PD >> 40); |
digit_low = digit - digit_h * 10000; |
|
if (!digit_low) |
nzeros += 4; |
else |
digit_h = digit_low; |
|
if (!(digit_h & 1)) |
nzeros += |
3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> |
(digit_h & 7)); |
} |
|
if (nzeros) { |
// get P*(2^M[extra_digits])/10^extra_digits |
__mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
|
// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
amount = recip_scale[nzeros]; |
__shr_128 (CQ, Qh, amount); |
} |
diff_expon += nzeros; |
|
} |
} |
} |
if(diff_expon>=0){ |
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], |
rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
} |
#endif |
|
if (diff_expon >= 0) { |
#ifdef IEEE_ROUND_NEAREST |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
|
D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); |
|
CQ.w[0] += carry64; |
//if(CQ.w[0]<carry64) |
//CQ.w[1] ++; |
#else |
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
|
D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; |
|
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
#else |
rmode = rnd_mode; |
if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) |
rmode = 3 - rmode; |
switch (rmode) { |
case ROUNDING_TO_NEAREST: // round to nearest code |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); |
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
break; |
case ROUNDING_TIES_AWAY: |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; |
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
break; |
case ROUNDING_DOWN: |
case ROUNDING_TO_ZERO: |
break; |
default: // rounding up |
CQ.w[0]++; |
if (!CQ.w[0]) |
CQ.w[1]++; |
break; |
} |
#endif |
#endif |
|
|
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode, |
pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} else { |
// UF occurs |
|
#ifdef SET_STATUS_FLAGS |
if ((diff_expon + 16 < 0)) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#endif |
rmode = rnd_mode; |
res = |
get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
|
} |
|
} |
|
//#define LEAVE_TRAILING_ZEROS |
|
extern UINT32 convert_table[5][128][2]; |
extern SINT8 factors[][2]; |
extern UINT8 packed_10000_zeros[]; |
|
|
//UINT64* bid64_div128x128(UINT64 res, UINT128 *px, UINT128 *py, unsigned rnd_mode, unsigned *pfpsf) |
|
TYPE0_FUNCTION_ARG128_ARG128 (UINT64, bid64qq_div, x, y) |
UINT256 CA4 = |
{ {0x0ull, 0x0ull, 0x0ull, 0x0ull} }, CA4r, P256, QB256; |
UINT128 CX, CY, T128, CQ, CQ2, CR, CA, TP128, Qh, Tmp; |
UINT64 sign_x, sign_y, T, carry64, D, Q_low, QX, valid_y, PD, res; |
int_float fx, fy, f64; |
UINT32 QX32, tdigit[3], digit, digit_h, digit_low; |
int exponent_x, exponent_y, bin_index, bin_expon, diff_expon, ed2, |
digits_q, amount; |
int nzeros, i, j, k, d5, done = 0; |
unsigned rmode; |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
fexcept_t binaryflags = 0; |
#endif |
|
valid_y = unpack_BID128_value (&sign_y, &exponent_y, &CY, y); |
|
// unpack arguments, check for NaN or Infinity |
if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) { |
// test if x is NaN |
if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
#ifdef SET_STATUS_FLAGS |
if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull || // sNaN |
(y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull); |
Tmp.w[0] = CX.w[0]; |
TP128 = reciprocals10_128[18]; |
__mul_128x128_high (Qh, Tmp, TP128); |
amount = recip_scale[18]; |
__shr_128 (Tmp, Qh, amount); |
res = (CX.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; |
BID_RETURN (res); |
} |
// x is Infinity? |
if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
// check if y is Inf. |
if (((y.w[1] & 0x7c00000000000000ull) == 0x7800000000000000ull)) |
// return NaN |
{ |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
if (((y.w[1] & 0x7c00000000000000ull) != 0x7c00000000000000ull)) { |
// otherwise return +/-Inf |
res = |
((x.w[1] ^ y. |
w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; |
BID_RETURN (res); |
} |
} |
// x is 0 |
if (((y.w[1] & 0x7800000000000000ull) != 0x7800000000000000ull)) { |
if ((!CY.w[0]) && !(CY.w[1] & 0x0001ffffffffffffull)) { |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
// x=y=0, return NaN |
res = 0x7c00000000000000ull; |
BID_RETURN (res); |
} |
// return 0 |
res = (x.w[1] ^ y.w[1]) & 0x8000000000000000ull; |
exponent_x = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; |
if (exponent_x > DECIMAL_MAX_EXPON_64) |
exponent_x = DECIMAL_MAX_EXPON_64; |
else if (exponent_x < 0) |
exponent_x = 0; |
res |= (((UINT64) exponent_x) << 53); |
BID_RETURN (res); |
} |
} |
if (!valid_y) { |
// y is Inf. or NaN |
|
// test if y is NaN |
if ((y.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) { |
#ifdef SET_STATUS_FLAGS |
if ((y.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN |
__set_status_flags (pfpsf, INVALID_EXCEPTION); |
#endif |
Tmp.w[1] = (CY.w[1] & 0x00003fffffffffffull); |
Tmp.w[0] = CY.w[0]; |
TP128 = reciprocals10_128[18]; |
__mul_128x128_high (Qh, Tmp, TP128); |
amount = recip_scale[18]; |
__shr_128 (Tmp, Qh, amount); |
res = (CY.w[1] & 0xfc00000000000000ull) | Tmp.w[0]; |
BID_RETURN (res); |
} |
// y is Infinity? |
if ((y.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) { |
// return +/-0 |
res = sign_x ^ sign_y; |
BID_RETURN (res); |
} |
// y is 0, return +/-Inf |
res = |
((x.w[1] ^ y.w[1]) & 0x8000000000000000ull) | 0x7800000000000000ull; |
#ifdef SET_STATUS_FLAGS |
__set_status_flags (pfpsf, ZERO_DIVIDE_EXCEPTION); |
#endif |
BID_RETURN (res); |
} |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
diff_expon = exponent_x - exponent_y + DECIMAL_EXPONENT_BIAS; |
|
if (__unsigned_compare_gt_128 (CY, CX)) { |
// CX < CY |
|
// 2^64 |
f64.i = 0x5f800000; |
|
// fx ~ CX, fy ~ CY |
fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0]; |
fy.d = (float) CY.w[1] * f64.d + (float) CY.w[0]; |
// expon_cy - expon_cx |
bin_index = (fy.i - fx.i) >> 23; |
|
if (CX.w[1]) { |
T = power10_index_binexp_128[bin_index].w[0]; |
__mul_64x128_short (CA, T, CX); |
} else { |
T128 = power10_index_binexp_128[bin_index]; |
__mul_64x128_short (CA, CX.w[0], T128); |
} |
|
ed2 = 15; |
if (__unsigned_compare_gt_128 (CY, CA)) |
ed2++; |
|
T128 = power10_table_128[ed2]; |
__mul_128x128_to_256 (CA4, CA, T128); |
|
ed2 += estimate_decimal_digits[bin_index]; |
CQ.w[0] = CQ.w[1] = 0; |
diff_expon = diff_expon - ed2; |
|
} else { |
// get CQ = CX/CY |
__div_128_by_128 (&CQ, &CR, CX, CY); |
|
// get number of decimal digits in CQ |
// 2^64 |
f64.i = 0x5f800000; |
fx.d = (float) CQ.w[1] * f64.d + (float) CQ.w[0]; |
// binary expon. of CQ |
bin_expon = (fx.i - 0x3f800000) >> 23; |
|
digits_q = estimate_decimal_digits[bin_expon]; |
TP128.w[0] = power10_index_binexp_128[bin_expon].w[0]; |
TP128.w[1] = power10_index_binexp_128[bin_expon].w[1]; |
if (__unsigned_compare_ge_128 (CQ, TP128)) |
digits_q++; |
|
if (digits_q <= 16) { |
if (!CR.w[1] && !CR.w[0]) { |
res = get_BID64 (sign_x ^ sign_y, diff_expon, |
CQ.w[0], rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
|
ed2 = 16 - digits_q; |
T128.w[0] = power10_table_128[ed2].w[0]; |
__mul_64x128_to_192 (CA4, (T128.w[0]), CR); |
diff_expon = diff_expon - ed2; |
CQ.w[0] *= T128.w[0]; |
} else { |
ed2 = digits_q - 16; |
diff_expon += ed2; |
T128 = reciprocals10_128[ed2]; |
__mul_128x128_to_256 (P256, CQ, T128); |
amount = recip_scale[ed2]; |
CQ.w[0] = (P256.w[2] >> amount) | (P256.w[3] << (64 - amount)); |
CQ.w[1] = 0; |
|
__mul_64x64_to_128 (CQ2, CQ.w[0], (power10_table_128[ed2].w[0])); |
|
__mul_64x64_to_128 (QB256, CQ2.w[0], CY.w[0]); |
QB256.w[1] += CQ2.w[0] * CY.w[1] + CQ2.w[1] * CY.w[0]; |
|
CA4.w[1] = CX.w[1] - QB256.w[1]; |
CA4.w[0] = CX.w[0] - QB256.w[0]; |
if (CX.w[0] < QB256.w[0]) |
CA4.w[1]--; |
if (CR.w[0] || CR.w[1]) |
CA4.w[0] |= 1; |
done = 1; |
if(CA4.w[1]|CA4.w[0]) { |
__mul_64x128_low(CY, (power10_table_128[ed2].w[0]),CY); |
} |
} |
|
} |
|
if (!done) { |
__div_256_by_128 (&CQ, &CA4, CY); |
} |
|
|
|
#ifdef SET_STATUS_FLAGS |
if (CA4.w[0] || CA4.w[1]) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#ifndef LEAVE_TRAILING_ZEROS |
else |
#endif |
#else |
#ifndef LEAVE_TRAILING_ZEROS |
if (!CA4.w[0] && !CA4.w[1]) |
#endif |
#endif |
#ifndef LEAVE_TRAILING_ZEROS |
// check whether result is exact |
{ |
if(!done) { |
// check whether CX, CY are short |
if (!CX.w[1] && !CY.w[1] && (CX.w[0] <= 1024) && (CY.w[0] <= 1024)) { |
i = (int) CY.w[0] - 1; |
j = (int) CX.w[0] - 1; |
// difference in powers of 2 factors for Y and X |
nzeros = ed2 - factors[i][0] + factors[j][0]; |
// difference in powers of 5 factors |
d5 = ed2 - factors[i][1] + factors[j][1]; |
if (d5 < nzeros) |
nzeros = d5; |
// get P*(2^M[extra_digits])/10^extra_digits |
__mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
//__mul_128x128_to_256(P256, CQ, reciprocals10_128[nzeros]);Qh.w[1]=P256.w[3];Qh.w[0]=P256.w[2]; |
|
// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
amount = recip_scale[nzeros]; |
__shr_128_long (CQ, Qh, amount); |
|
diff_expon += nzeros; |
} else { |
// decompose Q as Qh*10^17 + Ql |
//T128 = reciprocals10_128[17]; |
Q_low = CQ.w[0]; |
|
{ |
tdigit[0] = Q_low & 0x3ffffff; |
tdigit[1] = 0; |
QX = Q_low >> 26; |
QX32 = QX; |
nzeros = 0; |
|
for (j = 0; QX32; j++, QX32 >>= 7) { |
k = (QX32 & 127); |
tdigit[0] += convert_table[j][k][0]; |
tdigit[1] += convert_table[j][k][1]; |
if (tdigit[0] >= 100000000) { |
tdigit[0] -= 100000000; |
tdigit[1]++; |
} |
} |
|
if (tdigit[1] >= 100000000) { |
tdigit[1] -= 100000000; |
if (tdigit[1] >= 100000000) |
tdigit[1] -= 100000000; |
} |
|
digit = tdigit[0]; |
if (!digit && !tdigit[1]) |
nzeros += 16; |
else { |
if (!digit) { |
nzeros += 8; |
digit = tdigit[1]; |
} |
// decompose digit |
PD = (UINT64) digit *0x068DB8BBull; |
digit_h = (UINT32) (PD >> 40); |
digit_low = digit - digit_h * 10000; |
|
if (!digit_low) |
nzeros += 4; |
else |
digit_h = digit_low; |
|
if (!(digit_h & 1)) |
nzeros += |
3 & (UINT32) (packed_10000_zeros[digit_h >> 3] >> |
(digit_h & 7)); |
} |
|
if (nzeros) { |
// get P*(2^M[extra_digits])/10^extra_digits |
__mul_128x128_high (Qh, CQ, reciprocals10_128[nzeros]); |
|
// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128 |
amount = recip_scale[nzeros]; |
__shr_128 (CQ, Qh, amount); |
} |
diff_expon += nzeros; |
|
} |
} |
} |
if(diff_expon>=0){ |
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], |
rnd_mode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} |
} |
#endif |
|
if(diff_expon>=0) { |
|
#ifdef IEEE_ROUND_NEAREST |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
|
D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); |
|
CQ.w[0] += carry64; |
//if(CQ.w[0]<carry64) |
//CQ.w[1] ++; |
#else |
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
|
D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; |
|
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
#else |
rmode = rnd_mode; |
if (sign_x ^ sign_y && (unsigned) (rmode - 1) < 2) |
rmode = 3 - rmode; |
switch (rmode) { |
case ROUNDING_TO_NEAREST: // round to nearest code |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
D = (CA4r.w[1] | CA4r.w[0]) ? 1 : 0; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) & ((CQ.w[0]) | D); |
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
break; |
case ROUNDING_TIES_AWAY: |
// rounding |
// 2*CA4 - CY |
CA4r.w[1] = (CA4.w[1] + CA4.w[1]) | (CA4.w[0] >> 63); |
CA4r.w[0] = CA4.w[0] + CA4.w[0]; |
__sub_borrow_out (CA4r.w[0], carry64, CA4r.w[0], CY.w[0]); |
CA4r.w[1] = CA4r.w[1] - CY.w[1] - carry64; |
D = (CA4r.w[1] | CA4r.w[0]) ? 0 : 1; |
carry64 = (1 + (((SINT64) CA4r.w[1]) >> 63)) | D; |
CQ.w[0] += carry64; |
if (CQ.w[0] < carry64) |
CQ.w[1]++; |
break; |
case ROUNDING_DOWN: |
case ROUNDING_TO_ZERO: |
break; |
default: // rounding up |
CQ.w[0]++; |
if (!CQ.w[0]) |
CQ.w[1]++; |
break; |
} |
#endif |
#endif |
|
|
res = |
fast_get_BID64_check_OF (sign_x ^ sign_y, diff_expon, CQ.w[0], rnd_mode, |
pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
} else { |
// UF occurs |
|
#ifdef SET_STATUS_FLAGS |
if ((diff_expon + 16 < 0)) { |
// set status flags |
__set_status_flags (pfpsf, INEXACT_EXCEPTION); |
} |
#endif |
rmode = rnd_mode; |
res = |
get_BID64_UF (sign_x ^ sign_y, diff_expon, CQ.w[0], CA4.w[1] | CA4.w[0], rmode, pfpsf); |
#ifdef UNCHANGED_BINARY_STATUS_FLAGS |
(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS); |
#endif |
BID_RETURN (res); |
|
} |
|
} |