Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6515 serge 1
/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
2
 
3
This file is part of GCC.
4
 
5
GCC is free software; you can redistribute it and/or modify it under
6
the terms of the GNU General Public License as published by the Free
7
Software Foundation; either version 3, or (at your option) any later
8
version.
9
 
10
GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11
WARRANTY; without even the implied warranty of MERCHANTABILITY or
12
FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
13
for more details.
14
 
15
Under Section 7 of GPL version 3, you are granted additional
16
permissions described in the GCC Runtime Library Exception, version
17
3.1, as published by the Free Software Foundation.
18
 
19
You should have received a copy of the GNU General Public License and
20
a copy of the GCC Runtime Library Exception along with this program;
21
see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
22
.  */
23
 
24
#ifndef __BIDECIMAL_H
25
#define __BIDECIMAL_H
26
 
27
#include "bid_conf.h"
28
#include "bid_functions.h"
29
 
30
#define __BID_INLINE__ static __inline
31
 
32
/*********************************************************************
33
 *
34
 *      Logical Shift Macros
35
 *
36
 *********************************************************************/
37
 
38
#define __shr_128(Q, A, k)                        \
39
{                                                 \
40
     (Q).w[0] = (A).w[0] >> k;                      \
41
	 (Q).w[0] |= (A).w[1] << (64-k);               \
42
	 (Q).w[1] = (A).w[1] >> k;                    \
43
}
44
 
45
#define __shr_128_long(Q, A, k)                   \
46
{                                                 \
47
	if((k)<64) {                                  \
48
     (Q).w[0] = (A).w[0] >> k;                    \
49
	 (Q).w[0] |= (A).w[1] << (64-k);              \
50
	 (Q).w[1] = (A).w[1] >> k;                    \
51
	}                                             \
52
	else {                                        \
53
	 (Q).w[0] = (A).w[1]>>((k)-64);               \
54
	 (Q).w[1] = 0;                                \
55
	}                                             \
56
}
57
 
58
#define __shl_128_long(Q, A, k)                   \
59
{                                                 \
60
	if((k)<64) {                                  \
61
     (Q).w[1] = (A).w[1] << k;                    \
62
	 (Q).w[1] |= (A).w[0] >> (64-k);              \
63
	 (Q).w[0] = (A).w[0] << k;                    \
64
	}                                             \
65
	else {                                        \
66
	 (Q).w[1] = (A).w[0]<<((k)-64);               \
67
	 (Q).w[0] = 0;                                \
68
	}                                             \
69
}
70
 
71
#define __low_64(Q)  (Q).w[0]
72
/*********************************************************************
73
 *
74
 *      String Macros
75
 *
76
 *********************************************************************/
77
#define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
78
/*********************************************************************
79
 *
80
 *      Compare Macros
81
 *
82
 *********************************************************************/
83
// greater than
84
//  return 0 if A<=B
85
//  non-zero if A>B
86
#define __unsigned_compare_gt_128(A, B)  \
87
    ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
88
// greater-or-equal
89
#define __unsigned_compare_ge_128(A, B)  \
90
    ((A.w[1]>B.w[1]) || ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
91
#define __test_equal_128(A, B)  (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
92
// tighten exponent range
93
#define __tight_bin_range_128(bp, P, bin_expon)  \
94
{                                                \
95
UINT64 M;                                        \
96
	M = 1;                                       \
97
	(bp) = (bin_expon);                          \
98
	if((bp)<63) {                                \
99
	  M <<= ((bp)+1);                            \
100
	  if((P).w[0] >= M) (bp)++; }                 \
101
	else if((bp)>64) {                           \
102
	  M <<= ((bp)+1-64);                         \
103
	  if(((P).w[1]>M) ||((P).w[1]==M && (P).w[0]))\
104
	      (bp)++; }                              \
105
	else if((P).w[1]) (bp)++;                    \
106
}
107
/*********************************************************************
108
 *
109
 *      Add/Subtract Macros
110
 *
111
 *********************************************************************/
112
// add 64-bit value to 128-bit
113
#define __add_128_64(R128, A128, B64)    \
114
{                                        \
115
UINT64 R64H;                             \
116
	R64H = (A128).w[1];                 \
117
	(R128).w[0] = (B64) + (A128).w[0];     \
118
	if((R128).w[0] < (B64))               \
119
	  R64H ++;                           \
120
    (R128).w[1] = R64H;                  \
121
}
122
// subtract 64-bit value from 128-bit
123
#define __sub_128_64(R128, A128, B64)    \
124
{                                        \
125
UINT64 R64H;                             \
126
	R64H = (A128).w[1];                  \
127
	if((A128).w[0] < (B64))               \
128
	  R64H --;                           \
129
    (R128).w[1] = R64H;                  \
130
	(R128).w[0] = (A128).w[0] - (B64);     \
131
}
132
// add 128-bit value to 128-bit
133
// assume no carry-out
134
#define __add_128_128(R128, A128, B128)  \
135
{                                        \
136
UINT128 Q128;                            \
137
	Q128.w[1] = (A128).w[1]+(B128).w[1]; \
138
	Q128.w[0] = (B128).w[0] + (A128).w[0];  \
139
	if(Q128.w[0] < (B128).w[0])            \
140
	  Q128.w[1] ++;                      \
141
    (R128).w[1] = Q128.w[1];             \
142
    (R128).w[0] = Q128.w[0];               \
143
}
144
#define __sub_128_128(R128, A128, B128)  \
145
{                                        \
146
UINT128 Q128;                            \
147
	Q128.w[1] = (A128).w[1]-(B128).w[1]; \
148
	Q128.w[0] = (A128).w[0] - (B128).w[0];  \
149
	if((A128).w[0] < (B128).w[0])          \
150
	  Q128.w[1] --;                      \
151
    (R128).w[1] = Q128.w[1];             \
152
    (R128).w[0] = Q128.w[0];               \
153
}
154
#define __add_carry_out(S, CY, X, Y)    \
155
{                                      \
156
UINT64 X1=X;                           \
157
	S = X + Y;                         \
158
	CY = (S
159
}
160
#define __add_carry_in_out(S, CY, X, Y, CI)    \
161
{                                             \
162
UINT64 X1;                                    \
163
	X1 = X + CI;                              \
164
	S = X1 + Y;                               \
165
	CY = ((S
166
}
167
#define __sub_borrow_out(S, CY, X, Y)    \
168
{                                      \
169
UINT64 X1=X;                           \
170
	S = X - Y;                         \
171
	CY = (S>X1) ? 1 : 0;                \
172
}
173
#define __sub_borrow_in_out(S, CY, X, Y, CI)    \
174
{                                             \
175
UINT64 X1, X0=X;                              \
176
	X1 = X - CI;                              \
177
	S = X1 - Y;                               \
178
	CY = ((S>X1) || (X1>X0)) ? 1 : 0;          \
179
}
180
// increment C128 and check for rounding overflow:
181
// if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
182
#define INCREMENT(C_128, exp)                                           \
183
{                                                                       \
184
  C_128.w[0]++;                                                         \
185
  if (C_128.w[0] == 0) C_128.w[1]++;                                    \
186
  if (C_128.w[1] == 0x0001ed09bead87c0ull &&                            \
187
      C_128.w[0] == 0x378d8e6400000000ull) {                            \
188
    exp++;                                                              \
189
    C_128.w[1] = 0x0000314dc6448d93ull;                                 \
190
    C_128.w[0] = 0x38c15b0a00000000ull;                                 \
191
  }                                                                     \
192
}
193
// decrement C128 and check for rounding underflow, but only at the
194
// boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
195
// and decrement the exponent
196
#define DECREMENT(C_128, exp)                                           \
197
{                                                                       \
198
  C_128.w[0]--;                                                         \
199
  if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--;                \
200
  if (C_128.w[1] == 0x0000314dc6448d93ull &&                            \
201
      C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) {                 \
202
    exp--;                                                              \
203
    C_128.w[1] = 0x0001ed09bead87c0ull;                                 \
204
    C_128.w[0] = 0x378d8e63ffffffffull;                                 \
205
  }                                                                     \
206
}
207
 
208
 /*********************************************************************
209
 *
210
 *      Multiply Macros
211
 *
212
 *********************************************************************/
213
#define __mul_64x64_to_64(P64, CX, CY)  (P64) = (CX) * (CY)
214
/***************************************
215
 *  Signed, Full 64x64-bit Multiply
216
 ***************************************/
217
#define __imul_64x64_to_128(P, CX, CY)  \
218
{                                       \
219
UINT64 SX, SY;                          \
220
   __mul_64x64_to_128(P, CX, CY);       \
221
                                        \
222
   SX = ((SINT64)(CX))>>63;             \
223
   SY = ((SINT64)(CY))>>63;             \
224
   SX &= CY;   SY &= CX;                \
225
                                        \
226
   (P).w[1] = (P).w[1] - SX - SY;       \
227
}
228
/***************************************
229
 *  Signed, Full 64x128-bit Multiply
230
 ***************************************/
231
#define __imul_64x128_full(Ph, Ql, A, B)          \
232
{                                                 \
233
UINT128 ALBL, ALBH, QM2, QM;                      \
234
                                                  \
235
	__imul_64x64_to_128(ALBH, (A), (B).w[1]);     \
236
	__imul_64x64_to_128(ALBL, (A), (B).w[0]);      \
237
                                                  \
238
	(Ql).w[0] = ALBL.w[0];                          \
239
	QM.w[0] = ALBL.w[1];                           \
240
	QM.w[1] = ((SINT64)ALBL.w[1])>>63;            \
241
    __add_128_128(QM2, ALBH, QM);                 \
242
	(Ql).w[1] = QM2.w[0];                          \
243
    Ph = QM2.w[1];                                \
244
}
245
/*****************************************************
246
 *      Unsigned Multiply Macros
247
 *****************************************************/
248
// get full 64x64bit product
249
//
250
#define __mul_64x64_to_128(P, CX, CY)   \
251
{                                       \
252
UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
253
	CXH = (CX) >> 32;                     \
254
	CXL = (UINT32)(CX);                   \
255
	CYH = (CY) >> 32;                     \
256
	CYL = (UINT32)(CY);                   \
257
	                                      \
258
    PM = CXH*CYL;                         \
259
	PH = CXH*CYH;                         \
260
	PL = CXL*CYL;                         \
261
	PM2 = CXL*CYH;                        \
262
	PH += (PM>>32);                       \
263
	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
264
                                          \
265
	(P).w[1] = PH + (PM>>32);             \
266
	(P).w[0] = (PM<<32)+(UINT32)PL;       \
267
}
268
// get full 64x64bit product
269
// Note:
270
// This macro is used for CX < 2^61, CY < 2^61
271
//
272
#define __mul_64x64_to_128_fast(P, CX, CY)   \
273
{                                       \
274
UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
275
	CXH = (CX) >> 32;                   \
276
	CXL = (UINT32)(CX);                 \
277
	CYH = (CY) >> 32;                   \
278
	CYL = (UINT32)(CY);                 \
279
	                                    \
280
    PM = CXH*CYL;                       \
281
	PL = CXL*CYL;                       \
282
	PH = CXH*CYH;                       \
283
	PM += CXL*CYH;                      \
284
	PM += (PL>>32);                     \
285
                                        \
286
	(P).w[1] = PH + (PM>>32);           \
287
	(P).w[0] = (PM<<32)+(UINT32)PL;      \
288
}
289
// used for CX< 2^60
290
#define __sqr64_fast(P, CX)   \
291
{                                       \
292
UINT64 CXH, CXL, PL, PH, PM;            \
293
	CXH = (CX) >> 32;                   \
294
	CXL = (UINT32)(CX);                 \
295
	                                    \
296
    PM = CXH*CXL;                       \
297
	PL = CXL*CXL;                       \
298
	PH = CXH*CXH;                       \
299
	PM += PM;                           \
300
	PM += (PL>>32);                     \
301
                                        \
302
	(P).w[1] = PH + (PM>>32);           \
303
	(P).w[0] = (PM<<32)+(UINT32)PL;     \
304
}
305
// get full 64x64bit product
306
// Note:
307
// This implementation is used for CX < 2^61, CY < 2^61
308
//
309
#define __mul_64x64_to_64_high_fast(P, CX, CY)   \
310
{                                       \
311
UINT64 CXH, CXL, CYH, CYL, PL, PH, PM;  \
312
	CXH = (CX) >> 32;                   \
313
	CXL = (UINT32)(CX);                 \
314
	CYH = (CY) >> 32;                   \
315
	CYL = (UINT32)(CY);                 \
316
	                                    \
317
    PM = CXH*CYL;                       \
318
	PL = CXL*CYL;                       \
319
	PH = CXH*CYH;                       \
320
	PM += CXL*CYH;                      \
321
	PM += (PL>>32);                     \
322
                                        \
323
	(P) = PH + (PM>>32);                \
324
}
325
// get full 64x64bit product
326
//
327
#define __mul_64x64_to_128_full(P, CX, CY)     \
328
{                                         \
329
UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
330
	CXH = (CX) >> 32;                     \
331
	CXL = (UINT32)(CX);                   \
332
	CYH = (CY) >> 32;                     \
333
	CYL = (UINT32)(CY);                   \
334
	                                      \
335
    PM = CXH*CYL;                         \
336
	PH = CXH*CYH;                         \
337
	PL = CXL*CYL;                         \
338
	PM2 = CXL*CYH;                        \
339
	PH += (PM>>32);                       \
340
	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
341
                                          \
342
	(P).w[1] = PH + (PM>>32);             \
343
	(P).w[0] = (PM<<32)+(UINT32)PL;        \
344
}
345
#define __mul_128x128_high(Q, A, B)               \
346
{                                                 \
347
UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
348
                                                  \
349
	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
350
	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
351
	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
352
	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
353
                                                  \
354
    __add_128_128(QM, ALBH, AHBL);                \
355
    __add_128_64(QM2, QM, ALBL.w[1]);             \
356
    __add_128_64((Q), AHBH, QM2.w[1]);            \
357
}
358
#define __mul_128x128_full(Qh, Ql, A, B)          \
359
{                                                 \
360
UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2;          \
361
                                                  \
362
	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]);  \
363
	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]);  \
364
	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
365
	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]);  \
366
                                                  \
367
    __add_128_128(QM, ALBH, AHBL);                \
368
	(Ql).w[0] = ALBL.w[0];                          \
369
    __add_128_64(QM2, QM, ALBL.w[1]);             \
370
    __add_128_64((Qh), AHBH, QM2.w[1]);           \
371
	(Ql).w[1] = QM2.w[0];                          \
372
}
373
#define __mul_128x128_low(Ql, A, B)               \
374
{                                                 \
375
UINT128 ALBL;                                     \
376
UINT64 QM64;                                      \
377
                                                  \
378
	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]);   \
379
	QM64 = (B).w[0]*(A).w[1] + (A).w[0]*(B).w[1];   \
380
                                                  \
381
	(Ql).w[0] = ALBL.w[0];                          \
382
	(Ql).w[1] = QM64 + ALBL.w[1];                 \
383
}
384
#define __mul_64x128_low(Ql, A, B)                \
385
{                                                 \
386
  UINT128 ALBL, ALBH, QM2;                        \
387
  __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
388
  __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
389
  (Ql).w[0] = ALBL.w[0];                          \
390
  __add_128_64(QM2, ALBH, ALBL.w[1]);             \
391
  (Ql).w[1] = QM2.w[0];                           \
392
}
393
#define __mul_64x128_full(Ph, Ql, A, B)           \
394
{                                                 \
395
UINT128 ALBL, ALBH, QM2;                          \
396
                                                  \
397
	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
398
	__mul_64x64_to_128(ALBL, (A), (B).w[0]);       \
399
                                                  \
400
	(Ql).w[0] = ALBL.w[0];                          \
401
    __add_128_64(QM2, ALBH, ALBL.w[1]);           \
402
	(Ql).w[1] = QM2.w[0];                          \
403
    Ph = QM2.w[1];                                \
404
}
405
#define __mul_64x128_to_192(Q, A, B)              \
406
{                                                 \
407
UINT128 ALBL, ALBH, QM2;                          \
408
                                                  \
409
	__mul_64x64_to_128(ALBH, (A), (B).w[1]);      \
410
	__mul_64x64_to_128(ALBL, (A), (B).w[0]);      \
411
                                                  \
412
	(Q).w[0] = ALBL.w[0];                         \
413
    __add_128_64(QM2, ALBH, ALBL.w[1]);           \
414
	(Q).w[1] = QM2.w[0];                          \
415
    (Q).w[2] = QM2.w[1];                          \
416
}
417
#define __mul_64x128_to192(Q, A, B)          \
418
{                                             \
419
UINT128 ALBL, ALBH, QM2;                      \
420
                                              \
421
    __mul_64x64_to_128(ALBH, (A), (B).w[1]);  \
422
    __mul_64x64_to_128(ALBL, (A), (B).w[0]);  \
423
                                              \
424
    (Q).w[0] = ALBL.w[0];                    \
425
    __add_128_64(QM2, ALBH, ALBL.w[1]);       \
426
    (Q).w[1] = QM2.w[0];                     \
427
    (Q).w[2] = QM2.w[1];                     \
428
}
429
#define __mul_128x128_to_256(P256, A, B)                         \
430
{                                                                \
431
UINT128 Qll, Qlh;                                                \
432
UINT64 Phl, Phh, CY1, CY2;                                         \
433
                                                                 \
434
   __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
435
   __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
436
  (P256).w[0] = Qll.w[0];                                        \
437
	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
438
	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);    \
439
	   (P256).w[3] = Phh + CY2;                                   \
440
}
441
//
442
// For better performance, will check A.w[1] against 0,
443
//                         but not B.w[1]
444
// Use this macro accordingly
445
#define __mul_128x128_to_256_check_A(P256, A, B)                   \
446
{                                                                  \
447
UINT128 Qll, Qlh;                                                  \
448
UINT64 Phl, Phh, CY1, CY2;                                           \
449
                                                                   \
450
   __mul_64x128_full(Phl, Qll, A.w[0], B);                          \
451
  (P256).w[0] = Qll.w[0];                                        \
452
   if(A.w[1])  {                                                   \
453
	   __mul_64x128_full(Phh, Qlh, A.w[1], B);                     \
454
	   __add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]);      \
455
	   __add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1);   \
456
	   (P256).w[3] = Phh + CY2;   }                              \
457
   else  {                                                         \
458
	   (P256).w[1] = Qll.w[1];                                  \
459
	   (P256).w[2] = Phl;                                       \
460
	   (P256).w[3] = 0;  }                                      \
461
}
462
#define __mul_64x192_to_256(lP, lA, lB)                      \
463
{                                                         \
464
UINT128 lP0,lP1,lP2;                                      \
465
UINT64 lC;                                                 \
466
	__mul_64x64_to_128(lP0, lA, (lB).w[0]);              \
467
	__mul_64x64_to_128(lP1, lA, (lB).w[1]);              \
468
	__mul_64x64_to_128(lP2, lA, (lB).w[2]);              \
469
	(lP).w[0] = lP0.w[0];                                \
470
	__add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]);      \
471
	__add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
472
	(lP).w[3] = lP2.w[1] + lC;                           \
473
}
474
#define __mul_64x256_to_320(P, A, B)                    \
475
{                                                       \
476
UINT128 lP0,lP1,lP2,lP3;                                \
477
UINT64 lC;                                               \
478
	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
479
	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
480
	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
481
	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
482
	(P).w[0] = lP0.w[0];                               \
483
	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
484
	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
485
	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
486
	(P).w[4] = lP3.w[1] + lC;                          \
487
}
488
#define __mul_192x192_to_384(P, A, B)                          \
489
{                                                              \
490
UINT256 P0,P1,P2;                                              \
491
UINT64 CY;                                                      \
492
	__mul_64x192_to_256(P0, (A).w[0], B);                   \
493
	__mul_64x192_to_256(P1, (A).w[1], B);                   \
494
	__mul_64x192_to_256(P2, (A).w[2], B);                   \
495
	(P).w[0] = P0.w[0];                                  \
496
	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
497
	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
498
	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
499
	(P).w[4] = P1.w[3] + CY;                              \
500
	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
501
	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
502
	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
503
	(P).w[5] = P2.w[3] + CY;                              \
504
}
505
#define __mul_64x320_to_384(P, A, B)                    \
506
{                                                       \
507
UINT128 lP0,lP1,lP2,lP3,lP4;                            \
508
UINT64 lC;                                               \
509
	__mul_64x64_to_128(lP0, A, (B).w[0]);             \
510
	__mul_64x64_to_128(lP1, A, (B).w[1]);             \
511
	__mul_64x64_to_128(lP2, A, (B).w[2]);             \
512
	__mul_64x64_to_128(lP3, A, (B).w[3]);             \
513
	__mul_64x64_to_128(lP4, A, (B).w[4]);             \
514
	(P).w[0] = lP0.w[0];                               \
515
	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]);      \
516
	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
517
	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
518
	__add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
519
	(P).w[5] = lP4.w[1] + lC;                          \
520
}
521
// A*A
522
// Full 128x128-bit product
523
#define __sqr128_to_256(P256, A)                                 \
524
{                                                                \
525
UINT128 Qll, Qlh, Qhh;                                           \
526
UINT64 TMP_C1, TMP_C2;                                 \
527
                                                                 \
528
   __mul_64x64_to_128(Qhh, A.w[1], A.w[1]);                      \
529
   __mul_64x64_to_128(Qlh, A.w[0], A.w[1]);                      \
530
   Qhh.w[1] += (Qlh.w[1]>>63);                                   \
531
   Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])|(Qlh.w[0]>>63);                \
532
   Qlh.w[0] += Qlh.w[0];                                         \
533
   __mul_64x64_to_128(Qll, A.w[0], A.w[0]);                      \
534
                                                                 \
535
   __add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]);      \
536
   (P256).w[0] = Qll.w[0];                                       \
537
   __add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1);    \
538
   (P256).w[3] = Qhh.w[1]+TMP_C2;                                         \
539
}
540
#define __mul_128x128_to_256_low_high(PQh, PQl, A, B)            \
541
{                                                                \
542
UINT128 Qll, Qlh;                                                \
543
UINT64 Phl, Phh, C1, C2;                                         \
544
                                                                 \
545
   __mul_64x128_full(Phl, Qll, A.w[0], B);                       \
546
   __mul_64x128_full(Phh, Qlh, A.w[1], B);                       \
547
  (PQl).w[0] = Qll.w[0];                                        \
548
	   __add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]);      \
549
	   __add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1);    \
550
	   (PQh).w[1] = Phh + C2;                                   \
551
}
552
#define __mul_256x256_to_512(P, A, B)                          \
553
{                                                              \
554
UINT512 P0,P1,P2,P3;                                           \
555
UINT64 CY;                                                      \
556
	__mul_64x256_to_320(P0, (A).w[0], B);                   \
557
	__mul_64x256_to_320(P1, (A).w[1], B);                   \
558
	__mul_64x256_to_320(P2, (A).w[2], B);                   \
559
	__mul_64x256_to_320(P3, (A).w[3], B);                   \
560
	(P).w[0] = P0.w[0];                                  \
561
	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
562
	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
563
	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
564
	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
565
	(P).w[5] = P1.w[4] + CY;                              \
566
	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
567
	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
568
	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
569
	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
570
	(P).w[6] = P2.w[4] + CY;                              \
571
	__add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]);     \
572
	__add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY);   \
573
	__add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY);   \
574
	__add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY);   \
575
	(P).w[7] = P3.w[4] + CY;                              \
576
}
577
#define __mul_192x256_to_448(P, A, B)                          \
578
{                                                              \
579
UINT512 P0,P1,P2;                                           \
580
UINT64 CY;                                                      \
581
	__mul_64x256_to_320(P0, (A).w[0], B);                   \
582
	__mul_64x256_to_320(P1, (A).w[1], B);                   \
583
	__mul_64x256_to_320(P2, (A).w[2], B);                   \
584
	(P).w[0] = P0.w[0];                                  \
585
	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]);      \
586
	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
587
	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
588
	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
589
	(P).w[5] = P1.w[4] + CY;                              \
590
	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]);     \
591
	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY);   \
592
	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY);   \
593
	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY);   \
594
	(P).w[6] = P2.w[4] + CY;                              \
595
}
596
#define __mul_320x320_to_640(P, A, B)                          \
597
{                                                              \
598
UINT512 P0,P1,P2,P3;                                           \
599
UINT64 CY;                                                     \
600
	__mul_256x256_to_512((P), (A), B);                   \
601
	__mul_64x256_to_320(P1, (A).w[4], B);                   \
602
	__mul_64x256_to_320(P2, (B).w[4], A);                   \
603
	__mul_64x64_to_128(P3, (A).w[4], (B).w[4]);               \
604
	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
605
	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
606
	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
607
	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
608
	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
609
	P3.w[1] += CY;                                       \
610
	__add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]);      \
611
	__add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
612
	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
613
	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
614
	__add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
615
	(P).w[9] = P3.w[1] + CY;                             \
616
}
617
#define __mul_384x384_to_768(P, A, B)                          \
618
{                                                              \
619
UINT512 P0,P1,P2,P3;                                           \
620
UINT64 CY;                                                     \
621
	__mul_320x320_to_640((P), (A), B);                         \
622
	__mul_64x320_to_384(P1, (A).w[5], B);                   \
623
	__mul_64x320_to_384(P2, (B).w[5], A);                   \
624
	__mul_64x64_to_128(P3, (A).w[5], (B).w[5]);               \
625
	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]);      \
626
	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
627
	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
628
	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
629
	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
630
	__add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
631
	P3.w[1] += CY;                                       \
632
	__add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]);      \
633
	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
634
	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
635
	__add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
636
	__add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
637
	__add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
638
	(P).w[11] = P3.w[1] + CY;                             \
639
}
640
#define __mul_64x128_short(Ql, A, B)              \
641
{                                                 \
642
UINT64 ALBH_L;                                    \
643
                                                  \
644
	__mul_64x64_to_64(ALBH_L, (A),(B).w[1]);      \
645
	__mul_64x64_to_128((Ql), (A), (B).w[0]);       \
646
                                                  \
647
	(Ql).w[1] += ALBH_L;                          \
648
}
649
#define __scale128_10(D,_TMP)                            \
650
{                                                        \
651
UINT128 _TMP2,_TMP8;                                     \
652
	  _TMP2.w[1] = (_TMP.w[1]<<1)|(_TMP.w[0]>>63);       \
653
	  _TMP2.w[0] = _TMP.w[0]<<1;                         \
654
	  _TMP8.w[1] = (_TMP.w[1]<<3)|(_TMP.w[0]>>61);       \
655
	  _TMP8.w[0] = _TMP.w[0]<<3;                         \
656
	  __add_128_128(D, _TMP2, _TMP8);                    \
657
}
658
// 64x64-bit product
659
#define __mul_64x64_to_128MACH(P128, CX64, CY64)  \
660
{                                                  \
661
  UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
662
  CXH = (CX64) >> 32;                              \
663
  CXL = (UINT32)(CX64);                            \
664
  CYH = (CY64) >> 32;                              \
665
  CYL = (UINT32)(CY64);                            \
666
  PM = CXH*CYL;                                    \
667
  PH = CXH*CYH;                                    \
668
  PL = CXL*CYL;                                    \
669
  PM2 = CXL*CYH;                                   \
670
  PH += (PM>>32);                                  \
671
  PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
672
  (P128).w[1] = PH + (PM>>32);                     \
673
  (P128).w[0] = (PM<<32)+(UINT32)PL;                \
674
}
675
// 64x64-bit product
676
#define __mul_64x64_to_128HIGH(P64, CX64, CY64)  \
677
{                                                  \
678
  UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2;     \
679
  CXH = (CX64) >> 32;                              \
680
  CXL = (UINT32)(CX64);                            \
681
  CYH = (CY64) >> 32;                              \
682
  CYL = (UINT32)(CY64);                            \
683
  PM = CXH*CYL;                                    \
684
  PH = CXH*CYH;                                    \
685
  PL = CXL*CYL;                                    \
686
  PM2 = CXL*CYH;                                   \
687
  PH += (PM>>32);                                  \
688
  PM = (UINT64)((UINT32)PM)+PM2+(PL>>32);          \
689
  P64 = PH + (PM>>32);                     \
690
}
691
#define __mul_128x64_to_128(Q128, A64, B128)        \
692
{                                                  \
693
  UINT64 ALBH_L;                                   \
694
  ALBH_L = (A64) * (B128).w[1];                    \
695
  __mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]);   \
696
  (Q128).w[1] += ALBH_L;                           \
697
}
698
// might simplify by calculating just QM2.w[0]
699
#define __mul_64x128_to_128(Ql, A, B)           \
700
{                                                 \
701
  UINT128 ALBL, ALBH, QM2;                        \
702
  __mul_64x64_to_128(ALBH, (A), (B).w[1]);        \
703
  __mul_64x64_to_128(ALBL, (A), (B).w[0]);        \
704
  (Ql).w[0] = ALBL.w[0];                          \
705
  __add_128_64(QM2, ALBH, ALBL.w[1]);             \
706
  (Ql).w[1] = QM2.w[0];                           \
707
}
708
/*********************************************************************
709
 *
710
 *      BID Pack/Unpack Macros
711
 *
712
 *********************************************************************/
713
/////////////////////////////////////////
714
// BID64 definitions
715
////////////////////////////////////////
716
#define DECIMAL_MAX_EXPON_64  767
717
#define DECIMAL_EXPONENT_BIAS 398
718
#define MAX_FORMAT_DIGITS     16
719
/////////////////////////////////////////
720
// BID128 definitions
721
////////////////////////////////////////
722
#define DECIMAL_MAX_EXPON_128  12287
723
#define DECIMAL_EXPONENT_BIAS_128  6176
724
#define MAX_FORMAT_DIGITS_128      34
725
/////////////////////////////////////////
726
// BID32 definitions
727
////////////////////////////////////////
728
#define DECIMAL_MAX_EXPON_32  191
729
#define DECIMAL_EXPONENT_BIAS_32  101
730
#define MAX_FORMAT_DIGITS_32      7
731
////////////////////////////////////////
732
// Constant Definitions
733
///////////////////////////////////////
734
#define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
735
#define INFINITY_MASK64         0x7800000000000000ull
736
#define SINFINITY_MASK64        0xf800000000000000ull
737
#define SSNAN_MASK64            0xfc00000000000000ull
738
#define NAN_MASK64              0x7c00000000000000ull
739
#define SNAN_MASK64             0x7e00000000000000ull
740
#define QUIET_MASK64            0xfdffffffffffffffull
741
#define LARGE_COEFF_MASK64      0x0007ffffffffffffull
742
#define LARGE_COEFF_HIGH_BIT64  0x0020000000000000ull
743
#define SMALL_COEFF_MASK64      0x001fffffffffffffull
744
#define EXPONENT_MASK64         0x3ff
745
#define EXPONENT_SHIFT_LARGE64  51
746
#define EXPONENT_SHIFT_SMALL64  53
747
#define LARGEST_BID64           0x77fb86f26fc0ffffull
748
#define SMALLEST_BID64          0xf7fb86f26fc0ffffull
749
#define SMALL_COEFF_MASK128     0x0001ffffffffffffull
750
#define LARGE_COEFF_MASK128     0x00007fffffffffffull
751
#define EXPONENT_MASK128        0x3fff
752
#define LARGEST_BID128_HIGH     0x5fffed09bead87c0ull
753
#define LARGEST_BID128_LOW      0x378d8e63ffffffffull
754
#define SPECIAL_ENCODING_MASK32 0x60000000ul
755
#define INFINITY_MASK32         0x78000000ul
756
#define LARGE_COEFF_MASK32      0x007ffffful
757
#define LARGE_COEFF_HIGH_BIT32  0x00800000ul
758
#define SMALL_COEFF_MASK32      0x001ffffful
759
#define EXPONENT_MASK32         0xff
760
#define LARGEST_BID32           0x77f8967f
761
#define NAN_MASK32              0x7c000000
762
#define SNAN_MASK32             0x7e000000
763
#define MASK_BINARY_EXPONENT  0x7ff0000000000000ull
764
#define BINARY_EXPONENT_BIAS  0x3ff
765
#define UPPER_EXPON_LIMIT     51
766
// data needed for BID pack/unpack macros
767
extern UINT64 round_const_table[][19];
768
extern UINT128 reciprocals10_128[];
769
extern int recip_scale[];
770
extern UINT128 power10_table_128[];
771
extern int estimate_decimal_digits[];
772
extern int estimate_bin_expon[];
773
extern UINT64 power10_index_binexp[];
774
extern int short_recip_scale[];
775
extern UINT64 reciprocals10_64[];
776
extern UINT128 power10_index_binexp_128[];
777
extern UINT128 round_const_table_128[][36];
778
 
779
 
780
//////////////////////////////////////////////
781
//  Status Flag Handling
782
/////////////////////////////////////////////
783
#define __set_status_flags(fpsc, status)  *(fpsc) |= status
784
#define is_inexact(fpsc)  ((*(fpsc))&INEXACT_EXCEPTION)
785
 
786
__BID_INLINE__ UINT64
787
unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
788
	      UINT64 * pcoefficient_x, UINT64 x) {
789
  UINT64 tmp, coeff;
790
 
791
  *psign_x = x & 0x8000000000000000ull;
792
 
793
  if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
794
    // special encodings
795
    // coefficient
796
    coeff = (x & LARGE_COEFF_MASK64) | LARGE_COEFF_HIGH_BIT64;
797
 
798
    if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
799
      *pexponent_x = 0;
800
      *pcoefficient_x = x & 0xfe03ffffffffffffull;
801
      if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
802
	*pcoefficient_x = x & 0xfe00000000000000ull;
803
      if ((x & NAN_MASK64) == INFINITY_MASK64)
804
	*pcoefficient_x = x & SINFINITY_MASK64;
805
      return 0;	// NaN or Infinity
806
    }
807
    // check for non-canonical values
808
    if (coeff >= 10000000000000000ull)
809
      coeff = 0;
810
    *pcoefficient_x = coeff;
811
    // get exponent
812
    tmp = x >> EXPONENT_SHIFT_LARGE64;
813
    *pexponent_x = (int) (tmp & EXPONENT_MASK64);
814
    return coeff;
815
  }
816
  // exponent
817
  tmp = x >> EXPONENT_SHIFT_SMALL64;
818
  *pexponent_x = (int) (tmp & EXPONENT_MASK64);
819
  // coefficient
820
  *pcoefficient_x = (x & SMALL_COEFF_MASK64);
821
 
822
  return *pcoefficient_x;
823
}
824
 
825
//
826
//   BID64 pack macro (general form)
827
//
828
__BID_INLINE__ UINT64
829
get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
830
	   unsigned *fpsc) {
831
  UINT128 Stemp, Q_low;
832
  UINT64 QH, r, mask, C64, remainder_h, CY, carry;
833
  int extra_digits, amount, amount2;
834
  unsigned status;
835
 
836
  if (coeff > 9999999999999999ull) {
837
    expon++;
838
    coeff = 1000000000000000ull;
839
  }
840
  // check for possible underflow/overflow
841
  if (((unsigned) expon) >= 3 * 256) {
842
    if (expon < 0) {
843
      // underflow
844
      if (expon + MAX_FORMAT_DIGITS < 0) {
845
#ifdef SET_STATUS_FLAGS
846
	__set_status_flags (fpsc,
847
			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
848
#endif
849
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
850
#ifndef IEEE_ROUND_NEAREST
851
	if (rmode == ROUNDING_DOWN && sgn)
852
	  return 0x8000000000000001ull;
853
	if (rmode == ROUNDING_UP && !sgn)
854
	  return 1ull;
855
#endif
856
#endif
857
	// result is 0
858
	return sgn;
859
      }
860
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
861
#ifndef IEEE_ROUND_NEAREST
862
      if (sgn && (unsigned) (rmode - 1) < 2)
863
	rmode = 3 - rmode;
864
#endif
865
#endif
866
      // get digits to be shifted out
867
      extra_digits = -expon;
868
      coeff += round_const_table[rmode][extra_digits];
869
 
870
      // get coeff*(2^M[extra_digits])/10^extra_digits
871
      __mul_64x128_full (QH, Q_low, coeff,
872
			 reciprocals10_128[extra_digits]);
873
 
874
      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
875
      amount = recip_scale[extra_digits];
876
 
877
      C64 = QH >> amount;
878
 
879
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
880
#ifndef IEEE_ROUND_NEAREST
881
      if (rmode == 0)	//ROUNDING_TO_NEAREST
882
#endif
883
	if (C64 & 1) {
884
	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
885
 
886
	  // get remainder
887
	  amount2 = 64 - amount;
888
	  remainder_h = 0;
889
	  remainder_h--;
890
	  remainder_h >>= amount2;
891
	  remainder_h = remainder_h & QH;
892
 
893
	  if (!remainder_h
894
	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
895
		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
896
		      && Q_low.w[0] <
897
		      reciprocals10_128[extra_digits].w[0]))) {
898
	    C64--;
899
	  }
900
	}
901
#endif
902
 
903
#ifdef SET_STATUS_FLAGS
904
 
905
      if (is_inexact (fpsc))
906
	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
907
      else {
908
	status = INEXACT_EXCEPTION;
909
	// get remainder
910
	remainder_h = QH << (64 - amount);
911
 
912
	switch (rmode) {
913
	case ROUNDING_TO_NEAREST:
914
	case ROUNDING_TIES_AWAY:
915
	  // test whether fractional part is 0
916
	  if (remainder_h == 0x8000000000000000ull
917
	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
918
		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
919
		      && Q_low.w[0] <
920
		      reciprocals10_128[extra_digits].w[0])))
921
	    status = EXACT_STATUS;
922
	  break;
923
	case ROUNDING_DOWN:
924
	case ROUNDING_TO_ZERO:
925
	  if (!remainder_h
926
	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
927
		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
928
		      && Q_low.w[0] <
929
		      reciprocals10_128[extra_digits].w[0])))
930
	    status = EXACT_STATUS;
931
	  break;
932
	default:
933
	  // round up
934
	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
935
			   reciprocals10_128[extra_digits].w[0]);
936
	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
937
			      reciprocals10_128[extra_digits].w[1], CY);
938
	  if ((remainder_h >> (64 - amount)) + carry >=
939
	      (((UINT64) 1) << amount))
940
	    status = EXACT_STATUS;
941
	}
942
 
943
	if (status != EXACT_STATUS)
944
	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
945
      }
946
 
947
#endif
948
 
949
      return sgn | C64;
950
    }
951
    while (coeff < 1000000000000000ull && expon >= 3 * 256) {
952
      expon--;
953
      coeff = (coeff << 3) + (coeff << 1);
954
    }
955
    if (expon > DECIMAL_MAX_EXPON_64) {
956
#ifdef SET_STATUS_FLAGS
957
      __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
958
#endif
959
      // overflow
960
      r = sgn | INFINITY_MASK64;
961
      switch (rmode) {
962
      case ROUNDING_DOWN:
963
	if (!sgn)
964
	  r = LARGEST_BID64;
965
	break;
966
      case ROUNDING_TO_ZERO:
967
	r = sgn | LARGEST_BID64;
968
	break;
969
      case ROUNDING_UP:
970
	// round up
971
	if (sgn)
972
	  r = SMALLEST_BID64;
973
      }
974
      return r;
975
    }
976
  }
977
 
978
  mask = 1;
979
  mask <<= EXPONENT_SHIFT_SMALL64;
980
 
981
  // check whether coefficient fits in 10*5+3 bits
982
  if (coeff < mask) {
983
    r = expon;
984
    r <<= EXPONENT_SHIFT_SMALL64;
985
    r |= (coeff | sgn);
986
    return r;
987
  }
988
  // special format
989
 
990
  // eliminate the case coeff==10^16 after rounding
991
  if (coeff == 10000000000000000ull) {
992
    r = expon + 1;
993
    r <<= EXPONENT_SHIFT_SMALL64;
994
    r |= (1000000000000000ull | sgn);
995
    return r;
996
  }
997
 
998
  r = expon;
999
  r <<= EXPONENT_SHIFT_LARGE64;
1000
  r |= (sgn | SPECIAL_ENCODING_MASK64);
1001
  // add coeff, without leading bits
1002
  mask = (mask >> 2) - 1;
1003
  coeff &= mask;
1004
  r |= coeff;
1005
 
1006
  return r;
1007
}
1008
 
1009
 
1010
 
1011
 
1012
//
1013
//   No overflow/underflow checking
1014
//
1015
__BID_INLINE__ UINT64
1016
fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1017
  UINT64 r, mask;
1018
 
1019
  mask = 1;
1020
  mask <<= EXPONENT_SHIFT_SMALL64;
1021
 
1022
  // check whether coefficient fits in 10*5+3 bits
1023
  if (coeff < mask) {
1024
    r = expon;
1025
    r <<= EXPONENT_SHIFT_SMALL64;
1026
    r |= (coeff | sgn);
1027
    return r;
1028
  }
1029
  // special format
1030
 
1031
  // eliminate the case coeff==10^16 after rounding
1032
  if (coeff == 10000000000000000ull) {
1033
    r = expon + 1;
1034
    r <<= EXPONENT_SHIFT_SMALL64;
1035
    r |= (1000000000000000ull | sgn);
1036
    return r;
1037
  }
1038
 
1039
  r = expon;
1040
  r <<= EXPONENT_SHIFT_LARGE64;
1041
  r |= (sgn | SPECIAL_ENCODING_MASK64);
1042
  // add coeff, without leading bits
1043
  mask = (mask >> 2) - 1;
1044
  coeff &= mask;
1045
  r |= coeff;
1046
 
1047
  return r;
1048
}
1049
 
1050
 
1051
//
1052
//   no underflow checking
1053
//
1054
__BID_INLINE__ UINT64
1055
fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
1056
			 unsigned *fpsc) {
1057
  UINT64 r, mask;
1058
 
1059
  if (((unsigned) expon) >= 3 * 256 - 1) {
1060
    if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
1061
      expon = 3 * 256;
1062
      coeff = 1000000000000000ull;
1063
    }
1064
 
1065
    if (((unsigned) expon) >= 3 * 256) {
1066
      while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1067
	expon--;
1068
	coeff = (coeff << 3) + (coeff << 1);
1069
      }
1070
      if (expon > DECIMAL_MAX_EXPON_64) {
1071
#ifdef SET_STATUS_FLAGS
1072
	__set_status_flags (fpsc,
1073
			    OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1074
#endif
1075
	// overflow
1076
	r = sgn | INFINITY_MASK64;
1077
	switch (rmode) {
1078
	case ROUNDING_DOWN:
1079
	  if (!sgn)
1080
	    r = LARGEST_BID64;
1081
	  break;
1082
	case ROUNDING_TO_ZERO:
1083
	  r = sgn | LARGEST_BID64;
1084
	  break;
1085
	case ROUNDING_UP:
1086
	  // round up
1087
	  if (sgn)
1088
	    r = SMALLEST_BID64;
1089
	}
1090
	return r;
1091
      }
1092
    }
1093
  }
1094
 
1095
  mask = 1;
1096
  mask <<= EXPONENT_SHIFT_SMALL64;
1097
 
1098
  // check whether coefficient fits in 10*5+3 bits
1099
  if (coeff < mask) {
1100
    r = expon;
1101
    r <<= EXPONENT_SHIFT_SMALL64;
1102
    r |= (coeff | sgn);
1103
    return r;
1104
  }
1105
  // special format
1106
 
1107
  // eliminate the case coeff==10^16 after rounding
1108
  if (coeff == 10000000000000000ull) {
1109
    r = expon + 1;
1110
    r <<= EXPONENT_SHIFT_SMALL64;
1111
    r |= (1000000000000000ull | sgn);
1112
    return r;
1113
  }
1114
 
1115
  r = expon;
1116
  r <<= EXPONENT_SHIFT_LARGE64;
1117
  r |= (sgn | SPECIAL_ENCODING_MASK64);
1118
  // add coeff, without leading bits
1119
  mask = (mask >> 2) - 1;
1120
  coeff &= mask;
1121
  r |= coeff;
1122
 
1123
  return r;
1124
}
1125
 
1126
 
1127
//
1128
//   No overflow/underflow checking
1129
//   or checking for coefficients equal to 10^16 (after rounding)
1130
//
1131
__BID_INLINE__ UINT64
1132
very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
1133
  UINT64 r, mask;
1134
 
1135
  mask = 1;
1136
  mask <<= EXPONENT_SHIFT_SMALL64;
1137
 
1138
  // check whether coefficient fits in 10*5+3 bits
1139
  if (coeff < mask) {
1140
    r = expon;
1141
    r <<= EXPONENT_SHIFT_SMALL64;
1142
    r |= (coeff | sgn);
1143
    return r;
1144
  }
1145
  // special format
1146
  r = expon;
1147
  r <<= EXPONENT_SHIFT_LARGE64;
1148
  r |= (sgn | SPECIAL_ENCODING_MASK64);
1149
  // add coeff, without leading bits
1150
  mask = (mask >> 2) - 1;
1151
  coeff &= mask;
1152
  r |= coeff;
1153
 
1154
  return r;
1155
}
1156
 
1157
//
1158
//   No overflow/underflow checking or checking for coefficients above 2^53
1159
//
1160
__BID_INLINE__ UINT64
1161
very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
1162
  // no UF/OF
1163
  UINT64 r;
1164
 
1165
  r = expon;
1166
  r <<= EXPONENT_SHIFT_SMALL64;
1167
  r |= (coeff | sgn);
1168
  return r;
1169
}
1170
 
1171
 
1172
//
1173
// This pack macro is used when underflow is known to occur
1174
//
1175
__BID_INLINE__ UINT64
1176
get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
1177
	      unsigned *fpsc) {
1178
  UINT128 C128, Q_low, Stemp;
1179
  UINT64 C64, remainder_h, QH, carry, CY;
1180
  int extra_digits, amount, amount2;
1181
  unsigned status;
1182
 
1183
  // underflow
1184
  if (expon + MAX_FORMAT_DIGITS < 0) {
1185
#ifdef SET_STATUS_FLAGS
1186
    __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1187
#endif
1188
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1189
#ifndef IEEE_ROUND_NEAREST
1190
    if (rmode == ROUNDING_DOWN && sgn)
1191
      return 0x8000000000000001ull;
1192
    if (rmode == ROUNDING_UP && !sgn)
1193
      return 1ull;
1194
#endif
1195
#endif
1196
    // result is 0
1197
    return sgn;
1198
  }
1199
  // 10*coeff
1200
  coeff = (coeff << 3) + (coeff << 1);
1201
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1202
#ifndef IEEE_ROUND_NEAREST
1203
  if (sgn && (unsigned) (rmode - 1) < 2)
1204
    rmode = 3 - rmode;
1205
#endif
1206
#endif
1207
  if (R)
1208
    coeff |= 1;
1209
  // get digits to be shifted out
1210
  extra_digits = 1 - expon;
1211
  C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1212
 
1213
  // get coeff*(2^M[extra_digits])/10^extra_digits
1214
  __mul_64x128_full (QH, Q_low, C128.w[0],
1215
		     reciprocals10_128[extra_digits]);
1216
 
1217
  // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1218
  amount = recip_scale[extra_digits];
1219
 
1220
  C64 = QH >> amount;
1221
  //__shr_128(C128, Q_high, amount);
1222
 
1223
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1224
#ifndef IEEE_ROUND_NEAREST
1225
  if (rmode == 0)	//ROUNDING_TO_NEAREST
1226
#endif
1227
    if (C64 & 1) {
1228
      // check whether fractional part of initial_P/10^extra_digits is exactly .5
1229
 
1230
      // get remainder
1231
      amount2 = 64 - amount;
1232
      remainder_h = 0;
1233
      remainder_h--;
1234
      remainder_h >>= amount2;
1235
      remainder_h = remainder_h & QH;
1236
 
1237
      if (!remainder_h
1238
	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1239
	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1240
		  && Q_low.w[0] <
1241
		  reciprocals10_128[extra_digits].w[0]))) {
1242
	C64--;
1243
      }
1244
    }
1245
#endif
1246
 
1247
#ifdef SET_STATUS_FLAGS
1248
 
1249
  if (is_inexact (fpsc))
1250
    __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1251
  else {
1252
    status = INEXACT_EXCEPTION;
1253
    // get remainder
1254
    remainder_h = QH << (64 - amount);
1255
 
1256
    switch (rmode) {
1257
    case ROUNDING_TO_NEAREST:
1258
    case ROUNDING_TIES_AWAY:
1259
      // test whether fractional part is 0
1260
      if (remainder_h == 0x8000000000000000ull
1261
	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1262
	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1263
		  && Q_low.w[0] <
1264
		  reciprocals10_128[extra_digits].w[0])))
1265
	status = EXACT_STATUS;
1266
      break;
1267
    case ROUNDING_DOWN:
1268
    case ROUNDING_TO_ZERO:
1269
      if (!remainder_h
1270
	  && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1271
	      || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1272
		  && Q_low.w[0] <
1273
		  reciprocals10_128[extra_digits].w[0])))
1274
	status = EXACT_STATUS;
1275
      break;
1276
    default:
1277
      // round up
1278
      __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1279
		       reciprocals10_128[extra_digits].w[0]);
1280
      __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1281
			  reciprocals10_128[extra_digits].w[1], CY);
1282
      if ((remainder_h >> (64 - amount)) + carry >=
1283
	  (((UINT64) 1) << amount))
1284
	status = EXACT_STATUS;
1285
    }
1286
 
1287
    if (status != EXACT_STATUS)
1288
      __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1289
  }
1290
 
1291
#endif
1292
 
1293
  return sgn | C64;
1294
 
1295
}
1296
 
1297
 
1298
 
1299
//
1300
//   This pack macro doesnot check for coefficients above 2^53
1301
//
1302
__BID_INLINE__ UINT64
1303
get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
1304
			  int rmode, unsigned *fpsc) {
1305
  UINT128 C128, Q_low, Stemp;
1306
  UINT64 r, mask, C64, remainder_h, QH, carry, CY;
1307
  int extra_digits, amount, amount2;
1308
  unsigned status;
1309
 
1310
  // check for possible underflow/overflow
1311
  if (((unsigned) expon) >= 3 * 256) {
1312
    if (expon < 0) {
1313
      // underflow
1314
      if (expon + MAX_FORMAT_DIGITS < 0) {
1315
#ifdef SET_STATUS_FLAGS
1316
	__set_status_flags (fpsc,
1317
			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1318
#endif
1319
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1320
#ifndef IEEE_ROUND_NEAREST
1321
	if (rmode == ROUNDING_DOWN && sgn)
1322
	  return 0x8000000000000001ull;
1323
	if (rmode == ROUNDING_UP && !sgn)
1324
	  return 1ull;
1325
#endif
1326
#endif
1327
	// result is 0
1328
	return sgn;
1329
      }
1330
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1331
#ifndef IEEE_ROUND_NEAREST
1332
      if (sgn && (unsigned) (rmode - 1) < 2)
1333
	rmode = 3 - rmode;
1334
#endif
1335
#endif
1336
      // get digits to be shifted out
1337
      extra_digits = -expon;
1338
      C128.w[0] = coeff + round_const_table[rmode][extra_digits];
1339
 
1340
      // get coeff*(2^M[extra_digits])/10^extra_digits
1341
      __mul_64x128_full (QH, Q_low, C128.w[0],
1342
			 reciprocals10_128[extra_digits]);
1343
 
1344
      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
1345
      amount = recip_scale[extra_digits];
1346
 
1347
      C64 = QH >> amount;
1348
 
1349
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1350
#ifndef IEEE_ROUND_NEAREST
1351
      if (rmode == 0)	//ROUNDING_TO_NEAREST
1352
#endif
1353
	if (C64 & 1) {
1354
	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
1355
 
1356
	  // get remainder
1357
	  amount2 = 64 - amount;
1358
	  remainder_h = 0;
1359
	  remainder_h--;
1360
	  remainder_h >>= amount2;
1361
	  remainder_h = remainder_h & QH;
1362
 
1363
	  if (!remainder_h
1364
	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1365
		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1366
		      && Q_low.w[0] <
1367
		      reciprocals10_128[extra_digits].w[0]))) {
1368
	    C64--;
1369
	  }
1370
	}
1371
#endif
1372
 
1373
#ifdef SET_STATUS_FLAGS
1374
 
1375
      if (is_inexact (fpsc))
1376
	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1377
      else {
1378
	status = INEXACT_EXCEPTION;
1379
	// get remainder
1380
	remainder_h = QH << (64 - amount);
1381
 
1382
	switch (rmode) {
1383
	case ROUNDING_TO_NEAREST:
1384
	case ROUNDING_TIES_AWAY:
1385
	  // test whether fractional part is 0
1386
	  if (remainder_h == 0x8000000000000000ull
1387
	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1388
		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1389
		      && Q_low.w[0] <
1390
		      reciprocals10_128[extra_digits].w[0])))
1391
	    status = EXACT_STATUS;
1392
	  break;
1393
	case ROUNDING_DOWN:
1394
	case ROUNDING_TO_ZERO:
1395
	  if (!remainder_h
1396
	      && (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
1397
		  || (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
1398
		      && Q_low.w[0] <
1399
		      reciprocals10_128[extra_digits].w[0])))
1400
	    status = EXACT_STATUS;
1401
	  break;
1402
	default:
1403
	  // round up
1404
	  __add_carry_out (Stemp.w[0], CY, Q_low.w[0],
1405
			   reciprocals10_128[extra_digits].w[0]);
1406
	  __add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
1407
			      reciprocals10_128[extra_digits].w[1], CY);
1408
	  if ((remainder_h >> (64 - amount)) + carry >=
1409
	      (((UINT64) 1) << amount))
1410
	    status = EXACT_STATUS;
1411
	}
1412
 
1413
	if (status != EXACT_STATUS)
1414
	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1415
      }
1416
 
1417
#endif
1418
 
1419
      return sgn | C64;
1420
    }
1421
 
1422
    while (coeff < 1000000000000000ull && expon >= 3 * 256) {
1423
      expon--;
1424
      coeff = (coeff << 3) + (coeff << 1);
1425
    }
1426
    if (expon > DECIMAL_MAX_EXPON_64) {
1427
#ifdef SET_STATUS_FLAGS
1428
      __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1429
#endif
1430
      // overflow
1431
      r = sgn | INFINITY_MASK64;
1432
      switch (rmode) {
1433
      case ROUNDING_DOWN:
1434
	if (!sgn)
1435
	  r = LARGEST_BID64;
1436
	break;
1437
      case ROUNDING_TO_ZERO:
1438
	r = sgn | LARGEST_BID64;
1439
	break;
1440
      case ROUNDING_UP:
1441
	// round up
1442
	if (sgn)
1443
	  r = SMALLEST_BID64;
1444
      }
1445
      return r;
1446
    } else {
1447
      mask = 1;
1448
      mask <<= EXPONENT_SHIFT_SMALL64;
1449
      if (coeff >= mask) {
1450
	r = expon;
1451
	r <<= EXPONENT_SHIFT_LARGE64;
1452
	r |= (sgn | SPECIAL_ENCODING_MASK64);
1453
	// add coeff, without leading bits
1454
	mask = (mask >> 2) - 1;
1455
	coeff &= mask;
1456
	r |= coeff;
1457
	return r;
1458
      }
1459
    }
1460
  }
1461
 
1462
  r = expon;
1463
  r <<= EXPONENT_SHIFT_SMALL64;
1464
  r |= (coeff | sgn);
1465
 
1466
  return r;
1467
}
1468
 
1469
 
1470
/*****************************************************************************
1471
*
1472
*    BID128 pack/unpack macros
1473
*
1474
*****************************************************************************/
1475
 
1476
//
1477
//   Macro for handling BID128 underflow
1478
//         sticky bit given as additional argument
1479
//
1480
__BID_INLINE__ UINT128 *
1481
handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1482
		   UINT64 R, unsigned *prounding_mode, unsigned *fpsc) {
1483
  UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
1484
  UINT64 carry, CY;
1485
  int ed2, amount;
1486
  unsigned rmode, status;
1487
 
1488
  // UF occurs
1489
  if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1490
#ifdef SET_STATUS_FLAGS
1491
    __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1492
#endif
1493
    pres->w[1] = sgn;
1494
    pres->w[0] = 0;
1495
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1496
#ifndef IEEE_ROUND_NEAREST
1497
    if ((sgn && *prounding_mode == ROUNDING_DOWN)
1498
	|| (!sgn && *prounding_mode == ROUNDING_UP))
1499
      pres->w[0] = 1ull;
1500
#endif
1501
#endif
1502
    return pres;
1503
  }
1504
  // CQ *= 10
1505
  CQ2.w[1] = (CQ.w[1] << 1) | (CQ.w[0] >> 63);
1506
  CQ2.w[0] = CQ.w[0] << 1;
1507
  CQ8.w[1] = (CQ.w[1] << 3) | (CQ.w[0] >> 61);
1508
  CQ8.w[0] = CQ.w[0] << 3;
1509
  __add_128_128 (CQ, CQ2, CQ8);
1510
 
1511
  // add remainder
1512
  if (R)
1513
    CQ.w[0] |= 1;
1514
 
1515
  ed2 = 1 - expon;
1516
  // add rounding constant to CQ
1517
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1518
#ifndef IEEE_ROUND_NEAREST
1519
  rmode = *prounding_mode;
1520
  if (sgn && (unsigned) (rmode - 1) < 2)
1521
    rmode = 3 - rmode;
1522
#else
1523
  rmode = 0;
1524
#endif
1525
#else
1526
  rmode = 0;
1527
#endif
1528
  T128 = round_const_table_128[rmode][ed2];
1529
  __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1530
  CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1531
 
1532
  TP128 = reciprocals10_128[ed2];
1533
  __mul_128x128_full (Qh, Ql, CQ, TP128);
1534
  amount = recip_scale[ed2];
1535
 
1536
  if (amount >= 64) {
1537
    CQ.w[0] = Qh.w[1] >> (amount - 64);
1538
    CQ.w[1] = 0;
1539
  } else {
1540
    __shr_128 (CQ, Qh, amount);
1541
  }
1542
 
1543
  expon = 0;
1544
 
1545
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1546
#ifndef IEEE_ROUND_NEAREST
1547
  if (!(*prounding_mode))
1548
#endif
1549
    if (CQ.w[0] & 1) {
1550
      // check whether fractional part of initial_P/10^ed1 is exactly .5
1551
 
1552
      // get remainder
1553
      __shl_128_long (Qh1, Qh, (128 - amount));
1554
 
1555
      if (!Qh1.w[1] && !Qh1.w[0]
1556
	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1557
	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1558
		  && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1559
	CQ.w[0]--;
1560
      }
1561
    }
1562
#endif
1563
 
1564
#ifdef SET_STATUS_FLAGS
1565
 
1566
  if (is_inexact (fpsc))
1567
    __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1568
  else {
1569
    status = INEXACT_EXCEPTION;
1570
    // get remainder
1571
    __shl_128_long (Qh1, Qh, (128 - amount));
1572
 
1573
    switch (rmode) {
1574
    case ROUNDING_TO_NEAREST:
1575
    case ROUNDING_TIES_AWAY:
1576
      // test whether fractional part is 0
1577
      if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1578
	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1579
	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1580
		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1581
	status = EXACT_STATUS;
1582
      break;
1583
    case ROUNDING_DOWN:
1584
    case ROUNDING_TO_ZERO:
1585
      if ((!Qh1.w[1]) && (!Qh1.w[0])
1586
	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1587
	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1588
		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1589
	status = EXACT_STATUS;
1590
      break;
1591
    default:
1592
      // round up
1593
      __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1594
		       reciprocals10_128[ed2].w[0]);
1595
      __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1596
			  reciprocals10_128[ed2].w[1], CY);
1597
      __shr_128_long (Qh, Qh1, (128 - amount));
1598
      Tmp.w[0] = 1;
1599
      Tmp.w[1] = 0;
1600
      __shl_128_long (Tmp1, Tmp, amount);
1601
      Qh.w[0] += carry;
1602
      if (Qh.w[0] < carry)
1603
	Qh.w[1]++;
1604
      if (__unsigned_compare_ge_128 (Qh, Tmp1))
1605
	status = EXACT_STATUS;
1606
    }
1607
 
1608
    if (status != EXACT_STATUS)
1609
      __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1610
  }
1611
 
1612
#endif
1613
 
1614
  pres->w[1] = sgn | CQ.w[1];
1615
  pres->w[0] = CQ.w[0];
1616
 
1617
  return pres;
1618
 
1619
}
1620
 
1621
 
1622
//
1623
//   Macro for handling BID128 underflow
1624
//
1625
__BID_INLINE__ UINT128 *
1626
handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
1627
	       unsigned *prounding_mode, unsigned *fpsc) {
1628
  UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
1629
  UINT64 carry, CY;
1630
  int ed2, amount;
1631
  unsigned rmode, status;
1632
 
1633
  // UF occurs
1634
  if (expon + MAX_FORMAT_DIGITS_128 < 0) {
1635
#ifdef SET_STATUS_FLAGS
1636
    __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1637
#endif
1638
    pres->w[1] = sgn;
1639
    pres->w[0] = 0;
1640
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1641
#ifndef IEEE_ROUND_NEAREST
1642
    if ((sgn && *prounding_mode == ROUNDING_DOWN)
1643
	|| (!sgn && *prounding_mode == ROUNDING_UP))
1644
      pres->w[0] = 1ull;
1645
#endif
1646
#endif
1647
    return pres;
1648
  }
1649
 
1650
  ed2 = 0 - expon;
1651
  // add rounding constant to CQ
1652
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1653
#ifndef IEEE_ROUND_NEAREST
1654
  rmode = *prounding_mode;
1655
  if (sgn && (unsigned) (rmode - 1) < 2)
1656
    rmode = 3 - rmode;
1657
#else
1658
  rmode = 0;
1659
#endif
1660
#else
1661
  rmode = 0;
1662
#endif
1663
 
1664
  T128 = round_const_table_128[rmode][ed2];
1665
  __add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
1666
  CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
1667
 
1668
  TP128 = reciprocals10_128[ed2];
1669
  __mul_128x128_full (Qh, Ql, CQ, TP128);
1670
  amount = recip_scale[ed2];
1671
 
1672
  if (amount >= 64) {
1673
    CQ.w[0] = Qh.w[1] >> (amount - 64);
1674
    CQ.w[1] = 0;
1675
  } else {
1676
    __shr_128 (CQ, Qh, amount);
1677
  }
1678
 
1679
  expon = 0;
1680
 
1681
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1682
#ifndef IEEE_ROUND_NEAREST
1683
  if (!(*prounding_mode))
1684
#endif
1685
    if (CQ.w[0] & 1) {
1686
      // check whether fractional part of initial_P/10^ed1 is exactly .5
1687
 
1688
      // get remainder
1689
      __shl_128_long (Qh1, Qh, (128 - amount));
1690
 
1691
      if (!Qh1.w[1] && !Qh1.w[0]
1692
	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1693
	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1694
		  && Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
1695
	CQ.w[0]--;
1696
      }
1697
    }
1698
#endif
1699
 
1700
#ifdef SET_STATUS_FLAGS
1701
 
1702
  if (is_inexact (fpsc))
1703
    __set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
1704
  else {
1705
    status = INEXACT_EXCEPTION;
1706
    // get remainder
1707
    __shl_128_long (Qh1, Qh, (128 - amount));
1708
 
1709
    switch (rmode) {
1710
    case ROUNDING_TO_NEAREST:
1711
    case ROUNDING_TIES_AWAY:
1712
      // test whether fractional part is 0
1713
      if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
1714
	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1715
	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1716
		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1717
	status = EXACT_STATUS;
1718
      break;
1719
    case ROUNDING_DOWN:
1720
    case ROUNDING_TO_ZERO:
1721
      if ((!Qh1.w[1]) && (!Qh1.w[0])
1722
	  && (Ql.w[1] < reciprocals10_128[ed2].w[1]
1723
	      || (Ql.w[1] == reciprocals10_128[ed2].w[1]
1724
		  && Ql.w[0] < reciprocals10_128[ed2].w[0])))
1725
	status = EXACT_STATUS;
1726
      break;
1727
    default:
1728
      // round up
1729
      __add_carry_out (Stemp.w[0], CY, Ql.w[0],
1730
		       reciprocals10_128[ed2].w[0]);
1731
      __add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
1732
			  reciprocals10_128[ed2].w[1], CY);
1733
      __shr_128_long (Qh, Qh1, (128 - amount));
1734
      Tmp.w[0] = 1;
1735
      Tmp.w[1] = 0;
1736
      __shl_128_long (Tmp1, Tmp, amount);
1737
      Qh.w[0] += carry;
1738
      if (Qh.w[0] < carry)
1739
	Qh.w[1]++;
1740
      if (__unsigned_compare_ge_128 (Qh, Tmp1))
1741
	status = EXACT_STATUS;
1742
    }
1743
 
1744
    if (status != EXACT_STATUS)
1745
      __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
1746
  }
1747
 
1748
#endif
1749
 
1750
  pres->w[1] = sgn | CQ.w[1];
1751
  pres->w[0] = CQ.w[0];
1752
 
1753
  return pres;
1754
 
1755
}
1756
 
1757
 
1758
 
1759
//
1760
//  BID128 unpack, input passed by value
1761
//
1762
__BID_INLINE__ UINT64
1763
unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
1764
		     UINT128 * pcoefficient_x, UINT128 x) {
1765
  UINT128 coeff, T33, T34;
1766
  UINT64 ex;
1767
 
1768
  *psign_x = (x.w[1]) & 0x8000000000000000ull;
1769
 
1770
  // special encodings
1771
  if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1772
    if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1773
      // non-canonical input
1774
      pcoefficient_x->w[0] = 0;
1775
      pcoefficient_x->w[1] = 0;
1776
      ex = (x.w[1]) >> 47;
1777
      *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1778
      return 0;
1779
    }
1780
    // 10^33
1781
    T33 = power10_table_128[33];
1782
    /*coeff.w[0] = x.w[0];
1783
       coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
1784
       pcoefficient_x->w[0] = x.w[0];
1785
       pcoefficient_x->w[1] = x.w[1];
1786
       if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
1787
       pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
1788
 
1789
    pcoefficient_x->w[0] = x.w[0];
1790
    pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
1791
    if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33))	// non-canonical
1792
    {
1793
      pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
1794
      pcoefficient_x->w[0] = 0;
1795
    } else
1796
      pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
1797
    if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
1798
      pcoefficient_x->w[0] = 0;
1799
      pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
1800
    }
1801
    *pexponent_x = 0;
1802
    return 0;	// NaN or Infinity
1803
  }
1804
 
1805
  coeff.w[0] = x.w[0];
1806
  coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
1807
 
1808
  // 10^34
1809
  T34 = power10_table_128[34];
1810
  // check for non-canonical values
1811
  if (__unsigned_compare_ge_128 (coeff, T34))
1812
    coeff.w[0] = coeff.w[1] = 0;
1813
 
1814
  pcoefficient_x->w[0] = coeff.w[0];
1815
  pcoefficient_x->w[1] = coeff.w[1];
1816
 
1817
  ex = (x.w[1]) >> 49;
1818
  *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1819
 
1820
  return coeff.w[0] | coeff.w[1];
1821
}
1822
 
1823
 
1824
//
1825
//  BID128 unpack, input pased by reference
1826
//
1827
__BID_INLINE__ UINT64
1828
unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
1829
	       UINT128 * pcoefficient_x, UINT128 * px) {
1830
  UINT128 coeff, T33, T34;
1831
  UINT64 ex;
1832
 
1833
  *psign_x = (px->w[1]) & 0x8000000000000000ull;
1834
 
1835
  // special encodings
1836
  if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
1837
    if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
1838
      // non-canonical input
1839
      pcoefficient_x->w[0] = 0;
1840
      pcoefficient_x->w[1] = 0;
1841
      ex = (px->w[1]) >> 47;
1842
      *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1843
      return 0;
1844
    }
1845
    // 10^33
1846
    T33 = power10_table_128[33];
1847
    coeff.w[0] = px->w[0];
1848
    coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
1849
    pcoefficient_x->w[0] = px->w[0];
1850
    pcoefficient_x->w[1] = px->w[1];
1851
    if (__unsigned_compare_ge_128 (coeff, T33)) {	// non-canonical
1852
      pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
1853
      pcoefficient_x->w[0] = 0;
1854
    }
1855
    *pexponent_x = 0;
1856
    return 0;	// NaN or Infinity
1857
  }
1858
 
1859
  coeff.w[0] = px->w[0];
1860
  coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
1861
 
1862
  // 10^34
1863
  T34 = power10_table_128[34];
1864
  // check for non-canonical values
1865
  if (__unsigned_compare_ge_128 (coeff, T34))
1866
    coeff.w[0] = coeff.w[1] = 0;
1867
 
1868
  pcoefficient_x->w[0] = coeff.w[0];
1869
  pcoefficient_x->w[1] = coeff.w[1];
1870
 
1871
  ex = (px->w[1]) >> 49;
1872
  *pexponent_x = ((int) ex) & EXPONENT_MASK128;
1873
 
1874
  return coeff.w[0] | coeff.w[1];
1875
}
1876
 
1877
//
1878
//   Pack macro checks for overflow, but not underflow
1879
//
1880
__BID_INLINE__ UINT128 *
1881
get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
1882
			 UINT128 coeff, unsigned *prounding_mode,
1883
			 unsigned *fpsc) {
1884
  UINT128 T;
1885
  UINT64 tmp, tmp2;
1886
 
1887
  if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1888
 
1889
    if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
1890
      T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
1891
      while (__unsigned_compare_gt_128 (T, coeff)
1892
	     && expon > DECIMAL_MAX_EXPON_128) {
1893
	coeff.w[1] =
1894
	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
1895
	  (coeff.w[0] >> 63);
1896
	tmp2 = coeff.w[0] << 3;
1897
	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
1898
	if (coeff.w[0] < tmp2)
1899
	  coeff.w[1]++;
1900
 
1901
	expon--;
1902
      }
1903
    }
1904
    if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
1905
      // OF
1906
#ifdef SET_STATUS_FLAGS
1907
      __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
1908
#endif
1909
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
1910
#ifndef IEEE_ROUND_NEAREST
1911
      if (*prounding_mode == ROUNDING_TO_ZERO
1912
	  || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
1913
							 &&
1914
							 *prounding_mode
1915
							 ==
1916
							 ROUNDING_DOWN))
1917
      {
1918
	pres->w[1] = sgn | LARGEST_BID128_HIGH;
1919
	pres->w[0] = LARGEST_BID128_LOW;
1920
      } else
1921
#endif
1922
#endif
1923
      {
1924
	pres->w[1] = sgn | INFINITY_MASK64;
1925
	pres->w[0] = 0;
1926
      }
1927
      return pres;
1928
    }
1929
  }
1930
 
1931
  pres->w[0] = coeff.w[0];
1932
  tmp = expon;
1933
  tmp <<= 49;
1934
  pres->w[1] = sgn | tmp | coeff.w[1];
1935
 
1936
  return pres;
1937
}
1938
 
1939
 
1940
//
1941
//   No overflow/underflow checks
1942
//   No checking for coefficient == 10^34 (rounding artifact)
1943
//
1944
__BID_INLINE__ UINT128 *
1945
get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
1946
		      UINT128 coeff) {
1947
  UINT64 tmp;
1948
 
1949
  pres->w[0] = coeff.w[0];
1950
  tmp = expon;
1951
  tmp <<= 49;
1952
  pres->w[1] = sgn | tmp | coeff.w[1];
1953
 
1954
  return pres;
1955
}
1956
 
1957
//
1958
//   No overflow/underflow checks
1959
//
1960
__BID_INLINE__ UINT128 *
1961
get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
1962
  UINT64 tmp;
1963
 
1964
  // coeff==10^34?
1965
  if (coeff.w[1] == 0x0001ed09bead87c0ull
1966
      && coeff.w[0] == 0x378d8e6400000000ull) {
1967
    expon++;
1968
    // set coefficient to 10^33
1969
    coeff.w[1] = 0x0000314dc6448d93ull;
1970
    coeff.w[0] = 0x38c15b0a00000000ull;
1971
  }
1972
 
1973
  pres->w[0] = coeff.w[0];
1974
  tmp = expon;
1975
  tmp <<= 49;
1976
  pres->w[1] = sgn | tmp | coeff.w[1];
1977
 
1978
  return pres;
1979
}
1980
 
1981
//
1982
//   General BID128 pack macro
1983
//
1984
__BID_INLINE__ UINT128 *
1985
get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
1986
	    unsigned *prounding_mode, unsigned *fpsc) {
1987
  UINT128 T;
1988
  UINT64 tmp, tmp2;
1989
 
1990
  // coeff==10^34?
1991
  if (coeff.w[1] == 0x0001ed09bead87c0ull
1992
      && coeff.w[0] == 0x378d8e6400000000ull) {
1993
    expon++;
1994
    // set coefficient to 10^33
1995
    coeff.w[1] = 0x0000314dc6448d93ull;
1996
    coeff.w[0] = 0x38c15b0a00000000ull;
1997
  }
1998
  // check OF, UF
1999
  if (expon < 0 || expon > DECIMAL_MAX_EXPON_128) {
2000
    // check UF
2001
    if (expon < 0) {
2002
      return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
2003
			    fpsc);
2004
    }
2005
 
2006
    if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
2007
      T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
2008
      while (__unsigned_compare_gt_128 (T, coeff)
2009
	     && expon > DECIMAL_MAX_EXPON_128) {
2010
	coeff.w[1] =
2011
	  (coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
2012
	  (coeff.w[0] >> 63);
2013
	tmp2 = coeff.w[0] << 3;
2014
	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
2015
	if (coeff.w[0] < tmp2)
2016
	  coeff.w[1]++;
2017
 
2018
	expon--;
2019
      }
2020
    }
2021
    if (expon > DECIMAL_MAX_EXPON_128) {
2022
      if (!(coeff.w[1] | coeff.w[0])) {
2023
	pres->w[1] = sgn | (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
2024
	pres->w[0] = 0;
2025
	return pres;
2026
      }
2027
      // OF
2028
#ifdef SET_STATUS_FLAGS
2029
      __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2030
#endif
2031
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2032
#ifndef IEEE_ROUND_NEAREST
2033
      if (*prounding_mode == ROUNDING_TO_ZERO
2034
	  || (sgn && *prounding_mode == ROUNDING_UP) || (!sgn
2035
							 &&
2036
							 *prounding_mode
2037
							 ==
2038
							 ROUNDING_DOWN))
2039
      {
2040
	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2041
	pres->w[0] = LARGEST_BID128_LOW;
2042
      } else
2043
#endif
2044
#endif
2045
      {
2046
	pres->w[1] = sgn | INFINITY_MASK64;
2047
	pres->w[0] = 0;
2048
      }
2049
      return pres;
2050
    }
2051
  }
2052
 
2053
  pres->w[0] = coeff.w[0];
2054
  tmp = expon;
2055
  tmp <<= 49;
2056
  pres->w[1] = sgn | tmp | coeff.w[1];
2057
 
2058
  return pres;
2059
}
2060
 
2061
 
2062
//
2063
//  Macro used for conversions from string
2064
//        (no additional arguments given for rounding mode, status flags)
2065
//
2066
__BID_INLINE__ UINT128 *
2067
get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
2068
  UINT128 D2, D8;
2069
  UINT64 tmp;
2070
  unsigned rmode = 0, status;
2071
 
2072
  // coeff==10^34?
2073
  if (coeff.w[1] == 0x0001ed09bead87c0ull
2074
      && coeff.w[0] == 0x378d8e6400000000ull) {
2075
    expon++;
2076
    // set coefficient to 10^33
2077
    coeff.w[1] = 0x0000314dc6448d93ull;
2078
    coeff.w[0] = 0x38c15b0a00000000ull;
2079
  }
2080
  // check OF, UF
2081
  if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
2082
    // check UF
2083
    if (expon < 0)
2084
      return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
2085
 
2086
    // OF
2087
 
2088
    if (expon < DECIMAL_MAX_EXPON_128 + 34) {
2089
      while (expon > DECIMAL_MAX_EXPON_128 &&
2090
	     (coeff.w[1] < power10_table_128[33].w[1] ||
2091
	      (coeff.w[1] == power10_table_128[33].w[1]
2092
	       && coeff.w[0] < power10_table_128[33].w[0]))) {
2093
	D2.w[1] = (coeff.w[1] << 1) | (coeff.w[0] >> 63);
2094
	D2.w[0] = coeff.w[0] << 1;
2095
	D8.w[1] = (coeff.w[1] << 3) | (coeff.w[0] >> 61);
2096
	D8.w[0] = coeff.w[0] << 3;
2097
 
2098
	__add_128_128 (coeff, D2, D8);
2099
	expon--;
2100
      }
2101
    } else if (!(coeff.w[0] | coeff.w[1]))
2102
      expon = DECIMAL_MAX_EXPON_128;
2103
 
2104
    if (expon > DECIMAL_MAX_EXPON_128) {
2105
      pres->w[1] = sgn | INFINITY_MASK64;
2106
      pres->w[0] = 0;
2107
      switch (rmode) {
2108
      case ROUNDING_DOWN:
2109
	if (!sgn) {
2110
	  pres->w[1] = LARGEST_BID128_HIGH;
2111
	  pres->w[0] = LARGEST_BID128_LOW;
2112
	}
2113
	break;
2114
      case ROUNDING_TO_ZERO:
2115
	pres->w[1] = sgn | LARGEST_BID128_HIGH;
2116
	pres->w[0] = LARGEST_BID128_LOW;
2117
	break;
2118
      case ROUNDING_UP:
2119
	// round up
2120
	if (sgn) {
2121
	  pres->w[1] = sgn | LARGEST_BID128_HIGH;
2122
	  pres->w[0] = LARGEST_BID128_LOW;
2123
	}
2124
	break;
2125
      }
2126
 
2127
      return pres;
2128
    }
2129
  }
2130
 
2131
  pres->w[0] = coeff.w[0];
2132
  tmp = expon;
2133
  tmp <<= 49;
2134
  pres->w[1] = sgn | tmp | coeff.w[1];
2135
 
2136
  return pres;
2137
}
2138
 
2139
 
2140
 
2141
/*****************************************************************************
2142
*
2143
*    BID32 pack/unpack macros
2144
*
2145
*****************************************************************************/
2146
 
2147
 
2148
__BID_INLINE__ UINT32
2149
unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
2150
	      UINT32 * pcoefficient_x, UINT32 x) {
2151
  UINT32 tmp;
2152
 
2153
  *psign_x = x & 0x80000000;
2154
 
2155
  if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
2156
    // special encodings
2157
    if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
2158
      *pcoefficient_x = x & 0xfe0fffff;
2159
      if ((x & 0x000fffff) >= 1000000)
2160
	*pcoefficient_x = x & 0xfe000000;
2161
      if ((x & NAN_MASK32) == INFINITY_MASK32)
2162
	*pcoefficient_x = x & 0xf8000000;
2163
      *pexponent_x = 0;
2164
      return 0;	// NaN or Infinity
2165
    }
2166
    // coefficient
2167
    *pcoefficient_x = (x & SMALL_COEFF_MASK32) | LARGE_COEFF_HIGH_BIT32;
2168
    // check for non-canonical value
2169
    if (*pcoefficient_x >= 10000000)
2170
      *pcoefficient_x = 0;
2171
    // get exponent
2172
    tmp = x >> 21;
2173
    *pexponent_x = tmp & EXPONENT_MASK32;
2174
    return 1;
2175
  }
2176
  // exponent
2177
  tmp = x >> 23;
2178
  *pexponent_x = tmp & EXPONENT_MASK32;
2179
  // coefficient
2180
  *pcoefficient_x = (x & LARGE_COEFF_MASK32);
2181
 
2182
  return *pcoefficient_x;
2183
}
2184
 
2185
//
2186
//   General pack macro for BID32
2187
//
2188
__BID_INLINE__ UINT32
2189
get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
2190
	   unsigned *fpsc) {
2191
  UINT128 Q;
2192
  UINT64 C64, remainder_h, carry, Stemp;
2193
  UINT32 r, mask;
2194
  int extra_digits, amount, amount2;
2195
  unsigned status;
2196
 
2197
  if (coeff > 9999999ull) {
2198
    expon++;
2199
    coeff = 1000000ull;
2200
  }
2201
  // check for possible underflow/overflow
2202
  if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2203
    if (expon < 0) {
2204
      // underflow
2205
      if (expon + MAX_FORMAT_DIGITS_32 < 0) {
2206
#ifdef SET_STATUS_FLAGS
2207
	__set_status_flags (fpsc,
2208
			    UNDERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2209
#endif
2210
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2211
#ifndef IEEE_ROUND_NEAREST
2212
	if (rmode == ROUNDING_DOWN && sgn)
2213
	  return 0x80000001;
2214
	if (rmode == ROUNDING_UP && !sgn)
2215
	  return 1;
2216
#endif
2217
#endif
2218
	// result is 0
2219
	return sgn;
2220
      }
2221
      // get digits to be shifted out
2222
#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
2223
      rmode = 0;
2224
#endif
2225
#ifdef IEEE_ROUND_NEAREST
2226
      rmode = 0;
2227
#endif
2228
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2229
#ifndef IEEE_ROUND_NEAREST
2230
      if (sgn && (unsigned) (rmode - 1) < 2)
2231
	rmode = 3 - rmode;
2232
#endif
2233
#endif
2234
 
2235
      extra_digits = -expon;
2236
      coeff += round_const_table[rmode][extra_digits];
2237
 
2238
      // get coeff*(2^M[extra_digits])/10^extra_digits
2239
      __mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
2240
 
2241
      // now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
2242
      amount = short_recip_scale[extra_digits];
2243
 
2244
      C64 = Q.w[1] >> amount;
2245
 
2246
#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
2247
#ifndef IEEE_ROUND_NEAREST
2248
      if (rmode == 0)	//ROUNDING_TO_NEAREST
2249
#endif
2250
	if (C64 & 1) {
2251
	  // check whether fractional part of initial_P/10^extra_digits is exactly .5
2252
 
2253
	  // get remainder
2254
	  amount2 = 64 - amount;
2255
	  remainder_h = 0;
2256
	  remainder_h--;
2257
	  remainder_h >>= amount2;
2258
	  remainder_h = remainder_h & Q.w[1];
2259
 
2260
	  if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
2261
	    C64--;
2262
	  }
2263
	}
2264
#endif
2265
 
2266
#ifdef SET_STATUS_FLAGS
2267
 
2268
      if (is_inexact (fpsc))
2269
	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
2270
      else {
2271
	status = INEXACT_EXCEPTION;
2272
	// get remainder
2273
	remainder_h = Q.w[1] << (64 - amount);
2274
 
2275
	switch (rmode) {
2276
	case ROUNDING_TO_NEAREST:
2277
	case ROUNDING_TIES_AWAY:
2278
	  // test whether fractional part is 0
2279
	  if (remainder_h == 0x8000000000000000ull
2280
	      && (Q.w[0] < reciprocals10_64[extra_digits]))
2281
	    status = EXACT_STATUS;
2282
	  break;
2283
	case ROUNDING_DOWN:
2284
	case ROUNDING_TO_ZERO:
2285
	  if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
2286
	    status = EXACT_STATUS;
2287
	  break;
2288
	default:
2289
	  // round up
2290
	  __add_carry_out (Stemp, carry, Q.w[0],
2291
			   reciprocals10_64[extra_digits]);
2292
	  if ((remainder_h >> (64 - amount)) + carry >=
2293
	      (((UINT64) 1) << amount))
2294
	    status = EXACT_STATUS;
2295
	}
2296
 
2297
	if (status != EXACT_STATUS)
2298
	  __set_status_flags (fpsc, UNDERFLOW_EXCEPTION | status);
2299
      }
2300
 
2301
#endif
2302
 
2303
      return sgn | (UINT32) C64;
2304
    }
2305
 
2306
    while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
2307
      coeff = (coeff << 3) + (coeff << 1);
2308
      expon--;
2309
    }
2310
    if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
2311
      __set_status_flags (fpsc, OVERFLOW_EXCEPTION | INEXACT_EXCEPTION);
2312
      // overflow
2313
      r = sgn | INFINITY_MASK32;
2314
      switch (rmode) {
2315
      case ROUNDING_DOWN:
2316
	if (!sgn)
2317
	  r = LARGEST_BID32;
2318
	break;
2319
      case ROUNDING_TO_ZERO:
2320
	r = sgn | LARGEST_BID32;
2321
	break;
2322
      case ROUNDING_UP:
2323
	// round up
2324
	if (sgn)
2325
	  r = sgn | LARGEST_BID32;
2326
      }
2327
      return r;
2328
    }
2329
  }
2330
 
2331
  mask = 1 << 23;
2332
 
2333
  // check whether coefficient fits in DECIMAL_COEFF_FIT bits
2334
  if (coeff < mask) {
2335
    r = expon;
2336
    r <<= 23;
2337
    r |= ((UINT32) coeff | sgn);
2338
    return r;
2339
  }
2340
  // special format
2341
 
2342
  r = expon;
2343
  r <<= 21;
2344
  r |= (sgn | SPECIAL_ENCODING_MASK32);
2345
  // add coeff, without leading bits
2346
  mask = (1 << 21) - 1;
2347
  r |= (((UINT32) coeff) & mask);
2348
 
2349
  return r;
2350
}
2351
 
2352
 
2353
 
2354
//
2355
//   no overflow/underflow checks
2356
//
2357
__BID_INLINE__ UINT32
2358
very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
2359
  UINT32 r, mask;
2360
 
2361
  mask = 1 << 23;
2362
 
2363
  // check whether coefficient fits in 10*2+3 bits
2364
  if (coeff < mask) {
2365
    r = expon;
2366
    r <<= 23;
2367
    r |= (coeff | sgn);
2368
    return r;
2369
  }
2370
  // special format
2371
  r = expon;
2372
  r <<= 21;
2373
  r |= (sgn | SPECIAL_ENCODING_MASK32);
2374
  // add coeff, without leading bits
2375
  mask = (1 << 21) - 1;
2376
  coeff &= mask;
2377
  r |= coeff;
2378
 
2379
  return r;
2380
}
2381
 
2382
 
2383
 
2384
/*************************************************************
2385
 *
2386
 *************************************************************/
2387
typedef
2388
ALIGN (16)
2389
     struct {
2390
       UINT64 w[6];
2391
     } UINT384;
2392
     typedef ALIGN (16)
2393
     struct {
2394
       UINT64 w[8];
2395
     } UINT512;
2396
 
2397
// #define P                               34
2398
#define MASK_STEERING_BITS              0x6000000000000000ull
2399
#define MASK_BINARY_EXPONENT1           0x7fe0000000000000ull
2400
#define MASK_BINARY_SIG1                0x001fffffffffffffull
2401
#define MASK_BINARY_EXPONENT2           0x1ff8000000000000ull
2402
    //used to take G[2:w+3] (sec 3.3)
2403
#define MASK_BINARY_SIG2                0x0007ffffffffffffull
2404
    //used to mask out G4:T0 (sec 3.3)
2405
#define MASK_BINARY_OR2                 0x0020000000000000ull
2406
    //used to prefix 8+G4 to T (sec 3.3)
2407
#define UPPER_EXPON_LIMIT               51
2408
#define MASK_EXP                        0x7ffe000000000000ull
2409
#define MASK_SPECIAL                    0x7800000000000000ull
2410
#define MASK_NAN                        0x7c00000000000000ull
2411
#define MASK_SNAN                       0x7e00000000000000ull
2412
#define MASK_ANY_INF                    0x7c00000000000000ull
2413
#define MASK_INF                        0x7800000000000000ull
2414
#define MASK_SIGN                       0x8000000000000000ull
2415
#define MASK_COEFF                      0x0001ffffffffffffull
2416
#define BIN_EXP_BIAS                    (0x1820ull << 49)
2417
 
2418
#define EXP_MIN                         0x0000000000000000ull
2419
   // EXP_MIN = (-6176 + 6176) << 49
2420
#define EXP_MAX                         0x5ffe000000000000ull
2421
  // EXP_MAX = (6111 + 6176) << 49
2422
#define EXP_MAX_P1                      0x6000000000000000ull
2423
  // EXP_MAX + 1 = (6111 + 6176 + 1) << 49
2424
#define EXP_P1                          0x0002000000000000ull
2425
  // EXP_ P1= 1 << 49
2426
#define expmin                            -6176
2427
  // min unbiased exponent
2428
#define expmax                            6111
2429
  // max unbiased exponent
2430
#define expmin16                          -398
2431
  // min unbiased exponent
2432
#define expmax16                          369
2433
  // max unbiased exponent
2434
 
2435
#define SIGNMASK32 0x80000000
2436
#define BID64_SIG_MAX 0x002386F26FC0ffffull
2437
#define SIGNMASK64    0x8000000000000000ull
2438
 
2439
// typedef unsigned int FPSC; // floating-point status and control
2440
	// bit31:
2441
	// bit30:
2442
	// bit29:
2443
	// bit28:
2444
	// bit27:
2445
	// bit26:
2446
	// bit25:
2447
	// bit24:
2448
	// bit23:
2449
	// bit22:
2450
	// bit21:
2451
	// bit20:
2452
	// bit19:
2453
	// bit18:
2454
	// bit17:
2455
	// bit16:
2456
	// bit15:
2457
	// bit14: RC:2
2458
	// bit13: RC:1
2459
	// bit12: RC:0
2460
	// bit11: PM
2461
	// bit10: UM
2462
	// bit9:  OM
2463
	// bit8:  ZM
2464
	// bit7:  DM
2465
	// bit6:  IM
2466
	// bit5:  PE
2467
	// bit4:  UE
2468
	// bit3:  OE
2469
	// bit2:  ZE
2470
	// bit1:  DE
2471
	// bit0:  IE
2472
 
2473
#define ROUNDING_MODE_MASK	0x00007000
2474
 
2475
     typedef struct _DEC_DIGITS {
2476
       unsigned int digits;
2477
       UINT64 threshold_hi;
2478
       UINT64 threshold_lo;
2479
       unsigned int digits1;
2480
     } DEC_DIGITS;
2481
 
2482
     extern DEC_DIGITS nr_digits[];
2483
     extern UINT64 midpoint64[];
2484
     extern UINT128 midpoint128[];
2485
     extern UINT192 midpoint192[];
2486
     extern UINT256 midpoint256[];
2487
     extern UINT64 ten2k64[];
2488
     extern UINT128 ten2k128[];
2489
     extern UINT256 ten2k256[];
2490
     extern UINT128 ten2mk128[];
2491
     extern UINT64 ten2mk64[];
2492
     extern UINT128 ten2mk128trunc[];
2493
     extern int shiftright128[];
2494
     extern UINT64 maskhigh128[];
2495
     extern UINT64 maskhigh128M[];
2496
     extern UINT64 maskhigh192M[];
2497
     extern UINT64 maskhigh256M[];
2498
     extern UINT64 onehalf128[];
2499
     extern UINT64 onehalf128M[];
2500
     extern UINT64 onehalf192M[];
2501
     extern UINT64 onehalf256M[];
2502
     extern UINT128 ten2mk128M[];
2503
     extern UINT128 ten2mk128truncM[];
2504
     extern UINT192 ten2mk192truncM[];
2505
     extern UINT256 ten2mk256truncM[];
2506
     extern int shiftright128M[];
2507
     extern int shiftright192M[];
2508
     extern int shiftright256M[];
2509
     extern UINT192 ten2mk192M[];
2510
     extern UINT256 ten2mk256M[];
2511
     extern unsigned char char_table2[];
2512
     extern unsigned char char_table3[];
2513
 
2514
     extern UINT64 ten2m3k64[];
2515
     extern unsigned int shift_ten2m3k64[];
2516
     extern UINT128 ten2m3k128[];
2517
     extern unsigned int shift_ten2m3k128[];
2518
 
2519
 
2520
 
2521
/***************************************************************************
2522
 *************** TABLES FOR GENERAL ROUNDING FUNCTIONS *********************
2523
 ***************************************************************************/
2524
 
2525
     extern UINT64 Kx64[];
2526
     extern unsigned int Ex64m64[];
2527
     extern UINT64 half64[];
2528
     extern UINT64 mask64[];
2529
     extern UINT64 ten2mxtrunc64[];
2530
 
2531
     extern UINT128 Kx128[];
2532
     extern unsigned int Ex128m128[];
2533
     extern UINT64 half128[];
2534
     extern UINT64 mask128[];
2535
     extern UINT128 ten2mxtrunc128[];
2536
 
2537
     extern UINT192 Kx192[];
2538
     extern unsigned int Ex192m192[];
2539
     extern UINT64 half192[];
2540
     extern UINT64 mask192[];
2541
     extern UINT192 ten2mxtrunc192[];
2542
 
2543
     extern UINT256 Kx256[];
2544
     extern unsigned int Ex256m256[];
2545
     extern UINT64 half256[];
2546
     extern UINT64 mask256[];
2547
     extern UINT256 ten2mxtrunc256[];
2548
 
2549
     typedef union __bid64_128 {
2550
       UINT64 b64;
2551
       UINT128 b128;
2552
     } BID64_128;
2553
 
2554
     BID64_128 bid_fma (unsigned int P0,
2555
			BID64_128 x1, unsigned int P1,
2556
			BID64_128 y1, unsigned int P2,
2557
			BID64_128 z1, unsigned int P3,
2558
			unsigned int rnd_mode, FPSC * fpsc);
2559
 
2560
#define         P16     16
2561
#define         P34     34
2562
 
2563
     union __int_double {
2564
       UINT64 i;
2565
       double d;
2566
     };
2567
     typedef union __int_double int_double;
2568
 
2569
 
2570
     union __int_float {
2571
       UINT32 i;
2572
       float d;
2573
     };
2574
     typedef union __int_float int_float;
2575
 
2576
#define SWAP(A,B,T) {\
2577
        T = A; \
2578
        A = B; \
2579
        B = T; \
2580
}
2581
 
2582
// this macro will find coefficient_x to be in [2^A, 2^(A+1) )
2583
// ie it knows that it is A bits long
2584
#define NUMBITS(A, coefficient_x, tempx){\
2585
      temp_x.d=(float)coefficient_x;\
2586
      A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
2587
}
2588
 
2589
     enum class_types {
2590
       signalingNaN,
2591
       quietNaN,
2592
       negativeInfinity,
2593
       negativeNormal,
2594
       negativeSubnormal,
2595
       negativeZero,
2596
       positiveZero,
2597
       positiveSubnormal,
2598
       positiveNormal,
2599
       positiveInfinity
2600
     };
2601
 
2602
     typedef union {
2603
       UINT64 ui64;
2604
       double d;
2605
     } BID_UI64DOUBLE;
2606
 
2607
#endif