WebSVN – Kolibri OS – Blame – /contrib/toolchain/gcc/5x/libgcc/config/libbid/bid_internal.h

Rev	Author	Line No.	Line
6515	serge	1	/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
		2
		3	This file is part of GCC.
		4
		5	GCC is free software; you can redistribute it and/or modify it under
		6	the terms of the GNU General Public License as published by the Free
		7	Software Foundation; either version 3, or (at your option) any later
		8	version.
		9
		10	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
		11	WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
		13	for more details.
		14
		15	Under Section 7 of GPL version 3, you are granted additional
		16	permissions described in the GCC Runtime Library Exception, version
		17	3.1, as published by the Free Software Foundation.
		18
		19	You should have received a copy of the GNU General Public License and
		20	a copy of the GCC Runtime Library Exception along with this program;
		21	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
		22	. */
		23
		24	#ifndef __BIDECIMAL_H
		25	#define __BIDECIMAL_H
		26
		27	#include "bid_conf.h"
		28	#include "bid_functions.h"
		29
		30	#define __BID_INLINE__ static __inline
		31
		32	/*********************************************************************
		33	*
		34	* Logical Shift Macros
		35	*
		36	*********************************************************************/
		37
		38	#define __shr_128(Q, A, k) \
		39	{ \
		40	(Q).w[0] = (A).w[0] >> k; \
		41	(Q).w[0] \|= (A).w[1] << (64-k); \
		42	(Q).w[1] = (A).w[1] >> k; \
		43	}
		44
		45	#define __shr_128_long(Q, A, k) \
		46	{ \
		47	if((k)<64) { \
		48	(Q).w[0] = (A).w[0] >> k; \
		49	(Q).w[0] \|= (A).w[1] << (64-k); \
		50	(Q).w[1] = (A).w[1] >> k; \
		51	} \
		52	else { \
		53	(Q).w[0] = (A).w[1]>>((k)-64); \
		54	(Q).w[1] = 0; \
		55	} \
		56	}
		57
		58	#define __shl_128_long(Q, A, k) \
		59	{ \
		60	if((k)<64) { \
		61	(Q).w[1] = (A).w[1] << k; \
		62	(Q).w[1] \|= (A).w[0] >> (64-k); \
		63	(Q).w[0] = (A).w[0] << k; \
		64	} \
		65	else { \
		66	(Q).w[1] = (A).w[0]<<((k)-64); \
		67	(Q).w[0] = 0; \
		68	} \
		69	}
		70
		71	#define __low_64(Q) (Q).w[0]
		72	/*********************************************************************
		73	*
		74	* String Macros
		75	*
		76	*********************************************************************/
		77	#define tolower_macro(x) (((unsigned char)((x)-'A')<=('Z'-'A'))?((x)-'A'+'a'):(x))
		78	/*********************************************************************
		79	*
		80	* Compare Macros
		81	*
		82	*********************************************************************/
		83	// greater than
		84	// return 0 if A<=B
		85	// non-zero if A>B
		86	#define __unsigned_compare_gt_128(A, B) \
		87	((A.w[1]>B.w[1]) \|\| ((A.w[1]==B.w[1]) && (A.w[0]>B.w[0])))
		88	// greater-or-equal
		89	#define __unsigned_compare_ge_128(A, B) \
		90	((A.w[1]>B.w[1]) \|\| ((A.w[1]==B.w[1]) && (A.w[0]>=B.w[0])))
		91	#define __test_equal_128(A, B) (((A).w[1]==(B).w[1]) && ((A).w[0]==(B).w[0]))
		92	// tighten exponent range
		93	#define __tight_bin_range_128(bp, P, bin_expon) \
		94	{ \
		95	UINT64 M; \
		96	M = 1; \
		97	(bp) = (bin_expon); \
		98	if((bp)<63) { \
		99	M <<= ((bp)+1); \
		100	if((P).w[0] >= M) (bp)++; } \
		101	else if((bp)>64) { \
		102	M <<= ((bp)+1-64); \
		103	if(((P).w[1]>M) \|\|((P).w[1]==M && (P).w[0]))\
		104	(bp)++; } \
		105	else if((P).w[1]) (bp)++; \
		106	}
		107	/*********************************************************************
		108	*
		109	* Add/Subtract Macros
		110	*
		111	*********************************************************************/
		112	// add 64-bit value to 128-bit
		113	#define __add_128_64(R128, A128, B64) \
		114	{ \
		115	UINT64 R64H; \
		116	R64H = (A128).w[1]; \
		117	(R128).w[0] = (B64) + (A128).w[0]; \
		118	if((R128).w[0] < (B64)) \
		119	R64H ++; \
		120	(R128).w[1] = R64H; \
		121	}
		122	// subtract 64-bit value from 128-bit
		123	#define __sub_128_64(R128, A128, B64) \
		124	{ \
		125	UINT64 R64H; \
		126	R64H = (A128).w[1]; \
		127	if((A128).w[0] < (B64)) \
		128	R64H --; \
		129	(R128).w[1] = R64H; \
		130	(R128).w[0] = (A128).w[0] - (B64); \
		131	}
		132	// add 128-bit value to 128-bit
		133	// assume no carry-out
		134	#define __add_128_128(R128, A128, B128) \
		135	{ \
		136	UINT128 Q128; \
		137	Q128.w[1] = (A128).w[1]+(B128).w[1]; \
		138	Q128.w[0] = (B128).w[0] + (A128).w[0]; \
		139	if(Q128.w[0] < (B128).w[0]) \
		140	Q128.w[1] ++; \
		141	(R128).w[1] = Q128.w[1]; \
		142	(R128).w[0] = Q128.w[0]; \
		143	}
		144	#define __sub_128_128(R128, A128, B128) \
		145	{ \
		146	UINT128 Q128; \
		147	Q128.w[1] = (A128).w[1]-(B128).w[1]; \
		148	Q128.w[0] = (A128).w[0] - (B128).w[0]; \
		149	if((A128).w[0] < (B128).w[0]) \
		150	Q128.w[1] --; \
		151	(R128).w[1] = Q128.w[1]; \
		152	(R128).w[0] = Q128.w[0]; \
		153	}
		154	#define __add_carry_out(S, CY, X, Y) \
		155	{ \
		156	UINT64 X1=X; \
		157	S = X + Y; \
		158	CY = (S
		159	}
		160	#define __add_carry_in_out(S, CY, X, Y, CI) \
		161	{ \
		162	UINT64 X1; \
		163	X1 = X + CI; \
		164	S = X1 + Y; \
		165	CY = ((S
		166	}
		167	#define __sub_borrow_out(S, CY, X, Y) \
		168	{ \
		169	UINT64 X1=X; \
		170	S = X - Y; \
		171	CY = (S>X1) ? 1 : 0; \
		172	}
		173	#define __sub_borrow_in_out(S, CY, X, Y, CI) \
		174	{ \
		175	UINT64 X1, X0=X; \
		176	X1 = X - CI; \
		177	S = X1 - Y; \
		178	CY = ((S>X1) \|\| (X1>X0)) ? 1 : 0; \
		179	}
		180	// increment C128 and check for rounding overflow:
		181	// if (C_128) = 10^34 then (C_128) = 10^33 and increment the exponent
		182	#define INCREMENT(C_128, exp) \
		183	{ \
		184	C_128.w[0]++; \
		185	if (C_128.w[0] == 0) C_128.w[1]++; \
		186	if (C_128.w[1] == 0x0001ed09bead87c0ull && \
		187	C_128.w[0] == 0x378d8e6400000000ull) { \
		188	exp++; \
		189	C_128.w[1] = 0x0000314dc6448d93ull; \
		190	C_128.w[0] = 0x38c15b0a00000000ull; \
		191	} \
		192	}
		193	// decrement C128 and check for rounding underflow, but only at the
		194	// boundary: if C_128 = 10^33 - 1 and exp > 0 then C_128 = 10^34 - 1
		195	// and decrement the exponent
		196	#define DECREMENT(C_128, exp) \
		197	{ \
		198	C_128.w[0]--; \
		199	if (C_128.w[0] == 0xffffffffffffffffull) C_128.w[1]--; \
		200	if (C_128.w[1] == 0x0000314dc6448d93ull && \
		201	C_128.w[0] == 0x38c15b09ffffffffull && exp > 0) { \
		202	exp--; \
		203	C_128.w[1] = 0x0001ed09bead87c0ull; \
		204	C_128.w[0] = 0x378d8e63ffffffffull; \
		205	} \
		206	}
		207
		208	/*********************************************************************
		209	*
		210	* Multiply Macros
		211	*
		212	*********************************************************************/
		213	#define __mul_64x64_to_64(P64, CX, CY) (P64) = (CX) * (CY)
		214	/***************************************
		215	* Signed, Full 64x64-bit Multiply
		216	***************************************/
		217	#define __imul_64x64_to_128(P, CX, CY) \
		218	{ \
		219	UINT64 SX, SY; \
		220	__mul_64x64_to_128(P, CX, CY); \
		221	\
		222	SX = ((SINT64)(CX))>>63; \
		223	SY = ((SINT64)(CY))>>63; \
		224	SX &= CY; SY &= CX; \
		225	\
		226	(P).w[1] = (P).w[1] - SX - SY; \
		227	}
		228	/***************************************
		229	* Signed, Full 64x128-bit Multiply
		230	***************************************/
		231	#define __imul_64x128_full(Ph, Ql, A, B) \
		232	{ \
		233	UINT128 ALBL, ALBH, QM2, QM; \
		234	\
		235	__imul_64x64_to_128(ALBH, (A), (B).w[1]); \
		236	__imul_64x64_to_128(ALBL, (A), (B).w[0]); \
		237	\
		238	(Ql).w[0] = ALBL.w[0]; \
		239	QM.w[0] = ALBL.w[1]; \
		240	QM.w[1] = ((SINT64)ALBL.w[1])>>63; \
		241	__add_128_128(QM2, ALBH, QM); \
		242	(Ql).w[1] = QM2.w[0]; \
		243	Ph = QM2.w[1]; \
		244	}
		245	/*****************************************************
		246	* Unsigned Multiply Macros
		247	*****************************************************/
		248	// get full 64x64bit product
		249	//
		250	#define __mul_64x64_to_128(P, CX, CY) \
		251	{ \
		252	UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
		253	CXH = (CX) >> 32; \
		254	CXL = (UINT32)(CX); \
		255	CYH = (CY) >> 32; \
		256	CYL = (UINT32)(CY); \
		257	\
		258	PM = CXH*CYL; \
		259	PH = CXH*CYH; \
		260	PL = CXL*CYL; \
		261	PM2 = CXL*CYH; \
		262	PH += (PM>>32); \
		263	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
		264	\
		265	(P).w[1] = PH + (PM>>32); \
		266	(P).w[0] = (PM<<32)+(UINT32)PL; \
		267	}
		268	// get full 64x64bit product
		269	// Note:
		270	// This macro is used for CX < 2^61, CY < 2^61
		271	//
		272	#define __mul_64x64_to_128_fast(P, CX, CY) \
		273	{ \
		274	UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
		275	CXH = (CX) >> 32; \
		276	CXL = (UINT32)(CX); \
		277	CYH = (CY) >> 32; \
		278	CYL = (UINT32)(CY); \
		279	\
		280	PM = CXH*CYL; \
		281	PL = CXL*CYL; \
		282	PH = CXH*CYH; \
		283	PM += CXL*CYH; \
		284	PM += (PL>>32); \
		285	\
		286	(P).w[1] = PH + (PM>>32); \
		287	(P).w[0] = (PM<<32)+(UINT32)PL; \
		288	}
		289	// used for CX< 2^60
		290	#define __sqr64_fast(P, CX) \
		291	{ \
		292	UINT64 CXH, CXL, PL, PH, PM; \
		293	CXH = (CX) >> 32; \
		294	CXL = (UINT32)(CX); \
		295	\
		296	PM = CXH*CXL; \
		297	PL = CXL*CXL; \
		298	PH = CXH*CXH; \
		299	PM += PM; \
		300	PM += (PL>>32); \
		301	\
		302	(P).w[1] = PH + (PM>>32); \
		303	(P).w[0] = (PM<<32)+(UINT32)PL; \
		304	}
		305	// get full 64x64bit product
		306	// Note:
		307	// This implementation is used for CX < 2^61, CY < 2^61
		308	//
		309	#define __mul_64x64_to_64_high_fast(P, CX, CY) \
		310	{ \
		311	UINT64 CXH, CXL, CYH, CYL, PL, PH, PM; \
		312	CXH = (CX) >> 32; \
		313	CXL = (UINT32)(CX); \
		314	CYH = (CY) >> 32; \
		315	CYL = (UINT32)(CY); \
		316	\
		317	PM = CXH*CYL; \
		318	PL = CXL*CYL; \
		319	PH = CXH*CYH; \
		320	PM += CXL*CYH; \
		321	PM += (PL>>32); \
		322	\
		323	(P) = PH + (PM>>32); \
		324	}
		325	// get full 64x64bit product
		326	//
		327	#define __mul_64x64_to_128_full(P, CX, CY) \
		328	{ \
		329	UINT64 CXH, CXL, CYH,CYL,PL,PH,PM,PM2;\
		330	CXH = (CX) >> 32; \
		331	CXL = (UINT32)(CX); \
		332	CYH = (CY) >> 32; \
		333	CYL = (UINT32)(CY); \
		334	\
		335	PM = CXH*CYL; \
		336	PH = CXH*CYH; \
		337	PL = CXL*CYL; \
		338	PM2 = CXL*CYH; \
		339	PH += (PM>>32); \
		340	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
		341	\
		342	(P).w[1] = PH + (PM>>32); \
		343	(P).w[0] = (PM<<32)+(UINT32)PL; \
		344	}
		345	#define __mul_128x128_high(Q, A, B) \
		346	{ \
		347	UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
		348	\
		349	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
		350	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
		351	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
		352	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
		353	\
		354	__add_128_128(QM, ALBH, AHBL); \
		355	__add_128_64(QM2, QM, ALBL.w[1]); \
		356	__add_128_64((Q), AHBH, QM2.w[1]); \
		357	}
		358	#define __mul_128x128_full(Qh, Ql, A, B) \
		359	{ \
		360	UINT128 ALBL, ALBH, AHBL, AHBH, QM, QM2; \
		361	\
		362	__mul_64x64_to_128(ALBH, (A).w[0], (B).w[1]); \
		363	__mul_64x64_to_128(AHBL, (B).w[0], (A).w[1]); \
		364	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
		365	__mul_64x64_to_128(AHBH, (A).w[1],(B).w[1]); \
		366	\
		367	__add_128_128(QM, ALBH, AHBL); \
		368	(Ql).w[0] = ALBL.w[0]; \
		369	__add_128_64(QM2, QM, ALBL.w[1]); \
		370	__add_128_64((Qh), AHBH, QM2.w[1]); \
		371	(Ql).w[1] = QM2.w[0]; \
		372	}
		373	#define __mul_128x128_low(Ql, A, B) \
		374	{ \
		375	UINT128 ALBL; \
		376	UINT64 QM64; \
		377	\
		378	__mul_64x64_to_128(ALBL, (A).w[0], (B).w[0]); \
		379	QM64 = (B).w[0](A).w[1] + (A).w[0](B).w[1]; \
		380	\
		381	(Ql).w[0] = ALBL.w[0]; \
		382	(Ql).w[1] = QM64 + ALBL.w[1]; \
		383	}
		384	#define __mul_64x128_low(Ql, A, B) \
		385	{ \
		386	UINT128 ALBL, ALBH, QM2; \
		387	__mul_64x64_to_128(ALBH, (A), (B).w[1]); \
		388	__mul_64x64_to_128(ALBL, (A), (B).w[0]); \
		389	(Ql).w[0] = ALBL.w[0]; \
		390	__add_128_64(QM2, ALBH, ALBL.w[1]); \
		391	(Ql).w[1] = QM2.w[0]; \
		392	}
		393	#define __mul_64x128_full(Ph, Ql, A, B) \
		394	{ \
		395	UINT128 ALBL, ALBH, QM2; \
		396	\
		397	__mul_64x64_to_128(ALBH, (A), (B).w[1]); \
		398	__mul_64x64_to_128(ALBL, (A), (B).w[0]); \
		399	\
		400	(Ql).w[0] = ALBL.w[0]; \
		401	__add_128_64(QM2, ALBH, ALBL.w[1]); \
		402	(Ql).w[1] = QM2.w[0]; \
		403	Ph = QM2.w[1]; \
		404	}
		405	#define __mul_64x128_to_192(Q, A, B) \
		406	{ \
		407	UINT128 ALBL, ALBH, QM2; \
		408	\
		409	__mul_64x64_to_128(ALBH, (A), (B).w[1]); \
		410	__mul_64x64_to_128(ALBL, (A), (B).w[0]); \
		411	\
		412	(Q).w[0] = ALBL.w[0]; \
		413	__add_128_64(QM2, ALBH, ALBL.w[1]); \
		414	(Q).w[1] = QM2.w[0]; \
		415	(Q).w[2] = QM2.w[1]; \
		416	}
		417	#define __mul_64x128_to192(Q, A, B) \
		418	{ \
		419	UINT128 ALBL, ALBH, QM2; \
		420	\
		421	__mul_64x64_to_128(ALBH, (A), (B).w[1]); \
		422	__mul_64x64_to_128(ALBL, (A), (B).w[0]); \
		423	\
		424	(Q).w[0] = ALBL.w[0]; \
		425	__add_128_64(QM2, ALBH, ALBL.w[1]); \
		426	(Q).w[1] = QM2.w[0]; \
		427	(Q).w[2] = QM2.w[1]; \
		428	}
		429	#define __mul_128x128_to_256(P256, A, B) \
		430	{ \
		431	UINT128 Qll, Qlh; \
		432	UINT64 Phl, Phh, CY1, CY2; \
		433	\
		434	__mul_64x128_full(Phl, Qll, A.w[0], B); \
		435	__mul_64x128_full(Phh, Qlh, A.w[1], B); \
		436	(P256).w[0] = Qll.w[0]; \
		437	__add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
		438	__add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
		439	(P256).w[3] = Phh + CY2; \
		440	}
		441	//
		442	// For better performance, will check A.w[1] against 0,
		443	// but not B.w[1]
		444	// Use this macro accordingly
		445	#define __mul_128x128_to_256_check_A(P256, A, B) \
		446	{ \
		447	UINT128 Qll, Qlh; \
		448	UINT64 Phl, Phh, CY1, CY2; \
		449	\
		450	__mul_64x128_full(Phl, Qll, A.w[0], B); \
		451	(P256).w[0] = Qll.w[0]; \
		452	if(A.w[1]) { \
		453	__mul_64x128_full(Phh, Qlh, A.w[1], B); \
		454	__add_carry_out((P256).w[1],CY1, Qlh.w[0], Qll.w[1]); \
		455	__add_carry_in_out((P256).w[2],CY2, Qlh.w[1], Phl, CY1); \
		456	(P256).w[3] = Phh + CY2; } \
		457	else { \
		458	(P256).w[1] = Qll.w[1]; \
		459	(P256).w[2] = Phl; \
		460	(P256).w[3] = 0; } \
		461	}
		462	#define __mul_64x192_to_256(lP, lA, lB) \
		463	{ \
		464	UINT128 lP0,lP1,lP2; \
		465	UINT64 lC; \
		466	__mul_64x64_to_128(lP0, lA, (lB).w[0]); \
		467	__mul_64x64_to_128(lP1, lA, (lB).w[1]); \
		468	__mul_64x64_to_128(lP2, lA, (lB).w[2]); \
		469	(lP).w[0] = lP0.w[0]; \
		470	__add_carry_out((lP).w[1],lC,lP1.w[0],lP0.w[1]); \
		471	__add_carry_in_out((lP).w[2],lC,lP2.w[0],lP1.w[1],lC); \
		472	(lP).w[3] = lP2.w[1] + lC; \
		473	}
		474	#define __mul_64x256_to_320(P, A, B) \
		475	{ \
		476	UINT128 lP0,lP1,lP2,lP3; \
		477	UINT64 lC; \
		478	__mul_64x64_to_128(lP0, A, (B).w[0]); \
		479	__mul_64x64_to_128(lP1, A, (B).w[1]); \
		480	__mul_64x64_to_128(lP2, A, (B).w[2]); \
		481	__mul_64x64_to_128(lP3, A, (B).w[3]); \
		482	(P).w[0] = lP0.w[0]; \
		483	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
		484	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
		485	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
		486	(P).w[4] = lP3.w[1] + lC; \
		487	}
		488	#define __mul_192x192_to_384(P, A, B) \
		489	{ \
		490	UINT256 P0,P1,P2; \
		491	UINT64 CY; \
		492	__mul_64x192_to_256(P0, (A).w[0], B); \
		493	__mul_64x192_to_256(P1, (A).w[1], B); \
		494	__mul_64x192_to_256(P2, (A).w[2], B); \
		495	(P).w[0] = P0.w[0]; \
		496	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
		497	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
		498	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
		499	(P).w[4] = P1.w[3] + CY; \
		500	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
		501	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
		502	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
		503	(P).w[5] = P2.w[3] + CY; \
		504	}
		505	#define __mul_64x320_to_384(P, A, B) \
		506	{ \
		507	UINT128 lP0,lP1,lP2,lP3,lP4; \
		508	UINT64 lC; \
		509	__mul_64x64_to_128(lP0, A, (B).w[0]); \
		510	__mul_64x64_to_128(lP1, A, (B).w[1]); \
		511	__mul_64x64_to_128(lP2, A, (B).w[2]); \
		512	__mul_64x64_to_128(lP3, A, (B).w[3]); \
		513	__mul_64x64_to_128(lP4, A, (B).w[4]); \
		514	(P).w[0] = lP0.w[0]; \
		515	__add_carry_out((P).w[1],lC,lP1.w[0],lP0.w[1]); \
		516	__add_carry_in_out((P).w[2],lC,lP2.w[0],lP1.w[1],lC); \
		517	__add_carry_in_out((P).w[3],lC,lP3.w[0],lP2.w[1],lC); \
		518	__add_carry_in_out((P).w[4],lC,lP4.w[0],lP3.w[1],lC); \
		519	(P).w[5] = lP4.w[1] + lC; \
		520	}
		521	// A*A
		522	// Full 128x128-bit product
		523	#define __sqr128_to_256(P256, A) \
		524	{ \
		525	UINT128 Qll, Qlh, Qhh; \
		526	UINT64 TMP_C1, TMP_C2; \
		527	\
		528	__mul_64x64_to_128(Qhh, A.w[1], A.w[1]); \
		529	__mul_64x64_to_128(Qlh, A.w[0], A.w[1]); \
		530	Qhh.w[1] += (Qlh.w[1]>>63); \
		531	Qlh.w[1] = (Qlh.w[1]+Qlh.w[1])\|(Qlh.w[0]>>63); \
		532	Qlh.w[0] += Qlh.w[0]; \
		533	__mul_64x64_to_128(Qll, A.w[0], A.w[0]); \
		534	\
		535	__add_carry_out((P256).w[1],TMP_C1, Qlh.w[0], Qll.w[1]); \
		536	(P256).w[0] = Qll.w[0]; \
		537	__add_carry_in_out((P256).w[2],TMP_C2, Qlh.w[1], Qhh.w[0], TMP_C1); \
		538	(P256).w[3] = Qhh.w[1]+TMP_C2; \
		539	}
		540	#define __mul_128x128_to_256_low_high(PQh, PQl, A, B) \
		541	{ \
		542	UINT128 Qll, Qlh; \
		543	UINT64 Phl, Phh, C1, C2; \
		544	\
		545	__mul_64x128_full(Phl, Qll, A.w[0], B); \
		546	__mul_64x128_full(Phh, Qlh, A.w[1], B); \
		547	(PQl).w[0] = Qll.w[0]; \
		548	__add_carry_out((PQl).w[1],C1, Qlh.w[0], Qll.w[1]); \
		549	__add_carry_in_out((PQh).w[0],C2, Qlh.w[1], Phl, C1); \
		550	(PQh).w[1] = Phh + C2; \
		551	}
		552	#define __mul_256x256_to_512(P, A, B) \
		553	{ \
		554	UINT512 P0,P1,P2,P3; \
		555	UINT64 CY; \
		556	__mul_64x256_to_320(P0, (A).w[0], B); \
		557	__mul_64x256_to_320(P1, (A).w[1], B); \
		558	__mul_64x256_to_320(P2, (A).w[2], B); \
		559	__mul_64x256_to_320(P3, (A).w[3], B); \
		560	(P).w[0] = P0.w[0]; \
		561	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
		562	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
		563	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
		564	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
		565	(P).w[5] = P1.w[4] + CY; \
		566	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
		567	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
		568	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
		569	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
		570	(P).w[6] = P2.w[4] + CY; \
		571	__add_carry_out((P).w[3],CY,P3.w[0],(P).w[3]); \
		572	__add_carry_in_out((P).w[4],CY,P3.w[1],(P).w[4],CY); \
		573	__add_carry_in_out((P).w[5],CY,P3.w[2],(P).w[5],CY); \
		574	__add_carry_in_out((P).w[6],CY,P3.w[3],(P).w[6],CY); \
		575	(P).w[7] = P3.w[4] + CY; \
		576	}
		577	#define __mul_192x256_to_448(P, A, B) \
		578	{ \
		579	UINT512 P0,P1,P2; \
		580	UINT64 CY; \
		581	__mul_64x256_to_320(P0, (A).w[0], B); \
		582	__mul_64x256_to_320(P1, (A).w[1], B); \
		583	__mul_64x256_to_320(P2, (A).w[2], B); \
		584	(P).w[0] = P0.w[0]; \
		585	__add_carry_out((P).w[1],CY,P1.w[0],P0.w[1]); \
		586	__add_carry_in_out((P).w[2],CY,P1.w[1],P0.w[2],CY); \
		587	__add_carry_in_out((P).w[3],CY,P1.w[2],P0.w[3],CY); \
		588	__add_carry_in_out((P).w[4],CY,P1.w[3],P0.w[4],CY); \
		589	(P).w[5] = P1.w[4] + CY; \
		590	__add_carry_out((P).w[2],CY,P2.w[0],(P).w[2]); \
		591	__add_carry_in_out((P).w[3],CY,P2.w[1],(P).w[3],CY); \
		592	__add_carry_in_out((P).w[4],CY,P2.w[2],(P).w[4],CY); \
		593	__add_carry_in_out((P).w[5],CY,P2.w[3],(P).w[5],CY); \
		594	(P).w[6] = P2.w[4] + CY; \
		595	}
		596	#define __mul_320x320_to_640(P, A, B) \
		597	{ \
		598	UINT512 P0,P1,P2,P3; \
		599	UINT64 CY; \
		600	__mul_256x256_to_512((P), (A), B); \
		601	__mul_64x256_to_320(P1, (A).w[4], B); \
		602	__mul_64x256_to_320(P2, (B).w[4], A); \
		603	__mul_64x64_to_128(P3, (A).w[4], (B).w[4]); \
		604	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
		605	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
		606	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
		607	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
		608	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
		609	P3.w[1] += CY; \
		610	__add_carry_out((P).w[4],CY,(P).w[4],P0.w[0]); \
		611	__add_carry_in_out((P).w[5],CY,(P).w[5],P0.w[1],CY); \
		612	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[2],CY); \
		613	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[3],CY); \
		614	__add_carry_in_out((P).w[8],CY,P3.w[0],P0.w[4],CY); \
		615	(P).w[9] = P3.w[1] + CY; \
		616	}
		617	#define __mul_384x384_to_768(P, A, B) \
		618	{ \
		619	UINT512 P0,P1,P2,P3; \
		620	UINT64 CY; \
		621	__mul_320x320_to_640((P), (A), B); \
		622	__mul_64x320_to_384(P1, (A).w[5], B); \
		623	__mul_64x320_to_384(P2, (B).w[5], A); \
		624	__mul_64x64_to_128(P3, (A).w[5], (B).w[5]); \
		625	__add_carry_out((P0).w[0],CY,P1.w[0],P2.w[0]); \
		626	__add_carry_in_out((P0).w[1],CY,P1.w[1],P2.w[1],CY); \
		627	__add_carry_in_out((P0).w[2],CY,P1.w[2],P2.w[2],CY); \
		628	__add_carry_in_out((P0).w[3],CY,P1.w[3],P2.w[3],CY); \
		629	__add_carry_in_out((P0).w[4],CY,P1.w[4],P2.w[4],CY); \
		630	__add_carry_in_out((P0).w[5],CY,P1.w[5],P2.w[5],CY); \
		631	P3.w[1] += CY; \
		632	__add_carry_out((P).w[5],CY,(P).w[5],P0.w[0]); \
		633	__add_carry_in_out((P).w[6],CY,(P).w[6],P0.w[1],CY); \
		634	__add_carry_in_out((P).w[7],CY,(P).w[7],P0.w[2],CY); \
		635	__add_carry_in_out((P).w[8],CY,(P).w[8],P0.w[3],CY); \
		636	__add_carry_in_out((P).w[9],CY,(P).w[9],P0.w[4],CY); \
		637	__add_carry_in_out((P).w[10],CY,P3.w[0],P0.w[5],CY); \
		638	(P).w[11] = P3.w[1] + CY; \
		639	}
		640	#define __mul_64x128_short(Ql, A, B) \
		641	{ \
		642	UINT64 ALBH_L; \
		643	\
		644	__mul_64x64_to_64(ALBH_L, (A),(B).w[1]); \
		645	__mul_64x64_to_128((Ql), (A), (B).w[0]); \
		646	\
		647	(Ql).w[1] += ALBH_L; \
		648	}
		649	#define __scale128_10(D,_TMP) \
		650	{ \
		651	UINT128 _TMP2,_TMP8; \
		652	_TMP2.w[1] = (_TMP.w[1]<<1)\|(_TMP.w[0]>>63); \
		653	_TMP2.w[0] = _TMP.w[0]<<1; \
		654	_TMP8.w[1] = (_TMP.w[1]<<3)\|(_TMP.w[0]>>61); \
		655	_TMP8.w[0] = _TMP.w[0]<<3; \
		656	__add_128_128(D, _TMP2, _TMP8); \
		657	}
		658	// 64x64-bit product
		659	#define __mul_64x64_to_128MACH(P128, CX64, CY64) \
		660	{ \
		661	UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
		662	CXH = (CX64) >> 32; \
		663	CXL = (UINT32)(CX64); \
		664	CYH = (CY64) >> 32; \
		665	CYL = (UINT32)(CY64); \
		666	PM = CXH*CYL; \
		667	PH = CXH*CYH; \
		668	PL = CXL*CYL; \
		669	PM2 = CXL*CYH; \
		670	PH += (PM>>32); \
		671	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
		672	(P128).w[1] = PH + (PM>>32); \
		673	(P128).w[0] = (PM<<32)+(UINT32)PL; \
		674	}
		675	// 64x64-bit product
		676	#define __mul_64x64_to_128HIGH(P64, CX64, CY64) \
		677	{ \
		678	UINT64 CXH,CXL,CYH,CYL,PL,PH,PM,PM2; \
		679	CXH = (CX64) >> 32; \
		680	CXL = (UINT32)(CX64); \
		681	CYH = (CY64) >> 32; \
		682	CYL = (UINT32)(CY64); \
		683	PM = CXH*CYL; \
		684	PH = CXH*CYH; \
		685	PL = CXL*CYL; \
		686	PM2 = CXL*CYH; \
		687	PH += (PM>>32); \
		688	PM = (UINT64)((UINT32)PM)+PM2+(PL>>32); \
		689	P64 = PH + (PM>>32); \
		690	}
		691	#define __mul_128x64_to_128(Q128, A64, B128) \
		692	{ \
		693	UINT64 ALBH_L; \
		694	ALBH_L = (A64) * (B128).w[1]; \
		695	__mul_64x64_to_128MACH((Q128), (A64), (B128).w[0]); \
		696	(Q128).w[1] += ALBH_L; \
		697	}
		698	// might simplify by calculating just QM2.w[0]
		699	#define __mul_64x128_to_128(Ql, A, B) \
		700	{ \
		701	UINT128 ALBL, ALBH, QM2; \
		702	__mul_64x64_to_128(ALBH, (A), (B).w[1]); \
		703	__mul_64x64_to_128(ALBL, (A), (B).w[0]); \
		704	(Ql).w[0] = ALBL.w[0]; \
		705	__add_128_64(QM2, ALBH, ALBL.w[1]); \
		706	(Ql).w[1] = QM2.w[0]; \
		707	}
		708	/*********************************************************************
		709	*
		710	* BID Pack/Unpack Macros
		711	*
		712	*********************************************************************/
		713	/////////////////////////////////////////
		714	// BID64 definitions
		715	////////////////////////////////////////
		716	#define DECIMAL_MAX_EXPON_64 767
		717	#define DECIMAL_EXPONENT_BIAS 398
		718	#define MAX_FORMAT_DIGITS 16
		719	/////////////////////////////////////////
		720	// BID128 definitions
		721	////////////////////////////////////////
		722	#define DECIMAL_MAX_EXPON_128 12287
		723	#define DECIMAL_EXPONENT_BIAS_128 6176
		724	#define MAX_FORMAT_DIGITS_128 34
		725	/////////////////////////////////////////
		726	// BID32 definitions
		727	////////////////////////////////////////
		728	#define DECIMAL_MAX_EXPON_32 191
		729	#define DECIMAL_EXPONENT_BIAS_32 101
		730	#define MAX_FORMAT_DIGITS_32 7
		731	////////////////////////////////////////
		732	// Constant Definitions
		733	///////////////////////////////////////
		734	#define SPECIAL_ENCODING_MASK64 0x6000000000000000ull
		735	#define INFINITY_MASK64 0x7800000000000000ull
		736	#define SINFINITY_MASK64 0xf800000000000000ull
		737	#define SSNAN_MASK64 0xfc00000000000000ull
		738	#define NAN_MASK64 0x7c00000000000000ull
		739	#define SNAN_MASK64 0x7e00000000000000ull
		740	#define QUIET_MASK64 0xfdffffffffffffffull
		741	#define LARGE_COEFF_MASK64 0x0007ffffffffffffull
		742	#define LARGE_COEFF_HIGH_BIT64 0x0020000000000000ull
		743	#define SMALL_COEFF_MASK64 0x001fffffffffffffull
		744	#define EXPONENT_MASK64 0x3ff
		745	#define EXPONENT_SHIFT_LARGE64 51
		746	#define EXPONENT_SHIFT_SMALL64 53
		747	#define LARGEST_BID64 0x77fb86f26fc0ffffull
		748	#define SMALLEST_BID64 0xf7fb86f26fc0ffffull
		749	#define SMALL_COEFF_MASK128 0x0001ffffffffffffull
		750	#define LARGE_COEFF_MASK128 0x00007fffffffffffull
		751	#define EXPONENT_MASK128 0x3fff
		752	#define LARGEST_BID128_HIGH 0x5fffed09bead87c0ull
		753	#define LARGEST_BID128_LOW 0x378d8e63ffffffffull
		754	#define SPECIAL_ENCODING_MASK32 0x60000000ul
		755	#define INFINITY_MASK32 0x78000000ul
		756	#define LARGE_COEFF_MASK32 0x007ffffful
		757	#define LARGE_COEFF_HIGH_BIT32 0x00800000ul
		758	#define SMALL_COEFF_MASK32 0x001ffffful
		759	#define EXPONENT_MASK32 0xff
		760	#define LARGEST_BID32 0x77f8967f
		761	#define NAN_MASK32 0x7c000000
		762	#define SNAN_MASK32 0x7e000000
		763	#define MASK_BINARY_EXPONENT 0x7ff0000000000000ull
		764	#define BINARY_EXPONENT_BIAS 0x3ff
		765	#define UPPER_EXPON_LIMIT 51
		766	// data needed for BID pack/unpack macros
		767	extern UINT64 round_const_table[][19];
		768	extern UINT128 reciprocals10_128[];
		769	extern int recip_scale[];
		770	extern UINT128 power10_table_128[];
		771	extern int estimate_decimal_digits[];
		772	extern int estimate_bin_expon[];
		773	extern UINT64 power10_index_binexp[];
		774	extern int short_recip_scale[];
		775	extern UINT64 reciprocals10_64[];
		776	extern UINT128 power10_index_binexp_128[];
		777	extern UINT128 round_const_table_128[][36];
		778
		779
		780	//////////////////////////////////////////////
		781	// Status Flag Handling
		782	/////////////////////////////////////////////
		783	#define __set_status_flags(fpsc, status) *(fpsc) \|= status
		784	#define is_inexact(fpsc) ((*(fpsc))&INEXACT_EXCEPTION)
		785
		786	__BID_INLINE__ UINT64
		787	unpack_BID64 (UINT64 * psign_x, int *pexponent_x,
		788	UINT64 * pcoefficient_x, UINT64 x) {
		789	UINT64 tmp, coeff;
		790
		791	*psign_x = x & 0x8000000000000000ull;
		792
		793	if ((x & SPECIAL_ENCODING_MASK64) == SPECIAL_ENCODING_MASK64) {
		794	// special encodings
		795	// coefficient
		796	coeff = (x & LARGE_COEFF_MASK64) \| LARGE_COEFF_HIGH_BIT64;
		797
		798	if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
		799	*pexponent_x = 0;
		800	*pcoefficient_x = x & 0xfe03ffffffffffffull;
		801	if ((x & 0x0003ffffffffffffull) >= 1000000000000000ull)
		802	*pcoefficient_x = x & 0xfe00000000000000ull;
		803	if ((x & NAN_MASK64) == INFINITY_MASK64)
		804	*pcoefficient_x = x & SINFINITY_MASK64;
		805	return 0; // NaN or Infinity
		806	}
		807	// check for non-canonical values
		808	if (coeff >= 10000000000000000ull)
		809	coeff = 0;
		810	*pcoefficient_x = coeff;
		811	// get exponent
		812	tmp = x >> EXPONENT_SHIFT_LARGE64;
		813	*pexponent_x = (int) (tmp & EXPONENT_MASK64);
		814	return coeff;
		815	}
		816	// exponent
		817	tmp = x >> EXPONENT_SHIFT_SMALL64;
		818	*pexponent_x = (int) (tmp & EXPONENT_MASK64);
		819	// coefficient
		820	*pcoefficient_x = (x & SMALL_COEFF_MASK64);
		821
		822	return *pcoefficient_x;
		823	}
		824
		825	//
		826	// BID64 pack macro (general form)
		827	//
		828	__BID_INLINE__ UINT64
		829	get_BID64 (UINT64 sgn, int expon, UINT64 coeff, int rmode,
		830	unsigned *fpsc) {
		831	UINT128 Stemp, Q_low;
		832	UINT64 QH, r, mask, C64, remainder_h, CY, carry;
		833	int extra_digits, amount, amount2;
		834	unsigned status;
		835
		836	if (coeff > 9999999999999999ull) {
		837	expon++;
		838	coeff = 1000000000000000ull;
		839	}
		840	// check for possible underflow/overflow
		841	if (((unsigned) expon) >= 3 * 256) {
		842	if (expon < 0) {
		843	// underflow
		844	if (expon + MAX_FORMAT_DIGITS < 0) {
		845	#ifdef SET_STATUS_FLAGS
		846	__set_status_flags (fpsc,
		847	UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		848	#endif
		849	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		850	#ifndef IEEE_ROUND_NEAREST
		851	if (rmode == ROUNDING_DOWN && sgn)
		852	return 0x8000000000000001ull;
		853	if (rmode == ROUNDING_UP && !sgn)
		854	return 1ull;
		855	#endif
		856	#endif
		857	// result is 0
		858	return sgn;
		859	}
		860	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		861	#ifndef IEEE_ROUND_NEAREST
		862	if (sgn && (unsigned) (rmode - 1) < 2)
		863	rmode = 3 - rmode;
		864	#endif
		865	#endif
		866	// get digits to be shifted out
		867	extra_digits = -expon;
		868	coeff += round_const_table[rmode][extra_digits];
		869
		870	// get coeff*(2^M[extra_digits])/10^extra_digits
		871	__mul_64x128_full (QH, Q_low, coeff,
		872	reciprocals10_128[extra_digits]);
		873
		874	// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
		875	amount = recip_scale[extra_digits];
		876
		877	C64 = QH >> amount;
		878
		879	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		880	#ifndef IEEE_ROUND_NEAREST
		881	if (rmode == 0) //ROUNDING_TO_NEAREST
		882	#endif
		883	if (C64 & 1) {
		884	// check whether fractional part of initial_P/10^extra_digits is exactly .5
		885
		886	// get remainder
		887	amount2 = 64 - amount;
		888	remainder_h = 0;
		889	remainder_h--;
		890	remainder_h >>= amount2;
		891	remainder_h = remainder_h & QH;
		892
		893	if (!remainder_h
		894	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		895	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		896	&& Q_low.w[0] <
		897	reciprocals10_128[extra_digits].w[0]))) {
		898	C64--;
		899	}
		900	}
		901	#endif
		902
		903	#ifdef SET_STATUS_FLAGS
		904
		905	if (is_inexact (fpsc))
		906	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
		907	else {
		908	status = INEXACT_EXCEPTION;
		909	// get remainder
		910	remainder_h = QH << (64 - amount);
		911
		912	switch (rmode) {
		913	case ROUNDING_TO_NEAREST:
		914	case ROUNDING_TIES_AWAY:
		915	// test whether fractional part is 0
		916	if (remainder_h == 0x8000000000000000ull
		917	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		918	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		919	&& Q_low.w[0] <
		920	reciprocals10_128[extra_digits].w[0])))
		921	status = EXACT_STATUS;
		922	break;
		923	case ROUNDING_DOWN:
		924	case ROUNDING_TO_ZERO:
		925	if (!remainder_h
		926	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		927	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		928	&& Q_low.w[0] <
		929	reciprocals10_128[extra_digits].w[0])))
		930	status = EXACT_STATUS;
		931	break;
		932	default:
		933	// round up
		934	__add_carry_out (Stemp.w[0], CY, Q_low.w[0],
		935	reciprocals10_128[extra_digits].w[0]);
		936	__add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
		937	reciprocals10_128[extra_digits].w[1], CY);
		938	if ((remainder_h >> (64 - amount)) + carry >=
		939	(((UINT64) 1) << amount))
		940	status = EXACT_STATUS;
		941	}
		942
		943	if (status != EXACT_STATUS)
		944	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| status);
		945	}
		946
		947	#endif
		948
		949	return sgn \| C64;
		950	}
		951	while (coeff < 1000000000000000ull && expon >= 3 * 256) {
		952	expon--;
		953	coeff = (coeff << 3) + (coeff << 1);
		954	}
		955	if (expon > DECIMAL_MAX_EXPON_64) {
		956	#ifdef SET_STATUS_FLAGS
		957	__set_status_flags (fpsc, OVERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		958	#endif
		959	// overflow
		960	r = sgn \| INFINITY_MASK64;
		961	switch (rmode) {
		962	case ROUNDING_DOWN:
		963	if (!sgn)
		964	r = LARGEST_BID64;
		965	break;
		966	case ROUNDING_TO_ZERO:
		967	r = sgn \| LARGEST_BID64;
		968	break;
		969	case ROUNDING_UP:
		970	// round up
		971	if (sgn)
		972	r = SMALLEST_BID64;
		973	}
		974	return r;
		975	}
		976	}
		977
		978	mask = 1;
		979	mask <<= EXPONENT_SHIFT_SMALL64;
		980
		981	// check whether coefficient fits in 10*5+3 bits
		982	if (coeff < mask) {
		983	r = expon;
		984	r <<= EXPONENT_SHIFT_SMALL64;
		985	r \|= (coeff \| sgn);
		986	return r;
		987	}
		988	// special format
		989
		990	// eliminate the case coeff==10^16 after rounding
		991	if (coeff == 10000000000000000ull) {
		992	r = expon + 1;
		993	r <<= EXPONENT_SHIFT_SMALL64;
		994	r \|= (1000000000000000ull \| sgn);
		995	return r;
		996	}
		997
		998	r = expon;
		999	r <<= EXPONENT_SHIFT_LARGE64;
		1000	r \|= (sgn \| SPECIAL_ENCODING_MASK64);
		1001	// add coeff, without leading bits
		1002	mask = (mask >> 2) - 1;
		1003	coeff &= mask;
		1004	r \|= coeff;
		1005
		1006	return r;
		1007	}
		1008
		1009
		1010
		1011
		1012	//
		1013	// No overflow/underflow checking
		1014	//
		1015	__BID_INLINE__ UINT64
		1016	fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
		1017	UINT64 r, mask;
		1018
		1019	mask = 1;
		1020	mask <<= EXPONENT_SHIFT_SMALL64;
		1021
		1022	// check whether coefficient fits in 10*5+3 bits
		1023	if (coeff < mask) {
		1024	r = expon;
		1025	r <<= EXPONENT_SHIFT_SMALL64;
		1026	r \|= (coeff \| sgn);
		1027	return r;
		1028	}
		1029	// special format
		1030
		1031	// eliminate the case coeff==10^16 after rounding
		1032	if (coeff == 10000000000000000ull) {
		1033	r = expon + 1;
		1034	r <<= EXPONENT_SHIFT_SMALL64;
		1035	r \|= (1000000000000000ull \| sgn);
		1036	return r;
		1037	}
		1038
		1039	r = expon;
		1040	r <<= EXPONENT_SHIFT_LARGE64;
		1041	r \|= (sgn \| SPECIAL_ENCODING_MASK64);
		1042	// add coeff, without leading bits
		1043	mask = (mask >> 2) - 1;
		1044	coeff &= mask;
		1045	r \|= coeff;
		1046
		1047	return r;
		1048	}
		1049
		1050
		1051	//
		1052	// no underflow checking
		1053	//
		1054	__BID_INLINE__ UINT64
		1055	fast_get_BID64_check_OF (UINT64 sgn, int expon, UINT64 coeff, int rmode,
		1056	unsigned *fpsc) {
		1057	UINT64 r, mask;
		1058
		1059	if (((unsigned) expon) >= 3 * 256 - 1) {
		1060	if ((expon == 3 * 256 - 1) && coeff == 10000000000000000ull) {
		1061	expon = 3 * 256;
		1062	coeff = 1000000000000000ull;
		1063	}
		1064
		1065	if (((unsigned) expon) >= 3 * 256) {
		1066	while (coeff < 1000000000000000ull && expon >= 3 * 256) {
		1067	expon--;
		1068	coeff = (coeff << 3) + (coeff << 1);
		1069	}
		1070	if (expon > DECIMAL_MAX_EXPON_64) {
		1071	#ifdef SET_STATUS_FLAGS
		1072	__set_status_flags (fpsc,
		1073	OVERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1074	#endif
		1075	// overflow
		1076	r = sgn \| INFINITY_MASK64;
		1077	switch (rmode) {
		1078	case ROUNDING_DOWN:
		1079	if (!sgn)
		1080	r = LARGEST_BID64;
		1081	break;
		1082	case ROUNDING_TO_ZERO:
		1083	r = sgn \| LARGEST_BID64;
		1084	break;
		1085	case ROUNDING_UP:
		1086	// round up
		1087	if (sgn)
		1088	r = SMALLEST_BID64;
		1089	}
		1090	return r;
		1091	}
		1092	}
		1093	}
		1094
		1095	mask = 1;
		1096	mask <<= EXPONENT_SHIFT_SMALL64;
		1097
		1098	// check whether coefficient fits in 10*5+3 bits
		1099	if (coeff < mask) {
		1100	r = expon;
		1101	r <<= EXPONENT_SHIFT_SMALL64;
		1102	r \|= (coeff \| sgn);
		1103	return r;
		1104	}
		1105	// special format
		1106
		1107	// eliminate the case coeff==10^16 after rounding
		1108	if (coeff == 10000000000000000ull) {
		1109	r = expon + 1;
		1110	r <<= EXPONENT_SHIFT_SMALL64;
		1111	r \|= (1000000000000000ull \| sgn);
		1112	return r;
		1113	}
		1114
		1115	r = expon;
		1116	r <<= EXPONENT_SHIFT_LARGE64;
		1117	r \|= (sgn \| SPECIAL_ENCODING_MASK64);
		1118	// add coeff, without leading bits
		1119	mask = (mask >> 2) - 1;
		1120	coeff &= mask;
		1121	r \|= coeff;
		1122
		1123	return r;
		1124	}
		1125
		1126
		1127	//
		1128	// No overflow/underflow checking
		1129	// or checking for coefficients equal to 10^16 (after rounding)
		1130	//
		1131	__BID_INLINE__ UINT64
		1132	very_fast_get_BID64 (UINT64 sgn, int expon, UINT64 coeff) {
		1133	UINT64 r, mask;
		1134
		1135	mask = 1;
		1136	mask <<= EXPONENT_SHIFT_SMALL64;
		1137
		1138	// check whether coefficient fits in 10*5+3 bits
		1139	if (coeff < mask) {
		1140	r = expon;
		1141	r <<= EXPONENT_SHIFT_SMALL64;
		1142	r \|= (coeff \| sgn);
		1143	return r;
		1144	}
		1145	// special format
		1146	r = expon;
		1147	r <<= EXPONENT_SHIFT_LARGE64;
		1148	r \|= (sgn \| SPECIAL_ENCODING_MASK64);
		1149	// add coeff, without leading bits
		1150	mask = (mask >> 2) - 1;
		1151	coeff &= mask;
		1152	r \|= coeff;
		1153
		1154	return r;
		1155	}
		1156
		1157	//
		1158	// No overflow/underflow checking or checking for coefficients above 2^53
		1159	//
		1160	__BID_INLINE__ UINT64
		1161	very_fast_get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff) {
		1162	// no UF/OF
		1163	UINT64 r;
		1164
		1165	r = expon;
		1166	r <<= EXPONENT_SHIFT_SMALL64;
		1167	r \|= (coeff \| sgn);
		1168	return r;
		1169	}
		1170
		1171
		1172	//
		1173	// This pack macro is used when underflow is known to occur
		1174	//
		1175	__BID_INLINE__ UINT64
		1176	get_BID64_UF (UINT64 sgn, int expon, UINT64 coeff, UINT64 R, int rmode,
		1177	unsigned *fpsc) {
		1178	UINT128 C128, Q_low, Stemp;
		1179	UINT64 C64, remainder_h, QH, carry, CY;
		1180	int extra_digits, amount, amount2;
		1181	unsigned status;
		1182
		1183	// underflow
		1184	if (expon + MAX_FORMAT_DIGITS < 0) {
		1185	#ifdef SET_STATUS_FLAGS
		1186	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1187	#endif
		1188	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1189	#ifndef IEEE_ROUND_NEAREST
		1190	if (rmode == ROUNDING_DOWN && sgn)
		1191	return 0x8000000000000001ull;
		1192	if (rmode == ROUNDING_UP && !sgn)
		1193	return 1ull;
		1194	#endif
		1195	#endif
		1196	// result is 0
		1197	return sgn;
		1198	}
		1199	// 10*coeff
		1200	coeff = (coeff << 3) + (coeff << 1);
		1201	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1202	#ifndef IEEE_ROUND_NEAREST
		1203	if (sgn && (unsigned) (rmode - 1) < 2)
		1204	rmode = 3 - rmode;
		1205	#endif
		1206	#endif
		1207	if (R)
		1208	coeff \|= 1;
		1209	// get digits to be shifted out
		1210	extra_digits = 1 - expon;
		1211	C128.w[0] = coeff + round_const_table[rmode][extra_digits];
		1212
		1213	// get coeff*(2^M[extra_digits])/10^extra_digits
		1214	__mul_64x128_full (QH, Q_low, C128.w[0],
		1215	reciprocals10_128[extra_digits]);
		1216
		1217	// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
		1218	amount = recip_scale[extra_digits];
		1219
		1220	C64 = QH >> amount;
		1221	//__shr_128(C128, Q_high, amount);
		1222
		1223	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1224	#ifndef IEEE_ROUND_NEAREST
		1225	if (rmode == 0) //ROUNDING_TO_NEAREST
		1226	#endif
		1227	if (C64 & 1) {
		1228	// check whether fractional part of initial_P/10^extra_digits is exactly .5
		1229
		1230	// get remainder
		1231	amount2 = 64 - amount;
		1232	remainder_h = 0;
		1233	remainder_h--;
		1234	remainder_h >>= amount2;
		1235	remainder_h = remainder_h & QH;
		1236
		1237	if (!remainder_h
		1238	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		1239	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		1240	&& Q_low.w[0] <
		1241	reciprocals10_128[extra_digits].w[0]))) {
		1242	C64--;
		1243	}
		1244	}
		1245	#endif
		1246
		1247	#ifdef SET_STATUS_FLAGS
		1248
		1249	if (is_inexact (fpsc))
		1250	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
		1251	else {
		1252	status = INEXACT_EXCEPTION;
		1253	// get remainder
		1254	remainder_h = QH << (64 - amount);
		1255
		1256	switch (rmode) {
		1257	case ROUNDING_TO_NEAREST:
		1258	case ROUNDING_TIES_AWAY:
		1259	// test whether fractional part is 0
		1260	if (remainder_h == 0x8000000000000000ull
		1261	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		1262	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		1263	&& Q_low.w[0] <
		1264	reciprocals10_128[extra_digits].w[0])))
		1265	status = EXACT_STATUS;
		1266	break;
		1267	case ROUNDING_DOWN:
		1268	case ROUNDING_TO_ZERO:
		1269	if (!remainder_h
		1270	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		1271	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		1272	&& Q_low.w[0] <
		1273	reciprocals10_128[extra_digits].w[0])))
		1274	status = EXACT_STATUS;
		1275	break;
		1276	default:
		1277	// round up
		1278	__add_carry_out (Stemp.w[0], CY, Q_low.w[0],
		1279	reciprocals10_128[extra_digits].w[0]);
		1280	__add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
		1281	reciprocals10_128[extra_digits].w[1], CY);
		1282	if ((remainder_h >> (64 - amount)) + carry >=
		1283	(((UINT64) 1) << amount))
		1284	status = EXACT_STATUS;
		1285	}
		1286
		1287	if (status != EXACT_STATUS)
		1288	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| status);
		1289	}
		1290
		1291	#endif
		1292
		1293	return sgn \| C64;
		1294
		1295	}
		1296
		1297
		1298
		1299	//
		1300	// This pack macro doesnot check for coefficients above 2^53
		1301	//
		1302	__BID_INLINE__ UINT64
		1303	get_BID64_small_mantissa (UINT64 sgn, int expon, UINT64 coeff,
		1304	int rmode, unsigned *fpsc) {
		1305	UINT128 C128, Q_low, Stemp;
		1306	UINT64 r, mask, C64, remainder_h, QH, carry, CY;
		1307	int extra_digits, amount, amount2;
		1308	unsigned status;
		1309
		1310	// check for possible underflow/overflow
		1311	if (((unsigned) expon) >= 3 * 256) {
		1312	if (expon < 0) {
		1313	// underflow
		1314	if (expon + MAX_FORMAT_DIGITS < 0) {
		1315	#ifdef SET_STATUS_FLAGS
		1316	__set_status_flags (fpsc,
		1317	UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1318	#endif
		1319	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1320	#ifndef IEEE_ROUND_NEAREST
		1321	if (rmode == ROUNDING_DOWN && sgn)
		1322	return 0x8000000000000001ull;
		1323	if (rmode == ROUNDING_UP && !sgn)
		1324	return 1ull;
		1325	#endif
		1326	#endif
		1327	// result is 0
		1328	return sgn;
		1329	}
		1330	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1331	#ifndef IEEE_ROUND_NEAREST
		1332	if (sgn && (unsigned) (rmode - 1) < 2)
		1333	rmode = 3 - rmode;
		1334	#endif
		1335	#endif
		1336	// get digits to be shifted out
		1337	extra_digits = -expon;
		1338	C128.w[0] = coeff + round_const_table[rmode][extra_digits];
		1339
		1340	// get coeff*(2^M[extra_digits])/10^extra_digits
		1341	__mul_64x128_full (QH, Q_low, C128.w[0],
		1342	reciprocals10_128[extra_digits]);
		1343
		1344	// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
		1345	amount = recip_scale[extra_digits];
		1346
		1347	C64 = QH >> amount;
		1348
		1349	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1350	#ifndef IEEE_ROUND_NEAREST
		1351	if (rmode == 0) //ROUNDING_TO_NEAREST
		1352	#endif
		1353	if (C64 & 1) {
		1354	// check whether fractional part of initial_P/10^extra_digits is exactly .5
		1355
		1356	// get remainder
		1357	amount2 = 64 - amount;
		1358	remainder_h = 0;
		1359	remainder_h--;
		1360	remainder_h >>= amount2;
		1361	remainder_h = remainder_h & QH;
		1362
		1363	if (!remainder_h
		1364	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		1365	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		1366	&& Q_low.w[0] <
		1367	reciprocals10_128[extra_digits].w[0]))) {
		1368	C64--;
		1369	}
		1370	}
		1371	#endif
		1372
		1373	#ifdef SET_STATUS_FLAGS
		1374
		1375	if (is_inexact (fpsc))
		1376	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
		1377	else {
		1378	status = INEXACT_EXCEPTION;
		1379	// get remainder
		1380	remainder_h = QH << (64 - amount);
		1381
		1382	switch (rmode) {
		1383	case ROUNDING_TO_NEAREST:
		1384	case ROUNDING_TIES_AWAY:
		1385	// test whether fractional part is 0
		1386	if (remainder_h == 0x8000000000000000ull
		1387	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		1388	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		1389	&& Q_low.w[0] <
		1390	reciprocals10_128[extra_digits].w[0])))
		1391	status = EXACT_STATUS;
		1392	break;
		1393	case ROUNDING_DOWN:
		1394	case ROUNDING_TO_ZERO:
		1395	if (!remainder_h
		1396	&& (Q_low.w[1] < reciprocals10_128[extra_digits].w[1]
		1397	\|\| (Q_low.w[1] == reciprocals10_128[extra_digits].w[1]
		1398	&& Q_low.w[0] <
		1399	reciprocals10_128[extra_digits].w[0])))
		1400	status = EXACT_STATUS;
		1401	break;
		1402	default:
		1403	// round up
		1404	__add_carry_out (Stemp.w[0], CY, Q_low.w[0],
		1405	reciprocals10_128[extra_digits].w[0]);
		1406	__add_carry_in_out (Stemp.w[1], carry, Q_low.w[1],
		1407	reciprocals10_128[extra_digits].w[1], CY);
		1408	if ((remainder_h >> (64 - amount)) + carry >=
		1409	(((UINT64) 1) << amount))
		1410	status = EXACT_STATUS;
		1411	}
		1412
		1413	if (status != EXACT_STATUS)
		1414	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| status);
		1415	}
		1416
		1417	#endif
		1418
		1419	return sgn \| C64;
		1420	}
		1421
		1422	while (coeff < 1000000000000000ull && expon >= 3 * 256) {
		1423	expon--;
		1424	coeff = (coeff << 3) + (coeff << 1);
		1425	}
		1426	if (expon > DECIMAL_MAX_EXPON_64) {
		1427	#ifdef SET_STATUS_FLAGS
		1428	__set_status_flags (fpsc, OVERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1429	#endif
		1430	// overflow
		1431	r = sgn \| INFINITY_MASK64;
		1432	switch (rmode) {
		1433	case ROUNDING_DOWN:
		1434	if (!sgn)
		1435	r = LARGEST_BID64;
		1436	break;
		1437	case ROUNDING_TO_ZERO:
		1438	r = sgn \| LARGEST_BID64;
		1439	break;
		1440	case ROUNDING_UP:
		1441	// round up
		1442	if (sgn)
		1443	r = SMALLEST_BID64;
		1444	}
		1445	return r;
		1446	} else {
		1447	mask = 1;
		1448	mask <<= EXPONENT_SHIFT_SMALL64;
		1449	if (coeff >= mask) {
		1450	r = expon;
		1451	r <<= EXPONENT_SHIFT_LARGE64;
		1452	r \|= (sgn \| SPECIAL_ENCODING_MASK64);
		1453	// add coeff, without leading bits
		1454	mask = (mask >> 2) - 1;
		1455	coeff &= mask;
		1456	r \|= coeff;
		1457	return r;
		1458	}
		1459	}
		1460	}
		1461
		1462	r = expon;
		1463	r <<= EXPONENT_SHIFT_SMALL64;
		1464	r \|= (coeff \| sgn);
		1465
		1466	return r;
		1467	}
		1468
		1469
		1470	/*****************************************************************************
		1471	*
		1472	* BID128 pack/unpack macros
		1473	*
		1474	*****************************************************************************/
		1475
		1476	//
		1477	// Macro for handling BID128 underflow
		1478	// sticky bit given as additional argument
		1479	//
		1480	__BID_INLINE__ UINT128 *
		1481	handle_UF_128_rem (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
		1482	UINT64 R, unsigned prounding_mode, unsigned fpsc) {
		1483	UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1, CQ2, CQ8;
		1484	UINT64 carry, CY;
		1485	int ed2, amount;
		1486	unsigned rmode, status;
		1487
		1488	// UF occurs
		1489	if (expon + MAX_FORMAT_DIGITS_128 < 0) {
		1490	#ifdef SET_STATUS_FLAGS
		1491	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1492	#endif
		1493	pres->w[1] = sgn;
		1494	pres->w[0] = 0;
		1495	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1496	#ifndef IEEE_ROUND_NEAREST
		1497	if ((sgn && *prounding_mode == ROUNDING_DOWN)
		1498	\|\| (!sgn && *prounding_mode == ROUNDING_UP))
		1499	pres->w[0] = 1ull;
		1500	#endif
		1501	#endif
		1502	return pres;
		1503	}
		1504	// CQ *= 10
		1505	CQ2.w[1] = (CQ.w[1] << 1) \| (CQ.w[0] >> 63);
		1506	CQ2.w[0] = CQ.w[0] << 1;
		1507	CQ8.w[1] = (CQ.w[1] << 3) \| (CQ.w[0] >> 61);
		1508	CQ8.w[0] = CQ.w[0] << 3;
		1509	__add_128_128 (CQ, CQ2, CQ8);
		1510
		1511	// add remainder
		1512	if (R)
		1513	CQ.w[0] \|= 1;
		1514
		1515	ed2 = 1 - expon;
		1516	// add rounding constant to CQ
		1517	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1518	#ifndef IEEE_ROUND_NEAREST
		1519	rmode = *prounding_mode;
		1520	if (sgn && (unsigned) (rmode - 1) < 2)
		1521	rmode = 3 - rmode;
		1522	#else
		1523	rmode = 0;
		1524	#endif
		1525	#else
		1526	rmode = 0;
		1527	#endif
		1528	T128 = round_const_table_128[rmode][ed2];
		1529	__add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
		1530	CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
		1531
		1532	TP128 = reciprocals10_128[ed2];
		1533	__mul_128x128_full (Qh, Ql, CQ, TP128);
		1534	amount = recip_scale[ed2];
		1535
		1536	if (amount >= 64) {
		1537	CQ.w[0] = Qh.w[1] >> (amount - 64);
		1538	CQ.w[1] = 0;
		1539	} else {
		1540	__shr_128 (CQ, Qh, amount);
		1541	}
		1542
		1543	expon = 0;
		1544
		1545	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1546	#ifndef IEEE_ROUND_NEAREST
		1547	if (!(*prounding_mode))
		1548	#endif
		1549	if (CQ.w[0] & 1) {
		1550	// check whether fractional part of initial_P/10^ed1 is exactly .5
		1551
		1552	// get remainder
		1553	__shl_128_long (Qh1, Qh, (128 - amount));
		1554
		1555	if (!Qh1.w[1] && !Qh1.w[0]
		1556	&& (Ql.w[1] < reciprocals10_128[ed2].w[1]
		1557	\|\| (Ql.w[1] == reciprocals10_128[ed2].w[1]
		1558	&& Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
		1559	CQ.w[0]--;
		1560	}
		1561	}
		1562	#endif
		1563
		1564	#ifdef SET_STATUS_FLAGS
		1565
		1566	if (is_inexact (fpsc))
		1567	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
		1568	else {
		1569	status = INEXACT_EXCEPTION;
		1570	// get remainder
		1571	__shl_128_long (Qh1, Qh, (128 - amount));
		1572
		1573	switch (rmode) {
		1574	case ROUNDING_TO_NEAREST:
		1575	case ROUNDING_TIES_AWAY:
		1576	// test whether fractional part is 0
		1577	if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
		1578	&& (Ql.w[1] < reciprocals10_128[ed2].w[1]
		1579	\|\| (Ql.w[1] == reciprocals10_128[ed2].w[1]
		1580	&& Ql.w[0] < reciprocals10_128[ed2].w[0])))
		1581	status = EXACT_STATUS;
		1582	break;
		1583	case ROUNDING_DOWN:
		1584	case ROUNDING_TO_ZERO:
		1585	if ((!Qh1.w[1]) && (!Qh1.w[0])
		1586	&& (Ql.w[1] < reciprocals10_128[ed2].w[1]
		1587	\|\| (Ql.w[1] == reciprocals10_128[ed2].w[1]
		1588	&& Ql.w[0] < reciprocals10_128[ed2].w[0])))
		1589	status = EXACT_STATUS;
		1590	break;
		1591	default:
		1592	// round up
		1593	__add_carry_out (Stemp.w[0], CY, Ql.w[0],
		1594	reciprocals10_128[ed2].w[0]);
		1595	__add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
		1596	reciprocals10_128[ed2].w[1], CY);
		1597	__shr_128_long (Qh, Qh1, (128 - amount));
		1598	Tmp.w[0] = 1;
		1599	Tmp.w[1] = 0;
		1600	__shl_128_long (Tmp1, Tmp, amount);
		1601	Qh.w[0] += carry;
		1602	if (Qh.w[0] < carry)
		1603	Qh.w[1]++;
		1604	if (__unsigned_compare_ge_128 (Qh, Tmp1))
		1605	status = EXACT_STATUS;
		1606	}
		1607
		1608	if (status != EXACT_STATUS)
		1609	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| status);
		1610	}
		1611
		1612	#endif
		1613
		1614	pres->w[1] = sgn \| CQ.w[1];
		1615	pres->w[0] = CQ.w[0];
		1616
		1617	return pres;
		1618
		1619	}
		1620
		1621
		1622	//
		1623	// Macro for handling BID128 underflow
		1624	//
		1625	__BID_INLINE__ UINT128 *
		1626	handle_UF_128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 CQ,
		1627	unsigned prounding_mode, unsigned fpsc) {
		1628	UINT128 T128, TP128, Qh, Ql, Qh1, Stemp, Tmp, Tmp1;
		1629	UINT64 carry, CY;
		1630	int ed2, amount;
		1631	unsigned rmode, status;
		1632
		1633	// UF occurs
		1634	if (expon + MAX_FORMAT_DIGITS_128 < 0) {
		1635	#ifdef SET_STATUS_FLAGS
		1636	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1637	#endif
		1638	pres->w[1] = sgn;
		1639	pres->w[0] = 0;
		1640	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1641	#ifndef IEEE_ROUND_NEAREST
		1642	if ((sgn && *prounding_mode == ROUNDING_DOWN)
		1643	\|\| (!sgn && *prounding_mode == ROUNDING_UP))
		1644	pres->w[0] = 1ull;
		1645	#endif
		1646	#endif
		1647	return pres;
		1648	}
		1649
		1650	ed2 = 0 - expon;
		1651	// add rounding constant to CQ
		1652	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1653	#ifndef IEEE_ROUND_NEAREST
		1654	rmode = *prounding_mode;
		1655	if (sgn && (unsigned) (rmode - 1) < 2)
		1656	rmode = 3 - rmode;
		1657	#else
		1658	rmode = 0;
		1659	#endif
		1660	#else
		1661	rmode = 0;
		1662	#endif
		1663
		1664	T128 = round_const_table_128[rmode][ed2];
		1665	__add_carry_out (CQ.w[0], carry, T128.w[0], CQ.w[0]);
		1666	CQ.w[1] = CQ.w[1] + T128.w[1] + carry;
		1667
		1668	TP128 = reciprocals10_128[ed2];
		1669	__mul_128x128_full (Qh, Ql, CQ, TP128);
		1670	amount = recip_scale[ed2];
		1671
		1672	if (amount >= 64) {
		1673	CQ.w[0] = Qh.w[1] >> (amount - 64);
		1674	CQ.w[1] = 0;
		1675	} else {
		1676	__shr_128 (CQ, Qh, amount);
		1677	}
		1678
		1679	expon = 0;
		1680
		1681	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1682	#ifndef IEEE_ROUND_NEAREST
		1683	if (!(*prounding_mode))
		1684	#endif
		1685	if (CQ.w[0] & 1) {
		1686	// check whether fractional part of initial_P/10^ed1 is exactly .5
		1687
		1688	// get remainder
		1689	__shl_128_long (Qh1, Qh, (128 - amount));
		1690
		1691	if (!Qh1.w[1] && !Qh1.w[0]
		1692	&& (Ql.w[1] < reciprocals10_128[ed2].w[1]
		1693	\|\| (Ql.w[1] == reciprocals10_128[ed2].w[1]
		1694	&& Ql.w[0] < reciprocals10_128[ed2].w[0]))) {
		1695	CQ.w[0]--;
		1696	}
		1697	}
		1698	#endif
		1699
		1700	#ifdef SET_STATUS_FLAGS
		1701
		1702	if (is_inexact (fpsc))
		1703	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
		1704	else {
		1705	status = INEXACT_EXCEPTION;
		1706	// get remainder
		1707	__shl_128_long (Qh1, Qh, (128 - amount));
		1708
		1709	switch (rmode) {
		1710	case ROUNDING_TO_NEAREST:
		1711	case ROUNDING_TIES_AWAY:
		1712	// test whether fractional part is 0
		1713	if (Qh1.w[1] == 0x8000000000000000ull && (!Qh1.w[0])
		1714	&& (Ql.w[1] < reciprocals10_128[ed2].w[1]
		1715	\|\| (Ql.w[1] == reciprocals10_128[ed2].w[1]
		1716	&& Ql.w[0] < reciprocals10_128[ed2].w[0])))
		1717	status = EXACT_STATUS;
		1718	break;
		1719	case ROUNDING_DOWN:
		1720	case ROUNDING_TO_ZERO:
		1721	if ((!Qh1.w[1]) && (!Qh1.w[0])
		1722	&& (Ql.w[1] < reciprocals10_128[ed2].w[1]
		1723	\|\| (Ql.w[1] == reciprocals10_128[ed2].w[1]
		1724	&& Ql.w[0] < reciprocals10_128[ed2].w[0])))
		1725	status = EXACT_STATUS;
		1726	break;
		1727	default:
		1728	// round up
		1729	__add_carry_out (Stemp.w[0], CY, Ql.w[0],
		1730	reciprocals10_128[ed2].w[0]);
		1731	__add_carry_in_out (Stemp.w[1], carry, Ql.w[1],
		1732	reciprocals10_128[ed2].w[1], CY);
		1733	__shr_128_long (Qh, Qh1, (128 - amount));
		1734	Tmp.w[0] = 1;
		1735	Tmp.w[1] = 0;
		1736	__shl_128_long (Tmp1, Tmp, amount);
		1737	Qh.w[0] += carry;
		1738	if (Qh.w[0] < carry)
		1739	Qh.w[1]++;
		1740	if (__unsigned_compare_ge_128 (Qh, Tmp1))
		1741	status = EXACT_STATUS;
		1742	}
		1743
		1744	if (status != EXACT_STATUS)
		1745	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| status);
		1746	}
		1747
		1748	#endif
		1749
		1750	pres->w[1] = sgn \| CQ.w[1];
		1751	pres->w[0] = CQ.w[0];
		1752
		1753	return pres;
		1754
		1755	}
		1756
		1757
		1758
		1759	//
		1760	// BID128 unpack, input passed by value
		1761	//
		1762	__BID_INLINE__ UINT64
		1763	unpack_BID128_value (UINT64 * psign_x, int *pexponent_x,
		1764	UINT128 * pcoefficient_x, UINT128 x) {
		1765	UINT128 coeff, T33, T34;
		1766	UINT64 ex;
		1767
		1768	*psign_x = (x.w[1]) & 0x8000000000000000ull;
		1769
		1770	// special encodings
		1771	if ((x.w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
		1772	if ((x.w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
		1773	// non-canonical input
		1774	pcoefficient_x->w[0] = 0;
		1775	pcoefficient_x->w[1] = 0;
		1776	ex = (x.w[1]) >> 47;
		1777	*pexponent_x = ((int) ex) & EXPONENT_MASK128;
		1778	return 0;
		1779	}
		1780	// 10^33
		1781	T33 = power10_table_128[33];
		1782	/*coeff.w[0] = x.w[0];
		1783	coeff.w[1] = (x.w[1]) & LARGE_COEFF_MASK128;
		1784	pcoefficient_x->w[0] = x.w[0];
		1785	pcoefficient_x->w[1] = x.w[1];
		1786	if (__unsigned_compare_ge_128 (coeff, T33)) // non-canonical
		1787	pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128); */
		1788
		1789	pcoefficient_x->w[0] = x.w[0];
		1790	pcoefficient_x->w[1] = (x.w[1]) & 0x00003fffffffffffull;
		1791	if (__unsigned_compare_ge_128 ((*pcoefficient_x), T33)) // non-canonical
		1792	{
		1793	pcoefficient_x->w[1] = (x.w[1]) & 0xfe00000000000000ull;
		1794	pcoefficient_x->w[0] = 0;
		1795	} else
		1796	pcoefficient_x->w[1] = (x.w[1]) & 0xfe003fffffffffffull;
		1797	if ((x.w[1] & NAN_MASK64) == INFINITY_MASK64) {
		1798	pcoefficient_x->w[0] = 0;
		1799	pcoefficient_x->w[1] = x.w[1] & SINFINITY_MASK64;
		1800	}
		1801	*pexponent_x = 0;
		1802	return 0; // NaN or Infinity
		1803	}
		1804
		1805	coeff.w[0] = x.w[0];
		1806	coeff.w[1] = (x.w[1]) & SMALL_COEFF_MASK128;
		1807
		1808	// 10^34
		1809	T34 = power10_table_128[34];
		1810	// check for non-canonical values
		1811	if (__unsigned_compare_ge_128 (coeff, T34))
		1812	coeff.w[0] = coeff.w[1] = 0;
		1813
		1814	pcoefficient_x->w[0] = coeff.w[0];
		1815	pcoefficient_x->w[1] = coeff.w[1];
		1816
		1817	ex = (x.w[1]) >> 49;
		1818	*pexponent_x = ((int) ex) & EXPONENT_MASK128;
		1819
		1820	return coeff.w[0] \| coeff.w[1];
		1821	}
		1822
		1823
		1824	//
		1825	// BID128 unpack, input pased by reference
		1826	//
		1827	__BID_INLINE__ UINT64
		1828	unpack_BID128 (UINT64 * psign_x, int *pexponent_x,
		1829	UINT128 * pcoefficient_x, UINT128 * px) {
		1830	UINT128 coeff, T33, T34;
		1831	UINT64 ex;
		1832
		1833	*psign_x = (px->w[1]) & 0x8000000000000000ull;
		1834
		1835	// special encodings
		1836	if ((px->w[1] & INFINITY_MASK64) >= SPECIAL_ENCODING_MASK64) {
		1837	if ((px->w[1] & INFINITY_MASK64) < INFINITY_MASK64) {
		1838	// non-canonical input
		1839	pcoefficient_x->w[0] = 0;
		1840	pcoefficient_x->w[1] = 0;
		1841	ex = (px->w[1]) >> 47;
		1842	*pexponent_x = ((int) ex) & EXPONENT_MASK128;
		1843	return 0;
		1844	}
		1845	// 10^33
		1846	T33 = power10_table_128[33];
		1847	coeff.w[0] = px->w[0];
		1848	coeff.w[1] = (px->w[1]) & LARGE_COEFF_MASK128;
		1849	pcoefficient_x->w[0] = px->w[0];
		1850	pcoefficient_x->w[1] = px->w[1];
		1851	if (__unsigned_compare_ge_128 (coeff, T33)) { // non-canonical
		1852	pcoefficient_x->w[1] &= (~LARGE_COEFF_MASK128);
		1853	pcoefficient_x->w[0] = 0;
		1854	}
		1855	*pexponent_x = 0;
		1856	return 0; // NaN or Infinity
		1857	}
		1858
		1859	coeff.w[0] = px->w[0];
		1860	coeff.w[1] = (px->w[1]) & SMALL_COEFF_MASK128;
		1861
		1862	// 10^34
		1863	T34 = power10_table_128[34];
		1864	// check for non-canonical values
		1865	if (__unsigned_compare_ge_128 (coeff, T34))
		1866	coeff.w[0] = coeff.w[1] = 0;
		1867
		1868	pcoefficient_x->w[0] = coeff.w[0];
		1869	pcoefficient_x->w[1] = coeff.w[1];
		1870
		1871	ex = (px->w[1]) >> 49;
		1872	*pexponent_x = ((int) ex) & EXPONENT_MASK128;
		1873
		1874	return coeff.w[0] \| coeff.w[1];
		1875	}
		1876
		1877	//
		1878	// Pack macro checks for overflow, but not underflow
		1879	//
		1880	__BID_INLINE__ UINT128 *
		1881	get_BID128_very_fast_OF (UINT128 * pres, UINT64 sgn, int expon,
		1882	UINT128 coeff, unsigned *prounding_mode,
		1883	unsigned *fpsc) {
		1884	UINT128 T;
		1885	UINT64 tmp, tmp2;
		1886
		1887	if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
		1888
		1889	if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
		1890	T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
		1891	while (__unsigned_compare_gt_128 (T, coeff)
		1892	&& expon > DECIMAL_MAX_EXPON_128) {
		1893	coeff.w[1] =
		1894	(coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
		1895	(coeff.w[0] >> 63);
		1896	tmp2 = coeff.w[0] << 3;
		1897	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
		1898	if (coeff.w[0] < tmp2)
		1899	coeff.w[1]++;
		1900
		1901	expon--;
		1902	}
		1903	}
		1904	if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
		1905	// OF
		1906	#ifdef SET_STATUS_FLAGS
		1907	__set_status_flags (fpsc, OVERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		1908	#endif
		1909	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		1910	#ifndef IEEE_ROUND_NEAREST
		1911	if (*prounding_mode == ROUNDING_TO_ZERO
		1912	\|\| (sgn && *prounding_mode == ROUNDING_UP) \|\| (!sgn
		1913	&&
		1914	*prounding_mode
		1915	==
		1916	ROUNDING_DOWN))
		1917	{
		1918	pres->w[1] = sgn \| LARGEST_BID128_HIGH;
		1919	pres->w[0] = LARGEST_BID128_LOW;
		1920	} else
		1921	#endif
		1922	#endif
		1923	{
		1924	pres->w[1] = sgn \| INFINITY_MASK64;
		1925	pres->w[0] = 0;
		1926	}
		1927	return pres;
		1928	}
		1929	}
		1930
		1931	pres->w[0] = coeff.w[0];
		1932	tmp = expon;
		1933	tmp <<= 49;
		1934	pres->w[1] = sgn \| tmp \| coeff.w[1];
		1935
		1936	return pres;
		1937	}
		1938
		1939
		1940	//
		1941	// No overflow/underflow checks
		1942	// No checking for coefficient == 10^34 (rounding artifact)
		1943	//
		1944	__BID_INLINE__ UINT128 *
		1945	get_BID128_very_fast (UINT128 * pres, UINT64 sgn, int expon,
		1946	UINT128 coeff) {
		1947	UINT64 tmp;
		1948
		1949	pres->w[0] = coeff.w[0];
		1950	tmp = expon;
		1951	tmp <<= 49;
		1952	pres->w[1] = sgn \| tmp \| coeff.w[1];
		1953
		1954	return pres;
		1955	}
		1956
		1957	//
		1958	// No overflow/underflow checks
		1959	//
		1960	__BID_INLINE__ UINT128 *
		1961	get_BID128_fast (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
		1962	UINT64 tmp;
		1963
		1964	// coeff==10^34?
		1965	if (coeff.w[1] == 0x0001ed09bead87c0ull
		1966	&& coeff.w[0] == 0x378d8e6400000000ull) {
		1967	expon++;
		1968	// set coefficient to 10^33
		1969	coeff.w[1] = 0x0000314dc6448d93ull;
		1970	coeff.w[0] = 0x38c15b0a00000000ull;
		1971	}
		1972
		1973	pres->w[0] = coeff.w[0];
		1974	tmp = expon;
		1975	tmp <<= 49;
		1976	pres->w[1] = sgn \| tmp \| coeff.w[1];
		1977
		1978	return pres;
		1979	}
		1980
		1981	//
		1982	// General BID128 pack macro
		1983	//
		1984	__BID_INLINE__ UINT128 *
		1985	get_BID128 (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff,
		1986	unsigned prounding_mode, unsigned fpsc) {
		1987	UINT128 T;
		1988	UINT64 tmp, tmp2;
		1989
		1990	// coeff==10^34?
		1991	if (coeff.w[1] == 0x0001ed09bead87c0ull
		1992	&& coeff.w[0] == 0x378d8e6400000000ull) {
		1993	expon++;
		1994	// set coefficient to 10^33
		1995	coeff.w[1] = 0x0000314dc6448d93ull;
		1996	coeff.w[0] = 0x38c15b0a00000000ull;
		1997	}
		1998	// check OF, UF
		1999	if (expon < 0 \|\| expon > DECIMAL_MAX_EXPON_128) {
		2000	// check UF
		2001	if (expon < 0) {
		2002	return handle_UF_128 (pres, sgn, expon, coeff, prounding_mode,
		2003	fpsc);
		2004	}
		2005
		2006	if (expon - MAX_FORMAT_DIGITS_128 <= DECIMAL_MAX_EXPON_128) {
		2007	T = power10_table_128[MAX_FORMAT_DIGITS_128 - 1];
		2008	while (__unsigned_compare_gt_128 (T, coeff)
		2009	&& expon > DECIMAL_MAX_EXPON_128) {
		2010	coeff.w[1] =
		2011	(coeff.w[1] << 3) + (coeff.w[1] << 1) + (coeff.w[0] >> 61) +
		2012	(coeff.w[0] >> 63);
		2013	tmp2 = coeff.w[0] << 3;
		2014	coeff.w[0] = (coeff.w[0] << 1) + tmp2;
		2015	if (coeff.w[0] < tmp2)
		2016	coeff.w[1]++;
		2017
		2018	expon--;
		2019	}
		2020	}
		2021	if (expon > DECIMAL_MAX_EXPON_128) {
		2022	if (!(coeff.w[1] \| coeff.w[0])) {
		2023	pres->w[1] = sgn \| (((UINT64) DECIMAL_MAX_EXPON_128) << 49);
		2024	pres->w[0] = 0;
		2025	return pres;
		2026	}
		2027	// OF
		2028	#ifdef SET_STATUS_FLAGS
		2029	__set_status_flags (fpsc, OVERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		2030	#endif
		2031	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		2032	#ifndef IEEE_ROUND_NEAREST
		2033	if (*prounding_mode == ROUNDING_TO_ZERO
		2034	\|\| (sgn && *prounding_mode == ROUNDING_UP) \|\| (!sgn
		2035	&&
		2036	*prounding_mode
		2037	==
		2038	ROUNDING_DOWN))
		2039	{
		2040	pres->w[1] = sgn \| LARGEST_BID128_HIGH;
		2041	pres->w[0] = LARGEST_BID128_LOW;
		2042	} else
		2043	#endif
		2044	#endif
		2045	{
		2046	pres->w[1] = sgn \| INFINITY_MASK64;
		2047	pres->w[0] = 0;
		2048	}
		2049	return pres;
		2050	}
		2051	}
		2052
		2053	pres->w[0] = coeff.w[0];
		2054	tmp = expon;
		2055	tmp <<= 49;
		2056	pres->w[1] = sgn \| tmp \| coeff.w[1];
		2057
		2058	return pres;
		2059	}
		2060
		2061
		2062	//
		2063	// Macro used for conversions from string
		2064	// (no additional arguments given for rounding mode, status flags)
		2065	//
		2066	__BID_INLINE__ UINT128 *
		2067	get_BID128_string (UINT128 * pres, UINT64 sgn, int expon, UINT128 coeff) {
		2068	UINT128 D2, D8;
		2069	UINT64 tmp;
		2070	unsigned rmode = 0, status;
		2071
		2072	// coeff==10^34?
		2073	if (coeff.w[1] == 0x0001ed09bead87c0ull
		2074	&& coeff.w[0] == 0x378d8e6400000000ull) {
		2075	expon++;
		2076	// set coefficient to 10^33
		2077	coeff.w[1] = 0x0000314dc6448d93ull;
		2078	coeff.w[0] = 0x38c15b0a00000000ull;
		2079	}
		2080	// check OF, UF
		2081	if ((unsigned) expon > DECIMAL_MAX_EXPON_128) {
		2082	// check UF
		2083	if (expon < 0)
		2084	return handle_UF_128 (pres, sgn, expon, coeff, &rmode, &status);
		2085
		2086	// OF
		2087
		2088	if (expon < DECIMAL_MAX_EXPON_128 + 34) {
		2089	while (expon > DECIMAL_MAX_EXPON_128 &&
		2090	(coeff.w[1] < power10_table_128[33].w[1] \|\|
		2091	(coeff.w[1] == power10_table_128[33].w[1]
		2092	&& coeff.w[0] < power10_table_128[33].w[0]))) {
		2093	D2.w[1] = (coeff.w[1] << 1) \| (coeff.w[0] >> 63);
		2094	D2.w[0] = coeff.w[0] << 1;
		2095	D8.w[1] = (coeff.w[1] << 3) \| (coeff.w[0] >> 61);
		2096	D8.w[0] = coeff.w[0] << 3;
		2097
		2098	__add_128_128 (coeff, D2, D8);
		2099	expon--;
		2100	}
		2101	} else if (!(coeff.w[0] \| coeff.w[1]))
		2102	expon = DECIMAL_MAX_EXPON_128;
		2103
		2104	if (expon > DECIMAL_MAX_EXPON_128) {
		2105	pres->w[1] = sgn \| INFINITY_MASK64;
		2106	pres->w[0] = 0;
		2107	switch (rmode) {
		2108	case ROUNDING_DOWN:
		2109	if (!sgn) {
		2110	pres->w[1] = LARGEST_BID128_HIGH;
		2111	pres->w[0] = LARGEST_BID128_LOW;
		2112	}
		2113	break;
		2114	case ROUNDING_TO_ZERO:
		2115	pres->w[1] = sgn \| LARGEST_BID128_HIGH;
		2116	pres->w[0] = LARGEST_BID128_LOW;
		2117	break;
		2118	case ROUNDING_UP:
		2119	// round up
		2120	if (sgn) {
		2121	pres->w[1] = sgn \| LARGEST_BID128_HIGH;
		2122	pres->w[0] = LARGEST_BID128_LOW;
		2123	}
		2124	break;
		2125	}
		2126
		2127	return pres;
		2128	}
		2129	}
		2130
		2131	pres->w[0] = coeff.w[0];
		2132	tmp = expon;
		2133	tmp <<= 49;
		2134	pres->w[1] = sgn \| tmp \| coeff.w[1];
		2135
		2136	return pres;
		2137	}
		2138
		2139
		2140
		2141	/*****************************************************************************
		2142	*
		2143	* BID32 pack/unpack macros
		2144	*
		2145	*****************************************************************************/
		2146
		2147
		2148	__BID_INLINE__ UINT32
		2149	unpack_BID32 (UINT32 * psign_x, int *pexponent_x,
		2150	UINT32 * pcoefficient_x, UINT32 x) {
		2151	UINT32 tmp;
		2152
		2153	*psign_x = x & 0x80000000;
		2154
		2155	if ((x & SPECIAL_ENCODING_MASK32) == SPECIAL_ENCODING_MASK32) {
		2156	// special encodings
		2157	if ((x & INFINITY_MASK32) == INFINITY_MASK32) {
		2158	*pcoefficient_x = x & 0xfe0fffff;
		2159	if ((x & 0x000fffff) >= 1000000)
		2160	*pcoefficient_x = x & 0xfe000000;
		2161	if ((x & NAN_MASK32) == INFINITY_MASK32)
		2162	*pcoefficient_x = x & 0xf8000000;
		2163	*pexponent_x = 0;
		2164	return 0; // NaN or Infinity
		2165	}
		2166	// coefficient
		2167	*pcoefficient_x = (x & SMALL_COEFF_MASK32) \| LARGE_COEFF_HIGH_BIT32;
		2168	// check for non-canonical value
		2169	if (*pcoefficient_x >= 10000000)
		2170	*pcoefficient_x = 0;
		2171	// get exponent
		2172	tmp = x >> 21;
		2173	*pexponent_x = tmp & EXPONENT_MASK32;
		2174	return 1;
		2175	}
		2176	// exponent
		2177	tmp = x >> 23;
		2178	*pexponent_x = tmp & EXPONENT_MASK32;
		2179	// coefficient
		2180	*pcoefficient_x = (x & LARGE_COEFF_MASK32);
		2181
		2182	return *pcoefficient_x;
		2183	}
		2184
		2185	//
		2186	// General pack macro for BID32
		2187	//
		2188	__BID_INLINE__ UINT32
		2189	get_BID32 (UINT32 sgn, int expon, UINT64 coeff, int rmode,
		2190	unsigned *fpsc) {
		2191	UINT128 Q;
		2192	UINT64 C64, remainder_h, carry, Stemp;
		2193	UINT32 r, mask;
		2194	int extra_digits, amount, amount2;
		2195	unsigned status;
		2196
		2197	if (coeff > 9999999ull) {
		2198	expon++;
		2199	coeff = 1000000ull;
		2200	}
		2201	// check for possible underflow/overflow
		2202	if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
		2203	if (expon < 0) {
		2204	// underflow
		2205	if (expon + MAX_FORMAT_DIGITS_32 < 0) {
		2206	#ifdef SET_STATUS_FLAGS
		2207	__set_status_flags (fpsc,
		2208	UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		2209	#endif
		2210	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		2211	#ifndef IEEE_ROUND_NEAREST
		2212	if (rmode == ROUNDING_DOWN && sgn)
		2213	return 0x80000001;
		2214	if (rmode == ROUNDING_UP && !sgn)
		2215	return 1;
		2216	#endif
		2217	#endif
		2218	// result is 0
		2219	return sgn;
		2220	}
		2221	// get digits to be shifted out
		2222	#ifdef IEEE_ROUND_NEAREST_TIES_AWAY
		2223	rmode = 0;
		2224	#endif
		2225	#ifdef IEEE_ROUND_NEAREST
		2226	rmode = 0;
		2227	#endif
		2228	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		2229	#ifndef IEEE_ROUND_NEAREST
		2230	if (sgn && (unsigned) (rmode - 1) < 2)
		2231	rmode = 3 - rmode;
		2232	#endif
		2233	#endif
		2234
		2235	extra_digits = -expon;
		2236	coeff += round_const_table[rmode][extra_digits];
		2237
		2238	// get coeff*(2^M[extra_digits])/10^extra_digits
		2239	__mul_64x64_to_128 (Q, coeff, reciprocals10_64[extra_digits]);
		2240
		2241	// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
		2242	amount = short_recip_scale[extra_digits];
		2243
		2244	C64 = Q.w[1] >> amount;
		2245
		2246	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		2247	#ifndef IEEE_ROUND_NEAREST
		2248	if (rmode == 0) //ROUNDING_TO_NEAREST
		2249	#endif
		2250	if (C64 & 1) {
		2251	// check whether fractional part of initial_P/10^extra_digits is exactly .5
		2252
		2253	// get remainder
		2254	amount2 = 64 - amount;
		2255	remainder_h = 0;
		2256	remainder_h--;
		2257	remainder_h >>= amount2;
		2258	remainder_h = remainder_h & Q.w[1];
		2259
		2260	if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits])) {
		2261	C64--;
		2262	}
		2263	}
		2264	#endif
		2265
		2266	#ifdef SET_STATUS_FLAGS
		2267
		2268	if (is_inexact (fpsc))
		2269	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION);
		2270	else {
		2271	status = INEXACT_EXCEPTION;
		2272	// get remainder
		2273	remainder_h = Q.w[1] << (64 - amount);
		2274
		2275	switch (rmode) {
		2276	case ROUNDING_TO_NEAREST:
		2277	case ROUNDING_TIES_AWAY:
		2278	// test whether fractional part is 0
		2279	if (remainder_h == 0x8000000000000000ull
		2280	&& (Q.w[0] < reciprocals10_64[extra_digits]))
		2281	status = EXACT_STATUS;
		2282	break;
		2283	case ROUNDING_DOWN:
		2284	case ROUNDING_TO_ZERO:
		2285	if (!remainder_h && (Q.w[0] < reciprocals10_64[extra_digits]))
		2286	status = EXACT_STATUS;
		2287	break;
		2288	default:
		2289	// round up
		2290	__add_carry_out (Stemp, carry, Q.w[0],
		2291	reciprocals10_64[extra_digits]);
		2292	if ((remainder_h >> (64 - amount)) + carry >=
		2293	(((UINT64) 1) << amount))
		2294	status = EXACT_STATUS;
		2295	}
		2296
		2297	if (status != EXACT_STATUS)
		2298	__set_status_flags (fpsc, UNDERFLOW_EXCEPTION \| status);
		2299	}
		2300
		2301	#endif
		2302
		2303	return sgn \| (UINT32) C64;
		2304	}
		2305
		2306	while (coeff < 1000000 && expon > DECIMAL_MAX_EXPON_32) {
		2307	coeff = (coeff << 3) + (coeff << 1);
		2308	expon--;
		2309	}
		2310	if (((unsigned) expon) > DECIMAL_MAX_EXPON_32) {
		2311	__set_status_flags (fpsc, OVERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		2312	// overflow
		2313	r = sgn \| INFINITY_MASK32;
		2314	switch (rmode) {
		2315	case ROUNDING_DOWN:
		2316	if (!sgn)
		2317	r = LARGEST_BID32;
		2318	break;
		2319	case ROUNDING_TO_ZERO:
		2320	r = sgn \| LARGEST_BID32;
		2321	break;
		2322	case ROUNDING_UP:
		2323	// round up
		2324	if (sgn)
		2325	r = sgn \| LARGEST_BID32;
		2326	}
		2327	return r;
		2328	}
		2329	}
		2330
		2331	mask = 1 << 23;
		2332
		2333	// check whether coefficient fits in DECIMAL_COEFF_FIT bits
		2334	if (coeff < mask) {
		2335	r = expon;
		2336	r <<= 23;
		2337	r \|= ((UINT32) coeff \| sgn);
		2338	return r;
		2339	}
		2340	// special format
		2341
		2342	r = expon;
		2343	r <<= 21;
		2344	r \|= (sgn \| SPECIAL_ENCODING_MASK32);
		2345	// add coeff, without leading bits
		2346	mask = (1 << 21) - 1;
		2347	r \|= (((UINT32) coeff) & mask);
		2348
		2349	return r;
		2350	}
		2351
		2352
		2353
		2354	//
		2355	// no overflow/underflow checks
		2356	//
		2357	__BID_INLINE__ UINT32
		2358	very_fast_get_BID32 (UINT32 sgn, int expon, UINT32 coeff) {
		2359	UINT32 r, mask;
		2360
		2361	mask = 1 << 23;
		2362
		2363	// check whether coefficient fits in 10*2+3 bits
		2364	if (coeff < mask) {
		2365	r = expon;
		2366	r <<= 23;
		2367	r \|= (coeff \| sgn);
		2368	return r;
		2369	}
		2370	// special format
		2371	r = expon;
		2372	r <<= 21;
		2373	r \|= (sgn \| SPECIAL_ENCODING_MASK32);
		2374	// add coeff, without leading bits
		2375	mask = (1 << 21) - 1;
		2376	coeff &= mask;
		2377	r \|= coeff;
		2378
		2379	return r;
		2380	}
		2381
		2382
		2383
		2384	/*************************************************************
		2385	*
		2386	*************************************************************/
		2387	typedef
		2388	ALIGN (16)
		2389	struct {
		2390	UINT64 w[6];
		2391	} UINT384;
		2392	typedef ALIGN (16)
		2393	struct {
		2394	UINT64 w[8];
		2395	} UINT512;
		2396
		2397	// #define P 34
		2398	#define MASK_STEERING_BITS 0x6000000000000000ull
		2399	#define MASK_BINARY_EXPONENT1 0x7fe0000000000000ull
		2400	#define MASK_BINARY_SIG1 0x001fffffffffffffull
		2401	#define MASK_BINARY_EXPONENT2 0x1ff8000000000000ull
		2402	//used to take G[2:w+3] (sec 3.3)
		2403	#define MASK_BINARY_SIG2 0x0007ffffffffffffull
		2404	//used to mask out G4:T0 (sec 3.3)
		2405	#define MASK_BINARY_OR2 0x0020000000000000ull
		2406	//used to prefix 8+G4 to T (sec 3.3)
		2407	#define UPPER_EXPON_LIMIT 51
		2408	#define MASK_EXP 0x7ffe000000000000ull
		2409	#define MASK_SPECIAL 0x7800000000000000ull
		2410	#define MASK_NAN 0x7c00000000000000ull
		2411	#define MASK_SNAN 0x7e00000000000000ull
		2412	#define MASK_ANY_INF 0x7c00000000000000ull
		2413	#define MASK_INF 0x7800000000000000ull
		2414	#define MASK_SIGN 0x8000000000000000ull
		2415	#define MASK_COEFF 0x0001ffffffffffffull
		2416	#define BIN_EXP_BIAS (0x1820ull << 49)
		2417
		2418	#define EXP_MIN 0x0000000000000000ull
		2419	// EXP_MIN = (-6176 + 6176) << 49
		2420	#define EXP_MAX 0x5ffe000000000000ull
		2421	// EXP_MAX = (6111 + 6176) << 49
		2422	#define EXP_MAX_P1 0x6000000000000000ull
		2423	// EXP_MAX + 1 = (6111 + 6176 + 1) << 49
		2424	#define EXP_P1 0x0002000000000000ull
		2425	// EXP_ P1= 1 << 49
		2426	#define expmin -6176
		2427	// min unbiased exponent
		2428	#define expmax 6111
		2429	// max unbiased exponent
		2430	#define expmin16 -398
		2431	// min unbiased exponent
		2432	#define expmax16 369
		2433	// max unbiased exponent
		2434
		2435	#define SIGNMASK32 0x80000000
		2436	#define BID64_SIG_MAX 0x002386F26FC0ffffull
		2437	#define SIGNMASK64 0x8000000000000000ull
		2438
		2439	// typedef unsigned int FPSC; // floating-point status and control
		2440	// bit31:
		2441	// bit30:
		2442	// bit29:
		2443	// bit28:
		2444	// bit27:
		2445	// bit26:
		2446	// bit25:
		2447	// bit24:
		2448	// bit23:
		2449	// bit22:
		2450	// bit21:
		2451	// bit20:
		2452	// bit19:
		2453	// bit18:
		2454	// bit17:
		2455	// bit16:
		2456	// bit15:
		2457	// bit14: RC:2
		2458	// bit13: RC:1
		2459	// bit12: RC:0
		2460	// bit11: PM
		2461	// bit10: UM
		2462	// bit9: OM
		2463	// bit8: ZM
		2464	// bit7: DM
		2465	// bit6: IM
		2466	// bit5: PE
		2467	// bit4: UE
		2468	// bit3: OE
		2469	// bit2: ZE
		2470	// bit1: DE
		2471	// bit0: IE
		2472
		2473	#define ROUNDING_MODE_MASK 0x00007000
		2474
		2475	typedef struct _DEC_DIGITS {
		2476	unsigned int digits;
		2477	UINT64 threshold_hi;
		2478	UINT64 threshold_lo;
		2479	unsigned int digits1;
		2480	} DEC_DIGITS;
		2481
		2482	extern DEC_DIGITS nr_digits[];
		2483	extern UINT64 midpoint64[];
		2484	extern UINT128 midpoint128[];
		2485	extern UINT192 midpoint192[];
		2486	extern UINT256 midpoint256[];
		2487	extern UINT64 ten2k64[];
		2488	extern UINT128 ten2k128[];
		2489	extern UINT256 ten2k256[];
		2490	extern UINT128 ten2mk128[];
		2491	extern UINT64 ten2mk64[];
		2492	extern UINT128 ten2mk128trunc[];
		2493	extern int shiftright128[];
		2494	extern UINT64 maskhigh128[];
		2495	extern UINT64 maskhigh128M[];
		2496	extern UINT64 maskhigh192M[];
		2497	extern UINT64 maskhigh256M[];
		2498	extern UINT64 onehalf128[];
		2499	extern UINT64 onehalf128M[];
		2500	extern UINT64 onehalf192M[];
		2501	extern UINT64 onehalf256M[];
		2502	extern UINT128 ten2mk128M[];
		2503	extern UINT128 ten2mk128truncM[];
		2504	extern UINT192 ten2mk192truncM[];
		2505	extern UINT256 ten2mk256truncM[];
		2506	extern int shiftright128M[];
		2507	extern int shiftright192M[];
		2508	extern int shiftright256M[];
		2509	extern UINT192 ten2mk192M[];
		2510	extern UINT256 ten2mk256M[];
		2511	extern unsigned char char_table2[];
		2512	extern unsigned char char_table3[];
		2513
		2514	extern UINT64 ten2m3k64[];
		2515	extern unsigned int shift_ten2m3k64[];
		2516	extern UINT128 ten2m3k128[];
		2517	extern unsigned int shift_ten2m3k128[];
		2518
		2519
		2520
		2521	/***************************************************************************
		2522	************* TABLES FOR GENERAL ROUNDING FUNCTIONS *******************
		2523	***************************************************************************/
		2524
		2525	extern UINT64 Kx64[];
		2526	extern unsigned int Ex64m64[];
		2527	extern UINT64 half64[];
		2528	extern UINT64 mask64[];
		2529	extern UINT64 ten2mxtrunc64[];
		2530
		2531	extern UINT128 Kx128[];
		2532	extern unsigned int Ex128m128[];
		2533	extern UINT64 half128[];
		2534	extern UINT64 mask128[];
		2535	extern UINT128 ten2mxtrunc128[];
		2536
		2537	extern UINT192 Kx192[];
		2538	extern unsigned int Ex192m192[];
		2539	extern UINT64 half192[];
		2540	extern UINT64 mask192[];
		2541	extern UINT192 ten2mxtrunc192[];
		2542
		2543	extern UINT256 Kx256[];
		2544	extern unsigned int Ex256m256[];
		2545	extern UINT64 half256[];
		2546	extern UINT64 mask256[];
		2547	extern UINT256 ten2mxtrunc256[];
		2548
		2549	typedef union __bid64_128 {
		2550	UINT64 b64;
		2551	UINT128 b128;
		2552	} BID64_128;
		2553
		2554	BID64_128 bid_fma (unsigned int P0,
		2555	BID64_128 x1, unsigned int P1,
		2556	BID64_128 y1, unsigned int P2,
		2557	BID64_128 z1, unsigned int P3,
		2558	unsigned int rnd_mode, FPSC * fpsc);
		2559
		2560	#define P16 16
		2561	#define P34 34
		2562
		2563	union __int_double {
		2564	UINT64 i;
		2565	double d;
		2566	};
		2567	typedef union __int_double int_double;
		2568
		2569
		2570	union __int_float {
		2571	UINT32 i;
		2572	float d;
		2573	};
		2574	typedef union __int_float int_float;
		2575
		2576	#define SWAP(A,B,T) {\
		2577	T = A; \
		2578	A = B; \
		2579	B = T; \
		2580	}
		2581
		2582	// this macro will find coefficient_x to be in [2^A, 2^(A+1) )
		2583	// ie it knows that it is A bits long
		2584	#define NUMBITS(A, coefficient_x, tempx){\
		2585	temp_x.d=(float)coefficient_x;\
		2586	A=((tempx.i >>23) & EXPONENT_MASK32) - 0x7f;\
		2587	}
		2588
		2589	enum class_types {
		2590	signalingNaN,
		2591	quietNaN,
		2592	negativeInfinity,
		2593	negativeNormal,
		2594	negativeSubnormal,
		2595	negativeZero,
		2596	positiveZero,
		2597	positiveSubnormal,
		2598	positiveNormal,
		2599	positiveInfinity
		2600	};
		2601
		2602	typedef union {
		2603	UINT64 ui64;
		2604	double d;
		2605	} BID_UI64DOUBLE;
		2606
		2607	#endif

Subversion Repositories Kolibri OS

(root)/contrib/toolchain/gcc/5x/libgcc/config/libbid/bid_internal.h – Rev 6515