WebSVN – Kolibri OS – Blame – /contrib/toolchain/gcc/5x/libgcc/config/libbid/bid128_add.c

Rev	Author	Line No.	Line
6515	serge	1	/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
		2
		3	This file is part of GCC.
		4
		5	GCC is free software; you can redistribute it and/or modify it under
		6	the terms of the GNU General Public License as published by the Free
		7	Software Foundation; either version 3, or (at your option) any later
		8	version.
		9
		10	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
		11	WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
		13	for more details.
		14
		15	Under Section 7 of GPL version 3, you are granted additional
		16	permissions described in the GCC Runtime Library Exception, version
		17	3.1, as published by the Free Software Foundation.
		18
		19	You should have received a copy of the GNU General Public License and
		20	a copy of the GCC Runtime Library Exception along with this program;
		21	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
		22	. */
		23
		24	#include "bid_internal.h"
		25
		26
		27	#if DECIMAL_CALL_BY_REFERENCE
		28	void
		29	bid64dq_add (UINT64 * pres, UINT64 * px, UINT128 * py
		30	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		31	_EXC_INFO_PARAM) {
		32	UINT64 x = *px;
		33	#if !DECIMAL_GLOBAL_ROUNDING
		34	unsigned int rnd_mode = *prnd_mode;
		35	#endif
		36	#else
		37	UINT64
		38	bid64dq_add (UINT64 x, UINT128 y
		39	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		40	_EXC_INFO_PARAM) {
		41	#endif
		42	UINT64 res = 0xbaddbaddbaddbaddull;
		43	UINT128 x1;
		44
		45	#if DECIMAL_CALL_BY_REFERENCE
		46	bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		47	bid64qq_add (&res, &x1, py
		48	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		49	_EXC_INFO_ARG);
		50	#else
		51	x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		52	res = bid64qq_add (x1, y
		53	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		54	_EXC_INFO_ARG);
		55	#endif
		56	BID_RETURN (res);
		57	}
		58
		59
		60	#if DECIMAL_CALL_BY_REFERENCE
		61	void
		62	bid64qd_add (UINT64 * pres, UINT128 * px, UINT64 * py
		63	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		64	_EXC_INFO_PARAM) {
		65	UINT64 y = *py;
		66	#if !DECIMAL_GLOBAL_ROUNDING
		67	unsigned int rnd_mode = *prnd_mode;
		68	#endif
		69	#else
		70	UINT64
		71	bid64qd_add (UINT128 x, UINT64 y
		72	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		73	_EXC_INFO_PARAM) {
		74	#endif
		75	UINT64 res = 0xbaddbaddbaddbaddull;
		76	UINT128 y1;
		77
		78	#if DECIMAL_CALL_BY_REFERENCE
		79	bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		80	bid64qq_add (&res, px, &y1
		81	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		82	_EXC_INFO_ARG);
		83	#else
		84	y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		85	res = bid64qq_add (x, y1
		86	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		87	_EXC_INFO_ARG);
		88	#endif
		89	BID_RETURN (res);
		90	}
		91
		92
		93	#if DECIMAL_CALL_BY_REFERENCE
		94	void
		95	bid64qq_add (UINT64 * pres, UINT128 * px, UINT128 * py
		96	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		97	_EXC_INFO_PARAM) {
		98	UINT128 x = px, y = py;
		99	#if !DECIMAL_GLOBAL_ROUNDING
		100	unsigned int rnd_mode = *prnd_mode;
		101	#endif
		102	#else
		103	UINT64
		104	bid64qq_add (UINT128 x, UINT128 y
		105	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		106	_EXC_INFO_PARAM) {
		107	#endif
		108
		109	UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull}
		110	};
		111	UINT64 res = 0xbaddbaddbaddbaddull;
		112
		113	BID_SWAP128 (one);
		114	#if DECIMAL_CALL_BY_REFERENCE
		115	bid64qqq_fma (&res, &one, &x, &y
		116	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		117	_EXC_INFO_ARG);
		118	#else
		119	res = bid64qqq_fma (one, x, y
		120	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		121	_EXC_INFO_ARG);
		122	#endif
		123	BID_RETURN (res);
		124	}
		125
		126
		127	#if DECIMAL_CALL_BY_REFERENCE
		128	void
		129	bid128dd_add (UINT128 * pres, UINT64 * px, UINT64 * py
		130	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		131	_EXC_INFO_PARAM) {
		132	UINT64 x = px, y = py;
		133	#if !DECIMAL_GLOBAL_ROUNDING
		134	unsigned int rnd_mode = *prnd_mode;
		135	#endif
		136	#else
		137	UINT128
		138	bid128dd_add (UINT64 x, UINT64 y
		139	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		140	_EXC_INFO_PARAM) {
		141	#endif
		142	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		143	};
		144	UINT128 x1, y1;
		145
		146	#if DECIMAL_CALL_BY_REFERENCE
		147	bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		148	bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		149	bid128_add (&res, &x1, &y1
		150	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		151	_EXC_INFO_ARG);
		152	#else
		153	x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		154	y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		155	res = bid128_add (x1, y1
		156	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		157	_EXC_INFO_ARG);
		158	#endif
		159	BID_RETURN (res);
		160	}
		161
		162
		163	#if DECIMAL_CALL_BY_REFERENCE
		164	void
		165	bid128dq_add (UINT128 * pres, UINT64 * px, UINT128 * py
		166	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		167	_EXC_INFO_PARAM) {
		168	UINT64 x = *px;
		169	#if !DECIMAL_GLOBAL_ROUNDING
		170	unsigned int rnd_mode = *prnd_mode;
		171	#endif
		172	#else
		173	UINT128
		174	bid128dq_add (UINT64 x, UINT128 y
		175	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		176	_EXC_INFO_PARAM) {
		177	#endif
		178	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		179	};
		180	UINT128 x1;
		181
		182	#if DECIMAL_CALL_BY_REFERENCE
		183	bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		184	bid128_add (&res, &x1, py
		185	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		186	_EXC_INFO_ARG);
		187	#else
		188	x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		189	res = bid128_add (x1, y
		190	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		191	_EXC_INFO_ARG);
		192	#endif
		193	BID_RETURN (res);
		194	}
		195
		196
		197	#if DECIMAL_CALL_BY_REFERENCE
		198	void
		199	bid128qd_add (UINT128 * pres, UINT128 * px, UINT64 * py
		200	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		201	_EXC_INFO_PARAM) {
		202	UINT64 y = *py;
		203	#if !DECIMAL_GLOBAL_ROUNDING
		204	unsigned int rnd_mode = *prnd_mode;
		205	#endif
		206	#else
		207	UINT128
		208	bid128qd_add (UINT128 x, UINT64 y
		209	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		210	_EXC_INFO_PARAM) {
		211	#endif
		212	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		213	};
		214	UINT128 y1;
		215
		216	#if DECIMAL_CALL_BY_REFERENCE
		217	bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		218	bid128_add (&res, px, &y1
		219	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		220	_EXC_INFO_ARG);
		221	#else
		222	y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		223	res = bid128_add (x, y1
		224	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		225	_EXC_INFO_ARG);
		226	#endif
		227	BID_RETURN (res);
		228	}
		229
		230
		231	// bid128_add stands for bid128qq_add
		232
		233
		234	/*****************************************************************************
		235	* BID64/BID128 sub
		236	****************************************************************************/
		237
		238	#if DECIMAL_CALL_BY_REFERENCE
		239	void
		240	bid64dq_sub (UINT64 * pres, UINT64 * px, UINT128 * py
		241	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		242	_EXC_INFO_PARAM) {
		243	UINT64 x = *px;
		244	#if !DECIMAL_GLOBAL_ROUNDING
		245	unsigned int rnd_mode = *prnd_mode;
		246	#endif
		247	#else
		248	UINT64
		249	bid64dq_sub (UINT64 x, UINT128 y
		250	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		251	_EXC_INFO_PARAM) {
		252	#endif
		253	UINT64 res = 0xbaddbaddbaddbaddull;
		254	UINT128 x1;
		255
		256	#if DECIMAL_CALL_BY_REFERENCE
		257	bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		258	bid64qq_sub (&res, &x1, py
		259	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		260	_EXC_INFO_ARG);
		261	#else
		262	x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		263	res = bid64qq_sub (x1, y
		264	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		265	_EXC_INFO_ARG);
		266	#endif
		267	BID_RETURN (res);
		268	}
		269
		270
		271	#if DECIMAL_CALL_BY_REFERENCE
		272	void
		273	bid64qd_sub (UINT64 * pres, UINT128 * px, UINT64 * py
		274	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		275	_EXC_INFO_PARAM) {
		276	UINT64 y = *py;
		277	#if !DECIMAL_GLOBAL_ROUNDING
		278	unsigned int rnd_mode = *prnd_mode;
		279	#endif
		280	#else
		281	UINT64
		282	bid64qd_sub (UINT128 x, UINT64 y
		283	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		284	_EXC_INFO_PARAM) {
		285	#endif
		286	UINT64 res = 0xbaddbaddbaddbaddull;
		287	UINT128 y1;
		288
		289	#if DECIMAL_CALL_BY_REFERENCE
		290	bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		291	bid64qq_sub (&res, px, &y1
		292	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		293	_EXC_INFO_ARG);
		294	#else
		295	y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		296	res = bid64qq_sub (x, y1
		297	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		298	_EXC_INFO_ARG);
		299	#endif
		300	BID_RETURN (res);
		301	}
		302
		303
		304	#if DECIMAL_CALL_BY_REFERENCE
		305	void
		306	bid64qq_sub (UINT64 * pres, UINT128 * px, UINT128 * py
		307	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		308	_EXC_INFO_PARAM) {
		309	UINT128 x = px, y = py;
		310	#if !DECIMAL_GLOBAL_ROUNDING
		311	unsigned int rnd_mode = *prnd_mode;
		312	#endif
		313	#else
		314	UINT64
		315	bid64qq_sub (UINT128 x, UINT128 y
		316	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		317	_EXC_INFO_PARAM) {
		318	#endif
		319
		320	UINT128 one = { {0x0000000000000001ull, 0x3040000000000000ull}
		321	};
		322	UINT64 res = 0xbaddbaddbaddbaddull;
		323	UINT64 y_sign;
		324
		325	BID_SWAP128 (one);
		326	if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN
		327	// change its sign
		328	y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
		329	if (y_sign)
		330	y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull;
		331	else
		332	y.w[HIGH_128W] = y.w[HIGH_128W] \| 0x8000000000000000ull;
		333	}
		334	#if DECIMAL_CALL_BY_REFERENCE
		335	bid64qqq_fma (&res, &one, &x, &y
		336	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		337	_EXC_INFO_ARG);
		338	#else
		339	res = bid64qqq_fma (one, x, y
		340	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		341	_EXC_INFO_ARG);
		342	#endif
		343	BID_RETURN (res);
		344	}
		345
		346
		347	#if DECIMAL_CALL_BY_REFERENCE
		348	void
		349	bid128dd_sub (UINT128 * pres, UINT64 * px, UINT64 * py
		350	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		351	_EXC_INFO_PARAM) {
		352	UINT64 x = px, y = py;
		353	#if !DECIMAL_GLOBAL_ROUNDING
		354	unsigned int rnd_mode = *prnd_mode;
		355	#endif
		356	#else
		357	UINT128
		358	bid128dd_sub (UINT64 x, UINT64 y
		359	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		360	_EXC_INFO_PARAM) {
		361	#endif
		362	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		363	};
		364	UINT128 x1, y1;
		365
		366	#if DECIMAL_CALL_BY_REFERENCE
		367	bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		368	bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		369	bid128_sub (&res, &x1, &y1
		370	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		371	_EXC_INFO_ARG);
		372	#else
		373	x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		374	y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		375	res = bid128_sub (x1, y1
		376	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		377	_EXC_INFO_ARG);
		378	#endif
		379	BID_RETURN (res);
		380	}
		381
		382
		383	#if DECIMAL_CALL_BY_REFERENCE
		384	void
		385	bid128dq_sub (UINT128 * pres, UINT64 * px, UINT128 * py
		386	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		387	_EXC_INFO_PARAM) {
		388	UINT64 x = *px;
		389	#if !DECIMAL_GLOBAL_ROUNDING
		390	unsigned int rnd_mode = *prnd_mode;
		391	#endif
		392	#else
		393	UINT128
		394	bid128dq_sub (UINT64 x, UINT128 y
		395	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		396	_EXC_INFO_PARAM) {
		397	#endif
		398	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		399	};
		400	UINT128 x1;
		401
		402	#if DECIMAL_CALL_BY_REFERENCE
		403	bid64_to_bid128 (&x1, &x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		404	bid128_sub (&res, &x1, py
		405	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		406	_EXC_INFO_ARG);
		407	#else
		408	x1 = bid64_to_bid128 (x _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		409	res = bid128_sub (x1, y
		410	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		411	_EXC_INFO_ARG);
		412	#endif
		413	BID_RETURN (res);
		414	}
		415
		416
		417	#if DECIMAL_CALL_BY_REFERENCE
		418	void
		419	bid128qd_sub (UINT128 * pres, UINT128 * px, UINT64 * py
		420	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		421	_EXC_INFO_PARAM) {
		422	UINT64 y = *py;
		423	#if !DECIMAL_GLOBAL_ROUNDING
		424	unsigned int rnd_mode = *prnd_mode;
		425	#endif
		426	#else
		427	UINT128
		428	bid128qd_sub (UINT128 x, UINT64 y
		429	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		430	_EXC_INFO_PARAM) {
		431	#endif
		432	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		433	};
		434	UINT128 y1;
		435
		436	#if DECIMAL_CALL_BY_REFERENCE
		437	bid64_to_bid128 (&y1, &y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		438	bid128_sub (&res, px, &y1
		439	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		440	_EXC_INFO_ARG);
		441	#else
		442	y1 = bid64_to_bid128 (y _EXC_FLAGS_ARG _EXC_MASKS_ARG _EXC_INFO_ARG);
		443	res = bid128_sub (x, y1
		444	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		445	_EXC_INFO_ARG);
		446	#endif
		447	BID_RETURN (res);
		448	}
		449
		450	#if DECIMAL_CALL_BY_REFERENCE
		451	void
		452	bid128_add (UINT128 * pres, UINT128 * px, UINT128 * py
		453	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		454	_EXC_INFO_PARAM) {
		455	UINT128 x = px, y = py;
		456	#if !DECIMAL_GLOBAL_ROUNDING
		457	unsigned int rnd_mode = *prnd_mode;
		458	#endif
		459	#else
		460	UINT128
		461	bid128_add (UINT128 x, UINT128 y
		462	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		463	_EXC_INFO_PARAM) {
		464	#endif
		465	UINT128 res = { {0xbaddbaddbaddbaddull, 0xbaddbaddbaddbaddull}
		466	};
		467	UINT64 x_sign, y_sign, tmp_sign;
		468	UINT64 x_exp, y_exp, tmp_exp; // e1 = x_exp, e2 = y_exp
		469	UINT64 C1_hi, C2_hi, tmp_signif_hi;
		470	UINT64 C1_lo, C2_lo, tmp_signif_lo;
		471	// Note: C1.w[1], C1.w[0] represent C1_hi, C1_lo (all UINT64)
		472	// Note: C2.w[1], C2.w[0] represent C2_hi, C2_lo (all UINT64)
		473	UINT64 tmp64, tmp64A, tmp64B;
		474	BID_UI64DOUBLE tmp1, tmp2;
		475	int x_nr_bits, y_nr_bits;
		476	int q1, q2, delta, scale, x1, ind, shift, tmp_inexact = 0;
		477	UINT64 halfulp64;
		478	UINT128 halfulp128;
		479	UINT128 C1, C2;
		480	UINT128 ten2m1;
		481	UINT128 highf2star; // top 128 bits in f2*; low 128 bits in R256[1], R256[0]
		482	UINT256 P256, Q256, R256;
		483	int is_inexact = 0, is_midpoint_lt_even = 0, is_midpoint_gt_even = 0;
		484	int is_inexact_lt_midpoint = 0, is_inexact_gt_midpoint = 0;
		485	int second_pass = 0;
		486
		487	BID_SWAP128 (x);
		488	BID_SWAP128 (y);
		489	x_sign = x.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
		490	y_sign = y.w[1] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
		491
		492	// check for NaN or Infinity
		493	if (((x.w[1] & MASK_SPECIAL) == MASK_SPECIAL)
		494	\|\| ((y.w[1] & MASK_SPECIAL) == MASK_SPECIAL)) {
		495	// x is special or y is special
		496	if ((x.w[1] & MASK_NAN) == MASK_NAN) { // x is NAN
		497	// check first for non-canonical NaN payload
		498	if (((x.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) \|\|
		499	(((x.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull)
		500	&& (x.w[0] > 0x38c15b09ffffffffull))) {
		501	x.w[1] = x.w[1] & 0xffffc00000000000ull;
		502	x.w[0] = 0x0ull;
		503	}
		504	if ((x.w[1] & MASK_SNAN) == MASK_SNAN) { // x is SNAN
		505	// set invalid flag
		506	*pfpsf \|= INVALID_EXCEPTION;
		507	// return quiet (x)
		508	res.w[1] = x.w[1] & 0xfc003fffffffffffull;
		509	// clear out also G[6]-G[16]
		510	res.w[0] = x.w[0];
		511	} else { // x is QNaN
		512	// return x
		513	res.w[1] = x.w[1] & 0xfc003fffffffffffull;
		514	// clear out G[6]-G[16]
		515	res.w[0] = x.w[0];
		516	// if y = SNaN signal invalid exception
		517	if ((y.w[1] & MASK_SNAN) == MASK_SNAN) {
		518	// set invalid flag
		519	*pfpsf \|= INVALID_EXCEPTION;
		520	}
		521	}
		522	BID_SWAP128 (res);
		523	BID_RETURN (res);
		524	} else if ((y.w[1] & MASK_NAN) == MASK_NAN) { // y is NAN
		525	// check first for non-canonical NaN payload
		526	if (((y.w[1] & 0x00003fffffffffffull) > 0x0000314dc6448d93ull) \|\|
		527	(((y.w[1] & 0x00003fffffffffffull) == 0x0000314dc6448d93ull)
		528	&& (y.w[0] > 0x38c15b09ffffffffull))) {
		529	y.w[1] = y.w[1] & 0xffffc00000000000ull;
		530	y.w[0] = 0x0ull;
		531	}
		532	if ((y.w[1] & MASK_SNAN) == MASK_SNAN) { // y is SNAN
		533	// set invalid flag
		534	*pfpsf \|= INVALID_EXCEPTION;
		535	// return quiet (y)
		536	res.w[1] = y.w[1] & 0xfc003fffffffffffull;
		537	// clear out also G[6]-G[16]
		538	res.w[0] = y.w[0];
		539	} else { // y is QNaN
		540	// return y
		541	res.w[1] = y.w[1] & 0xfc003fffffffffffull;
		542	// clear out G[6]-G[16]
		543	res.w[0] = y.w[0];
		544	}
		545	BID_SWAP128 (res);
		546	BID_RETURN (res);
		547	} else { // neither x not y is NaN; at least one is infinity
		548	if ((x.w[1] & MASK_ANY_INF) == MASK_INF) { // x is infinity
		549	if ((y.w[1] & MASK_ANY_INF) == MASK_INF) { // y is infinity
		550	// if same sign, return either of them
		551	if ((x.w[1] & MASK_SIGN) == (y.w[1] & MASK_SIGN)) {
		552	res.w[1] = x_sign \| MASK_INF;
		553	res.w[0] = 0x0ull;
		554	} else { // x and y are infinities of opposite signs
		555	// set invalid flag
		556	*pfpsf \|= INVALID_EXCEPTION;
		557	// return QNaN Indefinite
		558	res.w[1] = 0x7c00000000000000ull;
		559	res.w[0] = 0x0000000000000000ull;
		560	}
		561	} else { // y is 0 or finite
		562	// return x
		563	res.w[1] = x_sign \| MASK_INF;
		564	res.w[0] = 0x0ull;
		565	}
		566	} else { // x is not NaN or infinity, so y must be infinity
		567	res.w[1] = y_sign \| MASK_INF;
		568	res.w[0] = 0x0ull;
		569	}
		570	BID_SWAP128 (res);
		571	BID_RETURN (res);
		572	}
		573	}
		574	// unpack the arguments
		575
		576	// unpack x
		577	C1_hi = x.w[1] & MASK_COEFF;
		578	C1_lo = x.w[0];
		579	// test for non-canonical values:
		580	// - values whose encoding begins with x00, x01, or x10 and whose
		581	// coefficient is larger than 10^34 -1, or
		582	// - values whose encoding begins with x1100, x1101, x1110 (if NaNs
		583	// and infinitis were eliminated already this test is reduced to
		584	// checking for x10x)
		585
		586	// x is not infinity; check for non-canonical values - treated as zero
		587	if ((x.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {
		588	// G0_G1=11; non-canonical
		589	x_exp = (x.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
		590	C1_hi = 0; // significand high
		591	C1_lo = 0; // significand low
		592	} else { // G0_G1 != 11
		593	x_exp = x.w[1] & MASK_EXP; // biased and shifted left 49 bits
		594	if (C1_hi > 0x0001ed09bead87c0ull \|\|
		595	(C1_hi == 0x0001ed09bead87c0ull
		596	&& C1_lo > 0x378d8e63ffffffffull)) {
		597	// x is non-canonical if coefficient is larger than 10^34 -1
		598	C1_hi = 0;
		599	C1_lo = 0;
		600	} else { // canonical
		601	;
		602	}
		603	}
		604
		605	// unpack y
		606	C2_hi = y.w[1] & MASK_COEFF;
		607	C2_lo = y.w[0];
		608	// y is not infinity; check for non-canonical values - treated as zero
		609	if ((y.w[1] & 0x6000000000000000ull) == 0x6000000000000000ull) {
		610	// G0_G1=11; non-canonical
		611	y_exp = (y.w[1] << 2) & MASK_EXP; // biased and shifted left 49 bits
		612	C2_hi = 0; // significand high
		613	C2_lo = 0; // significand low
		614	} else { // G0_G1 != 11
		615	y_exp = y.w[1] & MASK_EXP; // biased and shifted left 49 bits
		616	if (C2_hi > 0x0001ed09bead87c0ull \|\|
		617	(C2_hi == 0x0001ed09bead87c0ull
		618	&& C2_lo > 0x378d8e63ffffffffull)) {
		619	// y is non-canonical if coefficient is larger than 10^34 -1
		620	C2_hi = 0;
		621	C2_lo = 0;
		622	} else { // canonical
		623	;
		624	}
		625	}
		626
		627	if ((C1_hi == 0x0ull) && (C1_lo == 0x0ull)) {
		628	// x is 0 and y is not special
		629	// if y is 0 return 0 with the smaller exponent
		630	if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) {
		631	if (x_exp < y_exp)
		632	res.w[1] = x_exp;
		633	else
		634	res.w[1] = y_exp;
		635	if (x_sign && y_sign)
		636	res.w[1] = res.w[1] \| x_sign; // both negative
		637	else if (rnd_mode == ROUNDING_DOWN && x_sign != y_sign)
		638	res.w[1] = res.w[1] \| 0x8000000000000000ull; // -0
		639	// else; // res = +0
		640	res.w[0] = 0;
		641	} else {
		642	// for 0 + y return y, with the preferred exponent
		643	if (y_exp <= x_exp) {
		644	res.w[1] = y.w[1];
		645	res.w[0] = y.w[0];
		646	} else { // if y_exp > x_exp
		647	// return (C2 * 10^scale) * 10^(y_exp - scale)
		648	// where scale = min (P34-q2, y_exp-x_exp)
		649	// determine q2 = nr. of decimal digits in y
		650	// determine first the nr. of bits in y (y_nr_bits)
		651
		652	if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo
		653	if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53
		654	// split the 64-bit value in two 32-bit halves to avoid
		655	// rounding errors
		656	if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32
		657	tmp2.d = (double) (C2_lo >> 32); // exact conversion
		658	y_nr_bits =
		659	32 +
		660	((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		661	} else { // y < 2^32
		662	tmp2.d = (double) (C2_lo); // exact conversion
		663	y_nr_bits =
		664	((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		665	}
		666	} else { // if y < 2^53
		667	tmp2.d = (double) C2_lo; // exact conversion
		668	y_nr_bits =
		669	((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		670	}
		671	} else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
		672	tmp2.d = (double) C2_hi; // exact conversion
		673	y_nr_bits =
		674	64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		675	}
		676	q2 = nr_digits[y_nr_bits].digits;
		677	if (q2 == 0) {
		678	q2 = nr_digits[y_nr_bits].digits1;
		679	if (C2_hi > nr_digits[y_nr_bits].threshold_hi \|\|
		680	(C2_hi == nr_digits[y_nr_bits].threshold_hi &&
		681	C2_lo >= nr_digits[y_nr_bits].threshold_lo))
		682	q2++;
		683	}
		684	// return (C2 * 10^scale) * 10^(y_exp - scale)
		685	// where scale = min (P34-q2, y_exp-x_exp)
		686	scale = P34 - q2;
		687	ind = (y_exp - x_exp) >> 49;
		688	if (ind < scale)
		689	scale = ind;
		690	if (scale == 0) {
		691	res.w[1] = y.w[1];
		692	res.w[0] = y.w[0];
		693	} else if (q2 <= 19) { // y fits in 64 bits
		694	if (scale <= 19) { // 10^scale fits in 64 bits
		695	// 64 x 64 C2_lo * ten2k64[scale]
		696	__mul_64x64_to_128MACH (res, C2_lo, ten2k64[scale]);
		697	} else { // 10^scale fits in 128 bits
		698	// 64 x 128 C2_lo * ten2k128[scale - 20]
		699	__mul_128x64_to_128 (res, C2_lo, ten2k128[scale - 20]);
		700	}
		701	} else { // y fits in 128 bits, but 10^scale must fit in 64 bits
		702	// 64 x 128 ten2k64[scale] * C2
		703	C2.w[1] = C2_hi;
		704	C2.w[0] = C2_lo;
		705	__mul_128x64_to_128 (res, ten2k64[scale], C2);
		706	}
		707	// subtract scale from the exponent
		708	y_exp = y_exp - ((UINT64) scale << 49);
		709	res.w[1] = res.w[1] \| y_sign \| y_exp;
		710	}
		711	}
		712	BID_SWAP128 (res);
		713	BID_RETURN (res);
		714	} else if ((C2_hi == 0x0ull) && (C2_lo == 0x0ull)) {
		715	// y is 0 and x is not special, and not zero
		716	// for x + 0 return x, with the preferred exponent
		717	if (x_exp <= y_exp) {
		718	res.w[1] = x.w[1];
		719	res.w[0] = x.w[0];
		720	} else { // if x_exp > y_exp
		721	// return (C1 * 10^scale) * 10^(x_exp - scale)
		722	// where scale = min (P34-q1, x_exp-y_exp)
		723	// determine q1 = nr. of decimal digits in x
		724	// determine first the nr. of bits in x
		725	if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo
		726	if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53
		727	// split the 64-bit value in two 32-bit halves to avoid
		728	// rounding errors
		729	if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32
		730	tmp1.d = (double) (C1_lo >> 32); // exact conversion
		731	x_nr_bits =
		732	32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) -
		733	0x3ff);
		734	} else { // x < 2^32
		735	tmp1.d = (double) (C1_lo); // exact conversion
		736	x_nr_bits =
		737	((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		738	}
		739	} else { // if x < 2^53
		740	tmp1.d = (double) C1_lo; // exact conversion
		741	x_nr_bits =
		742	((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		743	}
		744	} else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
		745	tmp1.d = (double) C1_hi; // exact conversion
		746	x_nr_bits =
		747	64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		748	}
		749	q1 = nr_digits[x_nr_bits].digits;
		750	if (q1 == 0) {
		751	q1 = nr_digits[x_nr_bits].digits1;
		752	if (C1_hi > nr_digits[x_nr_bits].threshold_hi \|\|
		753	(C1_hi == nr_digits[x_nr_bits].threshold_hi &&
		754	C1_lo >= nr_digits[x_nr_bits].threshold_lo))
		755	q1++;
		756	}
		757	// return (C1 * 10^scale) * 10^(x_exp - scale)
		758	// where scale = min (P34-q1, x_exp-y_exp)
		759	scale = P34 - q1;
		760	ind = (x_exp - y_exp) >> 49;
		761	if (ind < scale)
		762	scale = ind;
		763	if (scale == 0) {
		764	res.w[1] = x.w[1];
		765	res.w[0] = x.w[0];
		766	} else if (q1 <= 19) { // x fits in 64 bits
		767	if (scale <= 19) { // 10^scale fits in 64 bits
		768	// 64 x 64 C1_lo * ten2k64[scale]
		769	__mul_64x64_to_128MACH (res, C1_lo, ten2k64[scale]);
		770	} else { // 10^scale fits in 128 bits
		771	// 64 x 128 C1_lo * ten2k128[scale - 20]
		772	__mul_128x64_to_128 (res, C1_lo, ten2k128[scale - 20]);
		773	}
		774	} else { // x fits in 128 bits, but 10^scale must fit in 64 bits
		775	// 64 x 128 ten2k64[scale] * C1
		776	C1.w[1] = C1_hi;
		777	C1.w[0] = C1_lo;
		778	__mul_128x64_to_128 (res, ten2k64[scale], C1);
		779	}
		780	// subtract scale from the exponent
		781	x_exp = x_exp - ((UINT64) scale << 49);
		782	res.w[1] = res.w[1] \| x_sign \| x_exp;
		783	}
		784	BID_SWAP128 (res);
		785	BID_RETURN (res);
		786	} else { // x and y are not canonical, not special, and are not zero
		787	// note that the result may still be zero, and then it has to have the
		788	// preferred exponent
		789	if (x_exp < y_exp) { // if exp_x < exp_y then swap x and y
		790	tmp_sign = x_sign;
		791	tmp_exp = x_exp;
		792	tmp_signif_hi = C1_hi;
		793	tmp_signif_lo = C1_lo;
		794	x_sign = y_sign;
		795	x_exp = y_exp;
		796	C1_hi = C2_hi;
		797	C1_lo = C2_lo;
		798	y_sign = tmp_sign;
		799	y_exp = tmp_exp;
		800	C2_hi = tmp_signif_hi;
		801	C2_lo = tmp_signif_lo;
		802	}
		803	// q1 = nr. of decimal digits in x
		804	// determine first the nr. of bits in x
		805	if (C1_hi == 0) { // x_bits is the nr. of bits in C1_lo
		806	if (C1_lo >= 0x0020000000000000ull) { // x >= 2^53
		807	//split the 64-bit value in two 32-bit halves to avoid rounding errors
		808	if (C1_lo >= 0x0000000100000000ull) { // x >= 2^32
		809	tmp1.d = (double) (C1_lo >> 32); // exact conversion
		810	x_nr_bits =
		811	32 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		812	} else { // x < 2^32
		813	tmp1.d = (double) (C1_lo); // exact conversion
		814	x_nr_bits =
		815	((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		816	}
		817	} else { // if x < 2^53
		818	tmp1.d = (double) C1_lo; // exact conversion
		819	x_nr_bits =
		820	((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		821	}
		822	} else { // C1_hi != 0 => nr. bits = 64 + nr_bits (C1_hi)
		823	tmp1.d = (double) C1_hi; // exact conversion
		824	x_nr_bits =
		825	64 + ((((unsigned int) (tmp1.ui64 >> 52)) & 0x7ff) - 0x3ff);
		826	}
		827
		828	q1 = nr_digits[x_nr_bits].digits;
		829	if (q1 == 0) {
		830	q1 = nr_digits[x_nr_bits].digits1;
		831	if (C1_hi > nr_digits[x_nr_bits].threshold_hi \|\|
		832	(C1_hi == nr_digits[x_nr_bits].threshold_hi &&
		833	C1_lo >= nr_digits[x_nr_bits].threshold_lo))
		834	q1++;
		835	}
		836	// q2 = nr. of decimal digits in y
		837	// determine first the nr. of bits in y (y_nr_bits)
		838	if (C2_hi == 0) { // y_bits is the nr. of bits in C2_lo
		839	if (C2_lo >= 0x0020000000000000ull) { // y >= 2^53
		840	//split the 64-bit value in two 32-bit halves to avoid rounding errors
		841	if (C2_lo >= 0x0000000100000000ull) { // y >= 2^32
		842	tmp2.d = (double) (C2_lo >> 32); // exact conversion
		843	y_nr_bits =
		844	32 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		845	} else { // y < 2^32
		846	tmp2.d = (double) (C2_lo); // exact conversion
		847	y_nr_bits =
		848	((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		849	}
		850	} else { // if y < 2^53
		851	tmp2.d = (double) C2_lo; // exact conversion
		852	y_nr_bits =
		853	((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		854	}
		855	} else { // C2_hi != 0 => nr. bits = 64 + nr_bits (C2_hi)
		856	tmp2.d = (double) C2_hi; // exact conversion
		857	y_nr_bits =
		858	64 + ((((unsigned int) (tmp2.ui64 >> 52)) & 0x7ff) - 0x3ff);
		859	}
		860
		861	q2 = nr_digits[y_nr_bits].digits;
		862	if (q2 == 0) {
		863	q2 = nr_digits[y_nr_bits].digits1;
		864	if (C2_hi > nr_digits[y_nr_bits].threshold_hi \|\|
		865	(C2_hi == nr_digits[y_nr_bits].threshold_hi &&
		866	C2_lo >= nr_digits[y_nr_bits].threshold_lo))
		867	q2++;
		868	}
		869
		870	delta = q1 + (int) (x_exp >> 49) - q2 - (int) (y_exp >> 49);
		871
		872	if (delta >= P34) {
		873	// round the result directly because 0 < C2 < ulp (C1 * 10^(x_exp-e2))
		874	// n = C1 * 10^e1 or n = C1 +/- 10^(q1-P34)) * 10^e1
		875	// the result is inexact; the preferred exponent is the least possible
		876
		877	if (delta >= P34 + 1) {
		878	// for RN the result is the operand with the larger magnitude,
		879	// possibly scaled up by 10^(P34-q1)
		880	// an overflow cannot occur in this case (rounding to nearest)
		881	if (q1 < P34) { // scale C1 up by 10^(P34-q1)
		882	// Note: because delta >= P34+1 it is certain that
		883	// x_exp - ((UINT64)scale << 49) will stay above e_min
		884	scale = P34 - q1;
		885	if (q1 <= 19) { // C1 fits in 64 bits
		886	// 1 <= q1 <= 19 => 15 <= scale <= 33
		887	if (scale <= 19) { // 10^scale fits in 64 bits
		888	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		889	} else { // if 20 <= scale <= 33
		890	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		891	// (C1 * 10^(scale-19)) fits in 64 bits
		892	C1_lo = C1_lo * ten2k64[scale - 19];
		893	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		894	}
		895	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		896	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		897	C1.w[1] = C1_hi;
		898	C1.w[0] = C1_lo;
		899	// C1 = ten2k64[P34 - q1] * C1
		900	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		901	}
		902	x_exp = x_exp - ((UINT64) scale << 49);
		903	C1_hi = C1.w[1];
		904	C1_lo = C1.w[0];
		905	}
		906	// some special cases arise: if delta = P34 + 1 and C1 = 10^(P34-1)
		907	// (after scaling) and x_sign != y_sign and C2 > 5*10^(q2-1) =>
		908	// subtract 1 ulp
		909	// Note: do this only for rounding to nearest; for other rounding
		910	// modes the correction will be applied next
		911	if ((rnd_mode == ROUNDING_TO_NEAREST
		912	\|\| rnd_mode == ROUNDING_TIES_AWAY) && delta == (P34 + 1)
		913	&& C1_hi == 0x0000314dc6448d93ull
		914	&& C1_lo == 0x38c15b0a00000000ull && x_sign != y_sign
		915	&& ((q2 <= 19 && C2_lo > midpoint64[q2 - 1]) \|\| (q2 >= 20
		916	&& (C2_hi >
		917	midpoint128
		918	[q2 -
		919	20].
		920	w[1]
		921	\|\|
		922	(C2_hi
		923	==
		924	midpoint128
		925	[q2 -
		926	20].
		927	w[1]
		928	&&
		929	C2_lo
		930	>
		931	midpoint128
		932	[q2 -
		933	20].
		934	w
		935	[0])))))
		936	{
		937	// C1 = 10^34 - 1 and decrement x_exp by 1 (no underflow possible)
		938	C1_hi = 0x0001ed09bead87c0ull;
		939	C1_lo = 0x378d8e63ffffffffull;
		940	x_exp = x_exp - EXP_P1;
		941	}
		942	if (rnd_mode != ROUNDING_TO_NEAREST) {
		943	if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) \|\|
		944	(rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
		945	// add 1 ulp and then check for overflow
		946	C1_lo = C1_lo + 1;
		947	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		948	C1_hi = C1_hi + 1;
		949	}
		950	if (C1_hi == 0x0001ed09bead87c0ull
		951	&& C1_lo == 0x378d8e6400000000ull) {
		952	// C1 = 10^34 => rounding overflow
		953	C1_hi = 0x0000314dc6448d93ull;
		954	C1_lo = 0x38c15b0a00000000ull; // 10^33
		955	x_exp = x_exp + EXP_P1;
		956	if (x_exp == EXP_MAX_P1) { // overflow
		957	C1_hi = 0x7800000000000000ull; // +inf
		958	C1_lo = 0x0ull;
		959	x_exp = 0; // x_sign is preserved
		960	// set overflow flag (the inexact flag was set too)
		961	*pfpsf \|= OVERFLOW_EXCEPTION;
		962	}
		963	}
		964	} else if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign) \|\|
		965	(rnd_mode == ROUNDING_UP && x_sign && !y_sign) \|\|
		966	(rnd_mode == ROUNDING_TO_ZERO
		967	&& x_sign != y_sign)) {
		968	// subtract 1 ulp from C1
		969	// Note: because delta >= P34 + 1 the result cannot be zero
		970	C1_lo = C1_lo - 1;
		971	if (C1_lo == 0xffffffffffffffffull)
		972	C1_hi = C1_hi - 1;
		973	// if the coefficient is 10^33 - 1 then make it 10^34 - 1 and
		974	// decrease the exponent by 1 (because delta >= P34 + 1 the
		975	// exponent will not become less than e_min)
		976	// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
		977	// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
		978	if (C1_hi == 0x0000314dc6448d93ull
		979	&& C1_lo == 0x38c15b09ffffffffull) {
		980	// make C1 = 10^34 - 1
		981	C1_hi = 0x0001ed09bead87c0ull;
		982	C1_lo = 0x378d8e63ffffffffull;
		983	x_exp = x_exp - EXP_P1;
		984	}
		985	} else {
		986	; // the result is already correct
		987	}
		988	}
		989	// set the inexact flag
		990	*pfpsf \|= INEXACT_EXCEPTION;
		991	// assemble the result
		992	res.w[1] = x_sign \| x_exp \| C1_hi;
		993	res.w[0] = C1_lo;
		994	} else { // delta = P34
		995	// in most cases, the smaller operand may be < or = or > 1/2 ulp of the
		996	// larger operand
		997	// however, the case C1 = 10^(q1-1) and x_sign != y_sign is special due
		998	// to accuracy loss after subtraction, and will be treated separately
		999	if (x_sign == y_sign \|\| (q1 <= 20
		1000	&& (C1_hi != 0
		1001	\|\| C1_lo != ten2k64[q1 - 1]))
		1002	\|\| (q1 >= 21 && (C1_hi != ten2k128[q1 - 21].w[1]
		1003	\|\| C1_lo != ten2k128[q1 - 21].w[0]))) {
		1004	// if x_sign == y_sign or C1 != 10^(q1-1)
		1005	// compare C2 with 1/2 ulp = 5 * 10^(q2-1), the latter read from table
		1006	// Note: cases q1<=19 and q1>=20 can be coalesced at some latency cost
		1007	if (q2 <= 19) { // C2 and 5*10^(q2-1) both fit in 64 bits
		1008	halfulp64 = midpoint64[q2 - 1]; // 5 * 10^(q2-1)
		1009	if (C2_lo < halfulp64) { // n2 < 1/2 ulp (n1)
		1010	// for RN the result is the operand with the larger magnitude,
		1011	// possibly scaled up by 10^(P34-q1)
		1012	// an overflow cannot occur in this case (rounding to nearest)
		1013	if (q1 < P34) { // scale C1 up by 10^(P34-q1)
		1014	// Note: because delta = P34 it is certain that
		1015	// x_exp - ((UINT64)scale << 49) will stay above e_min
		1016	scale = P34 - q1;
		1017	if (q1 <= 19) { // C1 fits in 64 bits
		1018	// 1 <= q1 <= 19 => 15 <= scale <= 33
		1019	if (scale <= 19) { // 10^scale fits in 64 bits
		1020	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		1021	} else { // if 20 <= scale <= 33
		1022	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		1023	// (C1 * 10^(scale-19)) fits in 64 bits
		1024	C1_lo = C1_lo * ten2k64[scale - 19];
		1025	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		1026	}
		1027	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		1028	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		1029	C1.w[1] = C1_hi;
		1030	C1.w[0] = C1_lo;
		1031	// C1 = ten2k64[P34 - q1] * C1
		1032	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		1033	}
		1034	x_exp = x_exp - ((UINT64) scale << 49);
		1035	C1_hi = C1.w[1];
		1036	C1_lo = C1.w[0];
		1037	}
		1038	if (rnd_mode != ROUNDING_TO_NEAREST) {
		1039	if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) \|\|
		1040	(rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
		1041	// add 1 ulp and then check for overflow
		1042	C1_lo = C1_lo + 1;
		1043	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1044	C1_hi = C1_hi + 1;
		1045	}
		1046	if (C1_hi == 0x0001ed09bead87c0ull
		1047	&& C1_lo == 0x378d8e6400000000ull) {
		1048	// C1 = 10^34 => rounding overflow
		1049	C1_hi = 0x0000314dc6448d93ull;
		1050	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1051	x_exp = x_exp + EXP_P1;
		1052	if (x_exp == EXP_MAX_P1) { // overflow
		1053	C1_hi = 0x7800000000000000ull; // +inf
		1054	C1_lo = 0x0ull;
		1055	x_exp = 0; // x_sign is preserved
		1056	// set overflow flag (the inexact flag was set too)
		1057	*pfpsf \|= OVERFLOW_EXCEPTION;
		1058	}
		1059	}
		1060	} else
		1061	if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
		1062	\|\| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
		1063	\|\| (rnd_mode == ROUNDING_TO_ZERO
		1064	&& x_sign != y_sign)) {
		1065	// subtract 1 ulp from C1
		1066	// Note: because delta >= P34 + 1 the result cannot be zero
		1067	C1_lo = C1_lo - 1;
		1068	if (C1_lo == 0xffffffffffffffffull)
		1069	C1_hi = C1_hi - 1;
		1070	// if the coefficient is 10^33-1 then make it 10^34-1 and
		1071	// decrease the exponent by 1 (because delta >= P34 + 1 the
		1072	// exponent will not become less than e_min)
		1073	// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
		1074	// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
		1075	if (C1_hi == 0x0000314dc6448d93ull
		1076	&& C1_lo == 0x38c15b09ffffffffull) {
		1077	// make C1 = 10^34 - 1
		1078	C1_hi = 0x0001ed09bead87c0ull;
		1079	C1_lo = 0x378d8e63ffffffffull;
		1080	x_exp = x_exp - EXP_P1;
		1081	}
		1082	} else {
		1083	; // the result is already correct
		1084	}
		1085	}
		1086	// set the inexact flag
		1087	*pfpsf \|= INEXACT_EXCEPTION;
		1088	// assemble the result
		1089	res.w[1] = x_sign \| x_exp \| C1_hi;
		1090	res.w[0] = C1_lo;
		1091	} else if ((C2_lo == halfulp64)
		1092	&& (q1 < P34 \|\| ((C1_lo & 0x1) == 0))) {
		1093	// n2 = 1/2 ulp (n1) and C1 is even
		1094	// the result is the operand with the larger magnitude,
		1095	// possibly scaled up by 10^(P34-q1)
		1096	// an overflow cannot occur in this case (rounding to nearest)
		1097	if (q1 < P34) { // scale C1 up by 10^(P34-q1)
		1098	// Note: because delta = P34 it is certain that
		1099	// x_exp - ((UINT64)scale << 49) will stay above e_min
		1100	scale = P34 - q1;
		1101	if (q1 <= 19) { // C1 fits in 64 bits
		1102	// 1 <= q1 <= 19 => 15 <= scale <= 33
		1103	if (scale <= 19) { // 10^scale fits in 64 bits
		1104	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		1105	} else { // if 20 <= scale <= 33
		1106	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		1107	// (C1 * 10^(scale-19)) fits in 64 bits
		1108	C1_lo = C1_lo * ten2k64[scale - 19];
		1109	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		1110	}
		1111	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		1112	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		1113	C1.w[1] = C1_hi;
		1114	C1.w[0] = C1_lo;
		1115	// C1 = ten2k64[P34 - q1] * C1
		1116	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		1117	}
		1118	x_exp = x_exp - ((UINT64) scale << 49);
		1119	C1_hi = C1.w[1];
		1120	C1_lo = C1.w[0];
		1121	}
		1122	if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign == y_sign
		1123	&& (C1_lo & 0x01)) \|\| (rnd_mode == ROUNDING_TIES_AWAY
		1124	&& x_sign == y_sign)
		1125	\|\| (rnd_mode == ROUNDING_UP && !x_sign && !y_sign)
		1126	\|\| (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)) {
		1127	// add 1 ulp and then check for overflow
		1128	C1_lo = C1_lo + 1;
		1129	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1130	C1_hi = C1_hi + 1;
		1131	}
		1132	if (C1_hi == 0x0001ed09bead87c0ull
		1133	&& C1_lo == 0x378d8e6400000000ull) {
		1134	// C1 = 10^34 => rounding overflow
		1135	C1_hi = 0x0000314dc6448d93ull;
		1136	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1137	x_exp = x_exp + EXP_P1;
		1138	if (x_exp == EXP_MAX_P1) { // overflow
		1139	C1_hi = 0x7800000000000000ull; // +inf
		1140	C1_lo = 0x0ull;
		1141	x_exp = 0; // x_sign is preserved
		1142	// set overflow flag (the inexact flag was set too)
		1143	*pfpsf \|= OVERFLOW_EXCEPTION;
		1144	}
		1145	}
		1146	} else
		1147	if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign
		1148	&& (C1_lo & 0x01)) \|\| (rnd_mode == ROUNDING_DOWN
		1149	&& !x_sign && y_sign)
		1150	\|\| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
		1151	\|\| (rnd_mode == ROUNDING_TO_ZERO
		1152	&& x_sign != y_sign)) {
		1153	// subtract 1 ulp from C1
		1154	// Note: because delta >= P34 + 1 the result cannot be zero
		1155	C1_lo = C1_lo - 1;
		1156	if (C1_lo == 0xffffffffffffffffull)
		1157	C1_hi = C1_hi - 1;
		1158	// if the coefficient is 10^33 - 1 then make it 10^34 - 1
		1159	// and decrease the exponent by 1 (because delta >= P34 + 1
		1160	// the exponent will not become less than e_min)
		1161	// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
		1162	// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
		1163	if (C1_hi == 0x0000314dc6448d93ull
		1164	&& C1_lo == 0x38c15b09ffffffffull) {
		1165	// make C1 = 10^34 - 1
		1166	C1_hi = 0x0001ed09bead87c0ull;
		1167	C1_lo = 0x378d8e63ffffffffull;
		1168	x_exp = x_exp - EXP_P1;
		1169	}
		1170	} else {
		1171	; // the result is already correct
		1172	}
		1173	// set the inexact flag
		1174	*pfpsf \|= INEXACT_EXCEPTION;
		1175	// assemble the result
		1176	res.w[1] = x_sign \| x_exp \| C1_hi;
		1177	res.w[0] = C1_lo;
		1178	} else { // if C2_lo > halfulp64 \|\|
		1179	// (C2_lo == halfulp64 && q1 == P34 && ((C1_lo & 0x1) == 1)), i.e.
		1180	// 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
		1181	// res = x+1 ulp if n1n2 > 0 and res = x-1 ulp if n1n2 < 0
		1182	if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
		1183	// Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
		1184	// because q1 < P34 we must first replace C1 by
		1185	// C1 * 10^(P34-q1), and must decrease the exponent by
		1186	// (P34-q1) (it will still be at least e_min)
		1187	scale = P34 - q1;
		1188	if (q1 <= 19) { // C1 fits in 64 bits
		1189	// 1 <= q1 <= 19 => 15 <= scale <= 33
		1190	if (scale <= 19) { // 10^scale fits in 64 bits
		1191	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		1192	} else { // if 20 <= scale <= 33
		1193	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		1194	// (C1 * 10^(scale-19)) fits in 64 bits
		1195	C1_lo = C1_lo * ten2k64[scale - 19];
		1196	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		1197	}
		1198	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		1199	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		1200	C1.w[1] = C1_hi;
		1201	C1.w[0] = C1_lo;
		1202	// C1 = ten2k64[P34 - q1] * C1
		1203	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		1204	}
		1205	x_exp = x_exp - ((UINT64) scale << 49);
		1206	C1_hi = C1.w[1];
		1207	C1_lo = C1.w[0];
		1208	// check for rounding overflow
		1209	if (C1_hi == 0x0001ed09bead87c0ull
		1210	&& C1_lo == 0x378d8e6400000000ull) {
		1211	// C1 = 10^34 => rounding overflow
		1212	C1_hi = 0x0000314dc6448d93ull;
		1213	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1214	x_exp = x_exp + EXP_P1;
		1215	}
		1216	}
		1217	if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign)
		1218	\|\| (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign
		1219	&& C2_lo != halfulp64)
		1220	\|\| (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
		1221	\|\| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
		1222	\|\| (rnd_mode == ROUNDING_TO_ZERO
		1223	&& x_sign != y_sign)) {
		1224	// the result is x - 1
		1225	// for RN n1 * n2 < 0; underflow not possible
		1226	C1_lo = C1_lo - 1;
		1227	if (C1_lo == 0xffffffffffffffffull)
		1228	C1_hi--;
		1229	// check if we crossed into the lower decade
		1230	if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
		1231	C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
		1232	C1_lo = 0x378d8e63ffffffffull;
		1233	x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2
		1234	}
		1235	} else
		1236	if ((rnd_mode == ROUNDING_TO_NEAREST
		1237	&& x_sign == y_sign)
		1238	\|\| (rnd_mode == ROUNDING_TIES_AWAY
		1239	&& x_sign == y_sign)
		1240	\|\| (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)
		1241	\|\| (rnd_mode == ROUNDING_UP && !x_sign
		1242	&& !y_sign)) {
		1243	// the result is x + 1
		1244	// for RN x_sign = y_sign, i.e. n1*n2 > 0
		1245	C1_lo = C1_lo + 1;
		1246	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1247	C1_hi = C1_hi + 1;
		1248	}
		1249	if (C1_hi == 0x0001ed09bead87c0ull
		1250	&& C1_lo == 0x378d8e6400000000ull) {
		1251	// C1 = 10^34 => rounding overflow
		1252	C1_hi = 0x0000314dc6448d93ull;
		1253	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1254	x_exp = x_exp + EXP_P1;
		1255	if (x_exp == EXP_MAX_P1) { // overflow
		1256	C1_hi = 0x7800000000000000ull; // +inf
		1257	C1_lo = 0x0ull;
		1258	x_exp = 0; // x_sign is preserved
		1259	// set the overflow flag
		1260	*pfpsf \|= OVERFLOW_EXCEPTION;
		1261	}
		1262	}
		1263	} else {
		1264	; // the result is x
		1265	}
		1266	// set the inexact flag
		1267	*pfpsf \|= INEXACT_EXCEPTION;
		1268	// assemble the result
		1269	res.w[1] = x_sign \| x_exp \| C1_hi;
		1270	res.w[0] = C1_lo;
		1271	}
		1272	} else { // if q2 >= 20 then 5*10^(q2-1) and C2 (the latter in
		1273	// most cases) fit only in more than 64 bits
		1274	halfulp128 = midpoint128[q2 - 20]; // 5 * 10^(q2-1)
		1275	if ((C2_hi < halfulp128.w[1])
		1276	\|\| (C2_hi == halfulp128.w[1]
		1277	&& C2_lo < halfulp128.w[0])) {
		1278	// n2 < 1/2 ulp (n1)
		1279	// the result is the operand with the larger magnitude,
		1280	// possibly scaled up by 10^(P34-q1)
		1281	// an overflow cannot occur in this case (rounding to nearest)
		1282	if (q1 < P34) { // scale C1 up by 10^(P34-q1)
		1283	// Note: because delta = P34 it is certain that
		1284	// x_exp - ((UINT64)scale << 49) will stay above e_min
		1285	scale = P34 - q1;
		1286	if (q1 <= 19) { // C1 fits in 64 bits
		1287	// 1 <= q1 <= 19 => 15 <= scale <= 33
		1288	if (scale <= 19) { // 10^scale fits in 64 bits
		1289	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		1290	} else { // if 20 <= scale <= 33
		1291	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		1292	// (C1 * 10^(scale-19)) fits in 64 bits
		1293	C1_lo = C1_lo * ten2k64[scale - 19];
		1294	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		1295	}
		1296	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		1297	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		1298	C1.w[1] = C1_hi;
		1299	C1.w[0] = C1_lo;
		1300	// C1 = ten2k64[P34 - q1] * C1
		1301	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		1302	}
		1303	C1_hi = C1.w[1];
		1304	C1_lo = C1.w[0];
		1305	x_exp = x_exp - ((UINT64) scale << 49);
		1306	}
		1307	if (rnd_mode != ROUNDING_TO_NEAREST) {
		1308	if ((rnd_mode == ROUNDING_DOWN && x_sign && y_sign) \|\|
		1309	(rnd_mode == ROUNDING_UP && !x_sign && !y_sign)) {
		1310	// add 1 ulp and then check for overflow
		1311	C1_lo = C1_lo + 1;
		1312	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1313	C1_hi = C1_hi + 1;
		1314	}
		1315	if (C1_hi == 0x0001ed09bead87c0ull
		1316	&& C1_lo == 0x378d8e6400000000ull) {
		1317	// C1 = 10^34 => rounding overflow
		1318	C1_hi = 0x0000314dc6448d93ull;
		1319	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1320	x_exp = x_exp + EXP_P1;
		1321	if (x_exp == EXP_MAX_P1) { // overflow
		1322	C1_hi = 0x7800000000000000ull; // +inf
		1323	C1_lo = 0x0ull;
		1324	x_exp = 0; // x_sign is preserved
		1325	// set overflow flag (the inexact flag was set too)
		1326	*pfpsf \|= OVERFLOW_EXCEPTION;
		1327	}
		1328	}
		1329	} else
		1330	if ((rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
		1331	\|\| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
		1332	\|\| (rnd_mode == ROUNDING_TO_ZERO
		1333	&& x_sign != y_sign)) {
		1334	// subtract 1 ulp from C1
		1335	// Note: because delta >= P34 + 1 the result cannot be zero
		1336	C1_lo = C1_lo - 1;
		1337	if (C1_lo == 0xffffffffffffffffull)
		1338	C1_hi = C1_hi - 1;
		1339	// if the coefficient is 10^33-1 then make it 10^34-1 and
		1340	// decrease the exponent by 1 (because delta >= P34 + 1 the
		1341	// exponent will not become less than e_min)
		1342	// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
		1343	// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
		1344	if (C1_hi == 0x0000314dc6448d93ull
		1345	&& C1_lo == 0x38c15b09ffffffffull) {
		1346	// make C1 = 10^34 - 1
		1347	C1_hi = 0x0001ed09bead87c0ull;
		1348	C1_lo = 0x378d8e63ffffffffull;
		1349	x_exp = x_exp - EXP_P1;
		1350	}
		1351	} else {
		1352	; // the result is already correct
		1353	}
		1354	}
		1355	// set the inexact flag
		1356	*pfpsf \|= INEXACT_EXCEPTION;
		1357	// assemble the result
		1358	res.w[1] = x_sign \| x_exp \| C1_hi;
		1359	res.w[0] = C1_lo;
		1360	} else if ((C2_hi == halfulp128.w[1]
		1361	&& C2_lo == halfulp128.w[0])
		1362	&& (q1 < P34 \|\| ((C1_lo & 0x1) == 0))) {
		1363	// midpoint & lsb in C1 is 0
		1364	// n2 = 1/2 ulp (n1) and C1 is even
		1365	// the result is the operand with the larger magnitude,
		1366	// possibly scaled up by 10^(P34-q1)
		1367	// an overflow cannot occur in this case (rounding to nearest)
		1368	if (q1 < P34) { // scale C1 up by 10^(P34-q1)
		1369	// Note: because delta = P34 it is certain that
		1370	// x_exp - ((UINT64)scale << 49) will stay above e_min
		1371	scale = P34 - q1;
		1372	if (q1 <= 19) { // C1 fits in 64 bits
		1373	// 1 <= q1 <= 19 => 15 <= scale <= 33
		1374	if (scale <= 19) { // 10^scale fits in 64 bits
		1375	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		1376	} else { // if 20 <= scale <= 33
		1377	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		1378	// (C1 * 10^(scale-19)) fits in 64 bits
		1379	C1_lo = C1_lo * ten2k64[scale - 19];
		1380	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		1381	}
		1382	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		1383	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		1384	C1.w[1] = C1_hi;
		1385	C1.w[0] = C1_lo;
		1386	// C1 = ten2k64[P34 - q1] * C1
		1387	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		1388	}
		1389	x_exp = x_exp - ((UINT64) scale << 49);
		1390	C1_hi = C1.w[1];
		1391	C1_lo = C1.w[0];
		1392	}
		1393	if (rnd_mode != ROUNDING_TO_NEAREST) {
		1394	if ((rnd_mode == ROUNDING_TIES_AWAY && x_sign == y_sign)
		1395	\|\| (rnd_mode == ROUNDING_UP && !y_sign)) {
		1396	// add 1 ulp and then check for overflow
		1397	C1_lo = C1_lo + 1;
		1398	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1399	C1_hi = C1_hi + 1;
		1400	}
		1401	if (C1_hi == 0x0001ed09bead87c0ull
		1402	&& C1_lo == 0x378d8e6400000000ull) {
		1403	// C1 = 10^34 => rounding overflow
		1404	C1_hi = 0x0000314dc6448d93ull;
		1405	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1406	x_exp = x_exp + EXP_P1;
		1407	if (x_exp == EXP_MAX_P1) { // overflow
		1408	C1_hi = 0x7800000000000000ull; // +inf
		1409	C1_lo = 0x0ull;
		1410	x_exp = 0; // x_sign is preserved
		1411	// set overflow flag (the inexact flag was set too)
		1412	*pfpsf \|= OVERFLOW_EXCEPTION;
		1413	}
		1414	}
		1415	} else if ((rnd_mode == ROUNDING_DOWN && y_sign)
		1416	\|\| (rnd_mode == ROUNDING_TO_ZERO
		1417	&& x_sign != y_sign)) {
		1418	// subtract 1 ulp from C1
		1419	// Note: because delta >= P34 + 1 the result cannot be zero
		1420	C1_lo = C1_lo - 1;
		1421	if (C1_lo == 0xffffffffffffffffull)
		1422	C1_hi = C1_hi - 1;
		1423	// if the coefficient is 10^33 - 1 then make it 10^34 - 1
		1424	// and decrease the exponent by 1 (because delta >= P34 + 1
		1425	// the exponent will not become less than e_min)
		1426	// 10^33 - 1 = 0x0000314dc6448d9338c15b09ffffffff
		1427	// 10^34 - 1 = 0x0001ed09bead87c0378d8e63ffffffff
		1428	if (C1_hi == 0x0000314dc6448d93ull
		1429	&& C1_lo == 0x38c15b09ffffffffull) {
		1430	// make C1 = 10^34 - 1
		1431	C1_hi = 0x0001ed09bead87c0ull;
		1432	C1_lo = 0x378d8e63ffffffffull;
		1433	x_exp = x_exp - EXP_P1;
		1434	}
		1435	} else {
		1436	; // the result is already correct
		1437	}
		1438	}
		1439	// set the inexact flag
		1440	*pfpsf \|= INEXACT_EXCEPTION;
		1441	// assemble the result
		1442	res.w[1] = x_sign \| x_exp \| C1_hi;
		1443	res.w[0] = C1_lo;
		1444	} else { // if C2 > halfulp128 \|\|
		1445	// (C2 == halfulp128 && q1 == P34 && ((C1 & 0x1) == 1)), i.e.
		1446	// 1/2 ulp(n1) < n2 < 1 ulp(n1) or n2 = 1/2 ulp(n1) and C1 odd
		1447	// res = x+1 ulp if n1n2 > 0 and res = x-1 ulp if n1n2 < 0
		1448	if (q1 < P34) { // then 1 ulp = 10^(e1+q1-P34) < 10^e1
		1449	// Note: if (q1 == P34) then 1 ulp = 10^(e1+q1-P34) = 10^e1
		1450	// because q1 < P34 we must first replace C1 by C1*10^(P34-q1),
		1451	// and must decrease the exponent by (P34-q1) (it will still be
		1452	// at least e_min)
		1453	scale = P34 - q1;
		1454	if (q1 <= 19) { // C1 fits in 64 bits
		1455	// 1 <= q1 <= 19 => 15 <= scale <= 33
		1456	if (scale <= 19) { // 10^scale fits in 64 bits
		1457	__mul_64x64_to_128MACH (C1, ten2k64[scale], C1_lo);
		1458	} else { // if 20 <= scale <= 33
		1459	// C1 * 10^scale = (C1 * 10^(scale-19)) * 10^19 where
		1460	// (C1 * 10^(scale-19)) fits in 64 bits
		1461	C1_lo = C1_lo * ten2k64[scale - 19];
		1462	__mul_64x64_to_128MACH (C1, ten2k64[19], C1_lo);
		1463	}
		1464	} else { //if 20 <= q1 <= 33=P34-1 then C1 fits only in 128 bits
		1465	// => 1 <= P34 - q1 <= 14 so 10^(P34-q1) fits in 64 bits
		1466	C1.w[1] = C1_hi;
		1467	C1.w[0] = C1_lo;
		1468	// C1 = ten2k64[P34 - q1] * C1
		1469	__mul_128x64_to_128 (C1, ten2k64[P34 - q1], C1);
		1470	}
		1471	C1_hi = C1.w[1];
		1472	C1_lo = C1.w[0];
		1473	x_exp = x_exp - ((UINT64) scale << 49);
		1474	}
		1475	if ((rnd_mode == ROUNDING_TO_NEAREST && x_sign != y_sign)
		1476	\|\| (rnd_mode == ROUNDING_TIES_AWAY && x_sign != y_sign
		1477	&& (C2_hi != halfulp128.w[1]
		1478	\|\| C2_lo != halfulp128.w[0]))
		1479	\|\| (rnd_mode == ROUNDING_DOWN && !x_sign && y_sign)
		1480	\|\| (rnd_mode == ROUNDING_UP && x_sign && !y_sign)
		1481	\|\| (rnd_mode == ROUNDING_TO_ZERO
		1482	&& x_sign != y_sign)) {
		1483	// the result is x - 1
		1484	// for RN n1 * n2 < 0; underflow not possible
		1485	C1_lo = C1_lo - 1;
		1486	if (C1_lo == 0xffffffffffffffffull)
		1487	C1_hi--;
		1488	// check if we crossed into the lower decade
		1489	if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
		1490	C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
		1491	C1_lo = 0x378d8e63ffffffffull;
		1492	x_exp = x_exp - EXP_P1; // no underflow, because n1 >> n2
		1493	}
		1494	} else
		1495	if ((rnd_mode == ROUNDING_TO_NEAREST
		1496	&& x_sign == y_sign)
		1497	\|\| (rnd_mode == ROUNDING_TIES_AWAY
		1498	&& x_sign == y_sign)
		1499	\|\| (rnd_mode == ROUNDING_DOWN && x_sign && y_sign)
		1500	\|\| (rnd_mode == ROUNDING_UP && !x_sign
		1501	&& !y_sign)) {
		1502	// the result is x + 1
		1503	// for RN x_sign = y_sign, i.e. n1*n2 > 0
		1504	C1_lo = C1_lo + 1;
		1505	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1506	C1_hi = C1_hi + 1;
		1507	}
		1508	if (C1_hi == 0x0001ed09bead87c0ull
		1509	&& C1_lo == 0x378d8e6400000000ull) {
		1510	// C1 = 10^34 => rounding overflow
		1511	C1_hi = 0x0000314dc6448d93ull;
		1512	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1513	x_exp = x_exp + EXP_P1;
		1514	if (x_exp == EXP_MAX_P1) { // overflow
		1515	C1_hi = 0x7800000000000000ull; // +inf
		1516	C1_lo = 0x0ull;
		1517	x_exp = 0; // x_sign is preserved
		1518	// set the overflow flag
		1519	*pfpsf \|= OVERFLOW_EXCEPTION;
		1520	}
		1521	}
		1522	} else {
		1523	; // the result is x
		1524	}
		1525	// set the inexact flag
		1526	*pfpsf \|= INEXACT_EXCEPTION;
		1527	// assemble the result
		1528	res.w[1] = x_sign \| x_exp \| C1_hi;
		1529	res.w[0] = C1_lo;
		1530	}
		1531	} // end q1 >= 20
		1532	// end case where C1 != 10^(q1-1)
		1533	} else { // C1 = 10^(q1-1) and x_sign != y_sign
		1534	// instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
		1535	// calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
		1536	// where x1 = q2 - 1, 0 <= x1 <= P34 - 1
		1537	// Because C1 = 10^(q1-1) and x_sign != y_sign, C' will have P34
		1538	// digits and n = C' * 10^(e2+x1)
		1539	// If the result has P34+1 digits, redo the steps above with x1+1
		1540	// If the result has P34-1 digits or less, redo the steps above with
		1541	// x1-1 but only if initially x1 >= 1
		1542	// NOTE: these two steps can be improved, e.g we could guess if
		1543	// P34+1 or P34-1 digits will be obtained by adding/subtracting
		1544	// just the top 64 bits of the two operands
		1545	// The result cannot be zero, and it cannot overflow
		1546	x1 = q2 - 1; // 0 <= x1 <= P34-1
		1547	// Calculate C1 * 10^(e1-e2-x1) where 1 <= e1-e2-x1 <= P34
		1548	// scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
		1549	scale = P34 - q1 + 1; // scale=e1-e2-x1 = P34+1-q1; 1<=scale<=P34
		1550	// either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
		1551	// but their product fits with certainty in 128 bits
		1552	if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
		1553	__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
		1554	} else { // if (scale >= 1
		1555	// if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
		1556	if (q1 <= 19) { // C1 fits in 64 bits
		1557	__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
		1558	} else { // q1 >= 20
		1559	C1.w[1] = C1_hi;
		1560	C1.w[0] = C1_lo;
		1561	__mul_128x64_to_128 (C1, ten2k64[scale], C1);
		1562	}
		1563	}
		1564	tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
		1565
		1566	// now round C2 to q2-x1 = 1 decimal digit
		1567	// C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
		1568	ind = x1 - 1; // -1 <= ind <= P34 - 2
		1569	if (ind >= 0) { // if (x1 >= 1)
		1570	C2.w[0] = C2_lo;
		1571	C2.w[1] = C2_hi;
		1572	if (ind <= 18) {
		1573	C2.w[0] = C2.w[0] + midpoint64[ind];
		1574	if (C2.w[0] < C2_lo)
		1575	C2.w[1]++;
		1576	} else { // 19 <= ind <= 32
		1577	C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0];
		1578	C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1];
		1579	if (C2.w[0] < C2_lo)
		1580	C2.w[1]++;
		1581	}
		1582	// the approximation of 10^(-x1) was rounded up to 118 bits
		1583	__mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2, f2
		1584	// calculate C2* and f2*
		1585	// C2* is actually floor(C2*) in this case
		1586	// C2* and f2* need shifting and masking, as shown by
		1587	// shiftright128[] and maskhigh128[]
		1588	// the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g.
		1589	// if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
		1590	// if (0 < f2* < 10^(-x1)) then
		1591	// if floor(C1+C2) is even then C2 = floor(C2*) - logical right
		1592	// shift; C2* has p decimal digits, correct by Prop. 1)
		1593	// else if floor(C1+C2) is odd C2 = floor(C2*)-1 (logical right
		1594	// shift; C2* has p decimal digits, correct by Pr. 1)
		1595	// else
		1596	// C2* = floor(C2*) (logical right shift; C has p decimal digits,
		1597	// correct by Property 1)
		1598	// n = C2* * 10^(e2+x1)
		1599
		1600	if (ind <= 2) {
		1601	highf2star.w[1] = 0x0;
		1602	highf2star.w[0] = 0x0; // low f2* ok
		1603	} else if (ind <= 21) {
		1604	highf2star.w[1] = 0x0;
		1605	highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok
		1606	} else {
		1607	highf2star.w[1] = R256.w[3] & maskhigh128[ind];
		1608	highf2star.w[0] = R256.w[2]; // low f2* is ok
		1609	}
		1610	// shift right C2* by Ex-128 = shiftright128[ind]
		1611	if (ind >= 3) {
		1612	shift = shiftright128[ind];
		1613	if (shift < 64) { // 3 <= shift <= 63
		1614	R256.w[2] =
		1615	(R256.w[2] >> shift) \| (R256.w[3] << (64 - shift));
		1616	R256.w[3] = (R256.w[3] >> shift);
		1617	} else { // 66 <= shift <= 102
		1618	R256.w[2] = (R256.w[3] >> (shift - 64));
		1619	R256.w[3] = 0x0ULL;
		1620	}
		1621	}
		1622	// redundant
		1623	is_inexact_lt_midpoint = 0;
		1624	is_inexact_gt_midpoint = 0;
		1625	is_midpoint_lt_even = 0;
		1626	is_midpoint_gt_even = 0;
		1627	// determine inexactness of the rounding of C2*
		1628	// (cannot be followed by a second rounding)
		1629	// if (0 < f2* - 1/2 < 10^(-x1)) then
		1630	// the result is exact
		1631	// else (if f2* - 1/2 > T* then)
		1632	// the result of is inexact
		1633	if (ind <= 2) {
		1634	if (R256.w[1] > 0x8000000000000000ull \|\|
		1635	(R256.w[1] == 0x8000000000000000ull
		1636	&& R256.w[0] > 0x0ull)) {
		1637	// f2* > 1/2 and the result may be exact
		1638	tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2
		1639	if ((tmp64A > ten2mk128trunc[ind].w[1]
		1640	\|\| (tmp64A == ten2mk128trunc[ind].w[1]
		1641	&& R256.w[0] >= ten2mk128trunc[ind].w[0]))) {
		1642	// set the inexact flag
		1643	*pfpsf \|= INEXACT_EXCEPTION;
		1644	// this rounding is applied to C2 only!
		1645	// x_sign != y_sign
		1646	is_inexact_gt_midpoint = 1;
		1647	} // else the result is exact
		1648	// rounding down, unless a midpoint in [ODD, EVEN]
		1649	} else { // the result is inexact; f2* <= 1/2
		1650	// set the inexact flag
		1651	*pfpsf \|= INEXACT_EXCEPTION;
		1652	// this rounding is applied to C2 only!
		1653	// x_sign != y_sign
		1654	is_inexact_lt_midpoint = 1;
		1655	}
		1656	} else if (ind <= 21) { // if 3 <= ind <= 21
		1657	if (highf2star.w[1] > 0x0 \|\| (highf2star.w[1] == 0x0
		1658	&& highf2star.w[0] >
		1659	onehalf128[ind])
		1660	\|\| (highf2star.w[1] == 0x0
		1661	&& highf2star.w[0] == onehalf128[ind]
		1662	&& (R256.w[1] \|\| R256.w[0]))) {
		1663	// f2* > 1/2 and the result may be exact
		1664	// Calculate f2* - 1/2
		1665	tmp64A = highf2star.w[0] - onehalf128[ind];
		1666	tmp64B = highf2star.w[1];
		1667	if (tmp64A > highf2star.w[0])
		1668	tmp64B--;
		1669	if (tmp64B \|\| tmp64A
		1670	\|\| R256.w[1] > ten2mk128trunc[ind].w[1]
		1671	\|\| (R256.w[1] == ten2mk128trunc[ind].w[1]
		1672	&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
		1673	// set the inexact flag
		1674	*pfpsf \|= INEXACT_EXCEPTION;
		1675	// this rounding is applied to C2 only!
		1676	// x_sign != y_sign
		1677	is_inexact_gt_midpoint = 1;
		1678	} // else the result is exact
		1679	} else { // the result is inexact; f2* <= 1/2
		1680	// set the inexact flag
		1681	*pfpsf \|= INEXACT_EXCEPTION;
		1682	// this rounding is applied to C2 only!
		1683	// x_sign != y_sign
		1684	is_inexact_lt_midpoint = 1;
		1685	}
		1686	} else { // if 22 <= ind <= 33
		1687	if (highf2star.w[1] > onehalf128[ind]
		1688	\|\| (highf2star.w[1] == onehalf128[ind]
		1689	&& (highf2star.w[0] \|\| R256.w[1]
		1690	\|\| R256.w[0]))) {
		1691	// f2* > 1/2 and the result may be exact
		1692	// Calculate f2* - 1/2
		1693	// tmp64A = highf2star.w[0];
		1694	tmp64B = highf2star.w[1] - onehalf128[ind];
		1695	if (tmp64B \|\| highf2star.w[0]
		1696	\|\| R256.w[1] > ten2mk128trunc[ind].w[1]
		1697	\|\| (R256.w[1] == ten2mk128trunc[ind].w[1]
		1698	&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
		1699	// set the inexact flag
		1700	*pfpsf \|= INEXACT_EXCEPTION;
		1701	// this rounding is applied to C2 only!
		1702	// x_sign != y_sign
		1703	is_inexact_gt_midpoint = 1;
		1704	} // else the result is exact
		1705	} else { // the result is inexact; f2* <= 1/2
		1706	// set the inexact flag
		1707	*pfpsf \|= INEXACT_EXCEPTION;
		1708	// this rounding is applied to C2 only!
		1709	// x_sign != y_sign
		1710	is_inexact_lt_midpoint = 1;
		1711	}
		1712	}
		1713	// check for midpoints after determining inexactness
		1714	if ((R256.w[1] \|\| R256.w[0]) && (highf2star.w[1] == 0)
		1715	&& (highf2star.w[0] == 0)
		1716	&& (R256.w[1] < ten2mk128trunc[ind].w[1]
		1717	\|\| (R256.w[1] == ten2mk128trunc[ind].w[1]
		1718	&& R256.w[0] <= ten2mk128trunc[ind].w[0]))) {
		1719	// the result is a midpoint
		1720	if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD]
		1721	// if floor(C2) is odd C = floor(C2) - 1; the result may be 0
		1722	R256.w[2]--;
		1723	if (R256.w[2] == 0xffffffffffffffffull)
		1724	R256.w[3]--;
		1725	// this rounding is applied to C2 only!
		1726	// x_sign != y_sign
		1727	is_midpoint_lt_even = 1;
		1728	is_inexact_lt_midpoint = 0;
		1729	is_inexact_gt_midpoint = 0;
		1730	} else {
		1731	// else MP in [ODD, EVEN]
		1732	// this rounding is applied to C2 only!
		1733	// x_sign != y_sign
		1734	is_midpoint_gt_even = 1;
		1735	is_inexact_lt_midpoint = 0;
		1736	is_inexact_gt_midpoint = 0;
		1737	}
		1738	}
		1739	} else { // if (ind == -1) only when x1 = 0
		1740	R256.w[2] = C2_lo;
		1741	R256.w[3] = C2_hi;
		1742	is_midpoint_lt_even = 0;
		1743	is_midpoint_gt_even = 0;
		1744	is_inexact_lt_midpoint = 0;
		1745	is_inexact_gt_midpoint = 0;
		1746	}
		1747	// and now subtract C1 * 10^(e1-e2-x1) - (C2 * 10^(-x1))rnd,P34
		1748	// because x_sign != y_sign this last operation is exact
		1749	C1.w[0] = C1.w[0] - R256.w[2];
		1750	C1.w[1] = C1.w[1] - R256.w[3];
		1751	if (C1.w[0] > tmp64)
		1752	C1.w[1]--; // borrow
		1753	if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient!
		1754	C1.w[0] = ~C1.w[0];
		1755	C1.w[0]++;
		1756	C1.w[1] = ~C1.w[1];
		1757	if (C1.w[0] == 0x0)
		1758	C1.w[1]++;
		1759	tmp_sign = y_sign; // the result will have the sign of y
		1760	} else {
		1761	tmp_sign = x_sign;
		1762	}
		1763	// the difference has exactly P34 digits
		1764	x_sign = tmp_sign;
		1765	if (x1 >= 1)
		1766	y_exp = y_exp + ((UINT64) x1 << 49);
		1767	C1_hi = C1.w[1];
		1768	C1_lo = C1.w[0];
		1769	// general correction from RN to RA, RM, RP, RZ; result uses y_exp
		1770	if (rnd_mode != ROUNDING_TO_NEAREST) {
		1771	if ((!x_sign
		1772	&& ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint)
		1773	\|\|
		1774	((rnd_mode == ROUNDING_TIES_AWAY
		1775	\|\| rnd_mode == ROUNDING_UP)
		1776	&& is_midpoint_gt_even))) \|\| (x_sign
		1777	&&
		1778	((rnd_mode ==
		1779	ROUNDING_DOWN
		1780	&&
		1781	is_inexact_lt_midpoint)
		1782	\|\|
		1783	((rnd_mode ==
		1784	ROUNDING_TIES_AWAY
		1785	\|\| rnd_mode ==
		1786	ROUNDING_DOWN)
		1787	&&
		1788	is_midpoint_gt_even))))
		1789	{
		1790	// C1 = C1 + 1
		1791	C1_lo = C1_lo + 1;
		1792	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		1793	C1_hi = C1_hi + 1;
		1794	}
		1795	if (C1_hi == 0x0001ed09bead87c0ull
		1796	&& C1_lo == 0x378d8e6400000000ull) {
		1797	// C1 = 10^34 => rounding overflow
		1798	C1_hi = 0x0000314dc6448d93ull;
		1799	C1_lo = 0x38c15b0a00000000ull; // 10^33
		1800	y_exp = y_exp + EXP_P1;
		1801	}
		1802	} else if ((is_midpoint_lt_even \|\| is_inexact_gt_midpoint)
		1803	&&
		1804	((x_sign
		1805	&& (rnd_mode == ROUNDING_UP
		1806	\|\| rnd_mode == ROUNDING_TO_ZERO))
		1807	\|\| (!x_sign
		1808	&& (rnd_mode == ROUNDING_DOWN
		1809	\|\| rnd_mode == ROUNDING_TO_ZERO)))) {
		1810	// C1 = C1 - 1
		1811	C1_lo = C1_lo - 1;
		1812	if (C1_lo == 0xffffffffffffffffull)
		1813	C1_hi--;
		1814	// check if we crossed into the lower decade
		1815	if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
		1816	C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
		1817	C1_lo = 0x378d8e63ffffffffull;
		1818	y_exp = y_exp - EXP_P1;
		1819	// no underflow, because delta + q2 >= P34 + 1
		1820	}
		1821	} else {
		1822	; // exact, the result is already correct
		1823	}
		1824	}
		1825	// assemble the result
		1826	res.w[1] = x_sign \| y_exp \| C1_hi;
		1827	res.w[0] = C1_lo;
		1828	}
		1829	} // end delta = P34
		1830	} else { // if (\|delta\| <= P34 - 1)
		1831	if (delta >= 0) { // if (0 <= delta <= P34 - 1)
		1832	if (delta <= P34 - 1 - q2) {
		1833	// calculate C' directly; the result is exact
		1834	// in this case 1<=q1<=P34-1, 1<=q2<=P34-1 and 0 <= e1-e2 <= P34-2
		1835	// The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
		1836	// exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
		1837	// but their product fits with certainty in 128 bits (actually in 113)
		1838	scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
		1839
		1840	if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
		1841	__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
		1842	C1_hi = C1.w[1];
		1843	C1_lo = C1.w[0];
		1844	} else if (scale >= 1) {
		1845	// if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
		1846	if (q1 <= 19) { // C1 fits in 64 bits
		1847	__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
		1848	} else { // q1 >= 20
		1849	C1.w[1] = C1_hi;
		1850	C1.w[0] = C1_lo;
		1851	__mul_128x64_to_128 (C1, ten2k64[scale], C1);
		1852	}
		1853	C1_hi = C1.w[1];
		1854	C1_lo = C1.w[0];
		1855	} else { // if (scale == 0) C1 is unchanged
		1856	C1.w[0] = C1_lo; // C1.w[1] = C1_hi;
		1857	}
		1858	// now add C2
		1859	if (x_sign == y_sign) {
		1860	// the result cannot overflow
		1861	C1_lo = C1_lo + C2_lo;
		1862	C1_hi = C1_hi + C2_hi;
		1863	if (C1_lo < C1.w[0])
		1864	C1_hi++;
		1865	} else { // if x_sign != y_sign
		1866	C1_lo = C1_lo - C2_lo;
		1867	C1_hi = C1_hi - C2_hi;
		1868	if (C1_lo > C1.w[0])
		1869	C1_hi--;
		1870	// the result can be zero, but it cannot overflow
		1871	if (C1_lo == 0 && C1_hi == 0) {
		1872	// assemble the result
		1873	if (x_exp < y_exp)
		1874	res.w[1] = x_exp;
		1875	else
		1876	res.w[1] = y_exp;
		1877	res.w[0] = 0;
		1878	if (rnd_mode == ROUNDING_DOWN) {
		1879	res.w[1] \|= 0x8000000000000000ull;
		1880	}
		1881	BID_SWAP128 (res);
		1882	BID_RETURN (res);
		1883	}
		1884	if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
		1885	C1_lo = ~C1_lo;
		1886	C1_lo++;
		1887	C1_hi = ~C1_hi;
		1888	if (C1_lo == 0x0)
		1889	C1_hi++;
		1890	x_sign = y_sign; // the result will have the sign of y
		1891	}
		1892	}
		1893	// assemble the result
		1894	res.w[1] = x_sign \| y_exp \| C1_hi;
		1895	res.w[0] = C1_lo;
		1896	} else if (delta == P34 - q2) {
		1897	// calculate C' directly; the result may be inexact if it requires
		1898	// P34+1 decimal digits; in this case the 'cutoff' point for addition
		1899	// is at the position of the lsb of C2, so 0 <= e1-e2 <= P34-1
		1900	// The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
		1901	// exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
		1902	// but their product fits with certainty in 128 bits (actually in 113)
		1903	scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
		1904	if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
		1905	__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
		1906	} else if (scale >= 1) {
		1907	// if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
		1908	if (q1 <= 19) { // C1 fits in 64 bits
		1909	__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
		1910	} else { // q1 >= 20
		1911	C1.w[1] = C1_hi;
		1912	C1.w[0] = C1_lo;
		1913	__mul_128x64_to_128 (C1, ten2k64[scale], C1);
		1914	}
		1915	} else { // if (scale == 0) C1 is unchanged
		1916	C1.w[1] = C1_hi;
		1917	C1.w[0] = C1_lo; // only the low part is necessary
		1918	}
		1919	C1_hi = C1.w[1];
		1920	C1_lo = C1.w[0];
		1921	// now add C2
		1922	if (x_sign == y_sign) {
		1923	// the result can overflow!
		1924	C1_lo = C1_lo + C2_lo;
		1925	C1_hi = C1_hi + C2_hi;
		1926	if (C1_lo < C1.w[0])
		1927	C1_hi++;
		1928	// test for overflow, possible only when C1 >= 10^34
		1929	if (C1_hi > 0x0001ed09bead87c0ull \|\| (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34
		1930	// in this case q = P34 + 1 and x = q - P34 = 1, so multiply
		1931	// C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
		1932	// decimal digits
		1933	// Calculate C'' = C' + 1/2 * 10^x
		1934	if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry
		1935	C1_lo = C1_lo + 5;
		1936	C1_hi = C1_hi + 1;
		1937	} else {
		1938	C1_lo = C1_lo + 5;
		1939	}
		1940	// the approximation of 10^(-1) was rounded up to 118 bits
		1941	// 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
		1942	// 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
		1943	C1.w[1] = C1_hi;
		1944	C1.w[0] = C1_lo; // C''
		1945	ten2m1.w[1] = 0x1999999999999999ull;
		1946	ten2m1.w[0] = 0x9999999999999a00ull;
		1947	__mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C, f
		1948	// C* is actually floor(C*) in this case
		1949	// the top Ex = 128 bits of 10^(-1) are
		1950	// T* = 0x00199999999999999999999999999999
		1951	// if (0 < f* < 10^(-x)) then
		1952	// if floor(C) is even then C = floor(C) - logical right
		1953	// shift; C has p decimal digits, correct by Prop. 1)
		1954	// else if floor(C) is odd C = floor(C) - 1 (logical right
		1955	// shift; C has p decimal digits, correct by Pr. 1)
		1956	// else
		1957	// C = floor(C*) (logical right shift; C has p decimal digits,
		1958	// correct by Property 1)
		1959	// n = C * 10^(e2+x)
		1960	if ((P256.w[1] \|\| P256.w[0])
		1961	&& (P256.w[1] < 0x1999999999999999ull
		1962	\|\| (P256.w[1] == 0x1999999999999999ull
		1963	&& P256.w[0] <= 0x9999999999999999ull))) {
		1964	// the result is a midpoint
		1965	if (P256.w[2] & 0x01) {
		1966	is_midpoint_gt_even = 1;
		1967	// if floor(C) is odd C = floor(C) - 1; the result is not 0
		1968	P256.w[2]--;
		1969	if (P256.w[2] == 0xffffffffffffffffull)
		1970	P256.w[3]--;
		1971	} else {
		1972	is_midpoint_lt_even = 1;
		1973	}
		1974	}
		1975	// n = Cstar * 10^(e2+1)
		1976	y_exp = y_exp + EXP_P1;
		1977	// C* != 10^P because C* has P34 digits
		1978	// check for overflow
		1979	if (y_exp == EXP_MAX_P1
		1980	&& (rnd_mode == ROUNDING_TO_NEAREST
		1981	\|\| rnd_mode == ROUNDING_TIES_AWAY)) {
		1982	// overflow for RN
		1983	res.w[1] = x_sign \| 0x7800000000000000ull; // +/-inf
		1984	res.w[0] = 0x0ull;
		1985	// set the inexact flag
		1986	*pfpsf \|= INEXACT_EXCEPTION;
		1987	// set the overflow flag
		1988	*pfpsf \|= OVERFLOW_EXCEPTION;
		1989	BID_SWAP128 (res);
		1990	BID_RETURN (res);
		1991	}
		1992	// if (0 < f* - 1/2 < 10^(-x)) then
		1993	// the result of the addition is exact
		1994	// else
		1995	// the result of the addition is inexact
		1996	if (P256.w[1] > 0x8000000000000000ull \|\| (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact
		1997	tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2
		1998	if ((tmp64 > 0x1999999999999999ull
		1999	\|\| (tmp64 == 0x1999999999999999ull
		2000	&& P256.w[0] >= 0x9999999999999999ull))) {
		2001	// set the inexact flag
		2002	*pfpsf \|= INEXACT_EXCEPTION;
		2003	is_inexact = 1;
		2004	} // else the result is exact
		2005	} else { // the result is inexact
		2006	// set the inexact flag
		2007	*pfpsf \|= INEXACT_EXCEPTION;
		2008	is_inexact = 1;
		2009	}
		2010	C1_hi = P256.w[3];
		2011	C1_lo = P256.w[2];
		2012	if (!is_midpoint_gt_even && !is_midpoint_lt_even) {
		2013	is_inexact_lt_midpoint = is_inexact
		2014	&& (P256.w[1] & 0x8000000000000000ull);
		2015	is_inexact_gt_midpoint = is_inexact
		2016	&& !(P256.w[1] & 0x8000000000000000ull);
		2017	}
		2018	// general correction from RN to RA, RM, RP, RZ;
		2019	// result uses y_exp
		2020	if (rnd_mode != ROUNDING_TO_NEAREST) {
		2021	if ((!x_sign
		2022	&&
		2023	((rnd_mode == ROUNDING_UP
		2024	&& is_inexact_lt_midpoint)
		2025	\|\|
		2026	((rnd_mode == ROUNDING_TIES_AWAY
		2027	\|\| rnd_mode == ROUNDING_UP)
		2028	&& is_midpoint_gt_even))) \|\| (x_sign
		2029	&&
		2030	((rnd_mode ==
		2031	ROUNDING_DOWN
		2032	&&
		2033	is_inexact_lt_midpoint)
		2034	\|\|
		2035	((rnd_mode ==
		2036	ROUNDING_TIES_AWAY
		2037	\|\| rnd_mode ==
		2038	ROUNDING_DOWN)
		2039	&&
		2040	is_midpoint_gt_even))))
		2041	{
		2042	// C1 = C1 + 1
		2043	C1_lo = C1_lo + 1;
		2044	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		2045	C1_hi = C1_hi + 1;
		2046	}
		2047	if (C1_hi == 0x0001ed09bead87c0ull
		2048	&& C1_lo == 0x378d8e6400000000ull) {
		2049	// C1 = 10^34 => rounding overflow
		2050	C1_hi = 0x0000314dc6448d93ull;
		2051	C1_lo = 0x38c15b0a00000000ull; // 10^33
		2052	y_exp = y_exp + EXP_P1;
		2053	}
		2054	} else
		2055	if ((is_midpoint_lt_even \|\| is_inexact_gt_midpoint)
		2056	&&
		2057	((x_sign
		2058	&& (rnd_mode == ROUNDING_UP
		2059	\|\| rnd_mode == ROUNDING_TO_ZERO))
		2060	\|\| (!x_sign
		2061	&& (rnd_mode == ROUNDING_DOWN
		2062	\|\| rnd_mode == ROUNDING_TO_ZERO)))) {
		2063	// C1 = C1 - 1
		2064	C1_lo = C1_lo - 1;
		2065	if (C1_lo == 0xffffffffffffffffull)
		2066	C1_hi--;
		2067	// check if we crossed into the lower decade
		2068	if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
		2069	C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
		2070	C1_lo = 0x378d8e63ffffffffull;
		2071	y_exp = y_exp - EXP_P1;
		2072	// no underflow, because delta + q2 >= P34 + 1
		2073	}
		2074	} else {
		2075	; // exact, the result is already correct
		2076	}
		2077	// in all cases check for overflow (RN and RA solved already)
		2078	if (y_exp == EXP_MAX_P1) { // overflow
		2079	if ((rnd_mode == ROUNDING_DOWN && x_sign) \|\| // RM and res < 0
		2080	(rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
		2081	C1_hi = 0x7800000000000000ull; // +inf
		2082	C1_lo = 0x0ull;
		2083	} else { // RM and res > 0, RP and res < 0, or RZ
		2084	C1_hi = 0x5fffed09bead87c0ull;
		2085	C1_lo = 0x378d8e63ffffffffull;
		2086	}
		2087	y_exp = 0; // x_sign is preserved
		2088	// set the inexact flag (in case the exact addition was exact)
		2089	*pfpsf \|= INEXACT_EXCEPTION;
		2090	// set the overflow flag
		2091	*pfpsf \|= OVERFLOW_EXCEPTION;
		2092	}
		2093	}
		2094	} // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
		2095	} else { // if x_sign != y_sign the result is exact
		2096	C1_lo = C1_lo - C2_lo;
		2097	C1_hi = C1_hi - C2_hi;
		2098	if (C1_lo > C1.w[0])
		2099	C1_hi--;
		2100	// the result can be zero, but it cannot overflow
		2101	if (C1_lo == 0 && C1_hi == 0) {
		2102	// assemble the result
		2103	if (x_exp < y_exp)
		2104	res.w[1] = x_exp;
		2105	else
		2106	res.w[1] = y_exp;
		2107	res.w[0] = 0;
		2108	if (rnd_mode == ROUNDING_DOWN) {
		2109	res.w[1] \|= 0x8000000000000000ull;
		2110	}
		2111	BID_SWAP128 (res);
		2112	BID_RETURN (res);
		2113	}
		2114	if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
		2115	C1_lo = ~C1_lo;
		2116	C1_lo++;
		2117	C1_hi = ~C1_hi;
		2118	if (C1_lo == 0x0)
		2119	C1_hi++;
		2120	x_sign = y_sign; // the result will have the sign of y
		2121	}
		2122	}
		2123	// assemble the result
		2124	res.w[1] = x_sign \| y_exp \| C1_hi;
		2125	res.w[0] = C1_lo;
		2126	} else { // if (delta >= P34 + 1 - q2)
		2127	// instead of C' = (C1 * 10^(e1-e2) + C2)rnd,P34
		2128	// calculate C' = C1 * 10^(e1-e2-x1) + (C2 * 10^(-x1))rnd,P34
		2129	// where x1 = q1 + e1 - e2 - P34, 1 <= x1 <= P34 - 1
		2130	// In most cases C' will have P34 digits, and n = C' * 10^(e2+x1)
		2131	// If the result has P34+1 digits, redo the steps above with x1+1
		2132	// If the result has P34-1 digits or less, redo the steps above with
		2133	// x1-1 but only if initially x1 >= 1
		2134	// NOTE: these two steps can be improved, e.g we could guess if
		2135	// P34+1 or P34-1 digits will be obtained by adding/subtracting just
		2136	// the top 64 bits of the two operands
		2137	// The result cannot be zero, but it can overflow
		2138	x1 = delta + q2 - P34; // 1 <= x1 <= P34-1
		2139	roundC2:
		2140	// Calculate C1 * 10^(e1-e2-x1) where 0 <= e1-e2-x1 <= P34 - 1
		2141	// scale = (int)(e1 >> 49) - (int)(e2 >> 49) - x1; 0 <= scale <= P34-1
		2142	scale = delta - q1 + q2 - x1; // scale = e1 - e2 - x1 = P34 - q1
		2143	// either C1 or 10^(e1-e2-x1) may not fit is 64 bits,
		2144	// but their product fits with certainty in 128 bits (actually in 113)
		2145	if (scale >= 20) { //10^(e1-e2-x1) doesn't fit in 64 bits, but C1 does
		2146	__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
		2147	} else if (scale >= 1) {
		2148	// if 1 <= scale <= 19 then 10^(e1-e2-x1) fits in 64 bits
		2149	if (q1 <= 19) { // C1 fits in 64 bits
		2150	__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
		2151	} else { // q1 >= 20
		2152	C1.w[1] = C1_hi;
		2153	C1.w[0] = C1_lo;
		2154	__mul_128x64_to_128 (C1, ten2k64[scale], C1);
		2155	}
		2156	} else { // if (scale == 0) C1 is unchanged
		2157	C1.w[1] = C1_hi;
		2158	C1.w[0] = C1_lo;
		2159	}
		2160	tmp64 = C1.w[0]; // C1.w[1], C1.w[0] contains C1 * 10^(e1-e2-x1)
		2161
		2162	// now round C2 to q2-x1 decimal digits, where 1<=x1<=q2-1<=P34-1
		2163	// (but if we got here a second time after x1 = x1 - 1, then
		2164	// x1 >= 0; note that for x1 = 0 C2 is unchanged)
		2165	// C2' = C2 + 1/2 * 10^x1 = C2 + 5 * 10^(x1-1)
		2166	ind = x1 - 1; // 0 <= ind <= q2-2<=P34-2=32; but note that if x1 = 0
		2167	// during a second pass, then ind = -1
		2168	if (ind >= 0) { // if (x1 >= 1)
		2169	C2.w[0] = C2_lo;
		2170	C2.w[1] = C2_hi;
		2171	if (ind <= 18) {
		2172	C2.w[0] = C2.w[0] + midpoint64[ind];
		2173	if (C2.w[0] < C2_lo)
		2174	C2.w[1]++;
		2175	} else { // 19 <= ind <= 32
		2176	C2.w[0] = C2.w[0] + midpoint128[ind - 19].w[0];
		2177	C2.w[1] = C2.w[1] + midpoint128[ind - 19].w[1];
		2178	if (C2.w[0] < C2_lo)
		2179	C2.w[1]++;
		2180	}
		2181	// the approximation of 10^(-x1) was rounded up to 118 bits
		2182	__mul_128x128_to_256 (R256, C2, ten2mk128[ind]); // R256 = C2, f2
		2183	// calculate C2* and f2*
		2184	// C2* is actually floor(C2*) in this case
		2185	// C2* and f2* need shifting and masking, as shown by
		2186	// shiftright128[] and maskhigh128[]
		2187	// the top Ex bits of 10^(-x1) are T* = ten2mk128trunc[ind], e.g.
		2188	// if x1=1, T*=ten2mk128trunc[0]=0x19999999999999999999999999999999
		2189	// if (0 < f2* < 10^(-x1)) then
		2190	// if floor(C1+C2) is even then C2 = floor(C2*) - logical right
		2191	// shift; C2* has p decimal digits, correct by Prop. 1)
		2192	// else if floor(C1+C2) is odd C2 = floor(C2*)-1 (logical right
		2193	// shift; C2* has p decimal digits, correct by Pr. 1)
		2194	// else
		2195	// C2* = floor(C2*) (logical right shift; C has p decimal digits,
		2196	// correct by Property 1)
		2197	// n = C2* * 10^(e2+x1)
		2198
		2199	if (ind <= 2) {
		2200	highf2star.w[1] = 0x0;
		2201	highf2star.w[0] = 0x0; // low f2* ok
		2202	} else if (ind <= 21) {
		2203	highf2star.w[1] = 0x0;
		2204	highf2star.w[0] = R256.w[2] & maskhigh128[ind]; // low f2* ok
		2205	} else {
		2206	highf2star.w[1] = R256.w[3] & maskhigh128[ind];
		2207	highf2star.w[0] = R256.w[2]; // low f2* is ok
		2208	}
		2209	// shift right C2* by Ex-128 = shiftright128[ind]
		2210	if (ind >= 3) {
		2211	shift = shiftright128[ind];
		2212	if (shift < 64) { // 3 <= shift <= 63
		2213	R256.w[2] =
		2214	(R256.w[2] >> shift) \| (R256.w[3] << (64 - shift));
		2215	R256.w[3] = (R256.w[3] >> shift);
		2216	} else { // 66 <= shift <= 102
		2217	R256.w[2] = (R256.w[3] >> (shift - 64));
		2218	R256.w[3] = 0x0ULL;
		2219	}
		2220	}
		2221	if (second_pass) {
		2222	is_inexact_lt_midpoint = 0;
		2223	is_inexact_gt_midpoint = 0;
		2224	is_midpoint_lt_even = 0;
		2225	is_midpoint_gt_even = 0;
		2226	}
		2227	// determine inexactness of the rounding of C2* (this may be
		2228	// followed by a second rounding only if we get P34+1
		2229	// decimal digits)
		2230	// if (0 < f2* - 1/2 < 10^(-x1)) then
		2231	// the result is exact
		2232	// else (if f2* - 1/2 > T* then)
		2233	// the result of is inexact
		2234	if (ind <= 2) {
		2235	if (R256.w[1] > 0x8000000000000000ull \|\|
		2236	(R256.w[1] == 0x8000000000000000ull
		2237	&& R256.w[0] > 0x0ull)) {
		2238	// f2* > 1/2 and the result may be exact
		2239	tmp64A = R256.w[1] - 0x8000000000000000ull; // f* - 1/2
		2240	if ((tmp64A > ten2mk128trunc[ind].w[1]
		2241	\|\| (tmp64A == ten2mk128trunc[ind].w[1]
		2242	&& R256.w[0] >= ten2mk128trunc[ind].w[0]))) {
		2243	// set the inexact flag
		2244	// *pfpsf \|= INEXACT_EXCEPTION;
		2245	tmp_inexact = 1; // may be set again during a second pass
		2246	// this rounding is applied to C2 only!
		2247	if (x_sign == y_sign)
		2248	is_inexact_lt_midpoint = 1;
		2249	else // if (x_sign != y_sign)
		2250	is_inexact_gt_midpoint = 1;
		2251	} // else the result is exact
		2252	// rounding down, unless a midpoint in [ODD, EVEN]
		2253	} else { // the result is inexact; f2* <= 1/2
		2254	// set the inexact flag
		2255	// *pfpsf \|= INEXACT_EXCEPTION;
		2256	tmp_inexact = 1; // just in case we will round a second time
		2257	// rounding up, unless a midpoint in [EVEN, ODD]
		2258	// this rounding is applied to C2 only!
		2259	if (x_sign == y_sign)
		2260	is_inexact_gt_midpoint = 1;
		2261	else // if (x_sign != y_sign)
		2262	is_inexact_lt_midpoint = 1;
		2263	}
		2264	} else if (ind <= 21) { // if 3 <= ind <= 21
		2265	if (highf2star.w[1] > 0x0 \|\| (highf2star.w[1] == 0x0
		2266	&& highf2star.w[0] >
		2267	onehalf128[ind])
		2268	\|\| (highf2star.w[1] == 0x0
		2269	&& highf2star.w[0] == onehalf128[ind]
		2270	&& (R256.w[1] \|\| R256.w[0]))) {
		2271	// f2* > 1/2 and the result may be exact
		2272	// Calculate f2* - 1/2
		2273	tmp64A = highf2star.w[0] - onehalf128[ind];
		2274	tmp64B = highf2star.w[1];
		2275	if (tmp64A > highf2star.w[0])
		2276	tmp64B--;
		2277	if (tmp64B \|\| tmp64A
		2278	\|\| R256.w[1] > ten2mk128trunc[ind].w[1]
		2279	\|\| (R256.w[1] == ten2mk128trunc[ind].w[1]
		2280	&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
		2281	// set the inexact flag
		2282	// *pfpsf \|= INEXACT_EXCEPTION;
		2283	tmp_inexact = 1; // may be set again during a second pass
		2284	// this rounding is applied to C2 only!
		2285	if (x_sign == y_sign)
		2286	is_inexact_lt_midpoint = 1;
		2287	else // if (x_sign != y_sign)
		2288	is_inexact_gt_midpoint = 1;
		2289	} // else the result is exact
		2290	} else { // the result is inexact; f2* <= 1/2
		2291	// set the inexact flag
		2292	// *pfpsf \|= INEXACT_EXCEPTION;
		2293	tmp_inexact = 1; // may be set again during a second pass
		2294	// rounding up, unless a midpoint in [EVEN, ODD]
		2295	// this rounding is applied to C2 only!
		2296	if (x_sign == y_sign)
		2297	is_inexact_gt_midpoint = 1;
		2298	else // if (x_sign != y_sign)
		2299	is_inexact_lt_midpoint = 1;
		2300	}
		2301	} else { // if 22 <= ind <= 33
		2302	if (highf2star.w[1] > onehalf128[ind]
		2303	\|\| (highf2star.w[1] == onehalf128[ind]
		2304	&& (highf2star.w[0] \|\| R256.w[1]
		2305	\|\| R256.w[0]))) {
		2306	// f2* > 1/2 and the result may be exact
		2307	// Calculate f2* - 1/2
		2308	// tmp64A = highf2star.w[0];
		2309	tmp64B = highf2star.w[1] - onehalf128[ind];
		2310	if (tmp64B \|\| highf2star.w[0]
		2311	\|\| R256.w[1] > ten2mk128trunc[ind].w[1]
		2312	\|\| (R256.w[1] == ten2mk128trunc[ind].w[1]
		2313	&& R256.w[0] > ten2mk128trunc[ind].w[0])) {
		2314	// set the inexact flag
		2315	// *pfpsf \|= INEXACT_EXCEPTION;
		2316	tmp_inexact = 1; // may be set again during a second pass
		2317	// this rounding is applied to C2 only!
		2318	if (x_sign == y_sign)
		2319	is_inexact_lt_midpoint = 1;
		2320	else // if (x_sign != y_sign)
		2321	is_inexact_gt_midpoint = 1;
		2322	} // else the result is exact
		2323	} else { // the result is inexact; f2* <= 1/2
		2324	// set the inexact flag
		2325	// *pfpsf \|= INEXACT_EXCEPTION;
		2326	tmp_inexact = 1; // may be set again during a second pass
		2327	// rounding up, unless a midpoint in [EVEN, ODD]
		2328	// this rounding is applied to C2 only!
		2329	if (x_sign == y_sign)
		2330	is_inexact_gt_midpoint = 1;
		2331	else // if (x_sign != y_sign)
		2332	is_inexact_lt_midpoint = 1;
		2333	}
		2334	}
		2335	// check for midpoints
		2336	if ((R256.w[1] \|\| R256.w[0]) && (highf2star.w[1] == 0)
		2337	&& (highf2star.w[0] == 0)
		2338	&& (R256.w[1] < ten2mk128trunc[ind].w[1]
		2339	\|\| (R256.w[1] == ten2mk128trunc[ind].w[1]
		2340	&& R256.w[0] <= ten2mk128trunc[ind].w[0]))) {
		2341	// the result is a midpoint
		2342	if ((tmp64 + R256.w[2]) & 0x01) { // MP in [EVEN, ODD]
		2343	// if floor(C2) is odd C = floor(C2) - 1; the result may be 0
		2344	R256.w[2]--;
		2345	if (R256.w[2] == 0xffffffffffffffffull)
		2346	R256.w[3]--;
		2347	// this rounding is applied to C2 only!
		2348	if (x_sign == y_sign)
		2349	is_midpoint_gt_even = 1;
		2350	else // if (x_sign != y_sign)
		2351	is_midpoint_lt_even = 1;
		2352	is_inexact_lt_midpoint = 0;
		2353	is_inexact_gt_midpoint = 0;
		2354	} else {
		2355	// else MP in [ODD, EVEN]
		2356	// this rounding is applied to C2 only!
		2357	if (x_sign == y_sign)
		2358	is_midpoint_lt_even = 1;
		2359	else // if (x_sign != y_sign)
		2360	is_midpoint_gt_even = 1;
		2361	is_inexact_lt_midpoint = 0;
		2362	is_inexact_gt_midpoint = 0;
		2363	}
		2364	}
		2365	// end if (ind >= 0)
		2366	} else { // if (ind == -1); only during a 2nd pass, and when x1 = 0
		2367	R256.w[2] = C2_lo;
		2368	R256.w[3] = C2_hi;
		2369	tmp_inexact = 0;
		2370	// to correct a possible setting to 1 from 1st pass
		2371	if (second_pass) {
		2372	is_midpoint_lt_even = 0;
		2373	is_midpoint_gt_even = 0;
		2374	is_inexact_lt_midpoint = 0;
		2375	is_inexact_gt_midpoint = 0;
		2376	}
		2377	}
		2378	// and now add/subtract C1 * 10^(e1-e2-x1) +/- (C2 * 10^(-x1))rnd,P34
		2379	if (x_sign == y_sign) { // addition; could overflow
		2380	// no second pass is possible this way (only for x_sign != y_sign)
		2381	C1.w[0] = C1.w[0] + R256.w[2];
		2382	C1.w[1] = C1.w[1] + R256.w[3];
		2383	if (C1.w[0] < tmp64)
		2384	C1.w[1]++; // carry
		2385	// if the sum has P34+1 digits, i.e. C1>=10^34 redo the calculation
		2386	// with x1=x1+1
		2387	if (C1.w[1] > 0x0001ed09bead87c0ull \|\| (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] >= 0x378d8e6400000000ull)) { // C1 >= 10^34
		2388	// chop off one more digit from the sum, but make sure there is
		2389	// no double-rounding error (see table - double rounding logic)
		2390	// now round C1 from P34+1 to P34 decimal digits
		2391	// C1' = C1 + 1/2 * 10 = C1 + 5
		2392	if (C1.w[0] >= 0xfffffffffffffffbull) { // low half add has carry
		2393	C1.w[0] = C1.w[0] + 5;
		2394	C1.w[1] = C1.w[1] + 1;
		2395	} else {
		2396	C1.w[0] = C1.w[0] + 5;
		2397	}
		2398	// the approximation of 10^(-1) was rounded up to 118 bits
		2399	__mul_128x128_to_256 (Q256, C1, ten2mk128[0]); // Q256 = C1, f1
		2400	// C1* is actually floor(C1*) in this case
		2401	// the top 128 bits of 10^(-1) are
		2402	// T* = ten2mk128trunc[0]=0x19999999999999999999999999999999
		2403	// if (0 < f1* < 10^(-1)) then
		2404	// if floor(C1) is even then C1 = floor(C1*) - logical right
		2405	// shift; C1* has p decimal digits, correct by Prop. 1)
		2406	// else if floor(C1) is odd C1 = floor(C1*) - 1 (logical right
		2407	// shift; C1* has p decimal digits, correct by Pr. 1)
		2408	// else
		2409	// C1* = floor(C1*) (logical right shift; C has p decimal digits
		2410	// correct by Property 1)
		2411	// n = C1* * 10^(e2+x1+1)
		2412	if ((Q256.w[1] \|\| Q256.w[0])
		2413	&& (Q256.w[1] < ten2mk128trunc[0].w[1]
		2414	\|\| (Q256.w[1] == ten2mk128trunc[0].w[1]
		2415	&& Q256.w[0] <= ten2mk128trunc[0].w[0]))) {
		2416	// the result is a midpoint
		2417	if (is_inexact_lt_midpoint) { // for the 1st rounding
		2418	is_inexact_gt_midpoint = 1;
		2419	is_inexact_lt_midpoint = 0;
		2420	is_midpoint_gt_even = 0;
		2421	is_midpoint_lt_even = 0;
		2422	} else if (is_inexact_gt_midpoint) { // for the 1st rounding
		2423	Q256.w[2]--;
		2424	if (Q256.w[2] == 0xffffffffffffffffull)
		2425	Q256.w[3]--;
		2426	is_inexact_gt_midpoint = 0;
		2427	is_inexact_lt_midpoint = 1;
		2428	is_midpoint_gt_even = 0;
		2429	is_midpoint_lt_even = 0;
		2430	} else if (is_midpoint_gt_even) { // for the 1st rounding
		2431	// Note: cannot have is_midpoint_lt_even
		2432	is_inexact_gt_midpoint = 0;
		2433	is_inexact_lt_midpoint = 1;
		2434	is_midpoint_gt_even = 0;
		2435	is_midpoint_lt_even = 0;
		2436	} else { // the first rounding must have been exact
		2437	if (Q256.w[2] & 0x01) { // MP in [EVEN, ODD]
		2438	// the truncated result is correct
		2439	Q256.w[2]--;
		2440	if (Q256.w[2] == 0xffffffffffffffffull)
		2441	Q256.w[3]--;
		2442	is_inexact_gt_midpoint = 0;
		2443	is_inexact_lt_midpoint = 0;
		2444	is_midpoint_gt_even = 1;
		2445	is_midpoint_lt_even = 0;
		2446	} else { // MP in [ODD, EVEN]
		2447	is_inexact_gt_midpoint = 0;
		2448	is_inexact_lt_midpoint = 0;
		2449	is_midpoint_gt_even = 0;
		2450	is_midpoint_lt_even = 1;
		2451	}
		2452	}
		2453	tmp_inexact = 1; // in all cases
		2454	} else { // the result is not a midpoint
		2455	// determine inexactness of the rounding of C1 (the sum C1+C2*)
		2456	// if (0 < f1* - 1/2 < 10^(-1)) then
		2457	// the result is exact
		2458	// else (if f1* - 1/2 > T* then)
		2459	// the result of is inexact
		2460	// ind = 0
		2461	if (Q256.w[1] > 0x8000000000000000ull
		2462	\|\| (Q256.w[1] == 0x8000000000000000ull
		2463	&& Q256.w[0] > 0x0ull)) {
		2464	// f1* > 1/2 and the result may be exact
		2465	Q256.w[1] = Q256.w[1] - 0x8000000000000000ull; // f1* - 1/2
		2466	if ((Q256.w[1] > ten2mk128trunc[0].w[1]
		2467	\|\| (Q256.w[1] == ten2mk128trunc[0].w[1]
		2468	&& Q256.w[0] > ten2mk128trunc[0].w[0]))) {
		2469	is_inexact_gt_midpoint = 0;
		2470	is_inexact_lt_midpoint = 1;
		2471	is_midpoint_gt_even = 0;
		2472	is_midpoint_lt_even = 0;
		2473	// set the inexact flag
		2474	tmp_inexact = 1;
		2475	// *pfpsf \|= INEXACT_EXCEPTION;
		2476	} else { // else the result is exact for the 2nd rounding
		2477	if (tmp_inexact) { // if the previous rounding was inexact
		2478	if (is_midpoint_lt_even) {
		2479	is_inexact_gt_midpoint = 1;
		2480	is_midpoint_lt_even = 0;
		2481	} else if (is_midpoint_gt_even) {
		2482	is_inexact_lt_midpoint = 1;
		2483	is_midpoint_gt_even = 0;
		2484	} else {
		2485	; // no change
		2486	}
		2487	}
		2488	}
		2489	// rounding down, unless a midpoint in [ODD, EVEN]
		2490	} else { // the result is inexact; f1* <= 1/2
		2491	is_inexact_gt_midpoint = 1;
		2492	is_inexact_lt_midpoint = 0;
		2493	is_midpoint_gt_even = 0;
		2494	is_midpoint_lt_even = 0;
		2495	// set the inexact flag
		2496	tmp_inexact = 1;
		2497	// *pfpsf \|= INEXACT_EXCEPTION;
		2498	}
		2499	} // end 'the result is not a midpoint'
		2500	// n = C1 * 10^(e2+x1)
		2501	C1.w[1] = Q256.w[3];
		2502	C1.w[0] = Q256.w[2];
		2503	y_exp = y_exp + ((UINT64) (x1 + 1) << 49);
		2504	} else { // C1 < 10^34
		2505	// C1.w[1] and C1.w[0] already set
		2506	// n = C1 * 10^(e2+x1)
		2507	y_exp = y_exp + ((UINT64) x1 << 49);
		2508	}
		2509	// check for overflow
		2510	if (y_exp == EXP_MAX_P1
		2511	&& (rnd_mode == ROUNDING_TO_NEAREST
		2512	\|\| rnd_mode == ROUNDING_TIES_AWAY)) {
		2513	res.w[1] = 0x7800000000000000ull \| x_sign; // +/-inf
		2514	res.w[0] = 0x0ull;
		2515	// set the inexact flag
		2516	*pfpsf \|= INEXACT_EXCEPTION;
		2517	// set the overflow flag
		2518	*pfpsf \|= OVERFLOW_EXCEPTION;
		2519	BID_SWAP128 (res);
		2520	BID_RETURN (res);
		2521	} // else no overflow
		2522	} else { // if x_sign != y_sign the result of this subtract. is exact
		2523	C1.w[0] = C1.w[0] - R256.w[2];
		2524	C1.w[1] = C1.w[1] - R256.w[3];
		2525	if (C1.w[0] > tmp64)
		2526	C1.w[1]--; // borrow
		2527	if (C1.w[1] >= 0x8000000000000000ull) { // negative coefficient!
		2528	C1.w[0] = ~C1.w[0];
		2529	C1.w[0]++;
		2530	C1.w[1] = ~C1.w[1];
		2531	if (C1.w[0] == 0x0)
		2532	C1.w[1]++;
		2533	tmp_sign = y_sign;
		2534	// the result will have the sign of y if last rnd
		2535	} else {
		2536	tmp_sign = x_sign;
		2537	}
		2538	// if the difference has P34-1 digits or less, i.e. C1 < 10^33 then
		2539	// redo the calculation with x1=x1-1;
		2540	// redo the calculation also if C1 = 10^33 and
		2541	// (is_inexact_gt_midpoint or is_midpoint_lt_even);
		2542	// (the last part should have really been
		2543	// (is_inexact_lt_midpoint or is_midpoint_gt_even) from
		2544	// the rounding of C2, but the position flags have been reversed)
		2545	// 10^33 = 0x0000314dc6448d93 0x38c15b0a00000000
		2546	if ((C1.w[1] < 0x0000314dc6448d93ull \|\| (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] < 0x38c15b0a00000000ull)) \|\| (C1.w[1] == 0x0000314dc6448d93ull && C1.w[0] == 0x38c15b0a00000000ull && (is_inexact_gt_midpoint \|\| is_midpoint_lt_even))) { // C1=10^33
		2547	x1 = x1 - 1; // x1 >= 0
		2548	if (x1 >= 0) {
		2549	// clear position flags and tmp_inexact
		2550	is_midpoint_lt_even = 0;
		2551	is_midpoint_gt_even = 0;
		2552	is_inexact_lt_midpoint = 0;
		2553	is_inexact_gt_midpoint = 0;
		2554	tmp_inexact = 0;
		2555	second_pass = 1;
		2556	goto roundC2; // else result has less than P34 digits
		2557	}
		2558	}
		2559	// if the coefficient of the result is 10^34 it means that this
		2560	// must be the second pass, and we are done
		2561	if (C1.w[1] == 0x0001ed09bead87c0ull && C1.w[0] == 0x378d8e6400000000ull) { // if C1 = 10^34
		2562	C1.w[1] = 0x0000314dc6448d93ull; // C1 = 10^33
		2563	C1.w[0] = 0x38c15b0a00000000ull;
		2564	y_exp = y_exp + ((UINT64) 1 << 49);
		2565	}
		2566	x_sign = tmp_sign;
		2567	if (x1 >= 1)
		2568	y_exp = y_exp + ((UINT64) x1 << 49);
		2569	// x1 = -1 is possible at the end of a second pass when the
		2570	// first pass started with x1 = 1
		2571	}
		2572	C1_hi = C1.w[1];
		2573	C1_lo = C1.w[0];
		2574	// general correction from RN to RA, RM, RP, RZ; result uses y_exp
		2575	if (rnd_mode != ROUNDING_TO_NEAREST) {
		2576	if ((!x_sign
		2577	&& ((rnd_mode == ROUNDING_UP && is_inexact_lt_midpoint)
		2578	\|\|
		2579	((rnd_mode == ROUNDING_TIES_AWAY
		2580	\|\| rnd_mode == ROUNDING_UP)
		2581	&& is_midpoint_gt_even))) \|\| (x_sign
		2582	&&
		2583	((rnd_mode ==
		2584	ROUNDING_DOWN
		2585	&&
		2586	is_inexact_lt_midpoint)
		2587	\|\|
		2588	((rnd_mode ==
		2589	ROUNDING_TIES_AWAY
		2590	\|\| rnd_mode ==
		2591	ROUNDING_DOWN)
		2592	&&
		2593	is_midpoint_gt_even))))
		2594	{
		2595	// C1 = C1 + 1
		2596	C1_lo = C1_lo + 1;
		2597	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		2598	C1_hi = C1_hi + 1;
		2599	}
		2600	if (C1_hi == 0x0001ed09bead87c0ull
		2601	&& C1_lo == 0x378d8e6400000000ull) {
		2602	// C1 = 10^34 => rounding overflow
		2603	C1_hi = 0x0000314dc6448d93ull;
		2604	C1_lo = 0x38c15b0a00000000ull; // 10^33
		2605	y_exp = y_exp + EXP_P1;
		2606	}
		2607	} else if ((is_midpoint_lt_even \|\| is_inexact_gt_midpoint)
		2608	&&
		2609	((x_sign
		2610	&& (rnd_mode == ROUNDING_UP
		2611	\|\| rnd_mode == ROUNDING_TO_ZERO))
		2612	\|\| (!x_sign
		2613	&& (rnd_mode == ROUNDING_DOWN
		2614	\|\| rnd_mode == ROUNDING_TO_ZERO)))) {
		2615	// C1 = C1 - 1
		2616	C1_lo = C1_lo - 1;
		2617	if (C1_lo == 0xffffffffffffffffull)
		2618	C1_hi--;
		2619	// check if we crossed into the lower decade
		2620	if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
		2621	C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
		2622	C1_lo = 0x378d8e63ffffffffull;
		2623	y_exp = y_exp - EXP_P1;
		2624	// no underflow, because delta + q2 >= P34 + 1
		2625	}
		2626	} else {
		2627	; // exact, the result is already correct
		2628	}
		2629	// in all cases check for overflow (RN and RA solved already)
		2630	if (y_exp == EXP_MAX_P1) { // overflow
		2631	if ((rnd_mode == ROUNDING_DOWN && x_sign) \|\| // RM and res < 0
		2632	(rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
		2633	C1_hi = 0x7800000000000000ull; // +inf
		2634	C1_lo = 0x0ull;
		2635	} else { // RM and res > 0, RP and res < 0, or RZ
		2636	C1_hi = 0x5fffed09bead87c0ull;
		2637	C1_lo = 0x378d8e63ffffffffull;
		2638	}
		2639	y_exp = 0; // x_sign is preserved
		2640	// set the inexact flag (in case the exact addition was exact)
		2641	*pfpsf \|= INEXACT_EXCEPTION;
		2642	// set the overflow flag
		2643	*pfpsf \|= OVERFLOW_EXCEPTION;
		2644	}
		2645	}
		2646	// assemble the result
		2647	res.w[1] = x_sign \| y_exp \| C1_hi;
		2648	res.w[0] = C1_lo;
		2649	if (tmp_inexact)
		2650	*pfpsf \|= INEXACT_EXCEPTION;
		2651	}
		2652	} else { // if (-P34 + 1 <= delta <= -1) <=> 1 <= -delta <= P34 - 1
		2653	// NOTE: the following, up to "} else { // if x_sign != y_sign
		2654	// the result is exact" is identical to "else if (delta == P34 - q2) {"
		2655	// from above; also, the code is not symmetric: a+b and b+a may take
		2656	// different paths (need to unify eventually!)
		2657	// calculate C' = C2 + C1 * 10^(e1-e2) directly; the result may be
		2658	// inexact if it requires P34 + 1 decimal digits; in either case the
		2659	// 'cutoff' point for addition is at the position of the lsb of C2
		2660	// The coefficient of the result is C1 * 10^(e1-e2) + C2 and the
		2661	// exponent is e2; either C1 or 10^(e1-e2) may not fit is 64 bits,
		2662	// but their product fits with certainty in 128 bits (actually in 113)
		2663	// Note that 0 <= e1 - e2 <= P34 - 2
		2664	// -P34 + 1 <= delta <= -1 <=> -P34 + 1 <= delta <= -1 <=>
		2665	// -P34 + 1 <= q1 + e1 - q2 - e2 <= -1 <=>
		2666	// q2 - q1 - P34 + 1 <= e1 - e2 <= q2 - q1 - 1 <=>
		2667	// 1 - P34 - P34 + 1 <= e1-e2 <= P34 - 1 - 1 => 0 <= e1-e2 <= P34 - 2
		2668	scale = delta - q1 + q2; // scale = (int)(e1 >> 49) - (int)(e2 >> 49)
		2669	if (scale >= 20) { // 10^(e1-e2) does not fit in 64 bits, but C1 does
		2670	__mul_128x64_to_128 (C1, C1_lo, ten2k128[scale - 20]);
		2671	} else if (scale >= 1) {
		2672	// if 1 <= scale <= 19 then 10^(e1-e2) fits in 64 bits
		2673	if (q1 <= 19) { // C1 fits in 64 bits
		2674	__mul_64x64_to_128MACH (C1, C1_lo, ten2k64[scale]);
		2675	} else { // q1 >= 20
		2676	C1.w[1] = C1_hi;
		2677	C1.w[0] = C1_lo;
		2678	__mul_128x64_to_128 (C1, ten2k64[scale], C1);
		2679	}
		2680	} else { // if (scale == 0) C1 is unchanged
		2681	C1.w[1] = C1_hi;
		2682	C1.w[0] = C1_lo; // only the low part is necessary
		2683	}
		2684	C1_hi = C1.w[1];
		2685	C1_lo = C1.w[0];
		2686	// now add C2
		2687	if (x_sign == y_sign) {
		2688	// the result can overflow!
		2689	C1_lo = C1_lo + C2_lo;
		2690	C1_hi = C1_hi + C2_hi;
		2691	if (C1_lo < C1.w[0])
		2692	C1_hi++;
		2693	// test for overflow, possible only when C1 >= 10^34
		2694	if (C1_hi > 0x0001ed09bead87c0ull \|\| (C1_hi == 0x0001ed09bead87c0ull && C1_lo >= 0x378d8e6400000000ull)) { // C1 >= 10^34
		2695	// in this case q = P34 + 1 and x = q - P34 = 1, so multiply
		2696	// C'' = C'+ 5 = C1 + 5 by k1 ~ 10^(-1) calculated for P34 + 1
		2697	// decimal digits
		2698	// Calculate C'' = C' + 1/2 * 10^x
		2699	if (C1_lo >= 0xfffffffffffffffbull) { // low half add has carry
		2700	C1_lo = C1_lo + 5;
		2701	C1_hi = C1_hi + 1;
		2702	} else {
		2703	C1_lo = C1_lo + 5;
		2704	}
		2705	// the approximation of 10^(-1) was rounded up to 118 bits
		2706	// 10^(-1) =~ 33333333333333333333333333333400 * 2^-129
		2707	// 10^(-1) =~ 19999999999999999999999999999a00 * 2^-128
		2708	C1.w[1] = C1_hi;
		2709	C1.w[0] = C1_lo; // C''
		2710	ten2m1.w[1] = 0x1999999999999999ull;
		2711	ten2m1.w[0] = 0x9999999999999a00ull;
		2712	__mul_128x128_to_256 (P256, C1, ten2m1); // P256 = C, f
		2713	// C* is actually floor(C*) in this case
		2714	// the top Ex = 128 bits of 10^(-1) are
		2715	// T* = 0x00199999999999999999999999999999
		2716	// if (0 < f* < 10^(-x)) then
		2717	// if floor(C) is even then C = floor(C) - logical right
		2718	// shift; C has p decimal digits, correct by Prop. 1)
		2719	// else if floor(C) is odd C = floor(C) - 1 (logical right
		2720	// shift; C has p decimal digits, correct by Pr. 1)
		2721	// else
		2722	// C = floor(C*) (logical right shift; C has p decimal digits,
		2723	// correct by Property 1)
		2724	// n = C * 10^(e2+x)
		2725	if ((P256.w[1] \|\| P256.w[0])
		2726	&& (P256.w[1] < 0x1999999999999999ull
		2727	\|\| (P256.w[1] == 0x1999999999999999ull
		2728	&& P256.w[0] <= 0x9999999999999999ull))) {
		2729	// the result is a midpoint
		2730	if (P256.w[2] & 0x01) {
		2731	is_midpoint_gt_even = 1;
		2732	// if floor(C) is odd C = floor(C) - 1; the result is not 0
		2733	P256.w[2]--;
		2734	if (P256.w[2] == 0xffffffffffffffffull)
		2735	P256.w[3]--;
		2736	} else {
		2737	is_midpoint_lt_even = 1;
		2738	}
		2739	}
		2740	// n = Cstar * 10^(e2+1)
		2741	y_exp = y_exp + EXP_P1;
		2742	// C* != 10^P34 because C* has P34 digits
		2743	// check for overflow
		2744	if (y_exp == EXP_MAX_P1
		2745	&& (rnd_mode == ROUNDING_TO_NEAREST
		2746	\|\| rnd_mode == ROUNDING_TIES_AWAY)) {
		2747	// overflow for RN
		2748	res.w[1] = x_sign \| 0x7800000000000000ull; // +/-inf
		2749	res.w[0] = 0x0ull;
		2750	// set the inexact flag
		2751	*pfpsf \|= INEXACT_EXCEPTION;
		2752	// set the overflow flag
		2753	*pfpsf \|= OVERFLOW_EXCEPTION;
		2754	BID_SWAP128 (res);
		2755	BID_RETURN (res);
		2756	}
		2757	// if (0 < f* - 1/2 < 10^(-x)) then
		2758	// the result of the addition is exact
		2759	// else
		2760	// the result of the addition is inexact
		2761	if (P256.w[1] > 0x8000000000000000ull \|\| (P256.w[1] == 0x8000000000000000ull && P256.w[0] > 0x0ull)) { // the result may be exact
		2762	tmp64 = P256.w[1] - 0x8000000000000000ull; // f* - 1/2
		2763	if ((tmp64 > 0x1999999999999999ull
		2764	\|\| (tmp64 == 0x1999999999999999ull
		2765	&& P256.w[0] >= 0x9999999999999999ull))) {
		2766	// set the inexact flag
		2767	*pfpsf \|= INEXACT_EXCEPTION;
		2768	is_inexact = 1;
		2769	} // else the result is exact
		2770	} else { // the result is inexact
		2771	// set the inexact flag
		2772	*pfpsf \|= INEXACT_EXCEPTION;
		2773	is_inexact = 1;
		2774	}
		2775	C1_hi = P256.w[3];
		2776	C1_lo = P256.w[2];
		2777	if (!is_midpoint_gt_even && !is_midpoint_lt_even) {
		2778	is_inexact_lt_midpoint = is_inexact
		2779	&& (P256.w[1] & 0x8000000000000000ull);
		2780	is_inexact_gt_midpoint = is_inexact
		2781	&& !(P256.w[1] & 0x8000000000000000ull);
		2782	}
		2783	// general correction from RN to RA, RM, RP, RZ; result uses y_exp
		2784	if (rnd_mode != ROUNDING_TO_NEAREST) {
		2785	if ((!x_sign
		2786	&& ((rnd_mode == ROUNDING_UP
		2787	&& is_inexact_lt_midpoint)
		2788	\|\| ((rnd_mode == ROUNDING_TIES_AWAY
		2789	\|\| rnd_mode == ROUNDING_UP)
		2790	&& is_midpoint_gt_even))) \|\| (x_sign
		2791	&&
		2792	((rnd_mode ==
		2793	ROUNDING_DOWN
		2794	&&
		2795	is_inexact_lt_midpoint)
		2796	\|\|
		2797	((rnd_mode ==
		2798	ROUNDING_TIES_AWAY
		2799	\|\| rnd_mode
		2800	==
		2801	ROUNDING_DOWN)
		2802	&&
		2803	is_midpoint_gt_even))))
		2804	{
		2805	// C1 = C1 + 1
		2806	C1_lo = C1_lo + 1;
		2807	if (C1_lo == 0) { // rounding overflow in the low 64 bits
		2808	C1_hi = C1_hi + 1;
		2809	}
		2810	if (C1_hi == 0x0001ed09bead87c0ull
		2811	&& C1_lo == 0x378d8e6400000000ull) {
		2812	// C1 = 10^34 => rounding overflow
		2813	C1_hi = 0x0000314dc6448d93ull;
		2814	C1_lo = 0x38c15b0a00000000ull; // 10^33
		2815	y_exp = y_exp + EXP_P1;
		2816	}
		2817	} else
		2818	if ((is_midpoint_lt_even \|\| is_inexact_gt_midpoint) &&
		2819	((x_sign && (rnd_mode == ROUNDING_UP \|\|
		2820	rnd_mode == ROUNDING_TO_ZERO)) \|\|
		2821	(!x_sign && (rnd_mode == ROUNDING_DOWN \|\|
		2822	rnd_mode == ROUNDING_TO_ZERO)))) {
		2823	// C1 = C1 - 1
		2824	C1_lo = C1_lo - 1;
		2825	if (C1_lo == 0xffffffffffffffffull)
		2826	C1_hi--;
		2827	// check if we crossed into the lower decade
		2828	if (C1_hi == 0x0000314dc6448d93ull && C1_lo == 0x38c15b09ffffffffull) { // 10^33 - 1
		2829	C1_hi = 0x0001ed09bead87c0ull; // 10^34 - 1
		2830	C1_lo = 0x378d8e63ffffffffull;
		2831	y_exp = y_exp - EXP_P1;
		2832	// no underflow, because delta + q2 >= P34 + 1
		2833	}
		2834	} else {
		2835	; // exact, the result is already correct
		2836	}
		2837	// in all cases check for overflow (RN and RA solved already)
		2838	if (y_exp == EXP_MAX_P1) { // overflow
		2839	if ((rnd_mode == ROUNDING_DOWN && x_sign) \|\| // RM and res < 0
		2840	(rnd_mode == ROUNDING_UP && !x_sign)) { // RP and res > 0
		2841	C1_hi = 0x7800000000000000ull; // +inf
		2842	C1_lo = 0x0ull;
		2843	} else { // RM and res > 0, RP and res < 0, or RZ
		2844	C1_hi = 0x5fffed09bead87c0ull;
		2845	C1_lo = 0x378d8e63ffffffffull;
		2846	}
		2847	y_exp = 0; // x_sign is preserved
		2848	// set the inexact flag (in case the exact addition was exact)
		2849	*pfpsf \|= INEXACT_EXCEPTION;
		2850	// set the overflow flag
		2851	*pfpsf \|= OVERFLOW_EXCEPTION;
		2852	}
		2853	}
		2854	} // else if (C1 < 10^34) then C1 is the coeff.; the result is exact
		2855	// assemble the result
		2856	res.w[1] = x_sign \| y_exp \| C1_hi;
		2857	res.w[0] = C1_lo;
		2858	} else { // if x_sign != y_sign the result is exact
		2859	C1_lo = C2_lo - C1_lo;
		2860	C1_hi = C2_hi - C1_hi;
		2861	if (C1_lo > C2_lo)
		2862	C1_hi--;
		2863	if (C1_hi >= 0x8000000000000000ull) { // negative coefficient!
		2864	C1_lo = ~C1_lo;
		2865	C1_lo++;
		2866	C1_hi = ~C1_hi;
		2867	if (C1_lo == 0x0)
		2868	C1_hi++;
		2869	x_sign = y_sign; // the result will have the sign of y
		2870	}
		2871	// the result can be zero, but it cannot overflow
		2872	if (C1_lo == 0 && C1_hi == 0) {
		2873	// assemble the result
		2874	if (x_exp < y_exp)
		2875	res.w[1] = x_exp;
		2876	else
		2877	res.w[1] = y_exp;
		2878	res.w[0] = 0;
		2879	if (rnd_mode == ROUNDING_DOWN) {
		2880	res.w[1] \|= 0x8000000000000000ull;
		2881	}
		2882	BID_SWAP128 (res);
		2883	BID_RETURN (res);
		2884	}
		2885	// assemble the result
		2886	res.w[1] = y_sign \| y_exp \| C1_hi;
		2887	res.w[0] = C1_lo;
		2888	}
		2889	}
		2890	}
		2891	BID_SWAP128 (res);
		2892	BID_RETURN (res)
		2893	}
		2894	}
		2895
		2896
		2897
		2898	// bid128_sub stands for bid128qq_sub
		2899
		2900	/*****************************************************************************
		2901	* BID128 sub
		2902	****************************************************************************/
		2903
		2904	#if DECIMAL_CALL_BY_REFERENCE
		2905	void
		2906	bid128_sub (UINT128 * pres, UINT128 * px, UINT128 * py
		2907	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		2908	_EXC_INFO_PARAM) {
		2909	UINT128 x = px, y = py;
		2910	#if !DECIMAL_GLOBAL_ROUNDING
		2911	unsigned int rnd_mode = *prnd_mode;
		2912	#endif
		2913	#else
		2914	UINT128
		2915	bid128_sub (UINT128 x, UINT128 y
		2916	_RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		2917	_EXC_INFO_PARAM) {
		2918	#endif
		2919
		2920	UINT128 res;
		2921	UINT64 y_sign;
		2922
		2923	if ((y.w[HIGH_128W] & MASK_NAN) != MASK_NAN) { // y is not NAN
		2924	// change its sign
		2925	y_sign = y.w[HIGH_128W] & MASK_SIGN; // 0 for positive, MASK_SIGN for negative
		2926	if (y_sign)
		2927	y.w[HIGH_128W] = y.w[HIGH_128W] & 0x7fffffffffffffffull;
		2928	else
		2929	y.w[HIGH_128W] = y.w[HIGH_128W] \| 0x8000000000000000ull;
		2930	}
		2931	#if DECIMAL_CALL_BY_REFERENCE
		2932	bid128_add (&res, &x, &y
		2933	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		2934	_EXC_INFO_ARG);
		2935	#else
		2936	res = bid128_add (x, y
		2937	_RND_MODE_ARG _EXC_FLAGS_ARG _EXC_MASKS_ARG
		2938	_EXC_INFO_ARG);
		2939	#endif
		2940	BID_RETURN (res);
		2941	}

Subversion Repositories Kolibri OS

(root)/contrib/toolchain/gcc/5x/libgcc/config/libbid/bid128_add.c – Rev 6515