WebSVN – Kolibri OS – Blame – /contrib/toolchain/gcc/5x/libgcc/config/libbid/bid64_sqrt.c

Rev	Author	Line No.	Line
6515	serge	1	/* Copyright (C) 2007-2015 Free Software Foundation, Inc.
		2
		3	This file is part of GCC.
		4
		5	GCC is free software; you can redistribute it and/or modify it under
		6	the terms of the GNU General Public License as published by the Free
		7	Software Foundation; either version 3, or (at your option) any later
		8	version.
		9
		10	GCC is distributed in the hope that it will be useful, but WITHOUT ANY
		11	WARRANTY; without even the implied warranty of MERCHANTABILITY or
		12	FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
		13	for more details.
		14
		15	Under Section 7 of GPL version 3, you are granted additional
		16	permissions described in the GCC Runtime Library Exception, version
		17	3.1, as published by the Free Software Foundation.
		18
		19	You should have received a copy of the GNU General Public License and
		20	a copy of the GCC Runtime Library Exception along with this program;
		21	see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
		22	. */
		23
		24	/*****************************************************************************
		25	* BID64 square root
		26	*****************************************************************************
		27	*
		28	* Algorithm description:
		29	*
		30	* if(exponent_x is odd)
		31	* scale coefficient_x by 10, adjust exponent
		32	* - get lower estimate for number of digits in coefficient_x
		33	* - scale coefficient x to between 31 and 33 decimal digits
		34	* - in parallel, check for exact case and return if true
		35	* - get high part of result coefficient using double precision sqrt
		36	* - compute remainder and refine coefficient in one iteration (which
		37	* modifies it by at most 1)
		38	* - result exponent is easy to compute from the adjusted arg. exponent
		39	*
		40	****************************************************************************/
		41
		42	#include "bid_internal.h"
		43	#include "bid_sqrt_macros.h"
		44	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		45	#include
		46
		47	#define FE_ALL_FLAGS FE_INVALID\|FE_DIVBYZERO\|FE_OVERFLOW\|FE_UNDERFLOW\|FE_INEXACT
		48	#endif
		49
		50	extern double sqrt (double);
		51
		52	#if DECIMAL_CALL_BY_REFERENCE
		53
		54	void
		55	bid64_sqrt (UINT64 * pres,
		56	UINT64 *
		57	px _RND_MODE_PARAM _EXC_FLAGS_PARAM _EXC_MASKS_PARAM
		58	_EXC_INFO_PARAM) {
		59	UINT64 x;
		60	#else
		61
		62	UINT64
		63	bid64_sqrt (UINT64 x _RND_MODE_PARAM _EXC_FLAGS_PARAM
		64	_EXC_MASKS_PARAM _EXC_INFO_PARAM) {
		65	#endif
		66	UINT128 CA, CT;
		67	UINT64 sign_x, coefficient_x;
		68	UINT64 Q, Q2, A10, C4, R, R2, QE, res;
		69	SINT64 D;
		70	int_double t_scale;
		71	int_float tempx;
		72	double da, dq, da_h, da_l, dqe;
		73	int exponent_x, exponent_q, bin_expon_cx;
		74	int digits_x;
		75	int scale;
		76	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		77	fexcept_t binaryflags = 0;
		78	#endif
		79
		80	#if DECIMAL_CALL_BY_REFERENCE
		81	#if !DECIMAL_GLOBAL_ROUNDING
		82	_IDEC_round rnd_mode = *prnd_mode;
		83	#endif
		84	x = *px;
		85	#endif
		86
		87	// unpack arguments, check for NaN or Infinity
		88	if (!unpack_BID64 (&sign_x, &exponent_x, &coefficient_x, x)) {
		89	// x is Inf. or NaN or 0
		90	if ((x & INFINITY_MASK64) == INFINITY_MASK64) {
		91	res = coefficient_x;
		92	if ((coefficient_x & SSNAN_MASK64) == SINFINITY_MASK64) // -Infinity
		93	{
		94	res = NAN_MASK64;
		95	#ifdef SET_STATUS_FLAGS
		96	__set_status_flags (pfpsf, INVALID_EXCEPTION);
		97	#endif
		98	}
		99	#ifdef SET_STATUS_FLAGS
		100	if ((x & SNAN_MASK64) == SNAN_MASK64) // sNaN
		101	__set_status_flags (pfpsf, INVALID_EXCEPTION);
		102	#endif
		103	BID_RETURN (res & QUIET_MASK64);
		104	}
		105	// x is 0
		106	exponent_x = (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1;
		107	res = sign_x \| (((UINT64) exponent_x) << 53);
		108	BID_RETURN (res);
		109	}
		110	// x<0?
		111	if (sign_x && coefficient_x) {
		112	res = NAN_MASK64;
		113	#ifdef SET_STATUS_FLAGS
		114	__set_status_flags (pfpsf, INVALID_EXCEPTION);
		115	#endif
		116	BID_RETURN (res);
		117	}
		118	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		119	(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
		120	#endif
		121	//--- get number of bits in the coefficient of x ---
		122	tempx.d = (float) coefficient_x;
		123	bin_expon_cx = ((tempx.i >> 23) & 0xff) - 0x7f;
		124	digits_x = estimate_decimal_digits[bin_expon_cx];
		125	// add test for range
		126	if (coefficient_x >= power10_index_binexp[bin_expon_cx])
		127	digits_x++;
		128
		129	A10 = coefficient_x;
		130	if (exponent_x & 1) {
		131	A10 = (A10 << 2) + A10;
		132	A10 += A10;
		133	}
		134
		135	dqe = sqrt ((double) A10);
		136	QE = (UINT32) dqe;
		137	if (QE * QE == A10) {
		138	res =
		139	very_fast_get_BID64 (0, (exponent_x + DECIMAL_EXPONENT_BIAS) >> 1,
		140	QE);
		141	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		142	(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
		143	#endif
		144	BID_RETURN (res);
		145	}
		146	// if exponent is odd, scale coefficient by 10
		147	scale = 31 - digits_x;
		148	exponent_q = exponent_x - scale;
		149	scale += (exponent_q & 1); // exp. bias is even
		150
		151	CT = power10_table_128[scale];
		152	__mul_64x128_short (CA, coefficient_x, CT);
		153
		154	// 2^64
		155	t_scale.i = 0x43f0000000000000ull;
		156	// convert CA to DP
		157	da_h = CA.w[1];
		158	da_l = CA.w[0];
		159	da = da_h * t_scale.d + da_l;
		160
		161	dq = sqrt (da);
		162
		163	Q = (UINT64) dq;
		164
		165	// get sign(sqrt(CA)-Q)
		166	R = CA.w[0] - Q * Q;
		167	R = ((SINT64) R) >> 63;
		168	D = R + R + 1;
		169
		170	exponent_q = (exponent_q + DECIMAL_EXPONENT_BIAS) >> 1;
		171
		172	#ifdef SET_STATUS_FLAGS
		173	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
		174	#endif
		175
		176	#ifndef IEEE_ROUND_NEAREST
		177	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		178	if (!((rnd_mode) & 3)) {
		179	#endif
		180	#endif
		181
		182	// midpoint to check
		183	Q2 = Q + Q + D;
		184	C4 = CA.w[0] << 2;
		185
		186	// get sign(-sqrt(CA)+Midpoint)
		187	R2 = Q2 * Q2 - C4;
		188	R2 = ((SINT64) R2) >> 63;
		189
		190	// adjust Q if R!=R2
		191	Q += (D & (R ^ R2));
		192	#ifndef IEEE_ROUND_NEAREST
		193	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		194	} else {
		195	C4 = CA.w[0];
		196	Q += D;
		197	if ((SINT64) (Q * Q - C4) > 0)
		198	Q--;
		199	if (rnd_mode == ROUNDING_UP)
		200	Q++;
		201	}
		202	#endif
		203	#endif
		204
		205	res = fast_get_BID64 (0, exponent_q, Q);
		206	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		207	(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
		208	#endif
		209	BID_RETURN (res);
		210	}
		211
		212
		213	TYPE0_FUNCTION_ARG1 (UINT64, bid64q_sqrt, x)
		214
		215	UINT256 M256, C4, C8;
		216	UINT128 CX, CX2, A10, S2, T128, CS, CSM, CS2, C256, CS1,
		217	mul_factor2_long = { {0x0ull, 0x0ull} }, QH, Tmp, TP128, Qh, Ql;
		218	UINT64 sign_x, Carry, B10, res, mul_factor, mul_factor2 = 0x0ull, CS0;
		219	SINT64 D;
		220	int_float fx, f64;
		221	int exponent_x, bin_expon_cx, done = 0;
		222	int digits, scale, exponent_q = 0, exact = 1, amount, extra_digits;
		223	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		224	fexcept_t binaryflags = 0;
		225	#endif
		226
		227	// unpack arguments, check for NaN or Infinity
		228	if (!unpack_BID128_value (&sign_x, &exponent_x, &CX, x)) {
		229	res = CX.w[1];
		230	// NaN ?
		231	if ((x.w[1] & 0x7c00000000000000ull) == 0x7c00000000000000ull) {
		232	#ifdef SET_STATUS_FLAGS
		233	if ((x.w[1] & 0x7e00000000000000ull) == 0x7e00000000000000ull) // sNaN
		234	__set_status_flags (pfpsf, INVALID_EXCEPTION);
		235	#endif
		236	Tmp.w[1] = (CX.w[1] & 0x00003fffffffffffull);
		237	Tmp.w[0] = CX.w[0];
		238	TP128 = reciprocals10_128[18];
		239	__mul_128x128_full (Qh, Ql, Tmp, TP128);
		240	amount = recip_scale[18];
		241	__shr_128 (Tmp, Qh, amount);
		242	res = (CX.w[1] & 0xfc00000000000000ull) \| Tmp.w[0];
		243	BID_RETURN (res);
		244	}
		245	// x is Infinity?
		246	if ((x.w[1] & 0x7800000000000000ull) == 0x7800000000000000ull) {
		247	if (sign_x) {
		248	// -Inf, return NaN
		249	res = 0x7c00000000000000ull;
		250	#ifdef SET_STATUS_FLAGS
		251	__set_status_flags (pfpsf, INVALID_EXCEPTION);
		252	#endif
		253	}
		254	BID_RETURN (res);
		255	}
		256	// x is 0 otherwise
		257
		258	exponent_x =
		259	((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +
		260	DECIMAL_EXPONENT_BIAS;
		261	if (exponent_x < 0)
		262	exponent_x = 0;
		263	if (exponent_x > DECIMAL_MAX_EXPON_64)
		264	exponent_x = DECIMAL_MAX_EXPON_64;
		265	//res= sign_x \| (((UINT64)exponent_x)<<53);
		266	res = get_BID64 (sign_x, exponent_x, 0, rnd_mode, pfpsf);
		267	BID_RETURN (res);
		268	}
		269	if (sign_x) {
		270	res = 0x7c00000000000000ull;
		271	#ifdef SET_STATUS_FLAGS
		272	__set_status_flags (pfpsf, INVALID_EXCEPTION);
		273	#endif
		274	BID_RETURN (res);
		275	}
		276	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		277	(void) fegetexceptflag (&binaryflags, FE_ALL_FLAGS);
		278	#endif
		279
		280	// 2^64
		281	f64.i = 0x5f800000;
		282
		283	// fx ~ CX
		284	fx.d = (float) CX.w[1] * f64.d + (float) CX.w[0];
		285	bin_expon_cx = ((fx.i >> 23) & 0xff) - 0x7f;
		286	digits = estimate_decimal_digits[bin_expon_cx];
		287
		288	A10 = CX;
		289	if (exponent_x & 1) {
		290	A10.w[1] = (CX.w[1] << 3) \| (CX.w[0] >> 61);
		291	A10.w[0] = CX.w[0] << 3;
		292	CX2.w[1] = (CX.w[1] << 1) \| (CX.w[0] >> 63);
		293	CX2.w[0] = CX.w[0] << 1;
		294	__add_128_128 (A10, A10, CX2);
		295	}
		296
		297	C256.w[1] = A10.w[1];
		298	C256.w[0] = A10.w[0];
		299	CS.w[0] = short_sqrt128 (A10);
		300	CS.w[1] = 0;
		301	mul_factor = 0;
		302	// check for exact result
		303	if (CS.w[0] < 10000000000000000ull) {
		304	if (CS.w[0] * CS.w[0] == A10.w[0]) {
		305	__sqr64_fast (S2, CS.w[0]);
		306	if (S2.w[1] == A10.w[1]) // && S2.w[0]==A10.w[0])
		307	{
		308	res =
		309	get_BID64 (0,
		310	((exponent_x - DECIMAL_EXPONENT_BIAS_128) >> 1) +
		311	DECIMAL_EXPONENT_BIAS, CS.w[0], rnd_mode, pfpsf);
		312	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		313	(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
		314	#endif
		315	BID_RETURN (res);
		316	}
		317	}
		318	if (CS.w[0] >= 1000000000000000ull) {
		319	done = 1;
		320	exponent_q = exponent_x;
		321	C256.w[1] = A10.w[1];
		322	C256.w[0] = A10.w[0];
		323	}
		324	#ifdef SET_STATUS_FLAGS
		325	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
		326	#endif
		327	exact = 0;
		328	} else {
		329	B10 = 0x3333333333333334ull;
		330	__mul_64x64_to_128_full (CS2, CS.w[0], B10);
		331	CS0 = CS2.w[1] >> 1;
		332	if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {
		333	#ifdef SET_STATUS_FLAGS
		334	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
		335	#endif
		336	exact = 0;
		337	}
		338	done = 1;
		339	CS.w[0] = CS0;
		340	exponent_q = exponent_x + 2;
		341	mul_factor = 10;
		342	mul_factor2 = 100;
		343	if (CS.w[0] >= 10000000000000000ull) {
		344	__mul_64x64_to_128_full (CS2, CS.w[0], B10);
		345	CS0 = CS2.w[1] >> 1;
		346	if (CS.w[0] != ((CS0 << 3) + (CS0 << 1))) {
		347	#ifdef SET_STATUS_FLAGS
		348	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
		349	#endif
		350	exact = 0;
		351	}
		352	exponent_q += 2;
		353	CS.w[0] = CS0;
		354	mul_factor = 100;
		355	mul_factor2 = 10000;
		356	}
		357	if (exact) {
		358	CS0 = CS.w[0] * mul_factor;
		359	__sqr64_fast (CS1, CS0)
		360	if ((CS1.w[0] != A10.w[0]) \|\| (CS1.w[1] != A10.w[1])) {
		361	#ifdef SET_STATUS_FLAGS
		362	__set_status_flags (pfpsf, INEXACT_EXCEPTION);
		363	#endif
		364	exact = 0;
		365	}
		366	}
		367	}
		368
		369	if (!done) {
		370	// get number of digits in CX
		371	D = CX.w[1] - power10_index_binexp_128[bin_expon_cx].w[1];
		372	if (D > 0
		373	\|\| (!D && CX.w[0] >= power10_index_binexp_128[bin_expon_cx].w[0]))
		374	digits++;
		375
		376	// if exponent is odd, scale coefficient by 10
		377	scale = 31 - digits;
		378	exponent_q = exponent_x - scale;
		379	scale += (exponent_q & 1); // exp. bias is even
		380
		381	T128 = power10_table_128[scale];
		382	__mul_128x128_low (C256, CX, T128);
		383
		384
		385	CS.w[0] = short_sqrt128 (C256);
		386	}
		387	//printf("CS=%016I64x\n",CS.w[0]);
		388
		389	exponent_q =
		390	((exponent_q - DECIMAL_EXPONENT_BIAS_128) >> 1) +
		391	DECIMAL_EXPONENT_BIAS;
		392	if ((exponent_q < 0) && (exponent_q + MAX_FORMAT_DIGITS >= 0)) {
		393	extra_digits = -exponent_q;
		394	exponent_q = 0;
		395
		396	// get coeff*(2^M[extra_digits])/10^extra_digits
		397	__mul_64x64_to_128 (QH, CS.w[0], reciprocals10_64[extra_digits]);
		398
		399	// now get P/10^extra_digits: shift Q_high right by M[extra_digits]-128
		400	amount = short_recip_scale[extra_digits];
		401
		402	CS0 = QH.w[1] >> amount;
		403
		404	#ifdef SET_STATUS_FLAGS
		405	if (exact) {
		406	if (CS.w[0] != CS0 * power10_table_128[extra_digits].w[0])
		407	exact = 0;
		408	}
		409	if (!exact)
		410	__set_status_flags (pfpsf, UNDERFLOW_EXCEPTION \| INEXACT_EXCEPTION);
		411	#endif
		412
		413	CS.w[0] = CS0;
		414	if (!mul_factor)
		415	mul_factor = 1;
		416	mul_factor *= power10_table_128[extra_digits].w[0];
		417	__mul_64x64_to_128 (mul_factor2_long, mul_factor, mul_factor);
		418	if (mul_factor2_long.w[1])
		419	mul_factor2 = 0;
		420	else
		421	mul_factor2 = mul_factor2_long.w[1];
		422	}
		423	// 4*C256
		424	C4.w[1] = (C256.w[1] << 2) \| (C256.w[0] >> 62);
		425	C4.w[0] = C256.w[0] << 2;
		426
		427	#ifndef IEEE_ROUND_NEAREST
		428	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		429	if (!((rnd_mode) & 3)) {
		430	#endif
		431	#endif
		432	// compare to midpoints
		433	CSM.w[0] = (CS.w[0] + CS.w[0]) \| 1;
		434	//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C4.w[1],C4.w[0],CSM.w[1],CSM.w[0], CS.w[0]);
		435	if (mul_factor)
		436	CSM.w[0] *= mul_factor;
		437	// CSM^2
		438	__mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);
		439	//__mul_128x128_to_256(M256, CSM, CSM);
		440
		441	if (C4.w[1] > M256.w[1] \|\|
		442	(C4.w[1] == M256.w[1] && C4.w[0] > M256.w[0])) {
		443	// round up
		444	CS.w[0]++;
		445	} else {
		446	C8.w[0] = CS.w[0] << 3;
		447	C8.w[1] = 0;
		448	if (mul_factor) {
		449	if (mul_factor2) {
		450	__mul_64x64_to_128 (C8, C8.w[0], mul_factor2);
		451	} else {
		452	__mul_64x128_low (C8, C8.w[0], mul_factor2_long);
		453	}
		454	}
		455	// M256 - 8*CSM
		456	__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
		457	M256.w[1] = M256.w[1] - C8.w[1] - Carry;
		458
		459	// if CSM' > C256, round up
		460	if (M256.w[1] > C4.w[1] \|\|
		461	(M256.w[1] == C4.w[1] && M256.w[0] > C4.w[0])) {
		462	// round down
		463	if (CS.w[0])
		464	CS.w[0]--;
		465	}
		466	}
		467	#ifndef IEEE_ROUND_NEAREST
		468	#ifndef IEEE_ROUND_NEAREST_TIES_AWAY
		469	} else {
		470	CS.w[0]++;
		471	CSM.w[0] = CS.w[0];
		472	C8.w[0] = CSM.w[0] << 1;
		473	if (mul_factor)
		474	CSM.w[0] *= mul_factor;
		475	__mul_64x64_to_128 (M256, CSM.w[0], CSM.w[0]);
		476	C8.w[1] = 0;
		477	if (mul_factor) {
		478	if (mul_factor2) {
		479	__mul_64x64_to_128 (C8, C8.w[0], mul_factor2);
		480	} else {
		481	__mul_64x128_low (C8, C8.w[0], mul_factor2_long);
		482	}
		483	}
		484	//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x\n",C256.w[1],C256.w[0],M256.w[1],M256.w[0], CS.w[0]);
		485
		486	if (M256.w[1] > C256.w[1] \|\|
		487	(M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0])) {
		488	__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
		489	M256.w[1] = M256.w[1] - Carry - C8.w[1];
		490	M256.w[0]++;
		491	if (!M256.w[0]) {
		492	M256.w[1]++;
		493
		494	}
		495
		496	if ((M256.w[1] > C256.w[1] \|\|
		497	(M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))
		498	&& (CS.w[0] > 1)) {
		499
		500	CS.w[0]--;
		501
		502	if (CS.w[0] > 1) {
		503	__sub_borrow_out (M256.w[0], Carry, M256.w[0], C8.w[0]);
		504	M256.w[1] = M256.w[1] - Carry - C8.w[1];
		505	M256.w[0]++;
		506	if (!M256.w[0]) {
		507	M256.w[1]++;
		508	}
		509
		510	if (M256.w[1] > C256.w[1] \|\|
		511	(M256.w[1] == C256.w[1] && M256.w[0] > C256.w[0]))
		512	CS.w[0]--;
		513	}
		514	}
		515	}
		516
		517	else {
		518	/*__add_carry_out(M256.w[0], Carry, M256.w[0], C8.w[0]);
		519	M256.w[1] = M256.w[1] + Carry + C8.w[1];
		520	M256.w[0]++;
		521	if(!M256.w[0])
		522	{
		523	M256.w[1]++;
		524	}
		525	CS.w[0]++;
		526	if(M256.w[1]
		527	(M256.w[1]==C256.w[1] && M256.w[0]<=C256.w[0]))
		528	{
		529	CS.w[0]++;
		530	}*/
		531	CS.w[0]++;
		532	}
		533	//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
		534	// RU?
		535	if (((rnd_mode) != ROUNDING_UP) \|\| exact) {
		536	if (CS.w[0])
		537	CS.w[0]--;
		538	}
		539
		540	}
		541	#endif
		542	#endif
		543	//printf("C256=%016I64x %016I64x, CSM=%016I64x %016I64x %016I64x %d\n",C4.w[1],C4.w[0],M256.w[1],M256.w[0], CS.w[0], exact);
		544
		545	res = get_BID64 (0, exponent_q, CS.w[0], rnd_mode, pfpsf);
		546	#ifdef UNCHANGED_BINARY_STATUS_FLAGS
		547	(void) fesetexceptflag (&binaryflags, FE_ALL_FLAGS);
		548	#endif
		549	BID_RETURN (res);
		550
		551
		552	}

Subversion Repositories Kolibri OS

(root)/contrib/toolchain/gcc/5x/libgcc/config/libbid/bid64_sqrt.c – Rev 6515