Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
1906 serge 1
/* Software floating-point emulation.
2
   Basic four-word fraction declaration and manipulation.
3
   Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc.
4
   This file is part of the GNU C Library.
5
   Contributed by Richard Henderson (rth@cygnus.com),
6
		  Jakub Jelinek (jj@ultra.linux.cz),
7
		  David S. Miller (davem@redhat.com) and
8
		  Peter Maydell (pmaydell@chiark.greenend.org.uk).
9
 
10
   The GNU C Library is free software; you can redistribute it and/or
11
   modify it under the terms of the GNU Lesser General Public
12
   License as published by the Free Software Foundation; either
13
   version 2.1 of the License, or (at your option) any later version.
14
 
15
   In addition to the permissions in the GNU Lesser General Public
16
   License, the Free Software Foundation gives you unlimited
17
   permission to link the compiled version of this file into
18
   combinations with other programs, and to distribute those
19
   combinations without any restriction coming from the use of this
20
   file.  (The Lesser General Public License restrictions do apply in
21
   other respects; for example, they cover modification of the file,
22
   and distribution when not linked into a combine executable.)
23
 
24
   The GNU C Library is distributed in the hope that it will be useful,
25
   but WITHOUT ANY WARRANTY; without even the implied warranty of
26
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
27
   Lesser General Public License for more details.
28
 
29
   You should have received a copy of the GNU Lesser General Public
30
   License along with the GNU C Library; if not, write to the Free
31
   Software Foundation, 51 Franklin Street, Fifth Floor, Boston,
32
   MA 02110-1301, USA.  */
33
 
34
#define _FP_FRAC_DECL_4(X)	_FP_W_TYPE X##_f[4]
35
#define _FP_FRAC_COPY_4(D,S)			\
36
  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],	\
37
   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
38
#define _FP_FRAC_SET_4(X,I)	__FP_FRAC_SET_4(X, I)
39
#define _FP_FRAC_HIGH_4(X)	(X##_f[3])
40
#define _FP_FRAC_LOW_4(X)	(X##_f[0])
41
#define _FP_FRAC_WORD_4(X,w)	(X##_f[w])
42
 
43
#define _FP_FRAC_SLL_4(X,N)						\
44
  do {									\
45
    _FP_I_TYPE _up, _down, _skip, _i;					\
46
    _skip = (N) / _FP_W_TYPE_SIZE;					\
47
    _up = (N) % _FP_W_TYPE_SIZE;					\
48
    _down = _FP_W_TYPE_SIZE - _up;					\
49
    if (!_up)								\
50
      for (_i = 3; _i >= _skip; --_i)					\
51
	X##_f[_i] = X##_f[_i-_skip];					\
52
    else								\
53
      {									\
54
	for (_i = 3; _i > _skip; --_i)					\
55
	  X##_f[_i] = X##_f[_i-_skip] << _up				\
56
		      | X##_f[_i-_skip-1] >> _down;			\
57
	X##_f[_i--] = X##_f[0] << _up; 					\
58
      }									\
59
    for (; _i >= 0; --_i)						\
60
      X##_f[_i] = 0;							\
61
  } while (0)
62
 
63
/* This one was broken too */
64
#define _FP_FRAC_SRL_4(X,N)						\
65
  do {									\
66
    _FP_I_TYPE _up, _down, _skip, _i;					\
67
    _skip = (N) / _FP_W_TYPE_SIZE;					\
68
    _down = (N) % _FP_W_TYPE_SIZE;					\
69
    _up = _FP_W_TYPE_SIZE - _down;					\
70
    if (!_down)								\
71
      for (_i = 0; _i <= 3-_skip; ++_i)					\
72
	X##_f[_i] = X##_f[_i+_skip];					\
73
    else								\
74
      {									\
75
	for (_i = 0; _i < 3-_skip; ++_i)				\
76
	  X##_f[_i] = X##_f[_i+_skip] >> _down				\
77
		      | X##_f[_i+_skip+1] << _up;			\
78
	X##_f[_i++] = X##_f[3] >> _down;				\
79
      }									\
80
    for (; _i < 4; ++_i)						\
81
      X##_f[_i] = 0;							\
82
  } while (0)
83
 
84
 
85
/* Right shift with sticky-lsb.
86
 * What this actually means is that we do a standard right-shift,
87
 * but that if any of the bits that fall off the right hand side
88
 * were one then we always set the LSbit.
89
 */
90
#define _FP_FRAC_SRST_4(X,S,N,size)			\
91
  do {							\
92
    _FP_I_TYPE _up, _down, _skip, _i;			\
93
    _FP_W_TYPE _s;					\
94
    _skip = (N) / _FP_W_TYPE_SIZE;			\
95
    _down = (N) % _FP_W_TYPE_SIZE;			\
96
    _up = _FP_W_TYPE_SIZE - _down;			\
97
    for (_s = _i = 0; _i < _skip; ++_i)			\
98
      _s |= X##_f[_i];					\
99
    if (!_down)						\
100
      for (_i = 0; _i <= 3-_skip; ++_i)			\
101
	X##_f[_i] = X##_f[_i+_skip];			\
102
    else						\
103
      {							\
104
	_s |= X##_f[_i] << _up;				\
105
	for (_i = 0; _i < 3-_skip; ++_i)		\
106
	  X##_f[_i] = X##_f[_i+_skip] >> _down		\
107
		      | X##_f[_i+_skip+1] << _up;	\
108
	X##_f[_i++] = X##_f[3] >> _down;		\
109
      }							\
110
    for (; _i < 4; ++_i)				\
111
      X##_f[_i] = 0;					\
112
    S = (_s != 0);					\
113
  } while (0)
114
 
115
#define _FP_FRAC_SRS_4(X,N,size)		\
116
  do {						\
117
    int _sticky;				\
118
    _FP_FRAC_SRST_4(X, _sticky, N, size);	\
119
    X##_f[0] |= _sticky;			\
120
  } while (0)
121
 
122
#define _FP_FRAC_ADD_4(R,X,Y)						\
123
  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
124
		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
125
		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
126
 
127
#define _FP_FRAC_SUB_4(R,X,Y)						\
128
  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],		\
129
		  X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
130
		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
131
 
132
#define _FP_FRAC_DEC_4(X,Y)						\
133
  __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],		\
134
		  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
135
 
136
#define _FP_FRAC_ADDI_4(X,I)						\
137
  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
138
 
139
#define _FP_ZEROFRAC_4  0,0,0,0
140
#define _FP_MINFRAC_4   0,0,0,1
141
#define _FP_MAXFRAC_4	(~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0)
142
 
143
#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
144
#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
145
#define _FP_FRAC_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs)
146
#define _FP_FRAC_CLEAR_OVERP_4(fs,X)  (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs)
147
 
148
#define _FP_FRAC_EQ_4(X,Y)				\
149
 (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]		\
150
  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
151
 
152
#define _FP_FRAC_GT_4(X,Y)				\
153
 (X##_f[3] > Y##_f[3] ||				\
154
  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
155
   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
156
    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])	\
157
   ))							\
158
  ))							\
159
 )
160
 
161
#define _FP_FRAC_GE_4(X,Y)				\
162
 (X##_f[3] > Y##_f[3] ||				\
163
  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||	\
164
   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||	\
165
    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])	\
166
   ))							\
167
  ))							\
168
 )
169
 
170
 
171
#define _FP_FRAC_CLZ_4(R,X)		\
172
  do {					\
173
    if (X##_f[3])			\
174
    {					\
175
	__FP_CLZ(R,X##_f[3]);		\
176
    }					\
177
    else if (X##_f[2])			\
178
    {					\
179
	__FP_CLZ(R,X##_f[2]);		\
180
	R += _FP_W_TYPE_SIZE;		\
181
    }					\
182
    else if (X##_f[1])			\
183
    {					\
184
	__FP_CLZ(R,X##_f[1]);		\
185
	R += _FP_W_TYPE_SIZE*2;		\
186
    }					\
187
    else				\
188
    {					\
189
	__FP_CLZ(R,X##_f[0]);		\
190
	R += _FP_W_TYPE_SIZE*3;		\
191
    }					\
192
  } while(0)
193
 
194
 
195
#define _FP_UNPACK_RAW_4(fs, X, val)				\
196
  do {								\
197
    union _FP_UNION_##fs _flo; _flo.flt = (val);		\
198
    X##_f[0] = _flo.bits.frac0;					\
199
    X##_f[1] = _flo.bits.frac1;					\
200
    X##_f[2] = _flo.bits.frac2;					\
201
    X##_f[3] = _flo.bits.frac3;					\
202
    X##_e  = _flo.bits.exp;					\
203
    X##_s  = _flo.bits.sign;					\
204
  } while (0)
205
 
206
#define _FP_UNPACK_RAW_4_P(fs, X, val)				\
207
  do {								\
208
    union _FP_UNION_##fs *_flo =				\
209
      (union _FP_UNION_##fs *)(val);				\
210
								\
211
    X##_f[0] = _flo->bits.frac0;				\
212
    X##_f[1] = _flo->bits.frac1;				\
213
    X##_f[2] = _flo->bits.frac2;				\
214
    X##_f[3] = _flo->bits.frac3;				\
215
    X##_e  = _flo->bits.exp;					\
216
    X##_s  = _flo->bits.sign;					\
217
  } while (0)
218
 
219
#define _FP_PACK_RAW_4(fs, val, X)				\
220
  do {								\
221
    union _FP_UNION_##fs _flo;					\
222
    _flo.bits.frac0 = X##_f[0];					\
223
    _flo.bits.frac1 = X##_f[1];					\
224
    _flo.bits.frac2 = X##_f[2];					\
225
    _flo.bits.frac3 = X##_f[3];					\
226
    _flo.bits.exp   = X##_e;					\
227
    _flo.bits.sign  = X##_s;					\
228
    (val) = _flo.flt;				   		\
229
  } while (0)
230
 
231
#define _FP_PACK_RAW_4_P(fs, val, X)				\
232
  do {								\
233
    union _FP_UNION_##fs *_flo =				\
234
      (union _FP_UNION_##fs *)(val);				\
235
								\
236
    _flo->bits.frac0 = X##_f[0];				\
237
    _flo->bits.frac1 = X##_f[1];				\
238
    _flo->bits.frac2 = X##_f[2];				\
239
    _flo->bits.frac3 = X##_f[3];				\
240
    _flo->bits.exp   = X##_e;					\
241
    _flo->bits.sign  = X##_s;					\
242
  } while (0)
243
 
244
/*
245
 * Multiplication algorithms:
246
 */
247
 
248
/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
249
 
250
#define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit)			    \
251
  do {									    \
252
    _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);	    \
253
    _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f);	    \
254
									    \
255
    doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \
256
    doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]);				    \
257
    doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]);				    \
258
    doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]);				    \
259
    doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]);				    \
260
    doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]);				    \
261
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
262
		    _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0,		    \
263
		    0,0,_FP_FRAC_WORD_8(_z,1));				    \
264
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
265
		    _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0,		    \
266
		    _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2),	    \
267
		    _FP_FRAC_WORD_8(_z,1));				    \
268
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
269
		    _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0,		    \
270
		    0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2));	    \
271
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
272
		    _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0,		    \
273
		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
274
		    _FP_FRAC_WORD_8(_z,2));				    \
275
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
276
		    _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0,		    \
277
		    _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3),	    \
278
		    _FP_FRAC_WORD_8(_z,2));				    \
279
    doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]);				    \
280
    doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]);				    \
281
    doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]);				    \
282
    doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]);				    \
283
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
284
		    _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0,		    \
285
		    0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3));	    \
286
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
287
		    _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0,		    \
288
		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
289
		    _FP_FRAC_WORD_8(_z,3));				    \
290
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
291
		    _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0,		    \
292
		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
293
		    _FP_FRAC_WORD_8(_z,3));				    \
294
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
295
		    _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0,		    \
296
		    _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4),	    \
297
		    _FP_FRAC_WORD_8(_z,3));				    \
298
    doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]);				    \
299
    doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]);				    \
300
    doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]);				    \
301
    doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]);				    \
302
    doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]);				    \
303
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
304
		    _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0,		    \
305
		    0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4));	    \
306
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
307
		    _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0,		    \
308
		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
309
		    _FP_FRAC_WORD_8(_z,4));				    \
310
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
311
		    _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0,		    \
312
		    _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5),	    \
313
		    _FP_FRAC_WORD_8(_z,4));				    \
314
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
315
		    _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0,		    \
316
		    0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5));	    \
317
    __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
318
		    _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0,		    \
319
		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
320
		    _FP_FRAC_WORD_8(_z,5));				    \
321
    doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]);				    \
322
    __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6),	    \
323
		    _b_f1,_b_f0,					    \
324
		    _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6));	    \
325
									    \
326
    /* Normalize since we know where the msb of the multiplicands	    \
327
       were (bit B), we know that the msb of the of the product is	    \
328
       at either 2B or 2B-1.  */					    \
329
    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);			    \
330
    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
331
		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
332
  } while (0)
333
 
334
#define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y)				    \
335
  do {									    \
336
    _FP_FRAC_DECL_8(_z);						    \
337
									    \
338
    mpn_mul_n(_z_f, _x_f, _y_f, 4);					    \
339
									    \
340
    /* Normalize since we know where the msb of the multiplicands	    \
341
       were (bit B), we know that the msb of the of the product is	    \
342
       at either 2B or 2B-1.  */					    \
343
    _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits);	 		    \
344
    __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2),	    \
345
		    _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0));	    \
346
  } while (0)
347
 
348
/*
349
 * Helper utility for _FP_DIV_MEAT_4_udiv:
350
 * pppp = m * nnn
351
 */
352
#define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0)				    \
353
  do {									    \
354
    UWtype _t;								    \
355
    umul_ppmm(p1,p0,m,n0);						    \
356
    umul_ppmm(p2,_t,m,n1);						    \
357
    __FP_FRAC_ADDI_2(p2,p1,_t);						    \
358
    umul_ppmm(p3,_t,m,n2);						    \
359
    __FP_FRAC_ADDI_2(p3,p2,_t);						    \
360
  } while (0)
361
 
362
/*
363
 * Division algorithms:
364
 */
365
 
366
#define _FP_DIV_MEAT_4_udiv(fs, R, X, Y)				    \
367
  do {									    \
368
    int _i;								    \
369
    _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m);				    \
370
    _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4);					    \
371
    if (_FP_FRAC_GT_4(X, Y))						    \
372
      {									    \
373
	_n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1);			    \
374
	_FP_FRAC_SRL_4(X, 1);						    \
375
      }									    \
376
    else								    \
377
      R##_e--;								    \
378
									    \
379
    /* Normalize, i.e. make the most significant bit of the 		    \
380
       denominator set. */						    \
381
    _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs);				    \
382
									    \
383
    for (_i = 3; ; _i--)						    \
384
      {									    \
385
        if (X##_f[3] == Y##_f[3])					    \
386
          {								    \
387
            /* This is a special case, not an optimization		    \
388
               (X##_f[3]/Y##_f[3] would not fit into UWtype).		    \
389
               As X## is guaranteed to be < Y,  R##_f[_i] can be either	    \
390
               (UWtype)-1 or (UWtype)-2.  */				    \
391
            R##_f[_i] = -1;						    \
392
            if (!_i)							    \
393
	      break;							    \
394
            __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0],	    \
395
			    Y##_f[2], Y##_f[1], Y##_f[0], 0,		    \
396
			    X##_f[2], X##_f[1], X##_f[0], _n_f[_i]);	    \
397
            _FP_FRAC_SUB_4(X, Y, X);					    \
398
            if (X##_f[3] > Y##_f[3])					    \
399
              {								    \
400
                R##_f[_i] = -2;						    \
401
                _FP_FRAC_ADD_4(X, Y, X);				    \
402
              }								    \
403
          }								    \
404
        else								    \
405
          {								    \
406
            udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]);  \
407
            umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0],		    \
408
			  R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]);	    \
409
            X##_f[2] = X##_f[1];					    \
410
            X##_f[1] = X##_f[0];					    \
411
            X##_f[0] = _n_f[_i];					    \
412
            if (_FP_FRAC_GT_4(_m, X))					    \
413
              {								    \
414
                R##_f[_i]--;						    \
415
                _FP_FRAC_ADD_4(X, Y, X);				    \
416
                if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X))	    \
417
                  {							    \
418
		    R##_f[_i]--;					    \
419
		    _FP_FRAC_ADD_4(X, Y, X);				    \
420
                  }							    \
421
              }								    \
422
            _FP_FRAC_DEC_4(X, _m);					    \
423
            if (!_i)							    \
424
	      {								    \
425
		if (!_FP_FRAC_EQ_4(X, _m))				    \
426
		  R##_f[0] |= _FP_WORK_STICKY;				    \
427
		break;							    \
428
	      }								    \
429
          }								    \
430
      }									    \
431
  } while (0)
432
 
433
 
434
/*
435
 * Square root algorithms:
436
 * We have just one right now, maybe Newton approximation
437
 * should be added for those machines where division is fast.
438
 */
439
 
440
#define _FP_SQRT_MEAT_4(R, S, T, X, q)				\
441
  do {								\
442
    while (q)							\
443
      {								\
444
	T##_f[3] = S##_f[3] + q;				\
445
	if (T##_f[3] <= X##_f[3])				\
446
	  {							\
447
	    S##_f[3] = T##_f[3] + q;				\
448
	    X##_f[3] -= T##_f[3];				\
449
	    R##_f[3] += q;					\
450
	  }							\
451
	_FP_FRAC_SLL_4(X, 1);					\
452
	q >>= 1;						\
453
      }								\
454
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
455
    while (q)							\
456
      {								\
457
	T##_f[2] = S##_f[2] + q;				\
458
	T##_f[3] = S##_f[3];					\
459
	if (T##_f[3] < X##_f[3] || 				\
460
	    (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2]))	\
461
	  {							\
462
	    S##_f[2] = T##_f[2] + q;				\
463
	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
464
	    __FP_FRAC_DEC_2(X##_f[3], X##_f[2],			\
465
			    T##_f[3], T##_f[2]);		\
466
	    R##_f[2] += q;					\
467
	  }							\
468
	_FP_FRAC_SLL_4(X, 1);					\
469
	q >>= 1;						\
470
      }								\
471
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
472
    while (q)							\
473
      {								\
474
	T##_f[1] = S##_f[1] + q;				\
475
	T##_f[2] = S##_f[2];					\
476
	T##_f[3] = S##_f[3];					\
477
	if (T##_f[3] < X##_f[3] || 				\
478
	    (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] ||	\
479
	     (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1]))))	\
480
	  {							\
481
	    S##_f[1] = T##_f[1] + q;				\
482
	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
483
	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
484
	    __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1],	\
485
	    		    T##_f[3], T##_f[2], T##_f[1]);	\
486
	    R##_f[1] += q;					\
487
	  }							\
488
	_FP_FRAC_SLL_4(X, 1);					\
489
	q >>= 1;						\
490
      }								\
491
    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);			\
492
    while (q != _FP_WORK_ROUND)					\
493
      {								\
494
	T##_f[0] = S##_f[0] + q;				\
495
	T##_f[1] = S##_f[1];					\
496
	T##_f[2] = S##_f[2];					\
497
	T##_f[3] = S##_f[3];					\
498
	if (_FP_FRAC_GE_4(X,T))					\
499
	  {							\
500
	    S##_f[0] = T##_f[0] + q;				\
501
	    S##_f[1] += (T##_f[0] > S##_f[0]);			\
502
	    S##_f[2] += (T##_f[1] > S##_f[1]);			\
503
	    S##_f[3] += (T##_f[2] > S##_f[2]);			\
504
	    _FP_FRAC_DEC_4(X, T);				\
505
	    R##_f[0] += q;					\
506
	  }							\
507
	_FP_FRAC_SLL_4(X, 1);					\
508
	q >>= 1;						\
509
      }								\
510
    if (!_FP_FRAC_ZEROP_4(X))					\
511
      {								\
512
	if (_FP_FRAC_GT_4(X,S))					\
513
	  R##_f[0] |= _FP_WORK_ROUND;				\
514
	R##_f[0] |= _FP_WORK_STICKY;				\
515
      }								\
516
  } while (0)
517
 
518
 
519
/*
520
 * Internals
521
 */
522
 
523
#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)					\
524
  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
525
 
526
#ifndef __FP_FRAC_ADD_3
527
#define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
528
  do {								\
529
    _FP_W_TYPE _c1, _c2;					\
530
    r0 = x0 + y0;						\
531
    _c1 = r0 < x0;						\
532
    r1 = x1 + y1;						\
533
    _c2 = r1 < x1;						\
534
    r1 += _c1;							\
535
    _c2 |= r1 < _c1;						\
536
    r2 = x2 + y2 + _c2;						\
537
  } while (0)
538
#endif
539
 
540
#ifndef __FP_FRAC_ADD_4
541
#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
542
  do {								\
543
    _FP_W_TYPE _c1, _c2, _c3;					\
544
    r0 = x0 + y0;						\
545
    _c1 = r0 < x0;						\
546
    r1 = x1 + y1;						\
547
    _c2 = r1 < x1;						\
548
    r1 += _c1;							\
549
    _c2 |= r1 < _c1;						\
550
    r2 = x2 + y2;						\
551
    _c3 = r2 < x2;						\
552
    r2 += _c2;							\
553
    _c3 |= r2 < _c2;						\
554
    r3 = x3 + y3 + _c3;						\
555
  } while (0)
556
#endif
557
 
558
#ifndef __FP_FRAC_SUB_3
559
#define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0)		\
560
  do {								\
561
    _FP_W_TYPE _c1, _c2;					\
562
    r0 = x0 - y0;						\
563
    _c1 = r0 > x0;						\
564
    r1 = x1 - y1;						\
565
    _c2 = r1 > x1;						\
566
    r1 -= _c1;							\
567
    _c2 |= _c1 && (y1 == x1);					\
568
    r2 = x2 - y2 - _c2;						\
569
  } while (0)
570
#endif
571
 
572
#ifndef __FP_FRAC_SUB_4
573
#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)	\
574
  do {								\
575
    _FP_W_TYPE _c1, _c2, _c3;					\
576
    r0 = x0 - y0;						\
577
    _c1 = r0 > x0;						\
578
    r1 = x1 - y1;						\
579
    _c2 = r1 > x1;						\
580
    r1 -= _c1;							\
581
    _c2 |= _c1 && (y1 == x1);					\
582
    r2 = x2 - y2;						\
583
    _c3 = r2 > x2;						\
584
    r2 -= _c2;							\
585
    _c3 |= _c2 && (y2 == x2);					\
586
    r3 = x3 - y3 - _c3;						\
587
  } while (0)
588
#endif
589
 
590
#ifndef __FP_FRAC_DEC_3
591
#define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0)				\
592
  do {									\
593
    UWtype _t0, _t1, _t2;						\
594
    _t0 = x0, _t1 = x1, _t2 = x2;					\
595
    __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0);		\
596
  } while (0)
597
#endif
598
 
599
#ifndef __FP_FRAC_DEC_4
600
#define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0)			\
601
  do {									\
602
    UWtype _t0, _t1, _t2, _t3;						\
603
    _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3;				\
604
    __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0);		\
605
  } while (0)
606
#endif
607
 
608
#ifndef __FP_FRAC_ADDI_4
609
#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)					\
610
  do {									\
611
    UWtype _t;								\
612
    _t = ((x0 += i) < i);						\
613
    x1 += _t; _t = (x1 < _t);						\
614
    x2 += _t; _t = (x2 < _t);						\
615
    x3 += _t;								\
616
  } while (0)
617
#endif
618
 
619
/* Convert FP values between word sizes. This appears to be more
620
 * complicated than I'd have expected it to be, so these might be
621
 * wrong... These macros are in any case somewhat bogus because they
622
 * use information about what various FRAC_n variables look like
623
 * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
624
 * the ones in op-2.h and op-1.h.
625
 */
626
#define _FP_FRAC_COPY_1_4(D, S)		(D##_f = S##_f[0])
627
 
628
#define _FP_FRAC_COPY_2_4(D, S)			\
629
do {						\
630
  D##_f0 = S##_f[0];				\
631
  D##_f1 = S##_f[1];				\
632
} while (0)
633
 
634
/* Assembly/disassembly for converting to/from integral types.
635
 * No shifting or overflow handled here.
636
 */
637
/* Put the FP value X into r, which is an integer of size rsize. */
638
#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)				\
639
  do {									\
640
    if (rsize <= _FP_W_TYPE_SIZE)					\
641
      r = X##_f[0];							\
642
    else if (rsize <= 2*_FP_W_TYPE_SIZE)				\
643
    {									\
644
      r = X##_f[1];							\
645
      r <<= _FP_W_TYPE_SIZE;						\
646
      r += X##_f[0];							\
647
    }									\
648
    else								\
649
    {									\
650
      /* I'm feeling lazy so we deal with int == 3words (implausible)*/	\
651
      /* and int == 4words as a single case.			 */	\
652
      r = X##_f[3];							\
653
      r <<= _FP_W_TYPE_SIZE;						\
654
      r += X##_f[2];							\
655
      r <<= _FP_W_TYPE_SIZE;						\
656
      r += X##_f[1];							\
657
      r <<= _FP_W_TYPE_SIZE;						\
658
      r += X##_f[0];							\
659
    }									\
660
  } while (0)
661
 
662
/* "No disassemble Number Five!" */
663
/* move an integer of size rsize into X's fractional part. We rely on
664
 * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
665
 * having to mask the values we store into it.
666
 */
667
#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)				\
668
  do {									\
669
    X##_f[0] = r;							\
670
    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);	\
671
    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
672
    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
673
  } while (0);
674
 
675
#define _FP_FRAC_COPY_4_1(D, S)			\
676
do {						\
677
  D##_f[0] = S##_f;				\
678
  D##_f[1] = D##_f[2] = D##_f[3] = 0;		\
679
} while (0)
680
 
681
#define _FP_FRAC_COPY_4_2(D, S)			\
682
do {						\
683
  D##_f[0] = S##_f0;				\
684
  D##_f[1] = S##_f1;				\
685
  D##_f[2] = D##_f[3] = 0;			\
686
} while (0)
687
 
688
#define _FP_FRAC_COPY_4_4(D,S)	_FP_FRAC_COPY_4(D,S)