Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
1906 | serge | 1 | /* Software floating-point emulation. |
2 | Basic four-word fraction declaration and manipulation. |
||
3 | Copyright (C) 1997,1998,1999,2006,2007 Free Software Foundation, Inc. |
||
4 | This file is part of the GNU C Library. |
||
5 | Contributed by Richard Henderson (rth@cygnus.com), |
||
6 | Jakub Jelinek (jj@ultra.linux.cz), |
||
7 | David S. Miller (davem@redhat.com) and |
||
8 | Peter Maydell (pmaydell@chiark.greenend.org.uk). |
||
9 | |||
10 | The GNU C Library is free software; you can redistribute it and/or |
||
11 | modify it under the terms of the GNU Lesser General Public |
||
12 | License as published by the Free Software Foundation; either |
||
13 | version 2.1 of the License, or (at your option) any later version. |
||
14 | |||
15 | In addition to the permissions in the GNU Lesser General Public |
||
16 | License, the Free Software Foundation gives you unlimited |
||
17 | permission to link the compiled version of this file into |
||
18 | combinations with other programs, and to distribute those |
||
19 | combinations without any restriction coming from the use of this |
||
20 | file. (The Lesser General Public License restrictions do apply in |
||
21 | other respects; for example, they cover modification of the file, |
||
22 | and distribution when not linked into a combine executable.) |
||
23 | |||
24 | The GNU C Library is distributed in the hope that it will be useful, |
||
25 | but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
26 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
27 | Lesser General Public License for more details. |
||
28 | |||
29 | You should have received a copy of the GNU Lesser General Public |
||
30 | License along with the GNU C Library; if not, write to the Free |
||
31 | Software Foundation, 51 Franklin Street, Fifth Floor, Boston, |
||
32 | MA 02110-1301, USA. */ |
||
33 | |||
34 | #define _FP_FRAC_DECL_4(X) _FP_W_TYPE X##_f[4] |
||
35 | #define _FP_FRAC_COPY_4(D,S) \ |
||
36 | (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1], \ |
||
37 | D##_f[2] = S##_f[2], D##_f[3] = S##_f[3]) |
||
38 | #define _FP_FRAC_SET_4(X,I) __FP_FRAC_SET_4(X, I) |
||
39 | #define _FP_FRAC_HIGH_4(X) (X##_f[3]) |
||
40 | #define _FP_FRAC_LOW_4(X) (X##_f[0]) |
||
41 | #define _FP_FRAC_WORD_4(X,w) (X##_f[w]) |
||
42 | |||
43 | #define _FP_FRAC_SLL_4(X,N) \ |
||
44 | do { \ |
||
45 | _FP_I_TYPE _up, _down, _skip, _i; \ |
||
46 | _skip = (N) / _FP_W_TYPE_SIZE; \ |
||
47 | _up = (N) % _FP_W_TYPE_SIZE; \ |
||
48 | _down = _FP_W_TYPE_SIZE - _up; \ |
||
49 | if (!_up) \ |
||
50 | for (_i = 3; _i >= _skip; --_i) \ |
||
51 | X##_f[_i] = X##_f[_i-_skip]; \ |
||
52 | else \ |
||
53 | { \ |
||
54 | for (_i = 3; _i > _skip; --_i) \ |
||
55 | X##_f[_i] = X##_f[_i-_skip] << _up \ |
||
56 | | X##_f[_i-_skip-1] >> _down; \ |
||
57 | X##_f[_i--] = X##_f[0] << _up; \ |
||
58 | } \ |
||
59 | for (; _i >= 0; --_i) \ |
||
60 | X##_f[_i] = 0; \ |
||
61 | } while (0) |
||
62 | |||
63 | /* This one was broken too */ |
||
64 | #define _FP_FRAC_SRL_4(X,N) \ |
||
65 | do { \ |
||
66 | _FP_I_TYPE _up, _down, _skip, _i; \ |
||
67 | _skip = (N) / _FP_W_TYPE_SIZE; \ |
||
68 | _down = (N) % _FP_W_TYPE_SIZE; \ |
||
69 | _up = _FP_W_TYPE_SIZE - _down; \ |
||
70 | if (!_down) \ |
||
71 | for (_i = 0; _i <= 3-_skip; ++_i) \ |
||
72 | X##_f[_i] = X##_f[_i+_skip]; \ |
||
73 | else \ |
||
74 | { \ |
||
75 | for (_i = 0; _i < 3-_skip; ++_i) \ |
||
76 | X##_f[_i] = X##_f[_i+_skip] >> _down \ |
||
77 | | X##_f[_i+_skip+1] << _up; \ |
||
78 | X##_f[_i++] = X##_f[3] >> _down; \ |
||
79 | } \ |
||
80 | for (; _i < 4; ++_i) \ |
||
81 | X##_f[_i] = 0; \ |
||
82 | } while (0) |
||
83 | |||
84 | |||
85 | /* Right shift with sticky-lsb. |
||
86 | * What this actually means is that we do a standard right-shift, |
||
87 | * but that if any of the bits that fall off the right hand side |
||
88 | * were one then we always set the LSbit. |
||
89 | */ |
||
90 | #define _FP_FRAC_SRST_4(X,S,N,size) \ |
||
91 | do { \ |
||
92 | _FP_I_TYPE _up, _down, _skip, _i; \ |
||
93 | _FP_W_TYPE _s; \ |
||
94 | _skip = (N) / _FP_W_TYPE_SIZE; \ |
||
95 | _down = (N) % _FP_W_TYPE_SIZE; \ |
||
96 | _up = _FP_W_TYPE_SIZE - _down; \ |
||
97 | for (_s = _i = 0; _i < _skip; ++_i) \ |
||
98 | _s |= X##_f[_i]; \ |
||
99 | if (!_down) \ |
||
100 | for (_i = 0; _i <= 3-_skip; ++_i) \ |
||
101 | X##_f[_i] = X##_f[_i+_skip]; \ |
||
102 | else \ |
||
103 | { \ |
||
104 | _s |= X##_f[_i] << _up; \ |
||
105 | for (_i = 0; _i < 3-_skip; ++_i) \ |
||
106 | X##_f[_i] = X##_f[_i+_skip] >> _down \ |
||
107 | | X##_f[_i+_skip+1] << _up; \ |
||
108 | X##_f[_i++] = X##_f[3] >> _down; \ |
||
109 | } \ |
||
110 | for (; _i < 4; ++_i) \ |
||
111 | X##_f[_i] = 0; \ |
||
112 | S = (_s != 0); \ |
||
113 | } while (0) |
||
114 | |||
115 | #define _FP_FRAC_SRS_4(X,N,size) \ |
||
116 | do { \ |
||
117 | int _sticky; \ |
||
118 | _FP_FRAC_SRST_4(X, _sticky, N, size); \ |
||
119 | X##_f[0] |= _sticky; \ |
||
120 | } while (0) |
||
121 | |||
122 | #define _FP_FRAC_ADD_4(R,X,Y) \ |
||
123 | __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ |
||
124 | X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ |
||
125 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) |
||
126 | |||
127 | #define _FP_FRAC_SUB_4(R,X,Y) \ |
||
128 | __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0], \ |
||
129 | X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ |
||
130 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) |
||
131 | |||
132 | #define _FP_FRAC_DEC_4(X,Y) \ |
||
133 | __FP_FRAC_DEC_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ |
||
134 | Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0]) |
||
135 | |||
136 | #define _FP_FRAC_ADDI_4(X,I) \ |
||
137 | __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I) |
||
138 | |||
139 | #define _FP_ZEROFRAC_4 0,0,0,0 |
||
140 | #define _FP_MINFRAC_4 0,0,0,1 |
||
141 | #define _FP_MAXFRAC_4 (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0), (~(_FP_WS_TYPE)0) |
||
142 | |||
143 | #define _FP_FRAC_ZEROP_4(X) ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0) |
||
144 | #define _FP_FRAC_NEGP_4(X) ((_FP_WS_TYPE)X##_f[3] < 0) |
||
145 | #define _FP_FRAC_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) & _FP_OVERFLOW_##fs) |
||
146 | #define _FP_FRAC_CLEAR_OVERP_4(fs,X) (_FP_FRAC_HIGH_##fs(X) &= ~_FP_OVERFLOW_##fs) |
||
147 | |||
148 | #define _FP_FRAC_EQ_4(X,Y) \ |
||
149 | (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1] \ |
||
150 | && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3]) |
||
151 | |||
152 | #define _FP_FRAC_GT_4(X,Y) \ |
||
153 | (X##_f[3] > Y##_f[3] || \ |
||
154 | (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ |
||
155 | (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ |
||
156 | (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0]) \ |
||
157 | )) \ |
||
158 | )) \ |
||
159 | ) |
||
160 | |||
161 | #define _FP_FRAC_GE_4(X,Y) \ |
||
162 | (X##_f[3] > Y##_f[3] || \ |
||
163 | (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] || \ |
||
164 | (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] || \ |
||
165 | (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0]) \ |
||
166 | )) \ |
||
167 | )) \ |
||
168 | ) |
||
169 | |||
170 | |||
171 | #define _FP_FRAC_CLZ_4(R,X) \ |
||
172 | do { \ |
||
173 | if (X##_f[3]) \ |
||
174 | { \ |
||
175 | __FP_CLZ(R,X##_f[3]); \ |
||
176 | } \ |
||
177 | else if (X##_f[2]) \ |
||
178 | { \ |
||
179 | __FP_CLZ(R,X##_f[2]); \ |
||
180 | R += _FP_W_TYPE_SIZE; \ |
||
181 | } \ |
||
182 | else if (X##_f[1]) \ |
||
183 | { \ |
||
184 | __FP_CLZ(R,X##_f[1]); \ |
||
185 | R += _FP_W_TYPE_SIZE*2; \ |
||
186 | } \ |
||
187 | else \ |
||
188 | { \ |
||
189 | __FP_CLZ(R,X##_f[0]); \ |
||
190 | R += _FP_W_TYPE_SIZE*3; \ |
||
191 | } \ |
||
192 | } while(0) |
||
193 | |||
194 | |||
195 | #define _FP_UNPACK_RAW_4(fs, X, val) \ |
||
196 | do { \ |
||
197 | union _FP_UNION_##fs _flo; _flo.flt = (val); \ |
||
198 | X##_f[0] = _flo.bits.frac0; \ |
||
199 | X##_f[1] = _flo.bits.frac1; \ |
||
200 | X##_f[2] = _flo.bits.frac2; \ |
||
201 | X##_f[3] = _flo.bits.frac3; \ |
||
202 | X##_e = _flo.bits.exp; \ |
||
203 | X##_s = _flo.bits.sign; \ |
||
204 | } while (0) |
||
205 | |||
206 | #define _FP_UNPACK_RAW_4_P(fs, X, val) \ |
||
207 | do { \ |
||
208 | union _FP_UNION_##fs *_flo = \ |
||
209 | (union _FP_UNION_##fs *)(val); \ |
||
210 | \ |
||
211 | X##_f[0] = _flo->bits.frac0; \ |
||
212 | X##_f[1] = _flo->bits.frac1; \ |
||
213 | X##_f[2] = _flo->bits.frac2; \ |
||
214 | X##_f[3] = _flo->bits.frac3; \ |
||
215 | X##_e = _flo->bits.exp; \ |
||
216 | X##_s = _flo->bits.sign; \ |
||
217 | } while (0) |
||
218 | |||
219 | #define _FP_PACK_RAW_4(fs, val, X) \ |
||
220 | do { \ |
||
221 | union _FP_UNION_##fs _flo; \ |
||
222 | _flo.bits.frac0 = X##_f[0]; \ |
||
223 | _flo.bits.frac1 = X##_f[1]; \ |
||
224 | _flo.bits.frac2 = X##_f[2]; \ |
||
225 | _flo.bits.frac3 = X##_f[3]; \ |
||
226 | _flo.bits.exp = X##_e; \ |
||
227 | _flo.bits.sign = X##_s; \ |
||
228 | (val) = _flo.flt; \ |
||
229 | } while (0) |
||
230 | |||
231 | #define _FP_PACK_RAW_4_P(fs, val, X) \ |
||
232 | do { \ |
||
233 | union _FP_UNION_##fs *_flo = \ |
||
234 | (union _FP_UNION_##fs *)(val); \ |
||
235 | \ |
||
236 | _flo->bits.frac0 = X##_f[0]; \ |
||
237 | _flo->bits.frac1 = X##_f[1]; \ |
||
238 | _flo->bits.frac2 = X##_f[2]; \ |
||
239 | _flo->bits.frac3 = X##_f[3]; \ |
||
240 | _flo->bits.exp = X##_e; \ |
||
241 | _flo->bits.sign = X##_s; \ |
||
242 | } while (0) |
||
243 | |||
244 | /* |
||
245 | * Multiplication algorithms: |
||
246 | */ |
||
247 | |||
248 | /* Given a 1W * 1W => 2W primitive, do the extended multiplication. */ |
||
249 | |||
250 | #define _FP_MUL_MEAT_4_wide(wfracbits, R, X, Y, doit) \ |
||
251 | do { \ |
||
252 | _FP_FRAC_DECL_8(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c); \ |
||
253 | _FP_FRAC_DECL_2(_d); _FP_FRAC_DECL_2(_e); _FP_FRAC_DECL_2(_f); \ |
||
254 | \ |
||
255 | doit(_FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0), X##_f[0], Y##_f[0]); \ |
||
256 | doit(_b_f1, _b_f0, X##_f[0], Y##_f[1]); \ |
||
257 | doit(_c_f1, _c_f0, X##_f[1], Y##_f[0]); \ |
||
258 | doit(_d_f1, _d_f0, X##_f[1], Y##_f[1]); \ |
||
259 | doit(_e_f1, _e_f0, X##_f[0], Y##_f[2]); \ |
||
260 | doit(_f_f1, _f_f0, X##_f[2], Y##_f[0]); \ |
||
261 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ |
||
262 | _FP_FRAC_WORD_8(_z,1), 0,_b_f1,_b_f0, \ |
||
263 | 0,0,_FP_FRAC_WORD_8(_z,1)); \ |
||
264 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ |
||
265 | _FP_FRAC_WORD_8(_z,1), 0,_c_f1,_c_f0, \ |
||
266 | _FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2), \ |
||
267 | _FP_FRAC_WORD_8(_z,1)); \ |
||
268 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ |
||
269 | _FP_FRAC_WORD_8(_z,2), 0,_d_f1,_d_f0, \ |
||
270 | 0,_FP_FRAC_WORD_8(_z,3),_FP_FRAC_WORD_8(_z,2)); \ |
||
271 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ |
||
272 | _FP_FRAC_WORD_8(_z,2), 0,_e_f1,_e_f0, \ |
||
273 | _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ |
||
274 | _FP_FRAC_WORD_8(_z,2)); \ |
||
275 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ |
||
276 | _FP_FRAC_WORD_8(_z,2), 0,_f_f1,_f_f0, \ |
||
277 | _FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3), \ |
||
278 | _FP_FRAC_WORD_8(_z,2)); \ |
||
279 | doit(_b_f1, _b_f0, X##_f[0], Y##_f[3]); \ |
||
280 | doit(_c_f1, _c_f0, X##_f[3], Y##_f[0]); \ |
||
281 | doit(_d_f1, _d_f0, X##_f[1], Y##_f[2]); \ |
||
282 | doit(_e_f1, _e_f0, X##_f[2], Y##_f[1]); \ |
||
283 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
284 | _FP_FRAC_WORD_8(_z,3), 0,_b_f1,_b_f0, \ |
||
285 | 0,_FP_FRAC_WORD_8(_z,4),_FP_FRAC_WORD_8(_z,3)); \ |
||
286 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
287 | _FP_FRAC_WORD_8(_z,3), 0,_c_f1,_c_f0, \ |
||
288 | _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
289 | _FP_FRAC_WORD_8(_z,3)); \ |
||
290 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
291 | _FP_FRAC_WORD_8(_z,3), 0,_d_f1,_d_f0, \ |
||
292 | _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
293 | _FP_FRAC_WORD_8(_z,3)); \ |
||
294 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
295 | _FP_FRAC_WORD_8(_z,3), 0,_e_f1,_e_f0, \ |
||
296 | _FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4), \ |
||
297 | _FP_FRAC_WORD_8(_z,3)); \ |
||
298 | doit(_b_f1, _b_f0, X##_f[2], Y##_f[2]); \ |
||
299 | doit(_c_f1, _c_f0, X##_f[1], Y##_f[3]); \ |
||
300 | doit(_d_f1, _d_f0, X##_f[3], Y##_f[1]); \ |
||
301 | doit(_e_f1, _e_f0, X##_f[2], Y##_f[3]); \ |
||
302 | doit(_f_f1, _f_f0, X##_f[3], Y##_f[2]); \ |
||
303 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ |
||
304 | _FP_FRAC_WORD_8(_z,4), 0,_b_f1,_b_f0, \ |
||
305 | 0,_FP_FRAC_WORD_8(_z,5),_FP_FRAC_WORD_8(_z,4)); \ |
||
306 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ |
||
307 | _FP_FRAC_WORD_8(_z,4), 0,_c_f1,_c_f0, \ |
||
308 | _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ |
||
309 | _FP_FRAC_WORD_8(_z,4)); \ |
||
310 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ |
||
311 | _FP_FRAC_WORD_8(_z,4), 0,_d_f1,_d_f0, \ |
||
312 | _FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5), \ |
||
313 | _FP_FRAC_WORD_8(_z,4)); \ |
||
314 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ |
||
315 | _FP_FRAC_WORD_8(_z,5), 0,_e_f1,_e_f0, \ |
||
316 | 0,_FP_FRAC_WORD_8(_z,6),_FP_FRAC_WORD_8(_z,5)); \ |
||
317 | __FP_FRAC_ADD_3(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ |
||
318 | _FP_FRAC_WORD_8(_z,5), 0,_f_f1,_f_f0, \ |
||
319 | _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ |
||
320 | _FP_FRAC_WORD_8(_z,5)); \ |
||
321 | doit(_b_f1, _b_f0, X##_f[3], Y##_f[3]); \ |
||
322 | __FP_FRAC_ADD_2(_FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6), \ |
||
323 | _b_f1,_b_f0, \ |
||
324 | _FP_FRAC_WORD_8(_z,7),_FP_FRAC_WORD_8(_z,6)); \ |
||
325 | \ |
||
326 | /* Normalize since we know where the msb of the multiplicands \ |
||
327 | were (bit B), we know that the msb of the of the product is \ |
||
328 | at either 2B or 2B-1. */ \ |
||
329 | _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ |
||
330 | __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ |
||
331 | _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ |
||
332 | } while (0) |
||
333 | |||
334 | #define _FP_MUL_MEAT_4_gmp(wfracbits, R, X, Y) \ |
||
335 | do { \ |
||
336 | _FP_FRAC_DECL_8(_z); \ |
||
337 | \ |
||
338 | mpn_mul_n(_z_f, _x_f, _y_f, 4); \ |
||
339 | \ |
||
340 | /* Normalize since we know where the msb of the multiplicands \ |
||
341 | were (bit B), we know that the msb of the of the product is \ |
||
342 | at either 2B or 2B-1. */ \ |
||
343 | _FP_FRAC_SRS_8(_z, wfracbits-1, 2*wfracbits); \ |
||
344 | __FP_FRAC_SET_4(R, _FP_FRAC_WORD_8(_z,3), _FP_FRAC_WORD_8(_z,2), \ |
||
345 | _FP_FRAC_WORD_8(_z,1), _FP_FRAC_WORD_8(_z,0)); \ |
||
346 | } while (0) |
||
347 | |||
348 | /* |
||
349 | * Helper utility for _FP_DIV_MEAT_4_udiv: |
||
350 | * pppp = m * nnn |
||
351 | */ |
||
352 | #define umul_ppppmnnn(p3,p2,p1,p0,m,n2,n1,n0) \ |
||
353 | do { \ |
||
354 | UWtype _t; \ |
||
355 | umul_ppmm(p1,p0,m,n0); \ |
||
356 | umul_ppmm(p2,_t,m,n1); \ |
||
357 | __FP_FRAC_ADDI_2(p2,p1,_t); \ |
||
358 | umul_ppmm(p3,_t,m,n2); \ |
||
359 | __FP_FRAC_ADDI_2(p3,p2,_t); \ |
||
360 | } while (0) |
||
361 | |||
362 | /* |
||
363 | * Division algorithms: |
||
364 | */ |
||
365 | |||
366 | #define _FP_DIV_MEAT_4_udiv(fs, R, X, Y) \ |
||
367 | do { \ |
||
368 | int _i; \ |
||
369 | _FP_FRAC_DECL_4(_n); _FP_FRAC_DECL_4(_m); \ |
||
370 | _FP_FRAC_SET_4(_n, _FP_ZEROFRAC_4); \ |
||
371 | if (_FP_FRAC_GT_4(X, Y)) \ |
||
372 | { \ |
||
373 | _n_f[3] = X##_f[0] << (_FP_W_TYPE_SIZE - 1); \ |
||
374 | _FP_FRAC_SRL_4(X, 1); \ |
||
375 | } \ |
||
376 | else \ |
||
377 | R##_e--; \ |
||
378 | \ |
||
379 | /* Normalize, i.e. make the most significant bit of the \ |
||
380 | denominator set. */ \ |
||
381 | _FP_FRAC_SLL_4(Y, _FP_WFRACXBITS_##fs); \ |
||
382 | \ |
||
383 | for (_i = 3; ; _i--) \ |
||
384 | { \ |
||
385 | if (X##_f[3] == Y##_f[3]) \ |
||
386 | { \ |
||
387 | /* This is a special case, not an optimization \ |
||
388 | (X##_f[3]/Y##_f[3] would not fit into UWtype). \ |
||
389 | As X## is guaranteed to be < Y, R##_f[_i] can be either \ |
||
390 | (UWtype)-1 or (UWtype)-2. */ \ |
||
391 | R##_f[_i] = -1; \ |
||
392 | if (!_i) \ |
||
393 | break; \ |
||
394 | __FP_FRAC_SUB_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], \ |
||
395 | Y##_f[2], Y##_f[1], Y##_f[0], 0, \ |
||
396 | X##_f[2], X##_f[1], X##_f[0], _n_f[_i]); \ |
||
397 | _FP_FRAC_SUB_4(X, Y, X); \ |
||
398 | if (X##_f[3] > Y##_f[3]) \ |
||
399 | { \ |
||
400 | R##_f[_i] = -2; \ |
||
401 | _FP_FRAC_ADD_4(X, Y, X); \ |
||
402 | } \ |
||
403 | } \ |
||
404 | else \ |
||
405 | { \ |
||
406 | udiv_qrnnd(R##_f[_i], X##_f[3], X##_f[3], X##_f[2], Y##_f[3]); \ |
||
407 | umul_ppppmnnn(_m_f[3], _m_f[2], _m_f[1], _m_f[0], \ |
||
408 | R##_f[_i], Y##_f[2], Y##_f[1], Y##_f[0]); \ |
||
409 | X##_f[2] = X##_f[1]; \ |
||
410 | X##_f[1] = X##_f[0]; \ |
||
411 | X##_f[0] = _n_f[_i]; \ |
||
412 | if (_FP_FRAC_GT_4(_m, X)) \ |
||
413 | { \ |
||
414 | R##_f[_i]--; \ |
||
415 | _FP_FRAC_ADD_4(X, Y, X); \ |
||
416 | if (_FP_FRAC_GE_4(X, Y) && _FP_FRAC_GT_4(_m, X)) \ |
||
417 | { \ |
||
418 | R##_f[_i]--; \ |
||
419 | _FP_FRAC_ADD_4(X, Y, X); \ |
||
420 | } \ |
||
421 | } \ |
||
422 | _FP_FRAC_DEC_4(X, _m); \ |
||
423 | if (!_i) \ |
||
424 | { \ |
||
425 | if (!_FP_FRAC_EQ_4(X, _m)) \ |
||
426 | R##_f[0] |= _FP_WORK_STICKY; \ |
||
427 | break; \ |
||
428 | } \ |
||
429 | } \ |
||
430 | } \ |
||
431 | } while (0) |
||
432 | |||
433 | |||
434 | /* |
||
435 | * Square root algorithms: |
||
436 | * We have just one right now, maybe Newton approximation |
||
437 | * should be added for those machines where division is fast. |
||
438 | */ |
||
439 | |||
440 | #define _FP_SQRT_MEAT_4(R, S, T, X, q) \ |
||
441 | do { \ |
||
442 | while (q) \ |
||
443 | { \ |
||
444 | T##_f[3] = S##_f[3] + q; \ |
||
445 | if (T##_f[3] <= X##_f[3]) \ |
||
446 | { \ |
||
447 | S##_f[3] = T##_f[3] + q; \ |
||
448 | X##_f[3] -= T##_f[3]; \ |
||
449 | R##_f[3] += q; \ |
||
450 | } \ |
||
451 | _FP_FRAC_SLL_4(X, 1); \ |
||
452 | q >>= 1; \ |
||
453 | } \ |
||
454 | q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ |
||
455 | while (q) \ |
||
456 | { \ |
||
457 | T##_f[2] = S##_f[2] + q; \ |
||
458 | T##_f[3] = S##_f[3]; \ |
||
459 | if (T##_f[3] < X##_f[3] || \ |
||
460 | (T##_f[3] == X##_f[3] && T##_f[2] <= X##_f[2])) \ |
||
461 | { \ |
||
462 | S##_f[2] = T##_f[2] + q; \ |
||
463 | S##_f[3] += (T##_f[2] > S##_f[2]); \ |
||
464 | __FP_FRAC_DEC_2(X##_f[3], X##_f[2], \ |
||
465 | T##_f[3], T##_f[2]); \ |
||
466 | R##_f[2] += q; \ |
||
467 | } \ |
||
468 | _FP_FRAC_SLL_4(X, 1); \ |
||
469 | q >>= 1; \ |
||
470 | } \ |
||
471 | q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ |
||
472 | while (q) \ |
||
473 | { \ |
||
474 | T##_f[1] = S##_f[1] + q; \ |
||
475 | T##_f[2] = S##_f[2]; \ |
||
476 | T##_f[3] = S##_f[3]; \ |
||
477 | if (T##_f[3] < X##_f[3] || \ |
||
478 | (T##_f[3] == X##_f[3] && (T##_f[2] < X##_f[2] || \ |
||
479 | (T##_f[2] == X##_f[2] && T##_f[1] <= X##_f[1])))) \ |
||
480 | { \ |
||
481 | S##_f[1] = T##_f[1] + q; \ |
||
482 | S##_f[2] += (T##_f[1] > S##_f[1]); \ |
||
483 | S##_f[3] += (T##_f[2] > S##_f[2]); \ |
||
484 | __FP_FRAC_DEC_3(X##_f[3], X##_f[2], X##_f[1], \ |
||
485 | T##_f[3], T##_f[2], T##_f[1]); \ |
||
486 | R##_f[1] += q; \ |
||
487 | } \ |
||
488 | _FP_FRAC_SLL_4(X, 1); \ |
||
489 | q >>= 1; \ |
||
490 | } \ |
||
491 | q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1); \ |
||
492 | while (q != _FP_WORK_ROUND) \ |
||
493 | { \ |
||
494 | T##_f[0] = S##_f[0] + q; \ |
||
495 | T##_f[1] = S##_f[1]; \ |
||
496 | T##_f[2] = S##_f[2]; \ |
||
497 | T##_f[3] = S##_f[3]; \ |
||
498 | if (_FP_FRAC_GE_4(X,T)) \ |
||
499 | { \ |
||
500 | S##_f[0] = T##_f[0] + q; \ |
||
501 | S##_f[1] += (T##_f[0] > S##_f[0]); \ |
||
502 | S##_f[2] += (T##_f[1] > S##_f[1]); \ |
||
503 | S##_f[3] += (T##_f[2] > S##_f[2]); \ |
||
504 | _FP_FRAC_DEC_4(X, T); \ |
||
505 | R##_f[0] += q; \ |
||
506 | } \ |
||
507 | _FP_FRAC_SLL_4(X, 1); \ |
||
508 | q >>= 1; \ |
||
509 | } \ |
||
510 | if (!_FP_FRAC_ZEROP_4(X)) \ |
||
511 | { \ |
||
512 | if (_FP_FRAC_GT_4(X,S)) \ |
||
513 | R##_f[0] |= _FP_WORK_ROUND; \ |
||
514 | R##_f[0] |= _FP_WORK_STICKY; \ |
||
515 | } \ |
||
516 | } while (0) |
||
517 | |||
518 | |||
519 | /* |
||
520 | * Internals |
||
521 | */ |
||
522 | |||
523 | #define __FP_FRAC_SET_4(X,I3,I2,I1,I0) \ |
||
524 | (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0) |
||
525 | |||
526 | #ifndef __FP_FRAC_ADD_3 |
||
527 | #define __FP_FRAC_ADD_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ |
||
528 | do { \ |
||
529 | _FP_W_TYPE _c1, _c2; \ |
||
530 | r0 = x0 + y0; \ |
||
531 | _c1 = r0 < x0; \ |
||
532 | r1 = x1 + y1; \ |
||
533 | _c2 = r1 < x1; \ |
||
534 | r1 += _c1; \ |
||
535 | _c2 |= r1 < _c1; \ |
||
536 | r2 = x2 + y2 + _c2; \ |
||
537 | } while (0) |
||
538 | #endif |
||
539 | |||
540 | #ifndef __FP_FRAC_ADD_4 |
||
541 | #define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ |
||
542 | do { \ |
||
543 | _FP_W_TYPE _c1, _c2, _c3; \ |
||
544 | r0 = x0 + y0; \ |
||
545 | _c1 = r0 < x0; \ |
||
546 | r1 = x1 + y1; \ |
||
547 | _c2 = r1 < x1; \ |
||
548 | r1 += _c1; \ |
||
549 | _c2 |= r1 < _c1; \ |
||
550 | r2 = x2 + y2; \ |
||
551 | _c3 = r2 < x2; \ |
||
552 | r2 += _c2; \ |
||
553 | _c3 |= r2 < _c2; \ |
||
554 | r3 = x3 + y3 + _c3; \ |
||
555 | } while (0) |
||
556 | #endif |
||
557 | |||
558 | #ifndef __FP_FRAC_SUB_3 |
||
559 | #define __FP_FRAC_SUB_3(r2,r1,r0,x2,x1,x0,y2,y1,y0) \ |
||
560 | do { \ |
||
561 | _FP_W_TYPE _c1, _c2; \ |
||
562 | r0 = x0 - y0; \ |
||
563 | _c1 = r0 > x0; \ |
||
564 | r1 = x1 - y1; \ |
||
565 | _c2 = r1 > x1; \ |
||
566 | r1 -= _c1; \ |
||
567 | _c2 |= _c1 && (y1 == x1); \ |
||
568 | r2 = x2 - y2 - _c2; \ |
||
569 | } while (0) |
||
570 | #endif |
||
571 | |||
572 | #ifndef __FP_FRAC_SUB_4 |
||
573 | #define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0) \ |
||
574 | do { \ |
||
575 | _FP_W_TYPE _c1, _c2, _c3; \ |
||
576 | r0 = x0 - y0; \ |
||
577 | _c1 = r0 > x0; \ |
||
578 | r1 = x1 - y1; \ |
||
579 | _c2 = r1 > x1; \ |
||
580 | r1 -= _c1; \ |
||
581 | _c2 |= _c1 && (y1 == x1); \ |
||
582 | r2 = x2 - y2; \ |
||
583 | _c3 = r2 > x2; \ |
||
584 | r2 -= _c2; \ |
||
585 | _c3 |= _c2 && (y2 == x2); \ |
||
586 | r3 = x3 - y3 - _c3; \ |
||
587 | } while (0) |
||
588 | #endif |
||
589 | |||
590 | #ifndef __FP_FRAC_DEC_3 |
||
591 | #define __FP_FRAC_DEC_3(x2,x1,x0,y2,y1,y0) \ |
||
592 | do { \ |
||
593 | UWtype _t0, _t1, _t2; \ |
||
594 | _t0 = x0, _t1 = x1, _t2 = x2; \ |
||
595 | __FP_FRAC_SUB_3 (x2, x1, x0, _t2, _t1, _t0, y2, y1, y0); \ |
||
596 | } while (0) |
||
597 | #endif |
||
598 | |||
599 | #ifndef __FP_FRAC_DEC_4 |
||
600 | #define __FP_FRAC_DEC_4(x3,x2,x1,x0,y3,y2,y1,y0) \ |
||
601 | do { \ |
||
602 | UWtype _t0, _t1, _t2, _t3; \ |
||
603 | _t0 = x0, _t1 = x1, _t2 = x2, _t3 = x3; \ |
||
604 | __FP_FRAC_SUB_4 (x3,x2,x1,x0,_t3,_t2,_t1,_t0, y3,y2,y1,y0); \ |
||
605 | } while (0) |
||
606 | #endif |
||
607 | |||
608 | #ifndef __FP_FRAC_ADDI_4 |
||
609 | #define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i) \ |
||
610 | do { \ |
||
611 | UWtype _t; \ |
||
612 | _t = ((x0 += i) < i); \ |
||
613 | x1 += _t; _t = (x1 < _t); \ |
||
614 | x2 += _t; _t = (x2 < _t); \ |
||
615 | x3 += _t; \ |
||
616 | } while (0) |
||
617 | #endif |
||
618 | |||
619 | /* Convert FP values between word sizes. This appears to be more |
||
620 | * complicated than I'd have expected it to be, so these might be |
||
621 | * wrong... These macros are in any case somewhat bogus because they |
||
622 | * use information about what various FRAC_n variables look like |
||
623 | * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do |
||
624 | * the ones in op-2.h and op-1.h. |
||
625 | */ |
||
626 | #define _FP_FRAC_COPY_1_4(D, S) (D##_f = S##_f[0]) |
||
627 | |||
628 | #define _FP_FRAC_COPY_2_4(D, S) \ |
||
629 | do { \ |
||
630 | D##_f0 = S##_f[0]; \ |
||
631 | D##_f1 = S##_f[1]; \ |
||
632 | } while (0) |
||
633 | |||
634 | /* Assembly/disassembly for converting to/from integral types. |
||
635 | * No shifting or overflow handled here. |
||
636 | */ |
||
637 | /* Put the FP value X into r, which is an integer of size rsize. */ |
||
638 | #define _FP_FRAC_ASSEMBLE_4(r, X, rsize) \ |
||
639 | do { \ |
||
640 | if (rsize <= _FP_W_TYPE_SIZE) \ |
||
641 | r = X##_f[0]; \ |
||
642 | else if (rsize <= 2*_FP_W_TYPE_SIZE) \ |
||
643 | { \ |
||
644 | r = X##_f[1]; \ |
||
645 | r <<= _FP_W_TYPE_SIZE; \ |
||
646 | r += X##_f[0]; \ |
||
647 | } \ |
||
648 | else \ |
||
649 | { \ |
||
650 | /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \ |
||
651 | /* and int == 4words as a single case. */ \ |
||
652 | r = X##_f[3]; \ |
||
653 | r <<= _FP_W_TYPE_SIZE; \ |
||
654 | r += X##_f[2]; \ |
||
655 | r <<= _FP_W_TYPE_SIZE; \ |
||
656 | r += X##_f[1]; \ |
||
657 | r <<= _FP_W_TYPE_SIZE; \ |
||
658 | r += X##_f[0]; \ |
||
659 | } \ |
||
660 | } while (0) |
||
661 | |||
662 | /* "No disassemble Number Five!" */ |
||
663 | /* move an integer of size rsize into X's fractional part. We rely on |
||
664 | * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid |
||
665 | * having to mask the values we store into it. |
||
666 | */ |
||
667 | #define _FP_FRAC_DISASSEMBLE_4(X, r, rsize) \ |
||
668 | do { \ |
||
669 | X##_f[0] = r; \ |
||
670 | X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE); \ |
||
671 | X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \ |
||
672 | X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \ |
||
673 | } while (0); |
||
674 | |||
675 | #define _FP_FRAC_COPY_4_1(D, S) \ |
||
676 | do { \ |
||
677 | D##_f[0] = S##_f; \ |
||
678 | D##_f[1] = D##_f[2] = D##_f[3] = 0; \ |
||
679 | } while (0) |
||
680 | |||
681 | #define _FP_FRAC_COPY_4_2(D, S) \ |
||
682 | do { \ |
||
683 | D##_f[0] = S##_f0; \ |
||
684 | D##_f[1] = S##_f1; \ |
||
685 | D##_f[2] = D##_f[3] = 0; \ |
||
686 | } while (0) |
||
687 | |||
688 | #define _FP_FRAC_COPY_4_4(D,S) _FP_FRAC_COPY_4(D,S)=>=>=>=><=>=><=>=><=>=><=>=>=>>>>>>>>>>>>><>=>>>><>=>>><>=>>><>>>><>>><>=>>>><>>=>><>><> |