Rev 554 | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
554 | serge | 1 | ;***************************************************************************** |
2 | ;* |
||
3 | ;* Open Watcom Project |
||
4 | ;* |
||
5 | ;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved. |
||
6 | ;* |
||
7 | ;* ======================================================================== |
||
8 | ;* |
||
9 | ;* This file contains Original Code and/or Modifications of Original |
||
10 | ;* Code as defined in and that are subject to the Sybase Open Watcom |
||
11 | ;* Public License version 1.0 (the 'License'). You may not use this file |
||
12 | ;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO |
||
13 | ;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is |
||
14 | ;* provided with the Original Code and Modifications, and is also |
||
15 | ;* available at www.sybase.com/developer/opensource. |
||
16 | ;* |
||
17 | ;* The Original Code and all software distributed under the License are |
||
18 | ;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
||
19 | ;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM |
||
20 | ;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF |
||
21 | ;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR |
||
22 | ;* NON-INFRINGEMENT. Please see the License for the specific language |
||
23 | ;* governing rights and limitations under the License. |
||
24 | ;* |
||
25 | ;* ======================================================================== |
||
26 | ;* |
||
27 | ;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE |
||
28 | ;* DESCRIBE IT HERE! |
||
29 | ;* |
||
30 | ;***************************************************************************** |
||
31 | |||
32 | |||
33 | ; static char sccs_id[] = "@(#)fprem32.asm 1.5 12/22/94 12:48:07"; |
||
34 | ; |
||
35 | ; This code is being published by Intel to users of the Pentium(tm) |
||
36 | ; processor. Recipients are authorized to copy, modify, compile, use and |
||
37 | ; distribute the code. |
||
38 | ; |
||
39 | ; Intel makes no warranty of any kind with regard to this code, including |
||
40 | ; but not limited to, implied warranties or merchantability and fitness for |
||
41 | ; a particular purpose. Intel assumes no responsibility for any errors that |
||
42 | ; may appear in this code. |
||
43 | ; |
||
44 | ; No patent licenses are granted, express or implied. |
||
45 | ; |
||
46 | ; |
||
47 | include mdef.inc |
||
48 | |||
49 | .386 |
||
50 | .387 |
||
51 | |||
52 | ; |
||
53 | ; PRELIMINARY VERSION of the software patch for the floating |
||
54 | ; point remainder. |
||
55 | ; |
||
56 | |||
57 | |||
58 | CHECKSW MACRO |
||
59 | ifdef DEBUG |
||
60 | fnstsw [fpsw] |
||
61 | fnstcw [fpcw] |
||
62 | endif |
||
63 | ENDM |
||
64 | |||
65 | |||
704 | serge | 66 | _DATA SEGMENT DWORD USE32 PUBLIC 'DATA' |
554 | serge | 67 | |
68 | ; |
||
69 | ; Stack variables for remainder routines. |
||
70 | ; |
||
71 | |||
72 | FLT_SIZE EQU 12 |
||
73 | DENOM EQU 0 |
||
74 | DENOM_SAVE EQU DENOM + FLT_SIZE |
||
75 | NUMER EQU DENOM_SAVE + FLT_SIZE |
||
76 | PREV_CW EQU NUMER + FLT_SIZE |
||
77 | PATCH_CW EQU PREV_CW + 4 |
||
78 | FPREM_SW EQU PATCH_CW + 4 |
||
79 | STACK_SIZE EQU FPREM_SW + 4 |
||
80 | RET_SIZE EQU 4 |
||
81 | PUSH_SIZE EQU 4 |
||
82 | |||
83 | MAIN_FUDGE EQU RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE |
||
84 | |||
85 | MAIN_DENOM EQU DENOM + MAIN_FUDGE |
||
86 | MAIN_DENOM_SAVE EQU DENOM_SAVE + MAIN_FUDGE |
||
87 | MAIN_NUMER EQU NUMER + MAIN_FUDGE |
||
88 | MAIN_PREV_CW EQU PREV_CW + MAIN_FUDGE |
||
89 | MAIN_PATCH_CW EQU PATCH_CW + MAIN_FUDGE |
||
90 | MAIN_FPREM_SW EQU FPREM_SW + MAIN_FUDGE |
||
91 | |||
92 | ONESMASK EQU 700h |
||
93 | |||
94 | fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0 |
||
95 | fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh |
||
96 | one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h |
||
97 | one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh |
||
98 | one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh |
||
99 | half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh |
||
100 | big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh |
||
101 | |||
102 | ifdef DEBUG |
||
103 | public fpcw |
||
104 | public fpsw |
||
105 | fpcw dw 0 |
||
106 | fpsw dw 0 |
||
107 | endif |
||
108 | |||
109 | FPU_STATE STRUC |
||
110 | CONTROL_WORD DW ? |
||
111 | reserved_1 DW ? |
||
112 | STATUS_WORD DD ? |
||
113 | TAG_WORD DW ? |
||
114 | reserved_3 DW ? |
||
115 | IP_OFFSET DD ? |
||
116 | CS_SLCT DW ? |
||
117 | OPCODE DW ? |
||
118 | DATA_OFFSET DD ? |
||
119 | OPERAND_SLCT DW ? |
||
120 | reserved_4 DW ? |
||
121 | FPU_STATE ENDS |
||
122 | |||
123 | ENV_SIZE EQU 28 |
||
124 | |||
125 | |||
704 | serge | 126 | _DATA ENDS |
554 | serge | 127 | |
128 | _TEXT SEGMENT DWORD USE32 PUBLIC 'CODE' |
||
129 | _TEXT ENDS |
||
130 | |||
131 | |||
704 | serge | 132 | DGROUP GROUP _DATA |
554 | serge | 133 | |
134 | |||
135 | |||
704 | serge | 136 | _TEXT SEGMENT DWORD USE32 PUBLIC 'CODE' |
554 | serge | 137 | |
138 | assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing |
||
139 | |||
140 | |||
141 | fprem_common PROC NEAR |
||
142 | |||
143 | push eax |
||
144 | push ebx |
||
145 | push ecx |
||
146 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
147 | xor eax, ONESMASK ; invert bits that have to be one |
||
148 | test eax, ONESMASK ; check bits that have to be one |
||
149 | jnz remainder_hardware_ok |
||
150 | shr eax, 11 |
||
151 | and eax, 0fh |
||
152 | cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d) |
||
153 | jz remainder_hardware_ok |
||
154 | |||
155 | ; The denominator has the bit pattern. Weed out the funny cases like NaNs |
||
156 | ; before applying the software version. Our caller guarantees that the |
||
157 | ; denominator is not a denormal. Here we check for: |
||
158 | ; denominator inf, NaN, unnormal |
||
159 | ; numerator inf, NaN, unnormal, denormal |
||
160 | |||
161 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
162 | and eax, 07fff0000h ; mask the exponent only |
||
163 | cmp eax, 07fff0000h ; check for INF or NaN |
||
164 | je remainder_hardware_ok |
||
165 | mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa |
||
166 | and eax, 07fff0000h ; mask the exponent only |
||
167 | jz remainder_hardware_ok ; jif numerator denormal |
||
168 | cmp eax, 07fff0000h ; check for INF or NaN |
||
169 | je remainder_hardware_ok |
||
170 | mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator |
||
171 | add eax, eax ; set carry if explicit bit set |
||
172 | jnz remainder_hardware_ok ; jmp if numerator is unnormal |
||
173 | mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator |
||
174 | add eax, eax ; set carry if explicit bit set |
||
175 | jnz remainder_hardware_ok ; jmp if denominator is unnormal |
||
176 | |||
177 | rem_patch: |
||
178 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
179 | and eax, 07fffh ; clear sy |
||
180 | add eax, 63 ; evaluate ey + 63 |
||
181 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
182 | and ebx, 07fffh ; clear sx |
||
183 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
184 | ja rem_large ; if ex > ey + 63, case of large arguments |
||
185 | rem_patch_loop: |
||
186 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
187 | and eax, 07fffh ; clear sy |
||
188 | add eax, 10 ; evaluate ey + 10 |
||
189 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
190 | and ebx, 07fffh ; clear sx |
||
191 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
192 | js remainder_hardware_ok ; safe if ey + 10 > ex |
||
193 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
194 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
195 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
196 | and ebx, 07fffh ; clear sx |
||
197 | mov ecx, ebx |
||
198 | sub ebx, eax |
||
199 | and ebx, 07h |
||
200 | or ebx, 04h |
||
201 | sub ecx, ebx |
||
202 | mov ebx, eax |
||
203 | and ebx, 08000h ; keep sy |
||
204 | or ecx, ebx ; merge the sign of y |
||
205 | mov dword ptr [MAIN_DENOM+8+esp], ecx |
||
206 | fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator |
||
207 | mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator |
||
208 | fxch |
||
209 | fprem ; this rem is safe |
||
210 | fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator |
||
211 | fstp st(0) ; pop the stack |
||
212 | jmp rem_patch_loop |
||
213 | rem_large: |
||
214 | test edx, 02h ; is denominator already saved |
||
215 | jnz already_saved |
||
216 | fld tbyte ptr[esp + MAIN_DENOM] |
||
217 | fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator |
||
218 | already_saved: |
||
219 | ; Save user's precision control and institute 80. The fp ops in |
||
220 | ; rem_large_loop must not round to user's precision (if it is less |
||
221 | ; than 80) because the hardware would not have done so. We are |
||
222 | ; aping the hardware here, which is all extended. |
||
223 | |||
224 | fnstcw [esp+MAIN_PREV_CW] ; save caller's control word |
||
225 | mov eax, dword ptr[esp + MAIN_PREV_CW] |
||
226 | or eax, 033fh ; mask exceptions, pc=80 |
||
227 | mov [esp + MAIN_PATCH_CW], eax |
||
228 | fldcw [esp + MAIN_PATCH_CW] |
||
229 | |||
230 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
231 | and eax, 07fffh ; clear sy |
||
232 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
233 | and ebx, 07fffh ; clear sx |
||
234 | sub ebx, eax ; evaluate the exponent difference |
||
235 | and ebx, 03fh |
||
236 | or ebx, 020h |
||
237 | add ebx, 1 |
||
238 | mov ecx, ebx |
||
239 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
240 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
241 | and ebx, 07fffh ; clear sx |
||
242 | and eax, 08000h ; keep sy |
||
243 | or ebx, eax ; merge the sign of y |
||
244 | mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator) |
||
245 | fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator |
||
246 | fabs |
||
247 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
248 | fabs |
||
249 | rem_large_loop: |
||
250 | fcom |
||
251 | fstsw ax |
||
252 | and eax, 00100h |
||
253 | jnz rem_no_sub |
||
254 | fsub st, st(1) |
||
255 | rem_no_sub: |
||
256 | fxch |
||
257 | fmul qword ptr half |
||
258 | fxch |
||
259 | sub ecx, 1 ; decrement the loop counter |
||
260 | jnz rem_large_loop |
||
261 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
262 | fstp tbyte ptr[esp + MAIN_NUMER] ; save result |
||
263 | fstp st ; toss modified denom |
||
264 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
265 | fld tbyte ptr[big_number] ; force C2 to be set |
||
266 | fprem |
||
267 | fstp st |
||
268 | fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result |
||
269 | |||
270 | fldcw [esp + MAIN_PREV_CW] ; restore caller's control word |
||
271 | and ebx, 08000h ; keep sx |
||
272 | jz rem_done |
||
273 | fchs |
||
274 | jmp rem_done |
||
275 | remainder_hardware_ok: |
||
276 | fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator |
||
277 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
278 | fprem ; and finally do a remainder |
||
279 | ; prem_main_routine end |
||
280 | rem_done: |
||
281 | test edx, 03h |
||
282 | jz rem_exit |
||
283 | fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2 |
||
284 | test edx, 01h |
||
285 | jz do_not_de_scale |
||
286 | ; De-scale the result. Go to pc=80 to prevent from fmul |
||
287 | ; from user precision (fprem does not round the result). |
||
288 | fnstcw [esp + MAIN_PREV_CW] ; save callers control word |
||
289 | mov eax, [esp + MAIN_PREV_CW] |
||
290 | or eax, 0300h ; pc = 80 |
||
291 | mov [esp + MAIN_PATCH_CW], eax |
||
292 | fldcw [esp + MAIN_PATCH_CW] |
||
293 | fmul qword ptr one_shr_64 |
||
294 | fldcw [esp + MAIN_PREV_CW] ; restore callers CW |
||
295 | do_not_de_scale: |
||
296 | mov eax, [esp + MAIN_FPREM_SW] |
||
297 | fxch |
||
298 | fstp st |
||
299 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
300 | fxch |
||
301 | and eax, 04300h ; restore saved Q0, Q1, Q2 |
||
302 | sub esp, ENV_SIZE |
||
303 | fnstenv [esp] |
||
304 | and [esp].STATUS_WORD, 0bcffh |
||
305 | or [esp].STATUS_WORD, eax |
||
306 | fldenv [esp] |
||
307 | add esp, ENV_SIZE |
||
308 | rem_exit: |
||
309 | pop ecx |
||
310 | pop ebx |
||
311 | pop eax |
||
312 | CHECKSW ; debug only: save status |
||
313 | ret |
||
314 | fprem_common ENDP |
||
315 | |||
316 | comment ~**************************************************************** |
||
317 | |||
318 | ; |
||
319 | ; float frem_chk (float numer, float denom) |
||
320 | ; |
||
321 | public frem_chk |
||
322 | frem_chk PROC NEAR |
||
323 | push edx |
||
324 | sub esp, STACK_SIZE |
||
325 | fld dword ptr [STACK_SIZE+8+esp] |
||
326 | fstp tbyte ptr [NUMER+esp] |
||
327 | fld dword ptr [STACK_SIZE+12+esp] |
||
328 | fstp tbyte ptr [DENOM+esp] |
||
329 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
330 | call fprem_common |
||
331 | fxch |
||
332 | fstp st |
||
333 | add esp, STACK_SIZE |
||
334 | pop edx |
||
335 | ret |
||
336 | frem_chk ENDP |
||
337 | ; end frem_chk |
||
338 | |||
339 | ; |
||
340 | ; double drem_chk (double numer, double denom) |
||
341 | ; |
||
342 | public drem_chk |
||
343 | drem_chk PROC NEAR |
||
344 | push edx |
||
345 | sub esp, STACK_SIZE |
||
346 | fld qword ptr [STACK_SIZE+8+esp] |
||
347 | fstp tbyte ptr [NUMER+esp] |
||
348 | fld qword ptr [STACK_SIZE+16+esp] |
||
349 | fstp tbyte ptr [DENOM+esp] |
||
350 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
351 | call fprem_common |
||
352 | fxch |
||
353 | fstp st |
||
354 | add esp, STACK_SIZE |
||
355 | pop edx |
||
356 | ret |
||
357 | |||
358 | drem_chk ENDP |
||
359 | ; end drem_chk |
||
360 | |||
361 | ; |
||
362 | ; long double lrem_chk(long double number,long double denom) |
||
363 | ; |
||
364 | public lrem_chk |
||
365 | lrem_chk PROC NEAR |
||
366 | fld tbyte ptr [20+esp] |
||
367 | fld tbyte ptr [4+esp] |
||
368 | call fprem_chk |
||
369 | fxch |
||
370 | fstp st |
||
371 | ret |
||
372 | lrem_chk ENDP |
||
373 | |||
374 | **********************************************************************~ |
||
375 | |||
376 | ; |
||
377 | ; FPREM: ST = remainder(ST, ST(1)) |
||
378 | ; |
||
379 | ; Compiler version of the FPREM must preserve the arguments in the floating |
||
380 | ; point stack. |
||
381 | |||
382 | public __fprem_chk |
||
383 | defpe __fprem_chk |
||
384 | push edx |
||
385 | sub esp, STACK_SIZE |
||
386 | fstp tbyte ptr [NUMER+esp] |
||
387 | fstp tbyte ptr [DENOM+esp] |
||
388 | xor edx, edx |
||
389 | ; prem_main_routine begin |
||
390 | mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
391 | test eax,07fff0000h ; check for denormal |
||
392 | jz denormal |
||
393 | call fprem_common |
||
394 | add esp, STACK_SIZE |
||
395 | pop edx |
||
396 | ret |
||
397 | |||
398 | denormal: |
||
399 | fld tbyte ptr [DENOM+esp] ; load the denominator |
||
400 | fld tbyte ptr [NUMER+esp] ; load the numerator |
||
401 | mov eax, [DENOM+esp] ; test for whole mantissa == 0 |
||
402 | or eax, [DENOM+4+esp] ; test for whole mantissa == 0 |
||
403 | jz remainder_hardware_ok_l ; denominator is zero |
||
404 | fxch |
||
405 | fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator |
||
406 | fld tbyte ptr[esp + DENOM] |
||
407 | fxch |
||
408 | or edx, 02h |
||
409 | ; |
||
410 | ; For this we need pc=80. Also, mask exceptions so we don't take any |
||
411 | ; denormal operand exceptions. It is guaranteed that the descaling |
||
412 | ; later on will take underflow, which is what the hardware would have done |
||
413 | ; on a normal fprem. |
||
414 | ; |
||
415 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
416 | mov eax, [PREV_CW+esp] |
||
417 | or eax, 0033fh ; mask exceptions, pc=80 |
||
418 | mov [PATCH_CW+esp], eax |
||
419 | fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
||
420 | |||
421 | ; The denominator is a denormal. For most numerators, scale both numerator |
||
422 | ; and denominator to get rid of denormals. Then execute the common code |
||
423 | ; with the flag set to indicate that the result must be de-scaled. |
||
424 | ; For large numerators this won't work because the scaling would cause |
||
425 | ; overflow. In this case we know the numerator is large, the denominator |
||
426 | ; is small (denormal), so the exponent difference is also large. This means |
||
427 | ; the rem_large code will be used and this code depends on the difference |
||
428 | ; in exponents modulo 64. Adding 64 to the denominators exponent |
||
429 | ; doesn't change the modulo 64 difference. So we can scale the denominator |
||
430 | ; by 64, making it not denormal, and this won't effect the result. |
||
431 | ; |
||
432 | ; To start with, figure out if numerator is large |
||
433 | |||
434 | mov eax, [esp + NUMER + 8] ; load numerator exponent |
||
435 | and eax, 7fffh ; isolate numerator exponent |
||
436 | cmp eax, 7fbeh ; compare Nexp to Maxexp-64 |
||
437 | ja big_numer_rem_de ; jif big numerator |
||
438 | |||
439 | ; So the numerator is not large scale both numerator and denominator |
||
440 | |||
441 | or edx, 1 ; edx = 1, if denormal extended divisor |
||
442 | fmul qword ptr one_shl_64 ; make numerator not denormal |
||
443 | fstp tbyte ptr[esp + NUMER] |
||
444 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
445 | fstp tbyte ptr[esp + DENOM] |
||
446 | jmp scaling_done |
||
447 | |||
448 | ; The numerator is large. Scale only the denominator, which will not |
||
449 | ; change the result which we know will be partial. Set the scale flag |
||
450 | ; to false. |
||
451 | big_numer_rem_de: |
||
452 | ; We must do this with pc=80 to avoid rounding to single/double. |
||
453 | ; In this case we do not mask exceptions so that we will take |
||
454 | ; denormal operand, as would the hardware. |
||
455 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
456 | mov eax, [PREV_CW+esp] |
||
457 | or eax, 00300h ; pc=80 |
||
458 | mov [PATCH_CW+esp], eax |
||
459 | fldcw [PATCH_CW+esp] ; pc=80 |
||
460 | |||
461 | fstp st ; Toss numerator |
||
462 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
463 | fstp tbyte ptr[esp + DENOM] |
||
464 | |||
465 | ; Restore the control word which was fiddled to scale at 80-bit precision. |
||
466 | ; Then call the common code. |
||
467 | scaling_done: |
||
468 | fldcw [esp + PREV_CW] ; restore callers control word |
||
469 | call fprem_common |
||
470 | add esp, STACK_SIZE |
||
471 | pop edx |
||
472 | ret |
||
473 | |||
474 | remainder_hardware_ok_l: |
||
475 | fprem ; and finally do a remainder |
||
476 | |||
477 | CHECKSW |
||
478 | |||
479 | add esp, STACK_SIZE |
||
480 | pop edx |
||
481 | ret |
||
482 | __fprem_chk ENDP |
||
483 | ; end fprem_chk |
||
484 | |||
485 | |||
486 | ; |
||
487 | ; FPREM1 code begins here |
||
488 | ; |
||
489 | |||
490 | |||
491 | fprem1_common PROC NEAR |
||
492 | |||
493 | push eax |
||
494 | push ebx |
||
495 | push ecx |
||
496 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
497 | xor eax, ONESMASK ; invert bits that have to be one |
||
498 | test eax, ONESMASK ; check bits that have to be one |
||
499 | jnz remainder1_hardware_ok |
||
500 | shr eax, 11 |
||
501 | and eax, 0fh |
||
502 | cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d) |
||
503 | jz remainder1_hardware_ok |
||
504 | |||
505 | ; The denominator has the bit pattern. Weed out the funny cases like NaNs |
||
506 | ; before applying the software version. Our caller guarantees that the |
||
507 | ; denominator is not a denormal. Here we check for: |
||
508 | ; denominator inf, NaN, unnormal |
||
509 | ; numerator inf, NaN, unnormal, denormal |
||
510 | |||
511 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
512 | and eax, 07fff0000h ; mask the exponent only |
||
513 | cmp eax, 07fff0000h ; check for INF or NaN |
||
514 | je remainder1_hardware_ok |
||
515 | mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa |
||
516 | and eax, 07fff0000h ; mask the exponent only |
||
517 | jz remainder1_hardware_ok ; jif numerator denormal |
||
518 | cmp eax, 07fff0000h ; check for INF or NaN |
||
519 | je remainder1_hardware_ok |
||
520 | mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator |
||
521 | add eax, eax ; set carry if explicit bit set |
||
522 | jnz remainder1_hardware_ok ; jmp if numerator is unnormal |
||
523 | mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator |
||
524 | add eax, eax ; set carry if explicit bit set |
||
525 | jnz remainder1_hardware_ok ; jmp if denominator is unnormal |
||
526 | |||
527 | rem1_patch: |
||
528 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
529 | and eax, 07fffh ; clear sy |
||
530 | add eax, 63 ; evaluate ey + 63 |
||
531 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
532 | and ebx, 07fffh ; clear sx |
||
533 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
534 | ja rem1_large ; if ex > ey + 63, case of large arguments |
||
535 | rem1_patch_loop: |
||
536 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
537 | and eax, 07fffh ; clear sy |
||
538 | add eax, 10 ; evaluate ey + 10 |
||
539 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
540 | and ebx, 07fffh ; clear sx |
||
541 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
542 | js remainder1_hardware_ok ; safe if ey + 10 > ex |
||
543 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
544 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
545 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
546 | and ebx, 07fffh ; clear sx |
||
547 | mov ecx, ebx |
||
548 | sub ebx, eax |
||
549 | and ebx, 07h |
||
550 | or ebx, 04h |
||
551 | sub ecx, ebx |
||
552 | mov ebx, eax |
||
553 | and ebx, 08000h ; keep sy |
||
554 | or ecx, ebx ; merge the sign of y |
||
555 | mov dword ptr [MAIN_DENOM+8+esp], ecx |
||
556 | fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator |
||
557 | mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator |
||
558 | fxch |
||
559 | fprem ; this rem is safe |
||
560 | fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator |
||
561 | fstp st(0) ; pop the stack |
||
562 | jmp rem1_patch_loop |
||
563 | rem1_large: |
||
564 | test ebx, 02h ; is denominator already saved |
||
565 | jnz already_saved1 |
||
566 | fld tbyte ptr[esp + MAIN_DENOM] |
||
567 | fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator |
||
568 | already_saved1: |
||
569 | ; Save user's precision control and institute 80. The fp ops in |
||
570 | ; rem1_large_loop must not round to user's precision (if it is less |
||
571 | ; than 80) because the hardware would not have done so. We are |
||
572 | ; aping the hardware here, which is all extended. |
||
573 | |||
574 | fnstcw [esp+MAIN_PREV_CW] ; save caller's control word |
||
575 | mov eax, dword ptr[esp + MAIN_PREV_CW] |
||
576 | or eax, 033fh ; mask exceptions, pc=80 |
||
577 | mov [esp + MAIN_PATCH_CW], eax |
||
578 | fldcw [esp + MAIN_PATCH_CW] |
||
579 | |||
580 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
581 | and eax, 07fffh ; clear sy |
||
582 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
583 | and ebx, 07fffh ; clear sx |
||
584 | sub ebx, eax ; evaluate the exponent difference |
||
585 | and ebx, 03fh |
||
586 | or ebx, 020h |
||
587 | add ebx, 1 |
||
588 | mov ecx, ebx |
||
589 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
590 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
591 | and ebx, 07fffh ; clear sx |
||
592 | and eax, 08000h ; keep sy |
||
593 | or ebx, eax ; merge the sign of y |
||
594 | mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator) |
||
595 | fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator |
||
596 | fabs |
||
597 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
598 | fabs |
||
599 | rem1_large_loop: |
||
600 | fcom |
||
601 | fstsw ax |
||
602 | and eax, 00100h |
||
603 | jnz rem1_no_sub |
||
604 | fsub st, st(1) |
||
605 | rem1_no_sub: |
||
606 | fxch |
||
607 | fmul qword ptr half |
||
608 | fxch |
||
609 | sub ecx, 1 ; decrement the loop counter |
||
610 | jnz rem1_large_loop |
||
611 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
612 | fstp tbyte ptr[esp + MAIN_NUMER] ; save result |
||
613 | fstp st ; toss modified denom |
||
614 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
615 | fld tbyte ptr[big_number] ; force C2 to be set |
||
616 | fprem1 |
||
617 | fstp st |
||
618 | fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result |
||
619 | |||
620 | fldcw [esp + MAIN_PREV_CW] ; restore caller's control word |
||
621 | and ebx, 08000h ; keep sx |
||
622 | jz rem1_done |
||
623 | fchs |
||
624 | jmp rem1_done |
||
625 | remainder1_hardware_ok: |
||
626 | fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator |
||
627 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
628 | fprem1 ; and finally do a remainder |
||
629 | ; prem1_main_routine end |
||
630 | rem1_done: |
||
631 | test edx, 03h |
||
632 | jz rem1_exit |
||
633 | fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2 |
||
634 | test edx, 01h |
||
635 | jz do_not_de_scale1 |
||
636 | ; De-scale the result. Go to pc=80 to prevent from fmul |
||
637 | ; from user precision (fprem does not round the result). |
||
638 | fnstcw [esp + MAIN_PREV_CW] ; save callers control word |
||
639 | mov eax, [esp + MAIN_PREV_CW] |
||
640 | or eax, 0300h ; pc = 80 |
||
641 | mov [esp + MAIN_PATCH_CW], eax |
||
642 | fldcw [esp + MAIN_PATCH_CW] |
||
643 | fmul qword ptr one_shr_64 |
||
644 | fldcw [esp + MAIN_PREV_CW] ; restore callers CW |
||
645 | do_not_de_scale1: |
||
646 | mov eax, [esp + MAIN_FPREM_SW] |
||
647 | fxch |
||
648 | fstp st |
||
649 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
650 | fxch |
||
651 | and eax, 04300h ; restore saved Q0, Q1, Q2 |
||
652 | sub esp, ENV_SIZE |
||
653 | fnstenv [esp] |
||
654 | and [esp].STATUS_WORD, 0bcffh |
||
655 | or [esp].STATUS_WORD, eax |
||
656 | fldenv [esp] |
||
657 | add esp, ENV_SIZE |
||
658 | rem1_exit: |
||
659 | pop ecx |
||
660 | pop ebx |
||
661 | pop eax |
||
662 | CHECKSW ; debug only: save status |
||
663 | ret |
||
664 | fprem1_common ENDP |
||
665 | |||
666 | |||
667 | comment ~*************************************************************** |
||
668 | ; |
||
669 | ; float frem1_chk (float numer, float denom) |
||
670 | ; |
||
671 | public frem1_chk |
||
672 | frem1_chk PROC NEAR |
||
673 | push edx |
||
674 | sub esp, STACK_SIZE |
||
675 | fld dword ptr [STACK_SIZE+8+esp] |
||
676 | fstp tbyte ptr [NUMER+esp] |
||
677 | fld dword ptr [STACK_SIZE+12+esp] |
||
678 | fstp tbyte ptr [DENOM+esp] |
||
679 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
680 | call fprem1_common |
||
681 | fxch |
||
682 | fstp st |
||
683 | add esp, STACK_SIZE |
||
684 | pop edx |
||
685 | ret |
||
686 | frem1_chk ENDP |
||
687 | ; end frem1_chk |
||
688 | |||
689 | ; |
||
690 | ; double drem1_chk (double numer, double denom) |
||
691 | ; |
||
692 | public drem1_chk |
||
693 | drem1_chk PROC NEAR |
||
694 | push edx |
||
695 | sub esp, STACK_SIZE |
||
696 | fld qword ptr [STACK_SIZE+8+esp] |
||
697 | fstp tbyte ptr [NUMER+esp] |
||
698 | fld qword ptr [STACK_SIZE+16+esp] |
||
699 | fstp tbyte ptr [DENOM+esp] |
||
700 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
701 | call fprem1_common |
||
702 | fxch |
||
703 | fstp st |
||
704 | add esp, STACK_SIZE |
||
705 | pop edx |
||
706 | ret |
||
707 | |||
708 | drem1_chk ENDP |
||
709 | ; end drem1_chk |
||
710 | |||
711 | ; |
||
712 | ; long double lrem1_chk(long double number,long double denom) |
||
713 | ; |
||
714 | public lrem1_chk |
||
715 | lrem1_chk PROC NEAR |
||
716 | fld tbyte ptr [20+esp] |
||
717 | fld tbyte ptr [4+esp] |
||
718 | call fprem1_chk |
||
719 | fxch |
||
720 | fstp st |
||
721 | ret |
||
722 | lrem1_chk ENDP |
||
723 | ********************************************************************~ |
||
724 | |||
725 | ; |
||
726 | ; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding |
||
727 | ; |
||
728 | ; Compiler version of the FPREM must preserve the arguments in the floating |
||
729 | ; point stack. |
||
730 | |||
731 | public __fprem1_chk |
||
732 | defpe __fprem1_chk |
||
733 | push edx |
||
734 | sub esp, STACK_SIZE |
||
735 | fstp tbyte ptr [NUMER+esp] |
||
736 | fstp tbyte ptr [DENOM+esp] |
||
737 | mov edx, 0 |
||
738 | ; prem1_main_routine begin |
||
739 | mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
740 | test eax,07fff0000h ; check for denormal |
||
741 | jz denormal1 |
||
742 | call fprem1_common |
||
743 | add esp, STACK_SIZE |
||
744 | pop edx |
||
745 | ret |
||
746 | |||
747 | denormal1: |
||
748 | fld tbyte ptr [DENOM+esp] ; load the denominator |
||
749 | fld tbyte ptr [NUMER+esp] ; load the numerator |
||
750 | mov eax, [DENOM+esp] ; test for whole mantissa == 0 |
||
751 | or eax, [DENOM+4+esp] ; test for whole mantissa == 0 |
||
752 | jz remainder1_hardware_ok_l ; denominator is zero |
||
753 | fxch |
||
754 | fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator |
||
755 | fld tbyte ptr[esp + DENOM] |
||
756 | fxch |
||
757 | or edx, 02h |
||
758 | ; |
||
759 | ; For this we need pc=80. Also, mask exceptions so we don't take any |
||
760 | ; denormal operand exceptions. It is guaranteed that the descaling |
||
761 | ; later on will take underflow, which is what the hardware would have done |
||
762 | ; on a normal fprem. |
||
763 | ; |
||
764 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
765 | mov eax, [PREV_CW+esp] |
||
766 | or eax, 0033fh ; mask exceptions, pc=80 |
||
767 | mov [PATCH_CW+esp], eax |
||
768 | fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
||
769 | |||
770 | ; The denominator is a denormal. For most numerators, scale both numerator |
||
771 | ; and denominator to get rid of denormals. Then execute the common code |
||
772 | ; with the flag set to indicate that the result must be de-scaled. |
||
773 | ; For large numerators this won't work because the scaling would cause |
||
774 | ; overflow. In this case we know the numerator is large, the denominator |
||
775 | ; is small (denormal), so the exponent difference is also large. This means |
||
776 | ; the rem1_large code will be used and this code depends on the difference |
||
777 | ; in exponents modulo 64. Adding 64 to the denominators exponent |
||
778 | ; doesn't change the modulo 64 difference. So we can scale the denominator |
||
779 | ; by 64, making it not denormal, and this won't effect the result. |
||
780 | ; |
||
781 | ; To start with, figure out if numerator is large |
||
782 | |||
783 | mov eax, [esp + NUMER + 8] ; load numerator exponent |
||
784 | and eax, 7fffh ; isolate numerator exponent |
||
785 | cmp eax, 7fbeh ; compare Nexp to Maxexp-64 |
||
786 | ja big_numer_rem1_de ; jif big numerator |
||
787 | |||
788 | ; So the numerator is not large scale both numerator and denominator |
||
789 | |||
790 | or edx, 1 ; edx = 1, if denormal extended divisor |
||
791 | fmul qword ptr one_shl_64 ; make numerator not denormal |
||
792 | fstp tbyte ptr[esp + NUMER] |
||
793 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
794 | fstp tbyte ptr[esp + DENOM] |
||
795 | jmp scaling_done1 |
||
796 | |||
797 | ; The numerator is large. Scale only the denominator, which will not |
||
798 | ; change the result which we know will be partial. Set the scale flag |
||
799 | ; to false. |
||
800 | big_numer_rem1_de: |
||
801 | ; We must do this with pc=80 to avoid rounding to single/double. |
||
802 | ; In this case we do not mask exceptions so that we will take |
||
803 | ; denormal operand, as would the hardware. |
||
804 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
805 | mov eax, [PREV_CW+esp] |
||
806 | or eax, 00300h ; pc=80 |
||
807 | mov [PATCH_CW+esp], eax |
||
808 | fldcw [PATCH_CW+esp] ; pc=80 |
||
809 | |||
810 | fstp st ; Toss numerator |
||
811 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
812 | fstp tbyte ptr[esp + DENOM] |
||
813 | |||
814 | ; Restore the control word which was fiddled to scale at 80-bit precision. |
||
815 | ; Then call the common code. |
||
816 | scaling_done1: |
||
817 | fldcw [esp + PREV_CW] ; restore callers control word |
||
818 | call fprem1_common |
||
819 | add esp, STACK_SIZE |
||
820 | pop edx |
||
821 | ret |
||
822 | |||
823 | remainder1_hardware_ok_l: |
||
824 | fprem ; and finally do a remainder |
||
825 | |||
826 | CHECKSW |
||
827 | |||
828 | add esp, STACK_SIZE |
||
829 | pop edx |
||
830 | ret |
||
831 | __fprem1_chk ENDP |
||
832 | ; end fprem1_chk |
||
833 | |||
834 | ifdef DEBUG |
||
835 | public fpinit |
||
836 | fpinit PROC NEAR |
||
837 | fninit |
||
838 | ret |
||
839 | fpinit ENDP |
||
840 | endif |
||
841 | |||
704 | serge | 842 | _TEXT ENDS |
554 | serge | 843 | END |