Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
359 | serge | 1 | ;***************************************************************************** |
2 | ;* |
||
3 | ;* Open Watcom Project |
||
4 | ;* |
||
5 | ;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved. |
||
6 | ;* |
||
7 | ;* ======================================================================== |
||
8 | ;* |
||
9 | ;* This file contains Original Code and/or Modifications of Original |
||
10 | ;* Code as defined in and that are subject to the Sybase Open Watcom |
||
11 | ;* Public License version 1.0 (the 'License'). You may not use this file |
||
12 | ;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO |
||
13 | ;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is |
||
14 | ;* provided with the Original Code and Modifications, and is also |
||
15 | ;* available at www.sybase.com/developer/opensource. |
||
16 | ;* |
||
17 | ;* The Original Code and all software distributed under the License are |
||
18 | ;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER |
||
19 | ;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM |
||
20 | ;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF |
||
21 | ;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR |
||
22 | ;* NON-INFRINGEMENT. Please see the License for the specific language |
||
23 | ;* governing rights and limitations under the License. |
||
24 | ;* |
||
25 | ;* ======================================================================== |
||
26 | ;* |
||
27 | ;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE |
||
28 | ;* DESCRIBE IT HERE! |
||
29 | ;* |
||
30 | ;***************************************************************************** |
||
31 | |||
32 | |||
33 | ; static char sccs_id[] = "@(#)fprem32.asm 1.5 12/22/94 12:48:07"; |
||
34 | ; |
||
35 | ; This code is being published by Intel to users of the Pentium(tm) |
||
36 | ; processor. Recipients are authorized to copy, modify, compile, use and |
||
37 | ; distribute the code. |
||
38 | ; |
||
39 | ; Intel makes no warranty of any kind with regard to this code, including |
||
40 | ; but not limited to, implied warranties or merchantability and fitness for |
||
41 | ; a particular purpose. Intel assumes no responsibility for any errors that |
||
42 | ; may appear in this code. |
||
43 | ; |
||
44 | ; No patent licenses are granted, express or implied. |
||
45 | ; |
||
46 | ; |
||
47 | include mdef.inc |
||
48 | |||
49 | .386 |
||
50 | .387 |
||
51 | |||
52 | ; |
||
53 | ; PRELIMINARY VERSION of the software patch for the floating |
||
54 | ; point remainder. |
||
55 | ; |
||
56 | |||
57 | |||
58 | CHECKSW MACRO |
||
59 | ifdef DEBUG |
||
60 | fnstsw [fpsw] |
||
61 | fnstcw [fpcw] |
||
62 | endif |
||
63 | ENDM |
||
64 | |||
65 | |||
66 | DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA' |
||
67 | |||
68 | ; |
||
69 | ; Stack variables for remainder routines. |
||
70 | ; |
||
71 | |||
72 | FLT_SIZE EQU 12 |
||
73 | DENOM EQU 0 |
||
74 | DENOM_SAVE EQU DENOM + FLT_SIZE |
||
75 | NUMER EQU DENOM_SAVE + FLT_SIZE |
||
76 | PREV_CW EQU NUMER + FLT_SIZE |
||
77 | PATCH_CW EQU PREV_CW + 4 |
||
78 | FPREM_SW EQU PATCH_CW + 4 |
||
79 | STACK_SIZE EQU FPREM_SW + 4 |
||
80 | RET_SIZE EQU 4 |
||
81 | PUSH_SIZE EQU 4 |
||
82 | |||
83 | MAIN_FUDGE EQU RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE |
||
84 | |||
85 | MAIN_DENOM EQU DENOM + MAIN_FUDGE |
||
86 | MAIN_DENOM_SAVE EQU DENOM_SAVE + MAIN_FUDGE |
||
87 | MAIN_NUMER EQU NUMER + MAIN_FUDGE |
||
88 | MAIN_PREV_CW EQU PREV_CW + MAIN_FUDGE |
||
89 | MAIN_PATCH_CW EQU PATCH_CW + MAIN_FUDGE |
||
90 | MAIN_FPREM_SW EQU FPREM_SW + MAIN_FUDGE |
||
91 | |||
92 | ONESMASK EQU 700h |
||
93 | |||
94 | fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0 |
||
95 | fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh |
||
96 | one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h |
||
97 | one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh |
||
98 | one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh |
||
99 | half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh |
||
100 | big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh |
||
101 | |||
102 | ifdef DEBUG |
||
103 | public fpcw |
||
104 | public fpsw |
||
105 | fpcw dw 0 |
||
106 | fpsw dw 0 |
||
107 | endif |
||
108 | |||
109 | FPU_STATE STRUC |
||
110 | CONTROL_WORD DW ? |
||
111 | reserved_1 DW ? |
||
112 | STATUS_WORD DD ? |
||
113 | TAG_WORD DW ? |
||
114 | reserved_3 DW ? |
||
115 | IP_OFFSET DD ? |
||
116 | CS_SLCT DW ? |
||
117 | OPCODE DW ? |
||
118 | DATA_OFFSET DD ? |
||
119 | OPERAND_SLCT DW ? |
||
120 | reserved_4 DW ? |
||
121 | FPU_STATE ENDS |
||
122 | |||
123 | ENV_SIZE EQU 28 |
||
124 | |||
125 | |||
126 | DATA32 ENDS |
||
127 | |||
128 | _TEXT SEGMENT DWORD USE32 PUBLIC 'CODE' |
||
129 | _TEXT ENDS |
||
130 | |||
131 | DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA' |
||
132 | DATA32 ENDS |
||
133 | |||
134 | CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST' |
||
135 | CONST32 ENDS |
||
136 | |||
137 | BSS32 SEGMENT DWORD USE32 PUBLIC 'BSS' |
||
138 | BSS32 ENDS |
||
139 | |||
140 | DGROUP GROUP CONST32, BSS32, DATA32 |
||
141 | |||
142 | |||
143 | |||
144 | CODE32 SEGMENT DWORD USE32 PUBLIC 'CODE' |
||
145 | |||
146 | assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing |
||
147 | |||
148 | |||
149 | fprem_common PROC NEAR |
||
150 | |||
151 | push eax |
||
152 | push ebx |
||
153 | push ecx |
||
154 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
155 | xor eax, ONESMASK ; invert bits that have to be one |
||
156 | test eax, ONESMASK ; check bits that have to be one |
||
157 | jnz remainder_hardware_ok |
||
158 | shr eax, 11 |
||
159 | and eax, 0fh |
||
160 | cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d) |
||
161 | jz remainder_hardware_ok |
||
162 | |||
163 | ; The denominator has the bit pattern. Weed out the funny cases like NaNs |
||
164 | ; before applying the software version. Our caller guarantees that the |
||
165 | ; denominator is not a denormal. Here we check for: |
||
166 | ; denominator inf, NaN, unnormal |
||
167 | ; numerator inf, NaN, unnormal, denormal |
||
168 | |||
169 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
170 | and eax, 07fff0000h ; mask the exponent only |
||
171 | cmp eax, 07fff0000h ; check for INF or NaN |
||
172 | je remainder_hardware_ok |
||
173 | mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa |
||
174 | and eax, 07fff0000h ; mask the exponent only |
||
175 | jz remainder_hardware_ok ; jif numerator denormal |
||
176 | cmp eax, 07fff0000h ; check for INF or NaN |
||
177 | je remainder_hardware_ok |
||
178 | mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator |
||
179 | add eax, eax ; set carry if explicit bit set |
||
180 | jnz remainder_hardware_ok ; jmp if numerator is unnormal |
||
181 | mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator |
||
182 | add eax, eax ; set carry if explicit bit set |
||
183 | jnz remainder_hardware_ok ; jmp if denominator is unnormal |
||
184 | |||
185 | rem_patch: |
||
186 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
187 | and eax, 07fffh ; clear sy |
||
188 | add eax, 63 ; evaluate ey + 63 |
||
189 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
190 | and ebx, 07fffh ; clear sx |
||
191 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
192 | ja rem_large ; if ex > ey + 63, case of large arguments |
||
193 | rem_patch_loop: |
||
194 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
195 | and eax, 07fffh ; clear sy |
||
196 | add eax, 10 ; evaluate ey + 10 |
||
197 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
198 | and ebx, 07fffh ; clear sx |
||
199 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
200 | js remainder_hardware_ok ; safe if ey + 10 > ex |
||
201 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
202 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
203 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
204 | and ebx, 07fffh ; clear sx |
||
205 | mov ecx, ebx |
||
206 | sub ebx, eax |
||
207 | and ebx, 07h |
||
208 | or ebx, 04h |
||
209 | sub ecx, ebx |
||
210 | mov ebx, eax |
||
211 | and ebx, 08000h ; keep sy |
||
212 | or ecx, ebx ; merge the sign of y |
||
213 | mov dword ptr [MAIN_DENOM+8+esp], ecx |
||
214 | fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator |
||
215 | mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator |
||
216 | fxch |
||
217 | fprem ; this rem is safe |
||
218 | fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator |
||
219 | fstp st(0) ; pop the stack |
||
220 | jmp rem_patch_loop |
||
221 | rem_large: |
||
222 | test edx, 02h ; is denominator already saved |
||
223 | jnz already_saved |
||
224 | fld tbyte ptr[esp + MAIN_DENOM] |
||
225 | fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator |
||
226 | already_saved: |
||
227 | ; Save user's precision control and institute 80. The fp ops in |
||
228 | ; rem_large_loop must not round to user's precision (if it is less |
||
229 | ; than 80) because the hardware would not have done so. We are |
||
230 | ; aping the hardware here, which is all extended. |
||
231 | |||
232 | fnstcw [esp+MAIN_PREV_CW] ; save caller's control word |
||
233 | mov eax, dword ptr[esp + MAIN_PREV_CW] |
||
234 | or eax, 033fh ; mask exceptions, pc=80 |
||
235 | mov [esp + MAIN_PATCH_CW], eax |
||
236 | fldcw [esp + MAIN_PATCH_CW] |
||
237 | |||
238 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
239 | and eax, 07fffh ; clear sy |
||
240 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
241 | and ebx, 07fffh ; clear sx |
||
242 | sub ebx, eax ; evaluate the exponent difference |
||
243 | and ebx, 03fh |
||
244 | or ebx, 020h |
||
245 | add ebx, 1 |
||
246 | mov ecx, ebx |
||
247 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
248 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
249 | and ebx, 07fffh ; clear sx |
||
250 | and eax, 08000h ; keep sy |
||
251 | or ebx, eax ; merge the sign of y |
||
252 | mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator) |
||
253 | fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator |
||
254 | fabs |
||
255 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
256 | fabs |
||
257 | rem_large_loop: |
||
258 | fcom |
||
259 | fstsw ax |
||
260 | and eax, 00100h |
||
261 | jnz rem_no_sub |
||
262 | fsub st, st(1) |
||
263 | rem_no_sub: |
||
264 | fxch |
||
265 | fmul qword ptr half |
||
266 | fxch |
||
267 | sub ecx, 1 ; decrement the loop counter |
||
268 | jnz rem_large_loop |
||
269 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
270 | fstp tbyte ptr[esp + MAIN_NUMER] ; save result |
||
271 | fstp st ; toss modified denom |
||
272 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
273 | fld tbyte ptr[big_number] ; force C2 to be set |
||
274 | fprem |
||
275 | fstp st |
||
276 | fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result |
||
277 | |||
278 | fldcw [esp + MAIN_PREV_CW] ; restore caller's control word |
||
279 | and ebx, 08000h ; keep sx |
||
280 | jz rem_done |
||
281 | fchs |
||
282 | jmp rem_done |
||
283 | remainder_hardware_ok: |
||
284 | fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator |
||
285 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
286 | fprem ; and finally do a remainder |
||
287 | ; prem_main_routine end |
||
288 | rem_done: |
||
289 | test edx, 03h |
||
290 | jz rem_exit |
||
291 | fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2 |
||
292 | test edx, 01h |
||
293 | jz do_not_de_scale |
||
294 | ; De-scale the result. Go to pc=80 to prevent from fmul |
||
295 | ; from user precision (fprem does not round the result). |
||
296 | fnstcw [esp + MAIN_PREV_CW] ; save callers control word |
||
297 | mov eax, [esp + MAIN_PREV_CW] |
||
298 | or eax, 0300h ; pc = 80 |
||
299 | mov [esp + MAIN_PATCH_CW], eax |
||
300 | fldcw [esp + MAIN_PATCH_CW] |
||
301 | fmul qword ptr one_shr_64 |
||
302 | fldcw [esp + MAIN_PREV_CW] ; restore callers CW |
||
303 | do_not_de_scale: |
||
304 | mov eax, [esp + MAIN_FPREM_SW] |
||
305 | fxch |
||
306 | fstp st |
||
307 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
308 | fxch |
||
309 | and eax, 04300h ; restore saved Q0, Q1, Q2 |
||
310 | sub esp, ENV_SIZE |
||
311 | fnstenv [esp] |
||
312 | and [esp].STATUS_WORD, 0bcffh |
||
313 | or [esp].STATUS_WORD, eax |
||
314 | fldenv [esp] |
||
315 | add esp, ENV_SIZE |
||
316 | rem_exit: |
||
317 | pop ecx |
||
318 | pop ebx |
||
319 | pop eax |
||
320 | CHECKSW ; debug only: save status |
||
321 | ret |
||
322 | fprem_common ENDP |
||
323 | |||
324 | comment ~**************************************************************** |
||
325 | |||
326 | ; |
||
327 | ; float frem_chk (float numer, float denom) |
||
328 | ; |
||
329 | public frem_chk |
||
330 | frem_chk PROC NEAR |
||
331 | push edx |
||
332 | sub esp, STACK_SIZE |
||
333 | fld dword ptr [STACK_SIZE+8+esp] |
||
334 | fstp tbyte ptr [NUMER+esp] |
||
335 | fld dword ptr [STACK_SIZE+12+esp] |
||
336 | fstp tbyte ptr [DENOM+esp] |
||
337 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
338 | call fprem_common |
||
339 | fxch |
||
340 | fstp st |
||
341 | add esp, STACK_SIZE |
||
342 | pop edx |
||
343 | ret |
||
344 | frem_chk ENDP |
||
345 | ; end frem_chk |
||
346 | |||
347 | ; |
||
348 | ; double drem_chk (double numer, double denom) |
||
349 | ; |
||
350 | public drem_chk |
||
351 | drem_chk PROC NEAR |
||
352 | push edx |
||
353 | sub esp, STACK_SIZE |
||
354 | fld qword ptr [STACK_SIZE+8+esp] |
||
355 | fstp tbyte ptr [NUMER+esp] |
||
356 | fld qword ptr [STACK_SIZE+16+esp] |
||
357 | fstp tbyte ptr [DENOM+esp] |
||
358 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
359 | call fprem_common |
||
360 | fxch |
||
361 | fstp st |
||
362 | add esp, STACK_SIZE |
||
363 | pop edx |
||
364 | ret |
||
365 | |||
366 | drem_chk ENDP |
||
367 | ; end drem_chk |
||
368 | |||
369 | ; |
||
370 | ; long double lrem_chk(long double number,long double denom) |
||
371 | ; |
||
372 | public lrem_chk |
||
373 | lrem_chk PROC NEAR |
||
374 | fld tbyte ptr [20+esp] |
||
375 | fld tbyte ptr [4+esp] |
||
376 | call fprem_chk |
||
377 | fxch |
||
378 | fstp st |
||
379 | ret |
||
380 | lrem_chk ENDP |
||
381 | |||
382 | **********************************************************************~ |
||
383 | |||
384 | ; |
||
385 | ; FPREM: ST = remainder(ST, ST(1)) |
||
386 | ; |
||
387 | ; Compiler version of the FPREM must preserve the arguments in the floating |
||
388 | ; point stack. |
||
389 | |||
390 | public __fprem_chk |
||
391 | defpe __fprem_chk |
||
392 | push edx |
||
393 | sub esp, STACK_SIZE |
||
394 | fstp tbyte ptr [NUMER+esp] |
||
395 | fstp tbyte ptr [DENOM+esp] |
||
396 | xor edx, edx |
||
397 | ; prem_main_routine begin |
||
398 | mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
399 | test eax,07fff0000h ; check for denormal |
||
400 | jz denormal |
||
401 | call fprem_common |
||
402 | add esp, STACK_SIZE |
||
403 | pop edx |
||
404 | ret |
||
405 | |||
406 | denormal: |
||
407 | fld tbyte ptr [DENOM+esp] ; load the denominator |
||
408 | fld tbyte ptr [NUMER+esp] ; load the numerator |
||
409 | mov eax, [DENOM+esp] ; test for whole mantissa == 0 |
||
410 | or eax, [DENOM+4+esp] ; test for whole mantissa == 0 |
||
411 | jz remainder_hardware_ok_l ; denominator is zero |
||
412 | fxch |
||
413 | fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator |
||
414 | fld tbyte ptr[esp + DENOM] |
||
415 | fxch |
||
416 | or edx, 02h |
||
417 | ; |
||
418 | ; For this we need pc=80. Also, mask exceptions so we don't take any |
||
419 | ; denormal operand exceptions. It is guaranteed that the descaling |
||
420 | ; later on will take underflow, which is what the hardware would have done |
||
421 | ; on a normal fprem. |
||
422 | ; |
||
423 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
424 | mov eax, [PREV_CW+esp] |
||
425 | or eax, 0033fh ; mask exceptions, pc=80 |
||
426 | mov [PATCH_CW+esp], eax |
||
427 | fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
||
428 | |||
429 | ; The denominator is a denormal. For most numerators, scale both numerator |
||
430 | ; and denominator to get rid of denormals. Then execute the common code |
||
431 | ; with the flag set to indicate that the result must be de-scaled. |
||
432 | ; For large numerators this won't work because the scaling would cause |
||
433 | ; overflow. In this case we know the numerator is large, the denominator |
||
434 | ; is small (denormal), so the exponent difference is also large. This means |
||
435 | ; the rem_large code will be used and this code depends on the difference |
||
436 | ; in exponents modulo 64. Adding 64 to the denominators exponent |
||
437 | ; doesn't change the modulo 64 difference. So we can scale the denominator |
||
438 | ; by 64, making it not denormal, and this won't effect the result. |
||
439 | ; |
||
440 | ; To start with, figure out if numerator is large |
||
441 | |||
442 | mov eax, [esp + NUMER + 8] ; load numerator exponent |
||
443 | and eax, 7fffh ; isolate numerator exponent |
||
444 | cmp eax, 7fbeh ; compare Nexp to Maxexp-64 |
||
445 | ja big_numer_rem_de ; jif big numerator |
||
446 | |||
447 | ; So the numerator is not large scale both numerator and denominator |
||
448 | |||
449 | or edx, 1 ; edx = 1, if denormal extended divisor |
||
450 | fmul qword ptr one_shl_64 ; make numerator not denormal |
||
451 | fstp tbyte ptr[esp + NUMER] |
||
452 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
453 | fstp tbyte ptr[esp + DENOM] |
||
454 | jmp scaling_done |
||
455 | |||
456 | ; The numerator is large. Scale only the denominator, which will not |
||
457 | ; change the result which we know will be partial. Set the scale flag |
||
458 | ; to false. |
||
459 | big_numer_rem_de: |
||
460 | ; We must do this with pc=80 to avoid rounding to single/double. |
||
461 | ; In this case we do not mask exceptions so that we will take |
||
462 | ; denormal operand, as would the hardware. |
||
463 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
464 | mov eax, [PREV_CW+esp] |
||
465 | or eax, 00300h ; pc=80 |
||
466 | mov [PATCH_CW+esp], eax |
||
467 | fldcw [PATCH_CW+esp] ; pc=80 |
||
468 | |||
469 | fstp st ; Toss numerator |
||
470 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
471 | fstp tbyte ptr[esp + DENOM] |
||
472 | |||
473 | ; Restore the control word which was fiddled to scale at 80-bit precision. |
||
474 | ; Then call the common code. |
||
475 | scaling_done: |
||
476 | fldcw [esp + PREV_CW] ; restore callers control word |
||
477 | call fprem_common |
||
478 | add esp, STACK_SIZE |
||
479 | pop edx |
||
480 | ret |
||
481 | |||
482 | remainder_hardware_ok_l: |
||
483 | fprem ; and finally do a remainder |
||
484 | |||
485 | CHECKSW |
||
486 | |||
487 | add esp, STACK_SIZE |
||
488 | pop edx |
||
489 | ret |
||
490 | __fprem_chk ENDP |
||
491 | ; end fprem_chk |
||
492 | |||
493 | |||
494 | ; |
||
495 | ; FPREM1 code begins here |
||
496 | ; |
||
497 | |||
498 | |||
499 | fprem1_common PROC NEAR |
||
500 | |||
501 | push eax |
||
502 | push ebx |
||
503 | push ecx |
||
504 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
505 | xor eax, ONESMASK ; invert bits that have to be one |
||
506 | test eax, ONESMASK ; check bits that have to be one |
||
507 | jnz remainder1_hardware_ok |
||
508 | shr eax, 11 |
||
509 | and eax, 0fh |
||
510 | cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d) |
||
511 | jz remainder1_hardware_ok |
||
512 | |||
513 | ; The denominator has the bit pattern. Weed out the funny cases like NaNs |
||
514 | ; before applying the software version. Our caller guarantees that the |
||
515 | ; denominator is not a denormal. Here we check for: |
||
516 | ; denominator inf, NaN, unnormal |
||
517 | ; numerator inf, NaN, unnormal, denormal |
||
518 | |||
519 | mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
520 | and eax, 07fff0000h ; mask the exponent only |
||
521 | cmp eax, 07fff0000h ; check for INF or NaN |
||
522 | je remainder1_hardware_ok |
||
523 | mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa |
||
524 | and eax, 07fff0000h ; mask the exponent only |
||
525 | jz remainder1_hardware_ok ; jif numerator denormal |
||
526 | cmp eax, 07fff0000h ; check for INF or NaN |
||
527 | je remainder1_hardware_ok |
||
528 | mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator |
||
529 | add eax, eax ; set carry if explicit bit set |
||
530 | jnz remainder1_hardware_ok ; jmp if numerator is unnormal |
||
531 | mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator |
||
532 | add eax, eax ; set carry if explicit bit set |
||
533 | jnz remainder1_hardware_ok ; jmp if denominator is unnormal |
||
534 | |||
535 | rem1_patch: |
||
536 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
537 | and eax, 07fffh ; clear sy |
||
538 | add eax, 63 ; evaluate ey + 63 |
||
539 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
540 | and ebx, 07fffh ; clear sx |
||
541 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
542 | ja rem1_large ; if ex > ey + 63, case of large arguments |
||
543 | rem1_patch_loop: |
||
544 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
545 | and eax, 07fffh ; clear sy |
||
546 | add eax, 10 ; evaluate ey + 10 |
||
547 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
548 | and ebx, 07fffh ; clear sx |
||
549 | sub ebx, eax ; evaluate the exponent difference (ex - ey) |
||
550 | js remainder1_hardware_ok ; safe if ey + 10 > ex |
||
551 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
552 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
553 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
554 | and ebx, 07fffh ; clear sx |
||
555 | mov ecx, ebx |
||
556 | sub ebx, eax |
||
557 | and ebx, 07h |
||
558 | or ebx, 04h |
||
559 | sub ecx, ebx |
||
560 | mov ebx, eax |
||
561 | and ebx, 08000h ; keep sy |
||
562 | or ecx, ebx ; merge the sign of y |
||
563 | mov dword ptr [MAIN_DENOM+8+esp], ecx |
||
564 | fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator |
||
565 | mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator |
||
566 | fxch |
||
567 | fprem ; this rem is safe |
||
568 | fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator |
||
569 | fstp st(0) ; pop the stack |
||
570 | jmp rem1_patch_loop |
||
571 | rem1_large: |
||
572 | test ebx, 02h ; is denominator already saved |
||
573 | jnz already_saved1 |
||
574 | fld tbyte ptr[esp + MAIN_DENOM] |
||
575 | fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator |
||
576 | already_saved1: |
||
577 | ; Save user's precision control and institute 80. The fp ops in |
||
578 | ; rem1_large_loop must not round to user's precision (if it is less |
||
579 | ; than 80) because the hardware would not have done so. We are |
||
580 | ; aping the hardware here, which is all extended. |
||
581 | |||
582 | fnstcw [esp+MAIN_PREV_CW] ; save caller's control word |
||
583 | mov eax, dword ptr[esp + MAIN_PREV_CW] |
||
584 | or eax, 033fh ; mask exceptions, pc=80 |
||
585 | mov [esp + MAIN_PATCH_CW], eax |
||
586 | fldcw [esp + MAIN_PATCH_CW] |
||
587 | |||
588 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
589 | and eax, 07fffh ; clear sy |
||
590 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
591 | and ebx, 07fffh ; clear sx |
||
592 | sub ebx, eax ; evaluate the exponent difference |
||
593 | and ebx, 03fh |
||
594 | or ebx, 020h |
||
595 | add ebx, 1 |
||
596 | mov ecx, ebx |
||
597 | mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator) |
||
598 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
599 | and ebx, 07fffh ; clear sx |
||
600 | and eax, 08000h ; keep sy |
||
601 | or ebx, eax ; merge the sign of y |
||
602 | mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator) |
||
603 | fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator |
||
604 | fabs |
||
605 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
606 | fabs |
||
607 | rem1_large_loop: |
||
608 | fcom |
||
609 | fstsw ax |
||
610 | and eax, 00100h |
||
611 | jnz rem1_no_sub |
||
612 | fsub st, st(1) |
||
613 | rem1_no_sub: |
||
614 | fxch |
||
615 | fmul qword ptr half |
||
616 | fxch |
||
617 | sub ecx, 1 ; decrement the loop counter |
||
618 | jnz rem1_large_loop |
||
619 | mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator) |
||
620 | fstp tbyte ptr[esp + MAIN_NUMER] ; save result |
||
621 | fstp st ; toss modified denom |
||
622 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
623 | fld tbyte ptr[big_number] ; force C2 to be set |
||
624 | fprem1 |
||
625 | fstp st |
||
626 | fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result |
||
627 | |||
628 | fldcw [esp + MAIN_PREV_CW] ; restore caller's control word |
||
629 | and ebx, 08000h ; keep sx |
||
630 | jz rem1_done |
||
631 | fchs |
||
632 | jmp rem1_done |
||
633 | remainder1_hardware_ok: |
||
634 | fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator |
||
635 | fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator |
||
636 | fprem1 ; and finally do a remainder |
||
637 | ; prem1_main_routine end |
||
638 | rem1_done: |
||
639 | test edx, 03h |
||
640 | jz rem1_exit |
||
641 | fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2 |
||
642 | test edx, 01h |
||
643 | jz do_not_de_scale1 |
||
644 | ; De-scale the result. Go to pc=80 to prevent from fmul |
||
645 | ; from user precision (fprem does not round the result). |
||
646 | fnstcw [esp + MAIN_PREV_CW] ; save callers control word |
||
647 | mov eax, [esp + MAIN_PREV_CW] |
||
648 | or eax, 0300h ; pc = 80 |
||
649 | mov [esp + MAIN_PATCH_CW], eax |
||
650 | fldcw [esp + MAIN_PATCH_CW] |
||
651 | fmul qword ptr one_shr_64 |
||
652 | fldcw [esp + MAIN_PREV_CW] ; restore callers CW |
||
653 | do_not_de_scale1: |
||
654 | mov eax, [esp + MAIN_FPREM_SW] |
||
655 | fxch |
||
656 | fstp st |
||
657 | fld tbyte ptr[esp + MAIN_DENOM_SAVE] |
||
658 | fxch |
||
659 | and eax, 04300h ; restore saved Q0, Q1, Q2 |
||
660 | sub esp, ENV_SIZE |
||
661 | fnstenv [esp] |
||
662 | and [esp].STATUS_WORD, 0bcffh |
||
663 | or [esp].STATUS_WORD, eax |
||
664 | fldenv [esp] |
||
665 | add esp, ENV_SIZE |
||
666 | rem1_exit: |
||
667 | pop ecx |
||
668 | pop ebx |
||
669 | pop eax |
||
670 | CHECKSW ; debug only: save status |
||
671 | ret |
||
672 | fprem1_common ENDP |
||
673 | |||
674 | |||
675 | comment ~*************************************************************** |
||
676 | ; |
||
677 | ; float frem1_chk (float numer, float denom) |
||
678 | ; |
||
679 | public frem1_chk |
||
680 | frem1_chk PROC NEAR |
||
681 | push edx |
||
682 | sub esp, STACK_SIZE |
||
683 | fld dword ptr [STACK_SIZE+8+esp] |
||
684 | fstp tbyte ptr [NUMER+esp] |
||
685 | fld dword ptr [STACK_SIZE+12+esp] |
||
686 | fstp tbyte ptr [DENOM+esp] |
||
687 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
688 | call fprem1_common |
||
689 | fxch |
||
690 | fstp st |
||
691 | add esp, STACK_SIZE |
||
692 | pop edx |
||
693 | ret |
||
694 | frem1_chk ENDP |
||
695 | ; end frem1_chk |
||
696 | |||
697 | ; |
||
698 | ; double drem1_chk (double numer, double denom) |
||
699 | ; |
||
700 | public drem1_chk |
||
701 | drem1_chk PROC NEAR |
||
702 | push edx |
||
703 | sub esp, STACK_SIZE |
||
704 | fld qword ptr [STACK_SIZE+8+esp] |
||
705 | fstp tbyte ptr [NUMER+esp] |
||
706 | fld qword ptr [STACK_SIZE+16+esp] |
||
707 | fstp tbyte ptr [DENOM+esp] |
||
708 | mov edx, 0 ; dx = 1 if denormal extended divisor |
||
709 | call fprem1_common |
||
710 | fxch |
||
711 | fstp st |
||
712 | add esp, STACK_SIZE |
||
713 | pop edx |
||
714 | ret |
||
715 | |||
716 | drem1_chk ENDP |
||
717 | ; end drem1_chk |
||
718 | |||
719 | ; |
||
720 | ; long double lrem1_chk(long double number,long double denom) |
||
721 | ; |
||
722 | public lrem1_chk |
||
723 | lrem1_chk PROC NEAR |
||
724 | fld tbyte ptr [20+esp] |
||
725 | fld tbyte ptr [4+esp] |
||
726 | call fprem1_chk |
||
727 | fxch |
||
728 | fstp st |
||
729 | ret |
||
730 | lrem1_chk ENDP |
||
731 | ********************************************************************~ |
||
732 | |||
733 | ; |
||
734 | ; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding |
||
735 | ; |
||
736 | ; Compiler version of the FPREM must preserve the arguments in the floating |
||
737 | ; point stack. |
||
738 | |||
739 | public __fprem1_chk |
||
740 | defpe __fprem1_chk |
||
741 | push edx |
||
742 | sub esp, STACK_SIZE |
||
743 | fstp tbyte ptr [NUMER+esp] |
||
744 | fstp tbyte ptr [DENOM+esp] |
||
745 | mov edx, 0 |
||
746 | ; prem1_main_routine begin |
||
747 | mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa |
||
748 | test eax,07fff0000h ; check for denormal |
||
749 | jz denormal1 |
||
750 | call fprem1_common |
||
751 | add esp, STACK_SIZE |
||
752 | pop edx |
||
753 | ret |
||
754 | |||
755 | denormal1: |
||
756 | fld tbyte ptr [DENOM+esp] ; load the denominator |
||
757 | fld tbyte ptr [NUMER+esp] ; load the numerator |
||
758 | mov eax, [DENOM+esp] ; test for whole mantissa == 0 |
||
759 | or eax, [DENOM+4+esp] ; test for whole mantissa == 0 |
||
760 | jz remainder1_hardware_ok_l ; denominator is zero |
||
761 | fxch |
||
762 | fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator |
||
763 | fld tbyte ptr[esp + DENOM] |
||
764 | fxch |
||
765 | or edx, 02h |
||
766 | ; |
||
767 | ; For this we need pc=80. Also, mask exceptions so we don't take any |
||
768 | ; denormal operand exceptions. It is guaranteed that the descaling |
||
769 | ; later on will take underflow, which is what the hardware would have done |
||
770 | ; on a normal fprem. |
||
771 | ; |
||
772 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
773 | mov eax, [PREV_CW+esp] |
||
774 | or eax, 0033fh ; mask exceptions, pc=80 |
||
775 | mov [PATCH_CW+esp], eax |
||
776 | fldcw [PATCH_CW+esp] ; mask exceptions & pc=80 |
||
777 | |||
778 | ; The denominator is a denormal. For most numerators, scale both numerator |
||
779 | ; and denominator to get rid of denormals. Then execute the common code |
||
780 | ; with the flag set to indicate that the result must be de-scaled. |
||
781 | ; For large numerators this won't work because the scaling would cause |
||
782 | ; overflow. In this case we know the numerator is large, the denominator |
||
783 | ; is small (denormal), so the exponent difference is also large. This means |
||
784 | ; the rem1_large code will be used and this code depends on the difference |
||
785 | ; in exponents modulo 64. Adding 64 to the denominators exponent |
||
786 | ; doesn't change the modulo 64 difference. So we can scale the denominator |
||
787 | ; by 64, making it not denormal, and this won't effect the result. |
||
788 | ; |
||
789 | ; To start with, figure out if numerator is large |
||
790 | |||
791 | mov eax, [esp + NUMER + 8] ; load numerator exponent |
||
792 | and eax, 7fffh ; isolate numerator exponent |
||
793 | cmp eax, 7fbeh ; compare Nexp to Maxexp-64 |
||
794 | ja big_numer_rem1_de ; jif big numerator |
||
795 | |||
796 | ; So the numerator is not large scale both numerator and denominator |
||
797 | |||
798 | or edx, 1 ; edx = 1, if denormal extended divisor |
||
799 | fmul qword ptr one_shl_64 ; make numerator not denormal |
||
800 | fstp tbyte ptr[esp + NUMER] |
||
801 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
802 | fstp tbyte ptr[esp + DENOM] |
||
803 | jmp scaling_done1 |
||
804 | |||
805 | ; The numerator is large. Scale only the denominator, which will not |
||
806 | ; change the result which we know will be partial. Set the scale flag |
||
807 | ; to false. |
||
808 | big_numer_rem1_de: |
||
809 | ; We must do this with pc=80 to avoid rounding to single/double. |
||
810 | ; In this case we do not mask exceptions so that we will take |
||
811 | ; denormal operand, as would the hardware. |
||
812 | fnstcw [PREV_CW+esp] ; save caller's control word |
||
813 | mov eax, [PREV_CW+esp] |
||
814 | or eax, 00300h ; pc=80 |
||
815 | mov [PATCH_CW+esp], eax |
||
816 | fldcw [PATCH_CW+esp] ; pc=80 |
||
817 | |||
818 | fstp st ; Toss numerator |
||
819 | fmul qword ptr one_shl_64 ; make denominator not denormal |
||
820 | fstp tbyte ptr[esp + DENOM] |
||
821 | |||
822 | ; Restore the control word which was fiddled to scale at 80-bit precision. |
||
823 | ; Then call the common code. |
||
824 | scaling_done1: |
||
825 | fldcw [esp + PREV_CW] ; restore callers control word |
||
826 | call fprem1_common |
||
827 | add esp, STACK_SIZE |
||
828 | pop edx |
||
829 | ret |
||
830 | |||
831 | remainder1_hardware_ok_l: |
||
832 | fprem ; and finally do a remainder |
||
833 | |||
834 | CHECKSW |
||
835 | |||
836 | add esp, STACK_SIZE |
||
837 | pop edx |
||
838 | ret |
||
839 | __fprem1_chk ENDP |
||
840 | ; end fprem1_chk |
||
841 | |||
842 | ifdef DEBUG |
||
843 | public fpinit |
||
844 | fpinit PROC NEAR |
||
845 | fninit |
||
846 | ret |
||
847 | fpinit ENDP |
||
848 | endif |
||
849 | |||
850 | CODE32 ENDS |
||
851 | END |