WebSVN – Kolibri OS – Blame – /programs/develop/open watcom/trunk/clib/fpu/chipr32.asm

Rev	Author	Line No.	Line
554	serge	1	;*****************************************************************************
		2	;*
		3	;* Open Watcom Project
		4	;*
		5	;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
		6	;*
		7	;* ========================================================================
		8	;*
		9	;* This file contains Original Code and/or Modifications of Original
		10	;* Code as defined in and that are subject to the Sybase Open Watcom
		11	;* Public License version 1.0 (the 'License'). You may not use this file
		12	;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
		13	;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
		14	;* provided with the Original Code and Modifications, and is also
		15	;* available at www.sybase.com/developer/opensource.
		16	;*
		17	;* The Original Code and all software distributed under the License are
		18	;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
		19	;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
		20	;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
		21	;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
		22	;* NON-INFRINGEMENT. Please see the License for the specific language
		23	;* governing rights and limitations under the License.
		24	;*
		25	;* ========================================================================
		26	;*
		27	;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
		28	;* DESCRIBE IT HERE!
		29	;*
		30	;*****************************************************************************
		31
		32
		33	; static char sccs_id[] = "@(#)fprem32.asm 1.5 12/22/94 12:48:07";
		34	;
		35	; This code is being published by Intel to users of the Pentium(tm)
		36	; processor. Recipients are authorized to copy, modify, compile, use and
		37	; distribute the code.
		38	;
		39	; Intel makes no warranty of any kind with regard to this code, including
		40	; but not limited to, implied warranties or merchantability and fitness for
		41	; a particular purpose. Intel assumes no responsibility for any errors that
		42	; may appear in this code.
		43	;
		44	; No patent licenses are granted, express or implied.
		45	;
		46	;
		47	include mdef.inc
		48
		49	.386
		50	.387
		51
		52	;
		53	; PRELIMINARY VERSION of the software patch for the floating
		54	; point remainder.
		55	;
		56
		57
		58	CHECKSW MACRO
		59	ifdef DEBUG
		60	fnstsw [fpsw]
		61	fnstcw [fpcw]
		62	endif
		63	ENDM
		64
		65
704	serge	66	_DATA SEGMENT DWORD USE32 PUBLIC 'DATA'
554	serge	67
		68	;
		69	; Stack variables for remainder routines.
		70	;
		71
		72	FLT_SIZE EQU 12
		73	DENOM EQU 0
		74	DENOM_SAVE EQU DENOM + FLT_SIZE
		75	NUMER EQU DENOM_SAVE + FLT_SIZE
		76	PREV_CW EQU NUMER + FLT_SIZE
		77	PATCH_CW EQU PREV_CW + 4
		78	FPREM_SW EQU PATCH_CW + 4
		79	STACK_SIZE EQU FPREM_SW + 4
		80	RET_SIZE EQU 4
		81	PUSH_SIZE EQU 4
		82
		83	MAIN_FUDGE EQU RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE
		84
		85	MAIN_DENOM EQU DENOM + MAIN_FUDGE
		86	MAIN_DENOM_SAVE EQU DENOM_SAVE + MAIN_FUDGE
		87	MAIN_NUMER EQU NUMER + MAIN_FUDGE
		88	MAIN_PREV_CW EQU PREV_CW + MAIN_FUDGE
		89	MAIN_PATCH_CW EQU PATCH_CW + MAIN_FUDGE
		90	MAIN_FPREM_SW EQU FPREM_SW + MAIN_FUDGE
		91
		92	ONESMASK EQU 700h
		93
		94	fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
		95	fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh
		96	one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h
		97	one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh
		98	one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh
		99	half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh
		100	big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
		101
		102	ifdef DEBUG
		103	public fpcw
		104	public fpsw
		105	fpcw dw 0
		106	fpsw dw 0
		107	endif
		108
		109	FPU_STATE STRUC
		110	CONTROL_WORD DW ?
		111	reserved_1 DW ?
		112	STATUS_WORD DD ?
		113	TAG_WORD DW ?
		114	reserved_3 DW ?
		115	IP_OFFSET DD ?
		116	CS_SLCT DW ?
		117	OPCODE DW ?
		118	DATA_OFFSET DD ?
		119	OPERAND_SLCT DW ?
		120	reserved_4 DW ?
		121	FPU_STATE ENDS
		122
		123	ENV_SIZE EQU 28
		124
		125
704	serge	126	_DATA ENDS
554	serge	127
		128	_TEXT SEGMENT DWORD USE32 PUBLIC 'CODE'
		129	_TEXT ENDS
		130
		131
704	serge	132	DGROUP GROUP _DATA
554	serge	133
		134
		135
704	serge	136	_TEXT SEGMENT DWORD USE32 PUBLIC 'CODE'
554	serge	137
		138	assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
		139
		140
		141	fprem_common PROC NEAR
		142
		143	push eax
		144	push ebx
		145	push ecx
		146	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		147	xor eax, ONESMASK ; invert bits that have to be one
		148	test eax, ONESMASK ; check bits that have to be one
		149	jnz remainder_hardware_ok
		150	shr eax, 11
		151	and eax, 0fh
		152	cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
		153	jz remainder_hardware_ok
		154
		155	; The denominator has the bit pattern. Weed out the funny cases like NaNs
		156	; before applying the software version. Our caller guarantees that the
		157	; denominator is not a denormal. Here we check for:
		158	; denominator inf, NaN, unnormal
		159	; numerator inf, NaN, unnormal, denormal
		160
		161	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		162	and eax, 07fff0000h ; mask the exponent only
		163	cmp eax, 07fff0000h ; check for INF or NaN
		164	je remainder_hardware_ok
		165	mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
		166	and eax, 07fff0000h ; mask the exponent only
		167	jz remainder_hardware_ok ; jif numerator denormal
		168	cmp eax, 07fff0000h ; check for INF or NaN
		169	je remainder_hardware_ok
		170	mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator
		171	add eax, eax ; set carry if explicit bit set
		172	jnz remainder_hardware_ok ; jmp if numerator is unnormal
		173	mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
		174	add eax, eax ; set carry if explicit bit set
		175	jnz remainder_hardware_ok ; jmp if denominator is unnormal
		176
		177	rem_patch:
		178	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		179	and eax, 07fffh ; clear sy
		180	add eax, 63 ; evaluate ey + 63
		181	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		182	and ebx, 07fffh ; clear sx
		183	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		184	ja rem_large ; if ex > ey + 63, case of large arguments
		185	rem_patch_loop:
		186	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		187	and eax, 07fffh ; clear sy
		188	add eax, 10 ; evaluate ey + 10
		189	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		190	and ebx, 07fffh ; clear sx
		191	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		192	js remainder_hardware_ok ; safe if ey + 10 > ex
		193	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		194	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		195	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		196	and ebx, 07fffh ; clear sx
		197	mov ecx, ebx
		198	sub ebx, eax
		199	and ebx, 07h
		200	or ebx, 04h
		201	sub ecx, ebx
		202	mov ebx, eax
		203	and ebx, 08000h ; keep sy
		204	or ecx, ebx ; merge the sign of y
		205	mov dword ptr [MAIN_DENOM+8+esp], ecx
		206	fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator
		207	mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator
		208	fxch
		209	fprem ; this rem is safe
		210	fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator
		211	fstp st(0) ; pop the stack
		212	jmp rem_patch_loop
		213	rem_large:
		214	test edx, 02h ; is denominator already saved
		215	jnz already_saved
		216	fld tbyte ptr[esp + MAIN_DENOM]
		217	fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator
		218	already_saved:
		219	; Save user's precision control and institute 80. The fp ops in
		220	; rem_large_loop must not round to user's precision (if it is less
		221	; than 80) because the hardware would not have done so. We are
		222	; aping the hardware here, which is all extended.
		223
		224	fnstcw [esp+MAIN_PREV_CW] ; save caller's control word
		225	mov eax, dword ptr[esp + MAIN_PREV_CW]
		226	or eax, 033fh ; mask exceptions, pc=80
		227	mov [esp + MAIN_PATCH_CW], eax
		228	fldcw [esp + MAIN_PATCH_CW]
		229
		230	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		231	and eax, 07fffh ; clear sy
		232	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		233	and ebx, 07fffh ; clear sx
		234	sub ebx, eax ; evaluate the exponent difference
		235	and ebx, 03fh
		236	or ebx, 020h
		237	add ebx, 1
		238	mov ecx, ebx
		239	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		240	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		241	and ebx, 07fffh ; clear sx
		242	and eax, 08000h ; keep sy
		243	or ebx, eax ; merge the sign of y
		244	mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
		245	fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator
		246	fabs
		247	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		248	fabs
		249	rem_large_loop:
		250	fcom
		251	fstsw ax
		252	and eax, 00100h
		253	jnz rem_no_sub
		254	fsub st, st(1)
		255	rem_no_sub:
		256	fxch
		257	fmul qword ptr half
		258	fxch
		259	sub ecx, 1 ; decrement the loop counter
		260	jnz rem_large_loop
		261	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		262	fstp tbyte ptr[esp + MAIN_NUMER] ; save result
		263	fstp st ; toss modified denom
		264	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		265	fld tbyte ptr[big_number] ; force C2 to be set
		266	fprem
		267	fstp st
		268	fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result
		269
		270	fldcw [esp + MAIN_PREV_CW] ; restore caller's control word
		271	and ebx, 08000h ; keep sx
		272	jz rem_done
		273	fchs
		274	jmp rem_done
		275	remainder_hardware_ok:
		276	fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator
		277	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		278	fprem ; and finally do a remainder
		279	; prem_main_routine end
		280	rem_done:
		281	test edx, 03h
		282	jz rem_exit
		283	fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2
		284	test edx, 01h
		285	jz do_not_de_scale
		286	; De-scale the result. Go to pc=80 to prevent from fmul
		287	; from user precision (fprem does not round the result).
		288	fnstcw [esp + MAIN_PREV_CW] ; save callers control word
		289	mov eax, [esp + MAIN_PREV_CW]
		290	or eax, 0300h ; pc = 80
		291	mov [esp + MAIN_PATCH_CW], eax
		292	fldcw [esp + MAIN_PATCH_CW]
		293	fmul qword ptr one_shr_64
		294	fldcw [esp + MAIN_PREV_CW] ; restore callers CW
		295	do_not_de_scale:
		296	mov eax, [esp + MAIN_FPREM_SW]
		297	fxch
		298	fstp st
		299	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		300	fxch
		301	and eax, 04300h ; restore saved Q0, Q1, Q2
		302	sub esp, ENV_SIZE
		303	fnstenv [esp]
		304	and [esp].STATUS_WORD, 0bcffh
		305	or [esp].STATUS_WORD, eax
		306	fldenv [esp]
		307	add esp, ENV_SIZE
		308	rem_exit:
		309	pop ecx
		310	pop ebx
		311	pop eax
		312	CHECKSW ; debug only: save status
		313	ret
		314	fprem_common ENDP
		315
		316	comment ~****************************************************************
		317
		318	;
		319	; float frem_chk (float numer, float denom)
		320	;
		321	public frem_chk
		322	frem_chk PROC NEAR
		323	push edx
		324	sub esp, STACK_SIZE
		325	fld dword ptr [STACK_SIZE+8+esp]
		326	fstp tbyte ptr [NUMER+esp]
		327	fld dword ptr [STACK_SIZE+12+esp]
		328	fstp tbyte ptr [DENOM+esp]
		329	mov edx, 0 ; dx = 1 if denormal extended divisor
		330	call fprem_common
		331	fxch
		332	fstp st
		333	add esp, STACK_SIZE
		334	pop edx
		335	ret
		336	frem_chk ENDP
		337	; end frem_chk
		338
		339	;
		340	; double drem_chk (double numer, double denom)
		341	;
		342	public drem_chk
		343	drem_chk PROC NEAR
		344	push edx
		345	sub esp, STACK_SIZE
		346	fld qword ptr [STACK_SIZE+8+esp]
		347	fstp tbyte ptr [NUMER+esp]
		348	fld qword ptr [STACK_SIZE+16+esp]
		349	fstp tbyte ptr [DENOM+esp]
		350	mov edx, 0 ; dx = 1 if denormal extended divisor
		351	call fprem_common
		352	fxch
		353	fstp st
		354	add esp, STACK_SIZE
		355	pop edx
		356	ret
		357
		358	drem_chk ENDP
		359	; end drem_chk
		360
		361	;
		362	; long double lrem_chk(long double number,long double denom)
		363	;
		364	public lrem_chk
		365	lrem_chk PROC NEAR
		366	fld tbyte ptr [20+esp]
		367	fld tbyte ptr [4+esp]
		368	call fprem_chk
		369	fxch
		370	fstp st
		371	ret
		372	lrem_chk ENDP
		373
		374	**********************************************************************~
		375
		376	;
		377	; FPREM: ST = remainder(ST, ST(1))
		378	;
		379	; Compiler version of the FPREM must preserve the arguments in the floating
		380	; point stack.
		381
		382	public __fprem_chk
		383	defpe __fprem_chk
		384	push edx
		385	sub esp, STACK_SIZE
		386	fstp tbyte ptr [NUMER+esp]
		387	fstp tbyte ptr [DENOM+esp]
		388	xor edx, edx
		389	; prem_main_routine begin
		390	mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa
		391	test eax,07fff0000h ; check for denormal
		392	jz denormal
		393	call fprem_common
		394	add esp, STACK_SIZE
		395	pop edx
		396	ret
		397
		398	denormal:
		399	fld tbyte ptr [DENOM+esp] ; load the denominator
		400	fld tbyte ptr [NUMER+esp] ; load the numerator
		401	mov eax, [DENOM+esp] ; test for whole mantissa == 0
		402	or eax, [DENOM+4+esp] ; test for whole mantissa == 0
		403	jz remainder_hardware_ok_l ; denominator is zero
		404	fxch
		405	fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator
		406	fld tbyte ptr[esp + DENOM]
		407	fxch
		408	or edx, 02h
		409	;
		410	; For this we need pc=80. Also, mask exceptions so we don't take any
		411	; denormal operand exceptions. It is guaranteed that the descaling
		412	; later on will take underflow, which is what the hardware would have done
		413	; on a normal fprem.
		414	;
		415	fnstcw [PREV_CW+esp] ; save caller's control word
		416	mov eax, [PREV_CW+esp]
		417	or eax, 0033fh ; mask exceptions, pc=80
		418	mov [PATCH_CW+esp], eax
		419	fldcw [PATCH_CW+esp] ; mask exceptions & pc=80
		420
		421	; The denominator is a denormal. For most numerators, scale both numerator
		422	; and denominator to get rid of denormals. Then execute the common code
		423	; with the flag set to indicate that the result must be de-scaled.
		424	; For large numerators this won't work because the scaling would cause
		425	; overflow. In this case we know the numerator is large, the denominator
		426	; is small (denormal), so the exponent difference is also large. This means
		427	; the rem_large code will be used and this code depends on the difference
		428	; in exponents modulo 64. Adding 64 to the denominators exponent
		429	; doesn't change the modulo 64 difference. So we can scale the denominator
		430	; by 64, making it not denormal, and this won't effect the result.
		431	;
		432	; To start with, figure out if numerator is large
		433
		434	mov eax, [esp + NUMER + 8] ; load numerator exponent
		435	and eax, 7fffh ; isolate numerator exponent
		436	cmp eax, 7fbeh ; compare Nexp to Maxexp-64
		437	ja big_numer_rem_de ; jif big numerator
		438
		439	; So the numerator is not large scale both numerator and denominator
		440
		441	or edx, 1 ; edx = 1, if denormal extended divisor
		442	fmul qword ptr one_shl_64 ; make numerator not denormal
		443	fstp tbyte ptr[esp + NUMER]
		444	fmul qword ptr one_shl_64 ; make denominator not denormal
		445	fstp tbyte ptr[esp + DENOM]
		446	jmp scaling_done
		447
		448	; The numerator is large. Scale only the denominator, which will not
		449	; change the result which we know will be partial. Set the scale flag
		450	; to false.
		451	big_numer_rem_de:
		452	; We must do this with pc=80 to avoid rounding to single/double.
		453	; In this case we do not mask exceptions so that we will take
		454	; denormal operand, as would the hardware.
		455	fnstcw [PREV_CW+esp] ; save caller's control word
		456	mov eax, [PREV_CW+esp]
		457	or eax, 00300h ; pc=80
		458	mov [PATCH_CW+esp], eax
		459	fldcw [PATCH_CW+esp] ; pc=80
		460
		461	fstp st ; Toss numerator
		462	fmul qword ptr one_shl_64 ; make denominator not denormal
		463	fstp tbyte ptr[esp + DENOM]
		464
		465	; Restore the control word which was fiddled to scale at 80-bit precision.
		466	; Then call the common code.
		467	scaling_done:
		468	fldcw [esp + PREV_CW] ; restore callers control word
		469	call fprem_common
		470	add esp, STACK_SIZE
		471	pop edx
		472	ret
		473
		474	remainder_hardware_ok_l:
		475	fprem ; and finally do a remainder
		476
		477	CHECKSW
		478
		479	add esp, STACK_SIZE
		480	pop edx
		481	ret
		482	__fprem_chk ENDP
		483	; end fprem_chk
		484
		485
		486	;
		487	; FPREM1 code begins here
		488	;
		489
		490
		491	fprem1_common PROC NEAR
		492
		493	push eax
		494	push ebx
		495	push ecx
		496	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		497	xor eax, ONESMASK ; invert bits that have to be one
		498	test eax, ONESMASK ; check bits that have to be one
		499	jnz remainder1_hardware_ok
		500	shr eax, 11
		501	and eax, 0fh
		502	cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
		503	jz remainder1_hardware_ok
		504
		505	; The denominator has the bit pattern. Weed out the funny cases like NaNs
		506	; before applying the software version. Our caller guarantees that the
		507	; denominator is not a denormal. Here we check for:
		508	; denominator inf, NaN, unnormal
		509	; numerator inf, NaN, unnormal, denormal
		510
		511	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		512	and eax, 07fff0000h ; mask the exponent only
		513	cmp eax, 07fff0000h ; check for INF or NaN
		514	je remainder1_hardware_ok
		515	mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
		516	and eax, 07fff0000h ; mask the exponent only
		517	jz remainder1_hardware_ok ; jif numerator denormal
		518	cmp eax, 07fff0000h ; check for INF or NaN
		519	je remainder1_hardware_ok
		520	mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator
		521	add eax, eax ; set carry if explicit bit set
		522	jnz remainder1_hardware_ok ; jmp if numerator is unnormal
		523	mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
		524	add eax, eax ; set carry if explicit bit set
		525	jnz remainder1_hardware_ok ; jmp if denominator is unnormal
		526
		527	rem1_patch:
		528	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		529	and eax, 07fffh ; clear sy
		530	add eax, 63 ; evaluate ey + 63
		531	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		532	and ebx, 07fffh ; clear sx
		533	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		534	ja rem1_large ; if ex > ey + 63, case of large arguments
		535	rem1_patch_loop:
		536	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		537	and eax, 07fffh ; clear sy
		538	add eax, 10 ; evaluate ey + 10
		539	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		540	and ebx, 07fffh ; clear sx
		541	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		542	js remainder1_hardware_ok ; safe if ey + 10 > ex
		543	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		544	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		545	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		546	and ebx, 07fffh ; clear sx
		547	mov ecx, ebx
		548	sub ebx, eax
		549	and ebx, 07h
		550	or ebx, 04h
		551	sub ecx, ebx
		552	mov ebx, eax
		553	and ebx, 08000h ; keep sy
		554	or ecx, ebx ; merge the sign of y
		555	mov dword ptr [MAIN_DENOM+8+esp], ecx
		556	fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator
		557	mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator
		558	fxch
		559	fprem ; this rem is safe
		560	fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator
		561	fstp st(0) ; pop the stack
		562	jmp rem1_patch_loop
		563	rem1_large:
		564	test ebx, 02h ; is denominator already saved
		565	jnz already_saved1
		566	fld tbyte ptr[esp + MAIN_DENOM]
		567	fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator
		568	already_saved1:
		569	; Save user's precision control and institute 80. The fp ops in
		570	; rem1_large_loop must not round to user's precision (if it is less
		571	; than 80) because the hardware would not have done so. We are
		572	; aping the hardware here, which is all extended.
		573
		574	fnstcw [esp+MAIN_PREV_CW] ; save caller's control word
		575	mov eax, dword ptr[esp + MAIN_PREV_CW]
		576	or eax, 033fh ; mask exceptions, pc=80
		577	mov [esp + MAIN_PATCH_CW], eax
		578	fldcw [esp + MAIN_PATCH_CW]
		579
		580	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		581	and eax, 07fffh ; clear sy
		582	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		583	and ebx, 07fffh ; clear sx
		584	sub ebx, eax ; evaluate the exponent difference
		585	and ebx, 03fh
		586	or ebx, 020h
		587	add ebx, 1
		588	mov ecx, ebx
		589	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		590	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		591	and ebx, 07fffh ; clear sx
		592	and eax, 08000h ; keep sy
		593	or ebx, eax ; merge the sign of y
		594	mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
		595	fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator
		596	fabs
		597	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		598	fabs
		599	rem1_large_loop:
		600	fcom
		601	fstsw ax
		602	and eax, 00100h
		603	jnz rem1_no_sub
		604	fsub st, st(1)
		605	rem1_no_sub:
		606	fxch
		607	fmul qword ptr half
		608	fxch
		609	sub ecx, 1 ; decrement the loop counter
		610	jnz rem1_large_loop
		611	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		612	fstp tbyte ptr[esp + MAIN_NUMER] ; save result
		613	fstp st ; toss modified denom
		614	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		615	fld tbyte ptr[big_number] ; force C2 to be set
		616	fprem1
		617	fstp st
		618	fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result
		619
		620	fldcw [esp + MAIN_PREV_CW] ; restore caller's control word
		621	and ebx, 08000h ; keep sx
		622	jz rem1_done
		623	fchs
		624	jmp rem1_done
		625	remainder1_hardware_ok:
		626	fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator
		627	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		628	fprem1 ; and finally do a remainder
		629	; prem1_main_routine end
		630	rem1_done:
		631	test edx, 03h
		632	jz rem1_exit
		633	fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2
		634	test edx, 01h
		635	jz do_not_de_scale1
		636	; De-scale the result. Go to pc=80 to prevent from fmul
		637	; from user precision (fprem does not round the result).
		638	fnstcw [esp + MAIN_PREV_CW] ; save callers control word
		639	mov eax, [esp + MAIN_PREV_CW]
		640	or eax, 0300h ; pc = 80
		641	mov [esp + MAIN_PATCH_CW], eax
		642	fldcw [esp + MAIN_PATCH_CW]
		643	fmul qword ptr one_shr_64
		644	fldcw [esp + MAIN_PREV_CW] ; restore callers CW
		645	do_not_de_scale1:
		646	mov eax, [esp + MAIN_FPREM_SW]
		647	fxch
		648	fstp st
		649	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		650	fxch
		651	and eax, 04300h ; restore saved Q0, Q1, Q2
		652	sub esp, ENV_SIZE
		653	fnstenv [esp]
		654	and [esp].STATUS_WORD, 0bcffh
		655	or [esp].STATUS_WORD, eax
		656	fldenv [esp]
		657	add esp, ENV_SIZE
		658	rem1_exit:
		659	pop ecx
		660	pop ebx
		661	pop eax
		662	CHECKSW ; debug only: save status
		663	ret
		664	fprem1_common ENDP
		665
		666
		667	comment ~***************************************************************
		668	;
		669	; float frem1_chk (float numer, float denom)
		670	;
		671	public frem1_chk
		672	frem1_chk PROC NEAR
		673	push edx
		674	sub esp, STACK_SIZE
		675	fld dword ptr [STACK_SIZE+8+esp]
		676	fstp tbyte ptr [NUMER+esp]
		677	fld dword ptr [STACK_SIZE+12+esp]
		678	fstp tbyte ptr [DENOM+esp]
		679	mov edx, 0 ; dx = 1 if denormal extended divisor
		680	call fprem1_common
		681	fxch
		682	fstp st
		683	add esp, STACK_SIZE
		684	pop edx
		685	ret
		686	frem1_chk ENDP
		687	; end frem1_chk
		688
		689	;
		690	; double drem1_chk (double numer, double denom)
		691	;
		692	public drem1_chk
		693	drem1_chk PROC NEAR
		694	push edx
		695	sub esp, STACK_SIZE
		696	fld qword ptr [STACK_SIZE+8+esp]
		697	fstp tbyte ptr [NUMER+esp]
		698	fld qword ptr [STACK_SIZE+16+esp]
		699	fstp tbyte ptr [DENOM+esp]
		700	mov edx, 0 ; dx = 1 if denormal extended divisor
		701	call fprem1_common
		702	fxch
		703	fstp st
		704	add esp, STACK_SIZE
		705	pop edx
		706	ret
		707
		708	drem1_chk ENDP
		709	; end drem1_chk
		710
		711	;
		712	; long double lrem1_chk(long double number,long double denom)
		713	;
		714	public lrem1_chk
		715	lrem1_chk PROC NEAR
		716	fld tbyte ptr [20+esp]
		717	fld tbyte ptr [4+esp]
		718	call fprem1_chk
		719	fxch
		720	fstp st
		721	ret
		722	lrem1_chk ENDP
		723	********************************************************************~
		724
		725	;
		726	; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding
		727	;
		728	; Compiler version of the FPREM must preserve the arguments in the floating
		729	; point stack.
		730
		731	public __fprem1_chk
		732	defpe __fprem1_chk
		733	push edx
		734	sub esp, STACK_SIZE
		735	fstp tbyte ptr [NUMER+esp]
		736	fstp tbyte ptr [DENOM+esp]
		737	mov edx, 0
		738	; prem1_main_routine begin
		739	mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa
		740	test eax,07fff0000h ; check for denormal
		741	jz denormal1
		742	call fprem1_common
		743	add esp, STACK_SIZE
		744	pop edx
		745	ret
		746
		747	denormal1:
		748	fld tbyte ptr [DENOM+esp] ; load the denominator
		749	fld tbyte ptr [NUMER+esp] ; load the numerator
		750	mov eax, [DENOM+esp] ; test for whole mantissa == 0
		751	or eax, [DENOM+4+esp] ; test for whole mantissa == 0
		752	jz remainder1_hardware_ok_l ; denominator is zero
		753	fxch
		754	fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator
		755	fld tbyte ptr[esp + DENOM]
		756	fxch
		757	or edx, 02h
		758	;
		759	; For this we need pc=80. Also, mask exceptions so we don't take any
		760	; denormal operand exceptions. It is guaranteed that the descaling
		761	; later on will take underflow, which is what the hardware would have done
		762	; on a normal fprem.
		763	;
		764	fnstcw [PREV_CW+esp] ; save caller's control word
		765	mov eax, [PREV_CW+esp]
		766	or eax, 0033fh ; mask exceptions, pc=80
		767	mov [PATCH_CW+esp], eax
		768	fldcw [PATCH_CW+esp] ; mask exceptions & pc=80
		769
		770	; The denominator is a denormal. For most numerators, scale both numerator
		771	; and denominator to get rid of denormals. Then execute the common code
		772	; with the flag set to indicate that the result must be de-scaled.
		773	; For large numerators this won't work because the scaling would cause
		774	; overflow. In this case we know the numerator is large, the denominator
		775	; is small (denormal), so the exponent difference is also large. This means
		776	; the rem1_large code will be used and this code depends on the difference
		777	; in exponents modulo 64. Adding 64 to the denominators exponent
		778	; doesn't change the modulo 64 difference. So we can scale the denominator
		779	; by 64, making it not denormal, and this won't effect the result.
		780	;
		781	; To start with, figure out if numerator is large
		782
		783	mov eax, [esp + NUMER + 8] ; load numerator exponent
		784	and eax, 7fffh ; isolate numerator exponent
		785	cmp eax, 7fbeh ; compare Nexp to Maxexp-64
		786	ja big_numer_rem1_de ; jif big numerator
		787
		788	; So the numerator is not large scale both numerator and denominator
		789
		790	or edx, 1 ; edx = 1, if denormal extended divisor
		791	fmul qword ptr one_shl_64 ; make numerator not denormal
		792	fstp tbyte ptr[esp + NUMER]
		793	fmul qword ptr one_shl_64 ; make denominator not denormal
		794	fstp tbyte ptr[esp + DENOM]
		795	jmp scaling_done1
		796
		797	; The numerator is large. Scale only the denominator, which will not
		798	; change the result which we know will be partial. Set the scale flag
		799	; to false.
		800	big_numer_rem1_de:
		801	; We must do this with pc=80 to avoid rounding to single/double.
		802	; In this case we do not mask exceptions so that we will take
		803	; denormal operand, as would the hardware.
		804	fnstcw [PREV_CW+esp] ; save caller's control word
		805	mov eax, [PREV_CW+esp]
		806	or eax, 00300h ; pc=80
		807	mov [PATCH_CW+esp], eax
		808	fldcw [PATCH_CW+esp] ; pc=80
		809
		810	fstp st ; Toss numerator
		811	fmul qword ptr one_shl_64 ; make denominator not denormal
		812	fstp tbyte ptr[esp + DENOM]
		813
		814	; Restore the control word which was fiddled to scale at 80-bit precision.
		815	; Then call the common code.
		816	scaling_done1:
		817	fldcw [esp + PREV_CW] ; restore callers control word
		818	call fprem1_common
		819	add esp, STACK_SIZE
		820	pop edx
		821	ret
		822
		823	remainder1_hardware_ok_l:
		824	fprem ; and finally do a remainder
		825
		826	CHECKSW
		827
		828	add esp, STACK_SIZE
		829	pop edx
		830	ret
		831	__fprem1_chk ENDP
		832	; end fprem1_chk
		833
		834	ifdef DEBUG
		835	public fpinit
		836	fpinit PROC NEAR
		837	fninit
		838	ret
		839	fpinit ENDP
		840	endif
		841
704	serge	842	_TEXT ENDS
554	serge	843	END

Subversion Repositories Kolibri OS

(root)/programs/develop/open watcom/trunk/clib/fpu/chipr32.asm – Rev 704