WebSVN – Kolibri OS – Blame – /programs/develop/open watcom/trunk/clib/src/chipr32.asm

Rev	Author	Line No.	Line
359	serge	1	;*****************************************************************************
		2	;*
		3	;* Open Watcom Project
		4	;*
		5	;* Portions Copyright (c) 1983-2002 Sybase, Inc. All Rights Reserved.
		6	;*
		7	;* ========================================================================
		8	;*
		9	;* This file contains Original Code and/or Modifications of Original
		10	;* Code as defined in and that are subject to the Sybase Open Watcom
		11	;* Public License version 1.0 (the 'License'). You may not use this file
		12	;* except in compliance with the License. BY USING THIS FILE YOU AGREE TO
		13	;* ALL TERMS AND CONDITIONS OF THE LICENSE. A copy of the License is
		14	;* provided with the Original Code and Modifications, and is also
		15	;* available at www.sybase.com/developer/opensource.
		16	;*
		17	;* The Original Code and all software distributed under the License are
		18	;* distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
		19	;* EXPRESS OR IMPLIED, AND SYBASE AND ALL CONTRIBUTORS HEREBY DISCLAIM
		20	;* ALL SUCH WARRANTIES, INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF
		21	;* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR
		22	;* NON-INFRINGEMENT. Please see the License for the specific language
		23	;* governing rights and limitations under the License.
		24	;*
		25	;* ========================================================================
		26	;*
		27	;* Description: WHEN YOU FIGURE OUT WHAT THIS FILE DOES, PLEASE
		28	;* DESCRIBE IT HERE!
		29	;*
		30	;*****************************************************************************
		31
		32
		33	; static char sccs_id[] = "@(#)fprem32.asm 1.5 12/22/94 12:48:07";
		34	;
		35	; This code is being published by Intel to users of the Pentium(tm)
		36	; processor. Recipients are authorized to copy, modify, compile, use and
		37	; distribute the code.
		38	;
		39	; Intel makes no warranty of any kind with regard to this code, including
		40	; but not limited to, implied warranties or merchantability and fitness for
		41	; a particular purpose. Intel assumes no responsibility for any errors that
		42	; may appear in this code.
		43	;
		44	; No patent licenses are granted, express or implied.
		45	;
		46	;
		47	include mdef.inc
		48
		49	.386
		50	.387
		51
		52	;
		53	; PRELIMINARY VERSION of the software patch for the floating
		54	; point remainder.
		55	;
		56
		57
		58	CHECKSW MACRO
		59	ifdef DEBUG
		60	fnstsw [fpsw]
		61	fnstcw [fpcw]
		62	endif
		63	ENDM
		64
		65
		66	DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA'
		67
		68	;
		69	; Stack variables for remainder routines.
		70	;
		71
		72	FLT_SIZE EQU 12
		73	DENOM EQU 0
		74	DENOM_SAVE EQU DENOM + FLT_SIZE
		75	NUMER EQU DENOM_SAVE + FLT_SIZE
		76	PREV_CW EQU NUMER + FLT_SIZE
		77	PATCH_CW EQU PREV_CW + 4
		78	FPREM_SW EQU PATCH_CW + 4
		79	STACK_SIZE EQU FPREM_SW + 4
		80	RET_SIZE EQU 4
		81	PUSH_SIZE EQU 4
		82
		83	MAIN_FUDGE EQU RET_SIZE + PUSH_SIZE + PUSH_SIZE + PUSH_SIZE
		84
		85	MAIN_DENOM EQU DENOM + MAIN_FUDGE
		86	MAIN_DENOM_SAVE EQU DENOM_SAVE + MAIN_FUDGE
		87	MAIN_NUMER EQU NUMER + MAIN_FUDGE
		88	MAIN_PREV_CW EQU PREV_CW + MAIN_FUDGE
		89	MAIN_PATCH_CW EQU PATCH_CW + MAIN_FUDGE
		90	MAIN_FPREM_SW EQU FPREM_SW + MAIN_FUDGE
		91
		92	ONESMASK EQU 700h
		93
		94	fprem_risc_table DB 0, 1, 0, 0, 4, 0, 0, 7, 0, 0, 10, 0, 0, 13, 0, 0
		95	fprem_scale DB 0, 0, 0, 0, 0, 0, 0eeh, 03fh
		96	one_shl_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 043h
		97	one_shr_64 DB 0, 0, 0, 0, 0, 0, 0f0h, 03bh
		98	one DB 0, 0, 0, 0, 0, 0, 0f0h, 03fh
		99	half DB 0, 0, 0, 0, 0, 0, 0e0h, 03fh
		100	big_number DB 0, 0, 0, 0, 0, 0, 0ffh, 0ffh, 0feh, 07fh
		101
		102	ifdef DEBUG
		103	public fpcw
		104	public fpsw
		105	fpcw dw 0
		106	fpsw dw 0
		107	endif
		108
		109	FPU_STATE STRUC
		110	CONTROL_WORD DW ?
		111	reserved_1 DW ?
		112	STATUS_WORD DD ?
		113	TAG_WORD DW ?
		114	reserved_3 DW ?
		115	IP_OFFSET DD ?
		116	CS_SLCT DW ?
		117	OPCODE DW ?
		118	DATA_OFFSET DD ?
		119	OPERAND_SLCT DW ?
		120	reserved_4 DW ?
		121	FPU_STATE ENDS
		122
		123	ENV_SIZE EQU 28
		124
		125
		126	DATA32 ENDS
		127
		128	_TEXT SEGMENT DWORD USE32 PUBLIC 'CODE'
		129	_TEXT ENDS
		130
		131	DATA32 SEGMENT DWORD USE32 PUBLIC 'DATA'
		132	DATA32 ENDS
		133
		134	CONST32 SEGMENT DWORD USE32 PUBLIC 'CONST'
		135	CONST32 ENDS
		136
		137	BSS32 SEGMENT DWORD USE32 PUBLIC 'BSS'
		138	BSS32 ENDS
		139
		140	DGROUP GROUP CONST32, BSS32, DATA32
		141
		142
		143
		144	CODE32 SEGMENT DWORD USE32 PUBLIC 'CODE'
		145
		146	assume cs:_TEXT, ds:DGROUP, es:DGROUP, ss:nothing
		147
		148
		149	fprem_common PROC NEAR
		150
		151	push eax
		152	push ebx
		153	push ecx
		154	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		155	xor eax, ONESMASK ; invert bits that have to be one
		156	test eax, ONESMASK ; check bits that have to be one
		157	jnz remainder_hardware_ok
		158	shr eax, 11
		159	and eax, 0fh
		160	cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
		161	jz remainder_hardware_ok
		162
		163	; The denominator has the bit pattern. Weed out the funny cases like NaNs
		164	; before applying the software version. Our caller guarantees that the
		165	; denominator is not a denormal. Here we check for:
		166	; denominator inf, NaN, unnormal
		167	; numerator inf, NaN, unnormal, denormal
		168
		169	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		170	and eax, 07fff0000h ; mask the exponent only
		171	cmp eax, 07fff0000h ; check for INF or NaN
		172	je remainder_hardware_ok
		173	mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
		174	and eax, 07fff0000h ; mask the exponent only
		175	jz remainder_hardware_ok ; jif numerator denormal
		176	cmp eax, 07fff0000h ; check for INF or NaN
		177	je remainder_hardware_ok
		178	mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator
		179	add eax, eax ; set carry if explicit bit set
		180	jnz remainder_hardware_ok ; jmp if numerator is unnormal
		181	mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
		182	add eax, eax ; set carry if explicit bit set
		183	jnz remainder_hardware_ok ; jmp if denominator is unnormal
		184
		185	rem_patch:
		186	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		187	and eax, 07fffh ; clear sy
		188	add eax, 63 ; evaluate ey + 63
		189	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		190	and ebx, 07fffh ; clear sx
		191	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		192	ja rem_large ; if ex > ey + 63, case of large arguments
		193	rem_patch_loop:
		194	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		195	and eax, 07fffh ; clear sy
		196	add eax, 10 ; evaluate ey + 10
		197	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		198	and ebx, 07fffh ; clear sx
		199	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		200	js remainder_hardware_ok ; safe if ey + 10 > ex
		201	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		202	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		203	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		204	and ebx, 07fffh ; clear sx
		205	mov ecx, ebx
		206	sub ebx, eax
		207	and ebx, 07h
		208	or ebx, 04h
		209	sub ecx, ebx
		210	mov ebx, eax
		211	and ebx, 08000h ; keep sy
		212	or ecx, ebx ; merge the sign of y
		213	mov dword ptr [MAIN_DENOM+8+esp], ecx
		214	fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator
		215	mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator
		216	fxch
		217	fprem ; this rem is safe
		218	fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator
		219	fstp st(0) ; pop the stack
		220	jmp rem_patch_loop
		221	rem_large:
		222	test edx, 02h ; is denominator already saved
		223	jnz already_saved
		224	fld tbyte ptr[esp + MAIN_DENOM]
		225	fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator
		226	already_saved:
		227	; Save user's precision control and institute 80. The fp ops in
		228	; rem_large_loop must not round to user's precision (if it is less
		229	; than 80) because the hardware would not have done so. We are
		230	; aping the hardware here, which is all extended.
		231
		232	fnstcw [esp+MAIN_PREV_CW] ; save caller's control word
		233	mov eax, dword ptr[esp + MAIN_PREV_CW]
		234	or eax, 033fh ; mask exceptions, pc=80
		235	mov [esp + MAIN_PATCH_CW], eax
		236	fldcw [esp + MAIN_PATCH_CW]
		237
		238	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		239	and eax, 07fffh ; clear sy
		240	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		241	and ebx, 07fffh ; clear sx
		242	sub ebx, eax ; evaluate the exponent difference
		243	and ebx, 03fh
		244	or ebx, 020h
		245	add ebx, 1
		246	mov ecx, ebx
		247	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		248	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		249	and ebx, 07fffh ; clear sx
		250	and eax, 08000h ; keep sy
		251	or ebx, eax ; merge the sign of y
		252	mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
		253	fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator
		254	fabs
		255	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		256	fabs
		257	rem_large_loop:
		258	fcom
		259	fstsw ax
		260	and eax, 00100h
		261	jnz rem_no_sub
		262	fsub st, st(1)
		263	rem_no_sub:
		264	fxch
		265	fmul qword ptr half
		266	fxch
		267	sub ecx, 1 ; decrement the loop counter
		268	jnz rem_large_loop
		269	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		270	fstp tbyte ptr[esp + MAIN_NUMER] ; save result
		271	fstp st ; toss modified denom
		272	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		273	fld tbyte ptr[big_number] ; force C2 to be set
		274	fprem
		275	fstp st
		276	fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result
		277
		278	fldcw [esp + MAIN_PREV_CW] ; restore caller's control word
		279	and ebx, 08000h ; keep sx
		280	jz rem_done
		281	fchs
		282	jmp rem_done
		283	remainder_hardware_ok:
		284	fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator
		285	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		286	fprem ; and finally do a remainder
		287	; prem_main_routine end
		288	rem_done:
		289	test edx, 03h
		290	jz rem_exit
		291	fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2
		292	test edx, 01h
		293	jz do_not_de_scale
		294	; De-scale the result. Go to pc=80 to prevent from fmul
		295	; from user precision (fprem does not round the result).
		296	fnstcw [esp + MAIN_PREV_CW] ; save callers control word
		297	mov eax, [esp + MAIN_PREV_CW]
		298	or eax, 0300h ; pc = 80
		299	mov [esp + MAIN_PATCH_CW], eax
		300	fldcw [esp + MAIN_PATCH_CW]
		301	fmul qword ptr one_shr_64
		302	fldcw [esp + MAIN_PREV_CW] ; restore callers CW
		303	do_not_de_scale:
		304	mov eax, [esp + MAIN_FPREM_SW]
		305	fxch
		306	fstp st
		307	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		308	fxch
		309	and eax, 04300h ; restore saved Q0, Q1, Q2
		310	sub esp, ENV_SIZE
		311	fnstenv [esp]
		312	and [esp].STATUS_WORD, 0bcffh
		313	or [esp].STATUS_WORD, eax
		314	fldenv [esp]
		315	add esp, ENV_SIZE
		316	rem_exit:
		317	pop ecx
		318	pop ebx
		319	pop eax
		320	CHECKSW ; debug only: save status
		321	ret
		322	fprem_common ENDP
		323
		324	comment ~****************************************************************
		325
		326	;
		327	; float frem_chk (float numer, float denom)
		328	;
		329	public frem_chk
		330	frem_chk PROC NEAR
		331	push edx
		332	sub esp, STACK_SIZE
		333	fld dword ptr [STACK_SIZE+8+esp]
		334	fstp tbyte ptr [NUMER+esp]
		335	fld dword ptr [STACK_SIZE+12+esp]
		336	fstp tbyte ptr [DENOM+esp]
		337	mov edx, 0 ; dx = 1 if denormal extended divisor
		338	call fprem_common
		339	fxch
		340	fstp st
		341	add esp, STACK_SIZE
		342	pop edx
		343	ret
		344	frem_chk ENDP
		345	; end frem_chk
		346
		347	;
		348	; double drem_chk (double numer, double denom)
		349	;
		350	public drem_chk
		351	drem_chk PROC NEAR
		352	push edx
		353	sub esp, STACK_SIZE
		354	fld qword ptr [STACK_SIZE+8+esp]
		355	fstp tbyte ptr [NUMER+esp]
		356	fld qword ptr [STACK_SIZE+16+esp]
		357	fstp tbyte ptr [DENOM+esp]
		358	mov edx, 0 ; dx = 1 if denormal extended divisor
		359	call fprem_common
		360	fxch
		361	fstp st
		362	add esp, STACK_SIZE
		363	pop edx
		364	ret
		365
		366	drem_chk ENDP
		367	; end drem_chk
		368
		369	;
		370	; long double lrem_chk(long double number,long double denom)
		371	;
		372	public lrem_chk
		373	lrem_chk PROC NEAR
		374	fld tbyte ptr [20+esp]
		375	fld tbyte ptr [4+esp]
		376	call fprem_chk
		377	fxch
		378	fstp st
		379	ret
		380	lrem_chk ENDP
		381
		382	**********************************************************************~
		383
		384	;
		385	; FPREM: ST = remainder(ST, ST(1))
		386	;
		387	; Compiler version of the FPREM must preserve the arguments in the floating
		388	; point stack.
		389
		390	public __fprem_chk
		391	defpe __fprem_chk
		392	push edx
		393	sub esp, STACK_SIZE
		394	fstp tbyte ptr [NUMER+esp]
		395	fstp tbyte ptr [DENOM+esp]
		396	xor edx, edx
		397	; prem_main_routine begin
		398	mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa
		399	test eax,07fff0000h ; check for denormal
		400	jz denormal
		401	call fprem_common
		402	add esp, STACK_SIZE
		403	pop edx
		404	ret
		405
		406	denormal:
		407	fld tbyte ptr [DENOM+esp] ; load the denominator
		408	fld tbyte ptr [NUMER+esp] ; load the numerator
		409	mov eax, [DENOM+esp] ; test for whole mantissa == 0
		410	or eax, [DENOM+4+esp] ; test for whole mantissa == 0
		411	jz remainder_hardware_ok_l ; denominator is zero
		412	fxch
		413	fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator
		414	fld tbyte ptr[esp + DENOM]
		415	fxch
		416	or edx, 02h
		417	;
		418	; For this we need pc=80. Also, mask exceptions so we don't take any
		419	; denormal operand exceptions. It is guaranteed that the descaling
		420	; later on will take underflow, which is what the hardware would have done
		421	; on a normal fprem.
		422	;
		423	fnstcw [PREV_CW+esp] ; save caller's control word
		424	mov eax, [PREV_CW+esp]
		425	or eax, 0033fh ; mask exceptions, pc=80
		426	mov [PATCH_CW+esp], eax
		427	fldcw [PATCH_CW+esp] ; mask exceptions & pc=80
		428
		429	; The denominator is a denormal. For most numerators, scale both numerator
		430	; and denominator to get rid of denormals. Then execute the common code
		431	; with the flag set to indicate that the result must be de-scaled.
		432	; For large numerators this won't work because the scaling would cause
		433	; overflow. In this case we know the numerator is large, the denominator
		434	; is small (denormal), so the exponent difference is also large. This means
		435	; the rem_large code will be used and this code depends on the difference
		436	; in exponents modulo 64. Adding 64 to the denominators exponent
		437	; doesn't change the modulo 64 difference. So we can scale the denominator
		438	; by 64, making it not denormal, and this won't effect the result.
		439	;
		440	; To start with, figure out if numerator is large
		441
		442	mov eax, [esp + NUMER + 8] ; load numerator exponent
		443	and eax, 7fffh ; isolate numerator exponent
		444	cmp eax, 7fbeh ; compare Nexp to Maxexp-64
		445	ja big_numer_rem_de ; jif big numerator
		446
		447	; So the numerator is not large scale both numerator and denominator
		448
		449	or edx, 1 ; edx = 1, if denormal extended divisor
		450	fmul qword ptr one_shl_64 ; make numerator not denormal
		451	fstp tbyte ptr[esp + NUMER]
		452	fmul qword ptr one_shl_64 ; make denominator not denormal
		453	fstp tbyte ptr[esp + DENOM]
		454	jmp scaling_done
		455
		456	; The numerator is large. Scale only the denominator, which will not
		457	; change the result which we know will be partial. Set the scale flag
		458	; to false.
		459	big_numer_rem_de:
		460	; We must do this with pc=80 to avoid rounding to single/double.
		461	; In this case we do not mask exceptions so that we will take
		462	; denormal operand, as would the hardware.
		463	fnstcw [PREV_CW+esp] ; save caller's control word
		464	mov eax, [PREV_CW+esp]
		465	or eax, 00300h ; pc=80
		466	mov [PATCH_CW+esp], eax
		467	fldcw [PATCH_CW+esp] ; pc=80
		468
		469	fstp st ; Toss numerator
		470	fmul qword ptr one_shl_64 ; make denominator not denormal
		471	fstp tbyte ptr[esp + DENOM]
		472
		473	; Restore the control word which was fiddled to scale at 80-bit precision.
		474	; Then call the common code.
		475	scaling_done:
		476	fldcw [esp + PREV_CW] ; restore callers control word
		477	call fprem_common
		478	add esp, STACK_SIZE
		479	pop edx
		480	ret
		481
		482	remainder_hardware_ok_l:
		483	fprem ; and finally do a remainder
		484
		485	CHECKSW
		486
		487	add esp, STACK_SIZE
		488	pop edx
		489	ret
		490	__fprem_chk ENDP
		491	; end fprem_chk
		492
		493
		494	;
		495	; FPREM1 code begins here
		496	;
		497
		498
		499	fprem1_common PROC NEAR
		500
		501	push eax
		502	push ebx
		503	push ecx
		504	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		505	xor eax, ONESMASK ; invert bits that have to be one
		506	test eax, ONESMASK ; check bits that have to be one
		507	jnz remainder1_hardware_ok
		508	shr eax, 11
		509	and eax, 0fh
		510	cmp byte ptr fprem_risc_table[eax], 0 ; check for (1,4,7,a,d)
		511	jz remainder1_hardware_ok
		512
		513	; The denominator has the bit pattern. Weed out the funny cases like NaNs
		514	; before applying the software version. Our caller guarantees that the
		515	; denominator is not a denormal. Here we check for:
		516	; denominator inf, NaN, unnormal
		517	; numerator inf, NaN, unnormal, denormal
		518
		519	mov eax, [MAIN_DENOM+6+esp] ; exponent and high 16 bits of mantissa
		520	and eax, 07fff0000h ; mask the exponent only
		521	cmp eax, 07fff0000h ; check for INF or NaN
		522	je remainder1_hardware_ok
		523	mov eax, [MAIN_NUMER+6+esp] ; exponent and high 16 bits of mantissa
		524	and eax, 07fff0000h ; mask the exponent only
		525	jz remainder1_hardware_ok ; jif numerator denormal
		526	cmp eax, 07fff0000h ; check for INF or NaN
		527	je remainder1_hardware_ok
		528	mov eax, [esp + MAIN_NUMER + 4] ; high mantissa bits - numerator
		529	add eax, eax ; set carry if explicit bit set
		530	jnz remainder1_hardware_ok ; jmp if numerator is unnormal
		531	mov eax, [esp + MAIN_DENOM + 4] ; high mantissa bits - denominator
		532	add eax, eax ; set carry if explicit bit set
		533	jnz remainder1_hardware_ok ; jmp if denominator is unnormal
		534
		535	rem1_patch:
		536	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		537	and eax, 07fffh ; clear sy
		538	add eax, 63 ; evaluate ey + 63
		539	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		540	and ebx, 07fffh ; clear sx
		541	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		542	ja rem1_large ; if ex > ey + 63, case of large arguments
		543	rem1_patch_loop:
		544	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		545	and eax, 07fffh ; clear sy
		546	add eax, 10 ; evaluate ey + 10
		547	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		548	and ebx, 07fffh ; clear sx
		549	sub ebx, eax ; evaluate the exponent difference (ex - ey)
		550	js remainder1_hardware_ok ; safe if ey + 10 > ex
		551	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		552	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		553	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		554	and ebx, 07fffh ; clear sx
		555	mov ecx, ebx
		556	sub ebx, eax
		557	and ebx, 07h
		558	or ebx, 04h
		559	sub ecx, ebx
		560	mov ebx, eax
		561	and ebx, 08000h ; keep sy
		562	or ecx, ebx ; merge the sign of y
		563	mov dword ptr [MAIN_DENOM+8+esp], ecx
		564	fld tbyte ptr [MAIN_DENOM+esp] ; load the shifted denominator
		565	mov dword ptr [MAIN_DENOM+8+esp], eax ; restore the initial denominator
		566	fxch
		567	fprem ; this rem is safe
		568	fstp tbyte ptr [MAIN_NUMER+esp] ; update the numerator
		569	fstp st(0) ; pop the stack
		570	jmp rem1_patch_loop
		571	rem1_large:
		572	test ebx, 02h ; is denominator already saved
		573	jnz already_saved1
		574	fld tbyte ptr[esp + MAIN_DENOM]
		575	fstp tbyte ptr[esp + MAIN_DENOM_SAVE] ; save denominator
		576	already_saved1:
		577	; Save user's precision control and institute 80. The fp ops in
		578	; rem1_large_loop must not round to user's precision (if it is less
		579	; than 80) because the hardware would not have done so. We are
		580	; aping the hardware here, which is all extended.
		581
		582	fnstcw [esp+MAIN_PREV_CW] ; save caller's control word
		583	mov eax, dword ptr[esp + MAIN_PREV_CW]
		584	or eax, 033fh ; mask exceptions, pc=80
		585	mov [esp + MAIN_PATCH_CW], eax
		586	fldcw [esp + MAIN_PATCH_CW]
		587
		588	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		589	and eax, 07fffh ; clear sy
		590	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		591	and ebx, 07fffh ; clear sx
		592	sub ebx, eax ; evaluate the exponent difference
		593	and ebx, 03fh
		594	or ebx, 020h
		595	add ebx, 1
		596	mov ecx, ebx
		597	mov eax, [MAIN_DENOM+8+esp] ; sign and exponent of y (denominator)
		598	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		599	and ebx, 07fffh ; clear sx
		600	and eax, 08000h ; keep sy
		601	or ebx, eax ; merge the sign of y
		602	mov dword ptr[MAIN_DENOM+8+esp], ebx ; make ey equal to ex (scaled denominator)
		603	fld tbyte ptr [MAIN_DENOM+esp] ; load the scaled denominator
		604	fabs
		605	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		606	fabs
		607	rem1_large_loop:
		608	fcom
		609	fstsw ax
		610	and eax, 00100h
		611	jnz rem1_no_sub
		612	fsub st, st(1)
		613	rem1_no_sub:
		614	fxch
		615	fmul qword ptr half
		616	fxch
		617	sub ecx, 1 ; decrement the loop counter
		618	jnz rem1_large_loop
		619	mov ebx, [MAIN_NUMER+8+esp] ; sign and exponent of x (numerator)
		620	fstp tbyte ptr[esp + MAIN_NUMER] ; save result
		621	fstp st ; toss modified denom
		622	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		623	fld tbyte ptr[big_number] ; force C2 to be set
		624	fprem1
		625	fstp st
		626	fld tbyte ptr[esp + MAIN_NUMER] ; restore saved result
		627
		628	fldcw [esp + MAIN_PREV_CW] ; restore caller's control word
		629	and ebx, 08000h ; keep sx
		630	jz rem1_done
		631	fchs
		632	jmp rem1_done
		633	remainder1_hardware_ok:
		634	fld tbyte ptr [MAIN_DENOM+esp] ; load the denominator
		635	fld tbyte ptr [MAIN_NUMER+esp] ; load the numerator
		636	fprem1 ; and finally do a remainder
		637	; prem1_main_routine end
		638	rem1_done:
		639	test edx, 03h
		640	jz rem1_exit
		641	fnstsw [esp + MAIN_FPREM_SW] ; save Q0 Q1 and Q2
		642	test edx, 01h
		643	jz do_not_de_scale1
		644	; De-scale the result. Go to pc=80 to prevent from fmul
		645	; from user precision (fprem does not round the result).
		646	fnstcw [esp + MAIN_PREV_CW] ; save callers control word
		647	mov eax, [esp + MAIN_PREV_CW]
		648	or eax, 0300h ; pc = 80
		649	mov [esp + MAIN_PATCH_CW], eax
		650	fldcw [esp + MAIN_PATCH_CW]
		651	fmul qword ptr one_shr_64
		652	fldcw [esp + MAIN_PREV_CW] ; restore callers CW
		653	do_not_de_scale1:
		654	mov eax, [esp + MAIN_FPREM_SW]
		655	fxch
		656	fstp st
		657	fld tbyte ptr[esp + MAIN_DENOM_SAVE]
		658	fxch
		659	and eax, 04300h ; restore saved Q0, Q1, Q2
		660	sub esp, ENV_SIZE
		661	fnstenv [esp]
		662	and [esp].STATUS_WORD, 0bcffh
		663	or [esp].STATUS_WORD, eax
		664	fldenv [esp]
		665	add esp, ENV_SIZE
		666	rem1_exit:
		667	pop ecx
		668	pop ebx
		669	pop eax
		670	CHECKSW ; debug only: save status
		671	ret
		672	fprem1_common ENDP
		673
		674
		675	comment ~***************************************************************
		676	;
		677	; float frem1_chk (float numer, float denom)
		678	;
		679	public frem1_chk
		680	frem1_chk PROC NEAR
		681	push edx
		682	sub esp, STACK_SIZE
		683	fld dword ptr [STACK_SIZE+8+esp]
		684	fstp tbyte ptr [NUMER+esp]
		685	fld dword ptr [STACK_SIZE+12+esp]
		686	fstp tbyte ptr [DENOM+esp]
		687	mov edx, 0 ; dx = 1 if denormal extended divisor
		688	call fprem1_common
		689	fxch
		690	fstp st
		691	add esp, STACK_SIZE
		692	pop edx
		693	ret
		694	frem1_chk ENDP
		695	; end frem1_chk
		696
		697	;
		698	; double drem1_chk (double numer, double denom)
		699	;
		700	public drem1_chk
		701	drem1_chk PROC NEAR
		702	push edx
		703	sub esp, STACK_SIZE
		704	fld qword ptr [STACK_SIZE+8+esp]
		705	fstp tbyte ptr [NUMER+esp]
		706	fld qword ptr [STACK_SIZE+16+esp]
		707	fstp tbyte ptr [DENOM+esp]
		708	mov edx, 0 ; dx = 1 if denormal extended divisor
		709	call fprem1_common
		710	fxch
		711	fstp st
		712	add esp, STACK_SIZE
		713	pop edx
		714	ret
		715
		716	drem1_chk ENDP
		717	; end drem1_chk
		718
		719	;
		720	; long double lrem1_chk(long double number,long double denom)
		721	;
		722	public lrem1_chk
		723	lrem1_chk PROC NEAR
		724	fld tbyte ptr [20+esp]
		725	fld tbyte ptr [4+esp]
		726	call fprem1_chk
		727	fxch
		728	fstp st
		729	ret
		730	lrem1_chk ENDP
		731	********************************************************************~
		732
		733	;
		734	; FPREM1: ST = remainder(ST, ST(1)) - IEEE version of rounding
		735	;
		736	; Compiler version of the FPREM must preserve the arguments in the floating
		737	; point stack.
		738
		739	public __fprem1_chk
		740	defpe __fprem1_chk
		741	push edx
		742	sub esp, STACK_SIZE
		743	fstp tbyte ptr [NUMER+esp]
		744	fstp tbyte ptr [DENOM+esp]
		745	mov edx, 0
		746	; prem1_main_routine begin
		747	mov eax,[DENOM+6+esp] ; exponent and high 16 bits of mantissa
		748	test eax,07fff0000h ; check for denormal
		749	jz denormal1
		750	call fprem1_common
		751	add esp, STACK_SIZE
		752	pop edx
		753	ret
		754
		755	denormal1:
		756	fld tbyte ptr [DENOM+esp] ; load the denominator
		757	fld tbyte ptr [NUMER+esp] ; load the numerator
		758	mov eax, [DENOM+esp] ; test for whole mantissa == 0
		759	or eax, [DENOM+4+esp] ; test for whole mantissa == 0
		760	jz remainder1_hardware_ok_l ; denominator is zero
		761	fxch
		762	fstp tbyte ptr[esp + DENOM_SAVE] ; save org denominator
		763	fld tbyte ptr[esp + DENOM]
		764	fxch
		765	or edx, 02h
		766	;
		767	; For this we need pc=80. Also, mask exceptions so we don't take any
		768	; denormal operand exceptions. It is guaranteed that the descaling
		769	; later on will take underflow, which is what the hardware would have done
		770	; on a normal fprem.
		771	;
		772	fnstcw [PREV_CW+esp] ; save caller's control word
		773	mov eax, [PREV_CW+esp]
		774	or eax, 0033fh ; mask exceptions, pc=80
		775	mov [PATCH_CW+esp], eax
		776	fldcw [PATCH_CW+esp] ; mask exceptions & pc=80
		777
		778	; The denominator is a denormal. For most numerators, scale both numerator
		779	; and denominator to get rid of denormals. Then execute the common code
		780	; with the flag set to indicate that the result must be de-scaled.
		781	; For large numerators this won't work because the scaling would cause
		782	; overflow. In this case we know the numerator is large, the denominator
		783	; is small (denormal), so the exponent difference is also large. This means
		784	; the rem1_large code will be used and this code depends on the difference
		785	; in exponents modulo 64. Adding 64 to the denominators exponent
		786	; doesn't change the modulo 64 difference. So we can scale the denominator
		787	; by 64, making it not denormal, and this won't effect the result.
		788	;
		789	; To start with, figure out if numerator is large
		790
		791	mov eax, [esp + NUMER + 8] ; load numerator exponent
		792	and eax, 7fffh ; isolate numerator exponent
		793	cmp eax, 7fbeh ; compare Nexp to Maxexp-64
		794	ja big_numer_rem1_de ; jif big numerator
		795
		796	; So the numerator is not large scale both numerator and denominator
		797
		798	or edx, 1 ; edx = 1, if denormal extended divisor
		799	fmul qword ptr one_shl_64 ; make numerator not denormal
		800	fstp tbyte ptr[esp + NUMER]
		801	fmul qword ptr one_shl_64 ; make denominator not denormal
		802	fstp tbyte ptr[esp + DENOM]
		803	jmp scaling_done1
		804
		805	; The numerator is large. Scale only the denominator, which will not
		806	; change the result which we know will be partial. Set the scale flag
		807	; to false.
		808	big_numer_rem1_de:
		809	; We must do this with pc=80 to avoid rounding to single/double.
		810	; In this case we do not mask exceptions so that we will take
		811	; denormal operand, as would the hardware.
		812	fnstcw [PREV_CW+esp] ; save caller's control word
		813	mov eax, [PREV_CW+esp]
		814	or eax, 00300h ; pc=80
		815	mov [PATCH_CW+esp], eax
		816	fldcw [PATCH_CW+esp] ; pc=80
		817
		818	fstp st ; Toss numerator
		819	fmul qword ptr one_shl_64 ; make denominator not denormal
		820	fstp tbyte ptr[esp + DENOM]
		821
		822	; Restore the control word which was fiddled to scale at 80-bit precision.
		823	; Then call the common code.
		824	scaling_done1:
		825	fldcw [esp + PREV_CW] ; restore callers control word
		826	call fprem1_common
		827	add esp, STACK_SIZE
		828	pop edx
		829	ret
		830
		831	remainder1_hardware_ok_l:
		832	fprem ; and finally do a remainder
		833
		834	CHECKSW
		835
		836	add esp, STACK_SIZE
		837	pop edx
		838	ret
		839	__fprem1_chk ENDP
		840	; end fprem1_chk
		841
		842	ifdef DEBUG
		843	public fpinit
		844	fpinit PROC NEAR
		845	fninit
		846	ret
		847	fpinit ENDP
		848	endif
		849
		850	CODE32 ENDS
		851	END

Subversion Repositories Kolibri OS

(root)/programs/develop/open watcom/trunk/clib/src/chipr32.asm @ 548 – Rev