WebSVN – Kolibri OS – Blame – /programs/develop/libraries/TinyGL/asm_fork/zmath.asm

Rev	Author	Line No.	Line
5153	IgorA	1	; Some simple mathematical functions. Don't look for some logic in
		2	; the function names :-)
		3
		4	; ***** Gestion des matrices 4x4 ****
		5
		6	align 4
		7	proc gl_M4_Id uses eax ecx edi, a:dword
		8	mov edi,[a]
		9	add edi,4
		10	mov ecx,14
		11	mov eax,0.0
		12	rep stosd
		13	mov eax,1.0
		14	stosd
		15	mov edi,[a]
		16	stosd
		17	add edi,16
		18	stosd
		19	add edi,16
		20	stosd
		21	ret
		22	endp
		23
		24	align 4
		25	proc gl_M4_IsId uses ebx ecx, a:dword
		26	mov eax,[a]
		27	xor ebx,ebx
		28	xor ecx,ecx
		29	.cycle_01:
		30	fld dword[eax]
		31	cmp ecx,ebx
		32	je .once
		33	ftst ;�� 0.0
		34	fstsw ax
		35	sahf
		36	je @f
		37	jmp .not_1 ;�� 0.0 ��
		38	.once:
		39	fld1
		40	fcomp st1 ;�� 1.0
		41	fstsw ax
		42	test ah,0x40
		43	je .not_1 ;�� 1.0 ��
		44	@@:
5218	IgorA	45	ffree st0
		46	fincstp
5153	IgorA	47	add eax,4
		48	inc ebx
		49	btr ebx,2
		50	jnc .cycle_01
		51	inc ecx
		52	bt ecx,2 ;�� ecx==4
		53	jnc .cycle_01
		54
		55	mov eax,1
		56	jmp @f
		57	.not_1:
5218	IgorA	58	ffree st0
		59	fincstp
5153	IgorA	60	xor eax,eax
		61	@@:
		62	ret
		63	endp
		64
		65	align 4
		66	proc gl_M4_Mul, c:dword,a:dword,b:dword
		67	pushad
		68	mov edx,[c]
		69	xor eax,eax
		70	.cycle_0: ;i
		71	xor ebx,ebx
		72	.cycle_1: ;j
		73	fldz ;sum=0
		74	xor ecx,ecx
		75	M4_reg edi,[a],eax,0
		76	.cycle_2: ;k
		77	fld dword[edi]
		78	add edi,4
		79	M4_reg esi,[b],ecx,ebx
		80	fmul dword[esi]
5256	IgorA	81	faddp ;sum += a[i][k] * b[k][j]
5153	IgorA	82	inc ecx
		83	cmp ecx,4
		84	jl .cycle_2
		85	fstp dword[edx] ;c[i][j] = sum
		86	add edx,4
		87	inc ebx
		88	cmp ebx,4
		89	jl .cycle_1
		90	inc eax
		91	cmp eax,4
		92	jl .cycle_0
		93	if DEBUG ;gl_M4_Mul
		94	stdcall dbg_print,f_m4m,txt_nl
		95	stdcall gl_print_matrix,[c],4
		96	stdcall dbg_print,txt_sp,txt_nl
		97	end if
		98	popad
		99	ret
		100	endp
		101
		102	; c=c*a
		103	align 4
		104	proc gl_M4_MulLeft, c:dword,b:dword
		105	locals
		106	i dd ?
		107	a M4
		108	endl
		109	pushad
		110	mov ecx,16
		111	mov esi,[c]
		112	mov edi,ebp
		113	sub edi,sizeof.M4
		114	rep movsd ;�� [a]=[c]
		115
		116	mov edx,[c]
		117	mov dword[i],0
		118	.cycle_0: ;i
		119	xor ebx,ebx
		120	.cycle_1: ;j
		121	finit
		122	fldz ;sum=0
		123	xor ecx,ecx
		124	mov eax,ebp
		125	sub eax,sizeof.M4
		126	M4_reg edi,eax,dword[i],0
		127	.cycle_2: ;k
		128	fld dword[edi]
		129	add edi,4
		130	M4_reg esi,[b],ecx,ebx
		131	fmul dword[esi]
		132	fadd st0,st1 ;sum += a[i][k] * b[k][j]
		133	inc ecx
		134	add eax,4
		135	cmp ecx,4
		136	jl .cycle_2
		137	fstp dword[edx] ;c[i][j] = sum
		138	add edx,4
		139	inc ebx
		140	cmp ebx,4
		141	jl .cycle_1
		142	inc dword[i]
		143	cmp dword[i],4
		144	jl .cycle_0
		145	finit
		146	if DEBUG ;gl_M4_MulLeft
		147	stdcall dbg_print,f_m4ml,txt_nl
		148	stdcall gl_print_matrix,[c],4
		149	stdcall dbg_print,txt_sp,txt_nl
		150	end if
		151	popad
		152	ret
		153	endp
		154
		155	align 4
		156	proc gl_M4_Move uses ecx edi esi, a:dword,b:dword
		157	mov edi,[a]
		158	mov esi,[b]
		159	mov ecx,sizeof.M4/4
		160	rep movsd
		161	ret
		162	endp
		163
		164	align 4
		165	proc gl_MoveV3 uses edi esi, a:dword,b:dword
		166	mov edi,[a]
		167	mov esi,[b]
		168	movsd
		169	movsd
		170	movsd
		171	ret
		172	endp
		173
		174	;void gl_MulM4V3(V3 a,M4 b,V3 *c)
		175	;{
		176	; a->X=b->m[0][0]c->X+b->m[0][1]c->Y+b->m[0][2]*c->Z+b->m[0][3];
		177	; a->Y=b->m[1][0]c->X+b->m[1][1]c->Y+b->m[1][2]*c->Z+b->m[1][3];
		178	; a->Z=b->m[2][0]c->X+b->m[2][1]c->Y+b->m[2][2]*c->Z+b->m[2][3];
		179	;}
		180
		181	;void gl_MulM3V3(V3 a,M4 b,V3 *c)
		182	;{
		183	; a->X=b->m[0][0]c->X+b->m[0][1]c->Y+b->m[0][2]*c->Z;
		184	; a->Y=b->m[1][0]c->X+b->m[1][1]c->Y+b->m[1][2]*c->Z;
		185	; a->Z=b->m[2][0]c->X+b->m[2][1]c->Y+b->m[2][2]*c->Z;
		186	;}
		187
5256	IgorA	188	align 4
		189	proc gl_M4_MulV4 uses ebx ecx edx, a:dword, b:dword, c:dword ;V4 a, M4 b, V4 *c
		190	mov ebx,[b]
		191	mov edx,[c]
		192	fld dword[edx]
		193	fld dword[edx+4]
		194	fld dword[edx+8]
		195	fld dword[edx+12]
		196	mov edx,[a]
		197	mov ecx,4
		198	.cycle_1:
		199	fld dword[ebx] ;st0 = m[_][0]
		200	fmul st0,st4 ;st0 *= c.X
		201	fld dword[ebx+4] ;st0 = m[_][1]
		202	fmul st0,st4 ;st0 *= c.Y
		203	faddp
		204	fld dword[ebx+8] ;st0 = m[_][2]
		205	fmul st0,st3 ;st0 *= c.Z
		206	faddp
		207	fld dword[ebx+12] ;st0 += m[_][3]
		208	fmul st0,st2 ;st0 *= c.Z
		209	faddp
		210	fstp dword[edx] ;a.X = b.m[_][0]c.X +b.m[_][1]c.Y +b.m[_][2]c.Z +b.m[_][3]c.W
		211	add ebx,16 ;��
		212	add edx,4 ;��
		213	loop .cycle_1
		214	ffree st0
		215	fincstp
		216	ffree st0
		217	fincstp
		218	ffree st0
		219	fincstp
		220	ffree st0
		221	fincstp
		222	ret
		223	endp
5153	IgorA	224
		225	; transposition of a 4x4 matrix
		226	align 4
		227	proc gl_M4_Transpose uses eax ecx edx, a:dword, b:dword
		228	mov eax,[a]
		229	mov ecx,[b]
		230
		231	mov edx,[ecx]
		232	mov [eax],edx
		233	mov edx,[ecx+0x10]
		234	mov [eax+0x4],edx
		235	mov edx,[ecx+0x20]
		236	mov [eax+0x8],edx
		237	mov edx,[ecx+0x30]
		238	mov [eax+0x0c],edx
		239
		240	mov edx,[ecx+0x4]
		241	mov [eax+0x10],edx
		242	mov edx,[ecx+0x14]
		243	mov [eax+0x14],edx
		244	mov edx,[ecx+0x24]
		245	mov [eax+0x18],edx
		246	mov edx,[ecx+0x34]
		247	mov [eax+0x1c],edx
		248
		249	mov edx,[ecx+0x8]
		250	mov [eax+0x20],edx
		251	mov edx,[ecx+0x18]
		252	mov [eax+0x24],edx
		253	mov edx,[ecx+0x28]
		254	mov [eax+0x28],edx
		255	mov edx,[ecx+0x38]
		256	mov [eax+0x2c],edx
		257
		258	mov edx,[ecx+0x0c]
		259	mov [eax+0x30],edx
		260	mov edx,[ecx+0x1c]
		261	mov [eax+0x34],edx
		262	mov edx,[ecx+0x2c]
		263	mov [eax+0x38],edx
		264	mov edx,[ecx+0x3c]
		265	mov [eax+0x3c],edx
		266	ret
		267	endp
		268
5218	IgorA	269	; inversion of an orthogonal matrix of type Y=M.X+P
5153	IgorA	270	;void gl_M4_InvOrtho(M4 *a,M4 b)
		271	;{
		272	; int i,j;
		273	; float s;
		274	; for(i=0;i<3;i++)
		275	; for(j=0;j<3;j++) a->m[i][j]=b.m[j][i];
		276	; a->m[3][0]=0.0; a->m[3][1]=0.0; a->m[3][2]=0.0; a->m[3][3]=1.0;
		277	; for(i=0;i<3;i++) {
		278	; s=0;
		279	; for(j=0;j<3;j++) s-=b.m[j][i]*b.m[j][3];
		280	; a->m[i][3]=s;
		281	; }
		282	;}
		283
5218	IgorA	284	; Inversion of a general nxn matrix.
		285	; Note : m is destroyed
5153	IgorA	286
		287	align 4
5256	IgorA	288	proc Matrix_Inv uses ebx ecx edx edi esi, r:dword, m:dword, n:dword ;(float r,float m,int n)
		289	locals
		290	max dd ? ;float
		291	tmp dd ?
		292	endl
5153	IgorA	293
5256	IgorA	294	; identit�e dans r
		295	mov eax,0.0
		296	mov ecx,[n]
		297	imul ecx,ecx
		298	mov edi,[r]
		299	rep stosd ;for(i=0;i
		300	mov eax,1.0
		301	xor ebx,ebx
		302	mov edi,[r]
		303	mov ecx,[n]
		304	shl ecx,2
		305	@@: ;for(i=0;i
		306	cmp ebx,[n]
		307	jge .end_0
		308	stosd ;r[i*n+i]=1
		309	add edi,ecx
		310	inc ebx
		311	jmp @b
		312	.end_0:
5153	IgorA	313
5256	IgorA	314	; ebx -> n
		315	; ecx -> j
		316	; edx -> k
		317	; edi -> i
		318	; esi -> l
		319	mov ebx,[n]
		320	xor ecx,ecx
		321	.cycle_0: ;for(j=0;j
		322	cmp ecx,ebx
		323	jge .cycle_0_end
		324	; recherche du nombre de plus grand module sur la colonne j
		325	mov eax,ecx
		326	imul eax,ebx
		327	add eax,ecx
		328	shl eax,2
		329	add eax,[m]
		330	mov eax,[eax]
		331	mov [max],eax ;max=m[j*n+j]
		332	mov edx,ecx ;k=j
		333	mov edi,ecx
		334	inc edi
		335	.cycle_1: ;for(i=j+1;i
		336	cmp edi,ebx
		337	jge .cycle_1_end
		338	mov eax,edi
		339	imul eax,ebx
		340	add eax,ecx
		341	shl eax,2
		342	add eax,[m]
		343	fld dword[eax]
		344	fcom dword[max] ;if (fabs(m[i*n+j])>fabs(max))
		345	fstsw ax
		346	sahf
		347	jbe @f
		348	mov edx,edi ;k=i
		349	fst dword[max]
		350	@@:
		351	ffree st0
		352	fincstp
		353	inc edi
		354	jmp .cycle_1
		355	.cycle_1_end:
5153	IgorA	356
5256	IgorA	357	; non intersible matrix
		358	fld dword[max]
		359	ftst ;if (max==0)
		360	fstsw ax
		361	ffree st0
		362	fincstp
		363	sahf
		364	jne @f
		365	xor eax,eax
		366	inc eax
		367	jmp .end_f ;return 1
		368	@@:
5153	IgorA	369
5256	IgorA	370	; permutation des lignes j et k
		371	cmp ecx,edx ;if (j!=k)
		372	je .cycle_2_end
		373	xor edi,edi
		374	.cycle_2: ;for(i=0;i
		375	cmp edi,ebx
		376	jge .cycle_2_end
		377	;�� esi != l
		378	mov eax,ecx
		379	imul eax,ebx
		380	add eax,edi
		381	shl eax,2
		382	add eax,[m]
		383	mov esi,[eax]
		384	mov [tmp],esi ;tmp=m[j*n+i]
		385	mov esi,edx
		386	imul esi,ebx
		387	add esi,edi
		388	shl esi,2
		389	add esi,[m]
		390	m2m dword[eax],dword[esi] ;m[jn+i]=m[kn+i]
		391	mov eax,[tmp]
		392	mov [esi],eax ;m[k*n+i]=tmp
5153	IgorA	393
5256	IgorA	394	mov eax,ecx
		395	imul eax,ebx
		396	add eax,edi
		397	shl eax,2
		398	add eax,[r]
		399	mov esi,[eax]
		400	mov [tmp],esi ;tmp=r[j*n+i]
		401	mov esi,edx
		402	imul esi,ebx
		403	add esi,edi
		404	shl esi,2
		405	add esi,[r]
		406	m2m dword[eax],dword[esi] ;r[jn+i]=r[kn+i]
		407	mov eax,[tmp]
		408	mov [esi],eax ;r[k*n+i]=tmp
		409	inc edi
		410	jmp .cycle_2
		411	.cycle_2_end:
5153	IgorA	412
5256	IgorA	413	; multiplication de la ligne j par 1/max
		414	fld1
		415	fdiv dword[max]
		416	fst dword[max] ;max=1/max
		417	xor edi,edi
		418	mov eax,ecx
		419	imul eax,ebx
		420	shl eax,2
		421	.cycle_3: ;for(i=0;i
		422	cmp edi,ebx
		423	jge .cycle_3_end
		424	add eax,[m]
		425	fld dword[eax]
		426	fmul st0,st1
		427	fstp dword[eax] ;m[jn+i]=max
		428	sub eax,[m]
		429	add eax,[r]
		430	fld dword[eax]
		431	fmul st0,st1
		432	fstp dword[eax] ;r[jn+i]=max
		433	sub eax,[r]
		434	add eax,4
		435	inc edi
		436	jmp .cycle_3
		437	.cycle_3_end:
		438	ffree st0 ;max
		439	fincstp
		440
		441	xor esi,esi
		442	.cycle_4: ;for(l=0;l
		443	cmp esi,ebx
		444	jge .cycle_4_end
		445	cmp esi,ecx ;if (l!=j)
		446	je .cycle_5_end
		447	mov eax,esi
		448	imul eax,ebx
		449	add eax,ecx
		450	shl eax,2
		451	add eax,[m]
		452	fld dword[eax] ;t=m[l*n+j]
		453	xor edi,edi
		454	.cycle_5: ;for(i=0;i
		455	cmp edi,ebx
		456	jge .cycle_5_end
		457	mov eax,ecx
		458	imul eax,ebx
		459	add eax,edi
		460	shl eax,2
		461	add eax,[m]
		462	fld dword[eax]
		463	fmul st0,st1
		464	mov eax,esi
		465	imul eax,ebx
		466	add eax,edi
		467	shl eax,2
		468	add eax,[m]
		469	fsub dword[eax]
		470	fchs
		471	fstp dword[eax] ;m[ln+i]-=m[jn+i]*t
		472	mov eax,ecx
		473	imul eax,ebx
		474	add eax,edi
		475	shl eax,2
		476	add eax,[r]
		477	fld dword[eax]
		478	fmul st0,st1
		479	mov eax,esi
		480	imul eax,ebx
		481	add eax,edi
		482	shl eax,2
		483	add eax,[r]
		484	fsub dword[eax]
		485	fchs
		486	fstp dword[eax] ;r[ln+i]-=r[jn+i]*t
		487	inc edi
		488	jmp .cycle_5
		489	.cycle_5_end:
		490	ffree st0 ;t
		491	fincstp
		492	inc esi
		493	jmp .cycle_4
		494	.cycle_4_end:
		495	inc ecx
		496	jmp .cycle_0
		497	.cycle_0_end:
		498
		499	xor eax,eax ;return 0
		500	.end_f:
5153	IgorA	501	ret
		502	endp
		503
		504	; inversion of a 4x4 matrix
		505
		506	align 4
		507	proc gl_M4_Inv uses eax ecx edi esi, a:dword, b:dword
		508	locals
		509	tmp M4
		510	endl
		511	mov esi,[b]
		512	mov edi,ebp
		513	sub edi,sizeof.M4 ;edi = &tmp
		514	mov ecx,16
		515	rep movsd
		516	sub edi,sizeof.M4 ;edi = &tmp
		517	stdcall Matrix_Inv,[a],edi,4 ;�� eax �� uses �� eax
		518	ret
		519	endp
		520
		521	align 4
		522	proc gl_M4_Rotate uses eax ecx, a:dword,t:dword,u:dword
		523	locals
		524	s dd ? ;float
		525	c dd ? ;float
		526	v dd ? ;int
		527	w dd ? ;int
		528	endl
		529	mov eax,[u]
		530	inc eax
		531	mov dword [v],eax
		532	cmp dword [v],2
		533	jle @f
		534	mov dword [v],0
		535	@@:
		536	mov eax,[v]
		537	inc eax
		538	mov dword [w],eax
		539	cmp dword [w],2
		540	jle @f
		541	mov dword [w],0
		542	@@:
		543	fld dword [t]
		544	fsin
		545	fstp dword [s]
		546	fld dword [t]
		547	fcos
		548	fstp dword [c]
		549
		550	stdcall gl_M4_Id,[a]
		551
		552	M4_reg ecx,[a],[v],[v]
		553	mov eax,[c]
		554	mov [ecx],eax
		555
		556	M4_reg ecx,[a],[v],[w]
		557	fld dword [s]
		558	fchs
		559	fstp dword [ecx]
		560
		561	M4_reg ecx,[a],[w],[v]
		562	mov eax,[s]
		563	mov [ecx],eax
		564
		565	M4_reg ecx,[a],[w],[w]
		566	mov eax,[c]
		567	mov [ecx],eax
		568
		569	ret
		570	endp
		571
		572	; inverse of a 3x3 matrix
		573	;void gl_M3_Inv(M3 a,M3 m)
		574	;{
		575	; float det;
		576
		577	; det = m->m[0][0]m->m[1][1]m->m[2][2]-m->m[0][0]m->m[1][2]m->m[2][1]-
		578	; m->m[1][0]m->m[0][1]m->m[2][2]+m->m[1][0]m->m[0][2]m->m[2][1]+
		579	; m->m[2][0]m->m[0][1]m->m[1][2]-m->m[2][0]m->m[0][2]m->m[1][1];
		580
		581	; a->m[0][0] = (m->m[1][1]m->m[2][2]-m->m[1][2]m->m[2][1])/det;
		582	; a->m[0][1] = -(m->m[0][1]m->m[2][2]-m->m[0][2]m->m[2][1])/det;
		583	; a->m[0][2] = -(-m->m[0][1]m->m[1][2]+m->m[0][2]m->m[1][1])/det;
		584
		585	; a->m[1][0] = -(m->m[1][0]m->m[2][2]-m->m[1][2]m->m[2][0])/det;
		586	; a->m[1][1] = (m->m[0][0]m->m[2][2]-m->m[0][2]m->m[2][0])/det;
		587	; a->m[1][2] = -(m->m[0][0]m->m[1][2]-m->m[0][2]m->m[1][0])/det;
		588
		589	; a->m[2][0] = (m->m[1][0]m->m[2][1]-m->m[1][1]m->m[2][0])/det;
		590	; a->m[2][1] = -(m->m[0][0]m->m[2][1]-m->m[0][1]m->m[2][0])/det;
		591	; a->m[2][2] = (m->m[0][0]m->m[1][1]-m->m[0][1]m->m[1][0])/det;
		592	;}
		593
		594	; vector arithmetic
		595
5218	IgorA	596	align 4
		597	proc gl_V3_Norm uses ebx, a:dword
		598	mov ebx,[a]
		599	fld dword[ebx]
5256	IgorA	600	fmul st0,st0
5218	IgorA	601	fld dword[ebx+4]
5256	IgorA	602	fmul st0,st0
5218	IgorA	603	faddp
		604	fld dword[ebx+8]
5256	IgorA	605	fmul st0,st0
5218	IgorA	606	faddp
		607	fsqrt ;st0 = sqrt(a.X^2 +a.Y^2 +a.Z^2)
5269	IgorA	608	ftst
5218	IgorA	609	fstsw ax
		610	sahf
		611	je .r1 ;if (sqrt(...)==0) return 1
		612	fld dword[ebx] ;offs_X = 0
		613	fdiv st0,st1
		614	fstp dword[ebx] ;a.X/=sqrt(...)
		615	fld dword[ebx+4]
		616	fdiv st0,st1
		617	fstp dword[ebx+4] ;a.Y/=sqrt(...)
		618	fld dword[ebx+8]
		619	fdiv st0,st1
		620	fstp dword[ebx+8] ;a.Z/=sqrt(...)
		621	xor eax,eax
		622	jmp @f
		623	.r1:
		624	xor eax,eax
		625	inc eax
		626	@@:
		627	ffree st0
		628	fincstp
		629	ret
		630	endp
5153	IgorA	631
		632	macro gl_V3_New p_mem, x, y, z
		633	{
		634	mov dword[p_mem],x
		635	mov dword[p_mem+4],y
		636	mov dword[p_mem+8],z
		637	}
		638
		639	macro gl_V4_New p_mem, x, y, z, w
		640	{
		641	mov dword[p_mem],x
		642	mov dword[p_mem+4],y
		643	mov dword[p_mem+8],z
		644	mov dword[p_mem+12],w
		645	}

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/TinyGL/asm_fork/zmath.asm – Rev 5269