WebSVN – Kolibri OS – Blame – /programs/develop/libraries/TinyGL/asm_fork/zmath.asm

Rev	Author	Line No.	Line
5153	IgorA	1	; Some simple mathematical functions. Don't look for some logic in
		2	; the function names :-)
		3
		4	; ***** Gestion des matrices 4x4 ****
		5
5353	IgorA	6	if DEBUG
		7	f_m4m db 'gl_M4_Mul',0
		8	f_m4ml db 'gl_M4_MulLeft',0
		9	end if
		10
5153	IgorA	11	align 4
		12	proc gl_M4_Id uses eax ecx edi, a:dword
		13	mov edi,[a]
		14	add edi,4
		15	mov ecx,14
		16	mov eax,0.0
		17	rep stosd
		18	mov eax,1.0
		19	stosd
		20	mov edi,[a]
		21	stosd
		22	add edi,16
		23	stosd
		24	add edi,16
		25	stosd
		26	ret
		27	endp
		28
		29	align 4
		30	proc gl_M4_IsId uses ebx ecx, a:dword
		31	mov eax,[a]
		32	xor ebx,ebx
		33	xor ecx,ecx
		34	.cycle_01:
		35	fld dword[eax]
		36	cmp ecx,ebx
		37	je .once
		38	ftst ;�� 0.0
		39	fstsw ax
		40	sahf
		41	je @f
		42	jmp .not_1 ;�� 0.0 ��
		43	.once:
		44	fld1
		45	fcomp st1 ;�� 1.0
		46	fstsw ax
		47	test ah,0x40
		48	je .not_1 ;�� 1.0 ��
		49	@@:
5218	IgorA	50	ffree st0
		51	fincstp
5153	IgorA	52	add eax,4
		53	inc ebx
		54	btr ebx,2
		55	jnc .cycle_01
		56	inc ecx
		57	bt ecx,2 ;�� ecx==4
		58	jnc .cycle_01
		59
		60	mov eax,1
		61	jmp @f
		62	.not_1:
5218	IgorA	63	ffree st0
		64	fincstp
5153	IgorA	65	xor eax,eax
		66	@@:
		67	ret
		68	endp
		69
		70	align 4
		71	proc gl_M4_Mul, c:dword,a:dword,b:dword
		72	pushad
		73	mov edx,[c]
		74	xor eax,eax
		75	.cycle_0: ;i
		76	xor ebx,ebx
		77	.cycle_1: ;j
		78	fldz ;sum=0
		79	xor ecx,ecx
		80	M4_reg edi,[a],eax,0
		81	.cycle_2: ;k
		82	fld dword[edi]
		83	add edi,4
		84	M4_reg esi,[b],ecx,ebx
		85	fmul dword[esi]
5256	IgorA	86	faddp ;sum += a[i][k] * b[k][j]
5153	IgorA	87	inc ecx
		88	cmp ecx,4
		89	jl .cycle_2
		90	fstp dword[edx] ;c[i][j] = sum
		91	add edx,4
		92	inc ebx
		93	cmp ebx,4
		94	jl .cycle_1
		95	inc eax
		96	cmp eax,4
		97	jl .cycle_0
		98	if DEBUG ;gl_M4_Mul
		99	stdcall dbg_print,f_m4m,txt_nl
		100	stdcall gl_print_matrix,[c],4
		101	stdcall dbg_print,txt_sp,txt_nl
		102	end if
		103	popad
		104	ret
		105	endp
		106
		107	; c=c*a
		108	align 4
		109	proc gl_M4_MulLeft, c:dword,b:dword
		110	locals
		111	i dd ?
		112	a M4
		113	endl
		114	pushad
		115	mov ecx,16
		116	mov esi,[c]
		117	mov edi,ebp
		118	sub edi,sizeof.M4
		119	rep movsd ;�� [a]=[c]
		120
		121	mov edx,[c]
		122	mov dword[i],0
5353	IgorA	123	mov eax,ebp
		124	sub eax,sizeof.M4
5153	IgorA	125	.cycle_0: ;i
5353	IgorA	126	xor ebx,ebx ;j=0
5153	IgorA	127	.cycle_1: ;j
		128	fldz ;sum=0
5353	IgorA	129	xor ecx,ecx ;k=0
5153	IgorA	130	M4_reg edi,eax,dword[i],0
		131	.cycle_2: ;k
		132	fld dword[edi]
		133	add edi,4
		134	M4_reg esi,[b],ecx,ebx
		135	fmul dword[esi]
5353	IgorA	136	faddp ;sum += a[i][k] * b[k][j]
5153	IgorA	137	inc ecx
		138	cmp ecx,4
		139	jl .cycle_2
		140	fstp dword[edx] ;c[i][j] = sum
		141	add edx,4
		142	inc ebx
		143	cmp ebx,4
		144	jl .cycle_1
		145	inc dword[i]
		146	cmp dword[i],4
		147	jl .cycle_0
		148	if DEBUG ;gl_M4_MulLeft
		149	stdcall dbg_print,f_m4ml,txt_nl
		150	stdcall gl_print_matrix,[c],4
		151	stdcall dbg_print,txt_sp,txt_nl
		152	end if
		153	popad
		154	ret
		155	endp
		156
		157	align 4
		158	proc gl_M4_Move uses ecx edi esi, a:dword,b:dword
		159	mov edi,[a]
		160	mov esi,[b]
		161	mov ecx,sizeof.M4/4
		162	rep movsd
		163	ret
		164	endp
		165
		166	align 4
		167	proc gl_MoveV3 uses edi esi, a:dword,b:dword
		168	mov edi,[a]
		169	mov esi,[b]
		170	movsd
		171	movsd
		172	movsd
		173	ret
		174	endp
		175
		176	;void gl_MulM4V3(V3 a,M4 b,V3 *c)
		177	;{
		178	; a->X=b->m[0][0]c->X+b->m[0][1]c->Y+b->m[0][2]*c->Z+b->m[0][3];
		179	; a->Y=b->m[1][0]c->X+b->m[1][1]c->Y+b->m[1][2]*c->Z+b->m[1][3];
		180	; a->Z=b->m[2][0]c->X+b->m[2][1]c->Y+b->m[2][2]*c->Z+b->m[2][3];
		181	;}
		182
		183	;void gl_MulM3V3(V3 a,M4 b,V3 *c)
		184	;{
		185	; a->X=b->m[0][0]c->X+b->m[0][1]c->Y+b->m[0][2]*c->Z;
		186	; a->Y=b->m[1][0]c->X+b->m[1][1]c->Y+b->m[1][2]*c->Z;
		187	; a->Z=b->m[2][0]c->X+b->m[2][1]c->Y+b->m[2][2]*c->Z;
		188	;}
		189
5256	IgorA	190	align 4
		191	proc gl_M4_MulV4 uses ebx ecx edx, a:dword, b:dword, c:dword ;V4 a, M4 b, V4 *c
		192	mov ebx,[b]
		193	mov edx,[c]
		194	fld dword[edx]
		195	fld dword[edx+4]
		196	fld dword[edx+8]
		197	fld dword[edx+12]
		198	mov edx,[a]
		199	mov ecx,4
		200	.cycle_1:
		201	fld dword[ebx] ;st0 = m[_][0]
		202	fmul st0,st4 ;st0 *= c.X
		203	fld dword[ebx+4] ;st0 = m[_][1]
		204	fmul st0,st4 ;st0 *= c.Y
		205	faddp
		206	fld dword[ebx+8] ;st0 = m[_][2]
		207	fmul st0,st3 ;st0 *= c.Z
		208	faddp
		209	fld dword[ebx+12] ;st0 += m[_][3]
		210	fmul st0,st2 ;st0 *= c.Z
		211	faddp
		212	fstp dword[edx] ;a.X = b.m[_][0]c.X +b.m[_][1]c.Y +b.m[_][2]c.Z +b.m[_][3]c.W
		213	add ebx,16 ;��
		214	add edx,4 ;��
		215	loop .cycle_1
		216	ffree st0
		217	fincstp
		218	ffree st0
		219	fincstp
		220	ffree st0
		221	fincstp
		222	ffree st0
		223	fincstp
		224	ret
		225	endp
5153	IgorA	226
		227	; transposition of a 4x4 matrix
		228	align 4
		229	proc gl_M4_Transpose uses eax ecx edx, a:dword, b:dword
		230	mov eax,[a]
		231	mov ecx,[b]
		232
		233	mov edx,[ecx]
		234	mov [eax],edx
		235	mov edx,[ecx+0x10]
		236	mov [eax+0x4],edx
		237	mov edx,[ecx+0x20]
		238	mov [eax+0x8],edx
		239	mov edx,[ecx+0x30]
		240	mov [eax+0x0c],edx
		241
		242	mov edx,[ecx+0x4]
		243	mov [eax+0x10],edx
		244	mov edx,[ecx+0x14]
		245	mov [eax+0x14],edx
		246	mov edx,[ecx+0x24]
		247	mov [eax+0x18],edx
		248	mov edx,[ecx+0x34]
		249	mov [eax+0x1c],edx
		250
		251	mov edx,[ecx+0x8]
		252	mov [eax+0x20],edx
		253	mov edx,[ecx+0x18]
		254	mov [eax+0x24],edx
		255	mov edx,[ecx+0x28]
		256	mov [eax+0x28],edx
		257	mov edx,[ecx+0x38]
		258	mov [eax+0x2c],edx
		259
		260	mov edx,[ecx+0x0c]
		261	mov [eax+0x30],edx
		262	mov edx,[ecx+0x1c]
		263	mov [eax+0x34],edx
		264	mov edx,[ecx+0x2c]
		265	mov [eax+0x38],edx
		266	mov edx,[ecx+0x3c]
		267	mov [eax+0x3c],edx
		268	ret
		269	endp
		270
5218	IgorA	271	; inversion of an orthogonal matrix of type Y=M.X+P
5153	IgorA	272	;void gl_M4_InvOrtho(M4 *a,M4 b)
		273	;{
		274	; int i,j;
		275	; float s;
		276	; for(i=0;i<3;i++)
		277	; for(j=0;j<3;j++) a->m[i][j]=b.m[j][i];
		278	; a->m[3][0]=0.0; a->m[3][1]=0.0; a->m[3][2]=0.0; a->m[3][3]=1.0;
		279	; for(i=0;i<3;i++) {
		280	; s=0;
		281	; for(j=0;j<3;j++) s-=b.m[j][i]*b.m[j][3];
		282	; a->m[i][3]=s;
		283	; }
		284	;}
		285
5218	IgorA	286	; Inversion of a general nxn matrix.
		287	; Note : m is destroyed
5153	IgorA	288
		289	align 4
5256	IgorA	290	proc Matrix_Inv uses ebx ecx edx edi esi, r:dword, m:dword, n:dword ;(float r,float m,int n)
		291	locals
		292	max dd ? ;float
		293	tmp dd ?
		294	endl
5153	IgorA	295
5256	IgorA	296	; identit�e dans r
		297	mov eax,0.0
		298	mov ecx,[n]
		299	imul ecx,ecx
		300	mov edi,[r]
		301	rep stosd ;for(i=0;i
		302	mov eax,1.0
		303	xor ebx,ebx
		304	mov edi,[r]
		305	mov ecx,[n]
		306	shl ecx,2
		307	@@: ;for(i=0;i
		308	cmp ebx,[n]
		309	jge .end_0
		310	stosd ;r[i*n+i]=1
		311	add edi,ecx
		312	inc ebx
		313	jmp @b
		314	.end_0:
5153	IgorA	315
5256	IgorA	316	; ebx -> n
		317	; ecx -> j
		318	; edx -> k
		319	; edi -> i
		320	; esi -> l
		321	mov ebx,[n]
		322	xor ecx,ecx
		323	.cycle_0: ;for(j=0;j
		324	cmp ecx,ebx
		325	jge .cycle_0_end
		326	; recherche du nombre de plus grand module sur la colonne j
		327	mov eax,ecx
		328	imul eax,ebx
		329	add eax,ecx
		330	shl eax,2
		331	add eax,[m]
		332	mov eax,[eax]
		333	mov [max],eax ;max=m[j*n+j]
		334	mov edx,ecx ;k=j
		335	mov edi,ecx
		336	inc edi
		337	.cycle_1: ;for(i=j+1;i
		338	cmp edi,ebx
		339	jge .cycle_1_end
		340	mov eax,edi
		341	imul eax,ebx
		342	add eax,ecx
		343	shl eax,2
		344	add eax,[m]
		345	fld dword[eax]
		346	fcom dword[max] ;if (fabs(m[i*n+j])>fabs(max))
		347	fstsw ax
		348	sahf
		349	jbe @f
		350	mov edx,edi ;k=i
		351	fst dword[max]
		352	@@:
		353	ffree st0
		354	fincstp
		355	inc edi
		356	jmp .cycle_1
		357	.cycle_1_end:
5153	IgorA	358
5256	IgorA	359	; non intersible matrix
		360	fld dword[max]
		361	ftst ;if (max==0)
		362	fstsw ax
		363	ffree st0
		364	fincstp
		365	sahf
		366	jne @f
		367	xor eax,eax
		368	inc eax
		369	jmp .end_f ;return 1
		370	@@:
5153	IgorA	371
5256	IgorA	372	; permutation des lignes j et k
		373	cmp ecx,edx ;if (j!=k)
		374	je .cycle_2_end
		375	xor edi,edi
		376	.cycle_2: ;for(i=0;i
		377	cmp edi,ebx
		378	jge .cycle_2_end
		379	;�� esi != l
		380	mov eax,ecx
		381	imul eax,ebx
		382	add eax,edi
		383	shl eax,2
		384	add eax,[m]
		385	mov esi,[eax]
		386	mov [tmp],esi ;tmp=m[j*n+i]
		387	mov esi,edx
		388	imul esi,ebx
		389	add esi,edi
		390	shl esi,2
		391	add esi,[m]
		392	m2m dword[eax],dword[esi] ;m[jn+i]=m[kn+i]
		393	mov eax,[tmp]
		394	mov [esi],eax ;m[k*n+i]=tmp
5153	IgorA	395
5256	IgorA	396	mov eax,ecx
		397	imul eax,ebx
		398	add eax,edi
		399	shl eax,2
		400	add eax,[r]
		401	mov esi,[eax]
		402	mov [tmp],esi ;tmp=r[j*n+i]
		403	mov esi,edx
		404	imul esi,ebx
		405	add esi,edi
		406	shl esi,2
		407	add esi,[r]
		408	m2m dword[eax],dword[esi] ;r[jn+i]=r[kn+i]
		409	mov eax,[tmp]
		410	mov [esi],eax ;r[k*n+i]=tmp
		411	inc edi
		412	jmp .cycle_2
		413	.cycle_2_end:
5153	IgorA	414
5256	IgorA	415	; multiplication de la ligne j par 1/max
		416	fld1
		417	fdiv dword[max]
		418	fst dword[max] ;max=1/max
		419	xor edi,edi
		420	mov eax,ecx
		421	imul eax,ebx
		422	shl eax,2
		423	.cycle_3: ;for(i=0;i
		424	cmp edi,ebx
		425	jge .cycle_3_end
		426	add eax,[m]
		427	fld dword[eax]
		428	fmul st0,st1
		429	fstp dword[eax] ;m[jn+i]=max
		430	sub eax,[m]
		431	add eax,[r]
		432	fld dword[eax]
		433	fmul st0,st1
		434	fstp dword[eax] ;r[jn+i]=max
		435	sub eax,[r]
		436	add eax,4
		437	inc edi
		438	jmp .cycle_3
		439	.cycle_3_end:
		440	ffree st0 ;max
		441	fincstp
		442
		443	xor esi,esi
		444	.cycle_4: ;for(l=0;l
		445	cmp esi,ebx
		446	jge .cycle_4_end
		447	cmp esi,ecx ;if (l!=j)
5418	IgorA	448	je .if_end
5256	IgorA	449	mov eax,esi
		450	imul eax,ebx
		451	add eax,ecx
		452	shl eax,2
		453	add eax,[m]
		454	fld dword[eax] ;t=m[l*n+j]
		455	xor edi,edi
		456	.cycle_5: ;for(i=0;i
		457	cmp edi,ebx
		458	jge .cycle_5_end
		459	mov eax,ecx
		460	imul eax,ebx
		461	add eax,edi
		462	shl eax,2
		463	add eax,[m]
		464	fld dword[eax]
		465	fmul st0,st1
		466	mov eax,esi
		467	imul eax,ebx
		468	add eax,edi
		469	shl eax,2
		470	add eax,[m]
		471	fsub dword[eax]
		472	fchs
		473	fstp dword[eax] ;m[ln+i]-=m[jn+i]*t
		474	mov eax,ecx
		475	imul eax,ebx
		476	add eax,edi
		477	shl eax,2
		478	add eax,[r]
		479	fld dword[eax]
		480	fmul st0,st1
		481	mov eax,esi
		482	imul eax,ebx
		483	add eax,edi
		484	shl eax,2
		485	add eax,[r]
		486	fsub dword[eax]
		487	fchs
		488	fstp dword[eax] ;r[ln+i]-=r[jn+i]*t
		489	inc edi
		490	jmp .cycle_5
		491	.cycle_5_end:
		492	ffree st0 ;t
		493	fincstp
5418	IgorA	494	.if_end:
5256	IgorA	495	inc esi
		496	jmp .cycle_4
		497	.cycle_4_end:
		498	inc ecx
		499	jmp .cycle_0
		500	.cycle_0_end:
		501
		502	xor eax,eax ;return 0
		503	.end_f:
5153	IgorA	504	ret
		505	endp
		506
		507	; inversion of a 4x4 matrix
		508
		509	align 4
		510	proc gl_M4_Inv uses eax ecx edi esi, a:dword, b:dword
		511	locals
		512	tmp M4
		513	endl
		514	mov esi,[b]
		515	mov edi,ebp
		516	sub edi,sizeof.M4 ;edi = &tmp
		517	mov ecx,16
		518	rep movsd
		519	sub edi,sizeof.M4 ;edi = &tmp
		520	stdcall Matrix_Inv,[a],edi,4 ;�� eax �� uses �� eax
		521	ret
		522	endp
		523
		524	align 4
		525	proc gl_M4_Rotate uses eax ecx, a:dword,t:dword,u:dword
		526	locals
		527	s dd ? ;float
		528	c dd ? ;float
		529	v dd ? ;int
		530	w dd ? ;int
		531	endl
		532	mov eax,[u]
		533	inc eax
5415	IgorA	534	cmp eax,2
5153	IgorA	535	jle @f
5415	IgorA	536	xor eax,eax
5153	IgorA	537	@@:
5415	IgorA	538	mov [v],eax
5153	IgorA	539	inc eax
5415	IgorA	540	cmp eax,2
5153	IgorA	541	jle @f
5415	IgorA	542	xor eax,eax
5153	IgorA	543	@@:
5415	IgorA	544	mov [w],eax
5153	IgorA	545	fld dword [t]
		546	fsin
		547	fstp dword [s]
		548	fld dword [t]
		549	fcos
		550	fstp dword [c]
		551
		552	stdcall gl_M4_Id,[a]
		553
		554	M4_reg ecx,[a],[v],[v]
		555	mov eax,[c]
		556	mov [ecx],eax
		557
		558	M4_reg ecx,[a],[v],[w]
		559	fld dword [s]
		560	fchs
		561	fstp dword [ecx]
		562
		563	M4_reg ecx,[a],[w],[v]
		564	mov eax,[s]
		565	mov [ecx],eax
		566
		567	M4_reg ecx,[a],[w],[w]
		568	mov eax,[c]
		569	mov [ecx],eax
		570
		571	ret
		572	endp
		573
		574	; inverse of a 3x3 matrix
		575	;void gl_M3_Inv(M3 a,M3 m)
		576	;{
		577	; float det;
		578
		579	; det = m->m[0][0]m->m[1][1]m->m[2][2]-m->m[0][0]m->m[1][2]m->m[2][1]-
		580	; m->m[1][0]m->m[0][1]m->m[2][2]+m->m[1][0]m->m[0][2]m->m[2][1]+
		581	; m->m[2][0]m->m[0][1]m->m[1][2]-m->m[2][0]m->m[0][2]m->m[1][1];
		582
		583	; a->m[0][0] = (m->m[1][1]m->m[2][2]-m->m[1][2]m->m[2][1])/det;
		584	; a->m[0][1] = -(m->m[0][1]m->m[2][2]-m->m[0][2]m->m[2][1])/det;
		585	; a->m[0][2] = -(-m->m[0][1]m->m[1][2]+m->m[0][2]m->m[1][1])/det;
		586
		587	; a->m[1][0] = -(m->m[1][0]m->m[2][2]-m->m[1][2]m->m[2][0])/det;
		588	; a->m[1][1] = (m->m[0][0]m->m[2][2]-m->m[0][2]m->m[2][0])/det;
		589	; a->m[1][2] = -(m->m[0][0]m->m[1][2]-m->m[0][2]m->m[1][0])/det;
		590
		591	; a->m[2][0] = (m->m[1][0]m->m[2][1]-m->m[1][1]m->m[2][0])/det;
		592	; a->m[2][1] = -(m->m[0][0]m->m[2][1]-m->m[0][1]m->m[2][0])/det;
		593	; a->m[2][2] = (m->m[0][0]m->m[1][1]-m->m[0][1]m->m[1][0])/det;
		594	;}
		595
		596	; vector arithmetic
		597
5218	IgorA	598	align 4
		599	proc gl_V3_Norm uses ebx, a:dword
		600	mov ebx,[a]
		601	fld dword[ebx]
5256	IgorA	602	fmul st0,st0
5218	IgorA	603	fld dword[ebx+4]
5256	IgorA	604	fmul st0,st0
5218	IgorA	605	faddp
		606	fld dword[ebx+8]
5256	IgorA	607	fmul st0,st0
5218	IgorA	608	faddp
		609	fsqrt ;st0 = sqrt(a.X^2 +a.Y^2 +a.Z^2)
5269	IgorA	610	ftst
5218	IgorA	611	fstsw ax
		612	sahf
		613	je .r1 ;if (sqrt(...)==0) return 1
		614	fld dword[ebx] ;offs_X = 0
		615	fdiv st0,st1
		616	fstp dword[ebx] ;a.X/=sqrt(...)
		617	fld dword[ebx+4]
		618	fdiv st0,st1
		619	fstp dword[ebx+4] ;a.Y/=sqrt(...)
		620	fld dword[ebx+8]
		621	fdiv st0,st1
		622	fstp dword[ebx+8] ;a.Z/=sqrt(...)
		623	xor eax,eax
		624	jmp @f
		625	.r1:
		626	xor eax,eax
		627	inc eax
		628	@@:
		629	ffree st0
		630	fincstp
		631	ret
		632	endp
5153	IgorA	633
		634	macro gl_V3_New p_mem, x, y, z
		635	{
		636	mov dword[p_mem],x
		637	mov dword[p_mem+4],y
		638	mov dword[p_mem+8],z
		639	}
		640
		641	macro gl_V4_New p_mem, x, y, z, w
		642	{
		643	mov dword[p_mem],x
		644	mov dword[p_mem+4],y
		645	mov dword[p_mem+8],z
		646	mov dword[p_mem+12],w
		647	}

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/TinyGL/asm_fork/zmath.asm – Rev 5418