WebSVN – Kolibri OS – Blame – /programs/develop/libraries/TinyGL/asm_fork/zmath.asm

Rev	Author	Line No.	Line
5153	IgorA	1	; Some simple mathematical functions. Don't look for some logic in
		2	; the function names :-)
		3
		4	; ***** Gestion des matrices 4x4 ****
		5
5353	IgorA	6	if DEBUG
		7	f_m4m db 'gl_M4_Mul',0
		8	f_m4ml db 'gl_M4_MulLeft',0
		9	end if
		10
5153	IgorA	11	align 4
		12	proc gl_M4_Id uses eax ecx edi, a:dword
		13	mov edi,[a]
		14	add edi,4
		15	mov ecx,14
		16	mov eax,0.0
		17	rep stosd
		18	mov eax,1.0
		19	stosd
		20	mov edi,[a]
		21	stosd
		22	add edi,16
		23	stosd
		24	add edi,16
		25	stosd
		26	ret
		27	endp
		28
		29	align 4
		30	proc gl_M4_IsId uses ebx ecx, a:dword
		31	mov eax,[a]
		32	xor ebx,ebx
		33	xor ecx,ecx
		34	.cycle_01:
		35	fld dword[eax]
		36	cmp ecx,ebx
		37	je .once
		38	ftst ;�� 0.0
		39	fstsw ax
		40	sahf
		41	je @f
		42	jmp .not_1 ;�� 0.0 ��
		43	.once:
		44	fld1
		45	fcomp st1 ;�� 1.0
		46	fstsw ax
		47	test ah,0x40
		48	je .not_1 ;�� 1.0 ��
		49	@@:
5218	IgorA	50	ffree st0
		51	fincstp
5153	IgorA	52	add eax,4
		53	inc ebx
		54	btr ebx,2
		55	jnc .cycle_01
		56	inc ecx
		57	bt ecx,2 ;�� ecx==4
		58	jnc .cycle_01
		59
		60	mov eax,1
		61	jmp @f
		62	.not_1:
5218	IgorA	63	ffree st0
		64	fincstp
5153	IgorA	65	xor eax,eax
		66	@@:
		67	ret
		68	endp
		69
		70	align 4
		71	proc gl_M4_Mul, c:dword,a:dword,b:dword
		72	pushad
		73	mov edx,[c]
		74	xor eax,eax
		75	.cycle_0: ;i
		76	xor ebx,ebx
		77	.cycle_1: ;j
		78	fldz ;sum=0
		79	xor ecx,ecx
		80	M4_reg edi,[a],eax,0
		81	.cycle_2: ;k
		82	fld dword[edi]
		83	add edi,4
		84	M4_reg esi,[b],ecx,ebx
		85	fmul dword[esi]
5256	IgorA	86	faddp ;sum += a[i][k] * b[k][j]
5153	IgorA	87	inc ecx
		88	cmp ecx,4
		89	jl .cycle_2
		90	fstp dword[edx] ;c[i][j] = sum
		91	add edx,4
		92	inc ebx
		93	cmp ebx,4
		94	jl .cycle_1
		95	inc eax
		96	cmp eax,4
		97	jl .cycle_0
		98	if DEBUG ;gl_M4_Mul
		99	stdcall dbg_print,f_m4m,txt_nl
		100	stdcall gl_print_matrix,[c],4
		101	stdcall dbg_print,txt_sp,txt_nl
		102	end if
		103	popad
		104	ret
		105	endp
		106
		107	; c=c*a
		108	align 4
		109	proc gl_M4_MulLeft, c:dword,b:dword
		110	locals
		111	i dd ?
		112	a M4
		113	endl
		114	pushad
		115	mov ecx,16
		116	mov esi,[c]
		117	mov edi,ebp
		118	sub edi,sizeof.M4
		119	rep movsd ;�� [a]=[c]
		120
		121	mov edx,[c]
		122	mov dword[i],0
5353	IgorA	123	mov eax,ebp
		124	sub eax,sizeof.M4
5153	IgorA	125	.cycle_0: ;i
5353	IgorA	126	xor ebx,ebx ;j=0
5153	IgorA	127	.cycle_1: ;j
		128	fldz ;sum=0
5353	IgorA	129	xor ecx,ecx ;k=0
5153	IgorA	130	M4_reg edi,eax,dword[i],0
		131	.cycle_2: ;k
		132	fld dword[edi]
		133	add edi,4
		134	M4_reg esi,[b],ecx,ebx
		135	fmul dword[esi]
5353	IgorA	136	faddp ;sum += a[i][k] * b[k][j]
5153	IgorA	137	inc ecx
		138	cmp ecx,4
		139	jl .cycle_2
		140	fstp dword[edx] ;c[i][j] = sum
		141	add edx,4
		142	inc ebx
		143	cmp ebx,4
		144	jl .cycle_1
		145	inc dword[i]
		146	cmp dword[i],4
		147	jl .cycle_0
		148	if DEBUG ;gl_M4_MulLeft
		149	stdcall dbg_print,f_m4ml,txt_nl
		150	stdcall gl_print_matrix,[c],4
		151	stdcall dbg_print,txt_sp,txt_nl
		152	end if
		153	popad
		154	ret
		155	endp
		156
		157	align 4
		158	proc gl_M4_Move uses ecx edi esi, a:dword,b:dword
		159	mov edi,[a]
		160	mov esi,[b]
		161	mov ecx,sizeof.M4/4
		162	rep movsd
		163	ret
		164	endp
		165
		166	align 4
		167	proc gl_MoveV3 uses edi esi, a:dword,b:dword
		168	mov edi,[a]
		169	mov esi,[b]
		170	movsd
		171	movsd
		172	movsd
		173	ret
		174	endp
		175
		176	;void gl_MulM4V3(V3 a,M4 b,V3 *c)
		177	;{
		178	; a->X=b->m[0][0]c->X+b->m[0][1]c->Y+b->m[0][2]*c->Z+b->m[0][3];
		179	; a->Y=b->m[1][0]c->X+b->m[1][1]c->Y+b->m[1][2]*c->Z+b->m[1][3];
		180	; a->Z=b->m[2][0]c->X+b->m[2][1]c->Y+b->m[2][2]*c->Z+b->m[2][3];
		181	;}
		182
		183	;void gl_MulM3V3(V3 a,M4 b,V3 *c)
		184	;{
		185	; a->X=b->m[0][0]c->X+b->m[0][1]c->Y+b->m[0][2]*c->Z;
		186	; a->Y=b->m[1][0]c->X+b->m[1][1]c->Y+b->m[1][2]*c->Z;
		187	; a->Z=b->m[2][0]c->X+b->m[2][1]c->Y+b->m[2][2]*c->Z;
		188	;}
		189
5256	IgorA	190	align 4
		191	proc gl_M4_MulV4 uses ebx ecx edx, a:dword, b:dword, c:dword ;V4 a, M4 b, V4 *c
		192	mov ebx,[b]
		193	mov edx,[c]
		194	fld dword[edx]
		195	fld dword[edx+4]
		196	fld dword[edx+8]
		197	fld dword[edx+12]
		198	mov edx,[a]
		199	mov ecx,4
		200	.cycle_1:
		201	fld dword[ebx] ;st0 = m[_][0]
		202	fmul st0,st4 ;st0 *= c.X
		203	fld dword[ebx+4] ;st0 = m[_][1]
		204	fmul st0,st4 ;st0 *= c.Y
		205	faddp
		206	fld dword[ebx+8] ;st0 = m[_][2]
		207	fmul st0,st3 ;st0 *= c.Z
		208	faddp
		209	fld dword[ebx+12] ;st0 += m[_][3]
6017	IgorA	210	fmul st0,st2 ;st0 *= c.W
5256	IgorA	211	faddp
		212	fstp dword[edx] ;a.X = b.m[_][0]c.X +b.m[_][1]c.Y +b.m[_][2]c.Z +b.m[_][3]c.W
		213	add ebx,16 ;��
		214	add edx,4 ;��
		215	loop .cycle_1
		216	ffree st0
		217	fincstp
		218	ffree st0
		219	fincstp
		220	ffree st0
		221	fincstp
		222	ffree st0
		223	fincstp
		224	ret
		225	endp
5153	IgorA	226
		227	; transposition of a 4x4 matrix
		228	align 4
		229	proc gl_M4_Transpose uses eax ecx edx, a:dword, b:dword
		230	mov eax,[a]
		231	mov ecx,[b]
		232
		233	mov edx,[ecx]
		234	mov [eax],edx
		235	mov edx,[ecx+0x10]
		236	mov [eax+0x4],edx
		237	mov edx,[ecx+0x20]
		238	mov [eax+0x8],edx
		239	mov edx,[ecx+0x30]
		240	mov [eax+0x0c],edx
		241
		242	mov edx,[ecx+0x4]
		243	mov [eax+0x10],edx
		244	mov edx,[ecx+0x14]
		245	mov [eax+0x14],edx
		246	mov edx,[ecx+0x24]
		247	mov [eax+0x18],edx
		248	mov edx,[ecx+0x34]
		249	mov [eax+0x1c],edx
		250
		251	mov edx,[ecx+0x8]
		252	mov [eax+0x20],edx
		253	mov edx,[ecx+0x18]
		254	mov [eax+0x24],edx
		255	mov edx,[ecx+0x28]
		256	mov [eax+0x28],edx
		257	mov edx,[ecx+0x38]
		258	mov [eax+0x2c],edx
		259
		260	mov edx,[ecx+0x0c]
		261	mov [eax+0x30],edx
		262	mov edx,[ecx+0x1c]
		263	mov [eax+0x34],edx
		264	mov edx,[ecx+0x2c]
		265	mov [eax+0x38],edx
		266	mov edx,[ecx+0x3c]
		267	mov [eax+0x3c],edx
		268	ret
		269	endp
		270
5218	IgorA	271	; inversion of an orthogonal matrix of type Y=M.X+P
5153	IgorA	272	;void gl_M4_InvOrtho(M4 *a,M4 b)
		273	;{
		274	; int i,j;
		275	; float s;
		276	; for(i=0;i<3;i++)
		277	; for(j=0;j<3;j++) a->m[i][j]=b.m[j][i];
		278	; a->m[3][0]=0.0; a->m[3][1]=0.0; a->m[3][2]=0.0; a->m[3][3]=1.0;
		279	; for(i=0;i<3;i++) {
		280	; s=0;
		281	; for(j=0;j<3;j++) s-=b.m[j][i]*b.m[j][3];
		282	; a->m[i][3]=s;
		283	; }
		284	;}
		285
5218	IgorA	286	; Inversion of a general nxn matrix.
		287	; Note : m is destroyed
5153	IgorA	288
		289	align 4
5256	IgorA	290	proc Matrix_Inv uses ebx ecx edx edi esi, r:dword, m:dword, n:dword ;(float r,float m,int n)
		291	locals
		292	max dd ? ;float
		293	tmp dd ?
		294	endl
5153	IgorA	295
5256	IgorA	296	; identit�e dans r
		297	mov eax,0.0
		298	mov ecx,[n]
		299	imul ecx,ecx
		300	mov edi,[r]
		301	rep stosd ;for(i=0;i
		302	mov eax,1.0
		303	xor ebx,ebx
		304	mov edi,[r]
		305	mov ecx,[n]
		306	shl ecx,2
		307	@@: ;for(i=0;i
		308	cmp ebx,[n]
		309	jge .end_0
		310	stosd ;r[i*n+i]=1
		311	add edi,ecx
		312	inc ebx
		313	jmp @b
		314	.end_0:
5153	IgorA	315
5256	IgorA	316	; ebx -> n
		317	; ecx -> j
		318	; edx -> k
		319	; edi -> i
		320	; esi -> l
		321	mov ebx,[n]
		322	xor ecx,ecx
		323	.cycle_0: ;for(j=0;j
		324	cmp ecx,ebx
		325	jge .cycle_0_end
		326	; recherche du nombre de plus grand module sur la colonne j
		327	mov eax,ecx
		328	imul eax,ebx
		329	add eax,ecx
		330	shl eax,2
		331	add eax,[m]
		332	mov eax,[eax]
		333	mov [max],eax ;max=m[j*n+j]
		334	mov edx,ecx ;k=j
		335	mov edi,ecx
		336	inc edi
		337	.cycle_1: ;for(i=j+1;i
		338	cmp edi,ebx
		339	jge .cycle_1_end
		340	mov eax,edi
		341	imul eax,ebx
		342	add eax,ecx
		343	shl eax,2
		344	add eax,[m]
		345	fld dword[eax]
6017	IgorA	346	fld st0
		347	fabs
		348	fld dword[max]
		349	fabs
		350	fcompp ;if (fabs(m[i*n+j])>fabs(max))
5256	IgorA	351	fstsw ax
		352	sahf
6017	IgorA	353	jae @f
5256	IgorA	354	mov edx,edi ;k=i
		355	fst dword[max]
		356	@@:
6017	IgorA	357	ffree st0 ;m[i*n+j]
5256	IgorA	358	fincstp
		359	inc edi
		360	jmp .cycle_1
		361	.cycle_1_end:
5153	IgorA	362
5256	IgorA	363	; non intersible matrix
		364	fld dword[max]
		365	ftst ;if (max==0)
		366	fstsw ax
		367	ffree st0
		368	fincstp
		369	sahf
		370	jne @f
		371	xor eax,eax
		372	inc eax
		373	jmp .end_f ;return 1
		374	@@:
5153	IgorA	375
5256	IgorA	376	; permutation des lignes j et k
		377	cmp ecx,edx ;if (j!=k)
		378	je .cycle_2_end
		379	xor edi,edi
		380	.cycle_2: ;for(i=0;i
		381	cmp edi,ebx
		382	jge .cycle_2_end
		383	;�� esi != l
		384	mov eax,ecx
		385	imul eax,ebx
		386	add eax,edi
		387	shl eax,2
		388	add eax,[m]
		389	mov esi,[eax]
		390	mov [tmp],esi ;tmp=m[j*n+i]
		391	mov esi,edx
		392	imul esi,ebx
		393	add esi,edi
		394	shl esi,2
		395	add esi,[m]
		396	m2m dword[eax],dword[esi] ;m[jn+i]=m[kn+i]
		397	mov eax,[tmp]
		398	mov [esi],eax ;m[k*n+i]=tmp
5153	IgorA	399
5256	IgorA	400	mov eax,ecx
		401	imul eax,ebx
		402	add eax,edi
		403	shl eax,2
		404	add eax,[r]
		405	mov esi,[eax]
		406	mov [tmp],esi ;tmp=r[j*n+i]
		407	mov esi,edx
		408	imul esi,ebx
		409	add esi,edi
		410	shl esi,2
		411	add esi,[r]
		412	m2m dword[eax],dword[esi] ;r[jn+i]=r[kn+i]
		413	mov eax,[tmp]
		414	mov [esi],eax ;r[k*n+i]=tmp
		415	inc edi
		416	jmp .cycle_2
		417	.cycle_2_end:
5153	IgorA	418
5256	IgorA	419	; multiplication de la ligne j par 1/max
		420	fld1
		421	fdiv dword[max]
		422	fst dword[max] ;max=1/max
		423	xor edi,edi
		424	mov eax,ecx
		425	imul eax,ebx
		426	shl eax,2
		427	.cycle_3: ;for(i=0;i
		428	cmp edi,ebx
		429	jge .cycle_3_end
		430	add eax,[m]
		431	fld dword[eax]
		432	fmul st0,st1
		433	fstp dword[eax] ;m[jn+i]=max
		434	sub eax,[m]
		435	add eax,[r]
		436	fld dword[eax]
		437	fmul st0,st1
		438	fstp dword[eax] ;r[jn+i]=max
		439	sub eax,[r]
		440	add eax,4
		441	inc edi
		442	jmp .cycle_3
		443	.cycle_3_end:
		444	ffree st0 ;max
		445	fincstp
		446
		447	xor esi,esi
		448	.cycle_4: ;for(l=0;l
		449	cmp esi,ebx
		450	jge .cycle_4_end
		451	cmp esi,ecx ;if (l!=j)
5418	IgorA	452	je .if_end
5256	IgorA	453	mov eax,esi
		454	imul eax,ebx
		455	add eax,ecx
		456	shl eax,2
		457	add eax,[m]
		458	fld dword[eax] ;t=m[l*n+j]
		459	xor edi,edi
		460	.cycle_5: ;for(i=0;i
		461	cmp edi,ebx
		462	jge .cycle_5_end
		463	mov eax,ecx
		464	imul eax,ebx
		465	add eax,edi
		466	shl eax,2
		467	add eax,[m]
		468	fld dword[eax]
		469	fmul st0,st1
		470	mov eax,esi
		471	imul eax,ebx
		472	add eax,edi
		473	shl eax,2
		474	add eax,[m]
		475	fsub dword[eax]
		476	fchs
		477	fstp dword[eax] ;m[ln+i]-=m[jn+i]*t
		478	mov eax,ecx
		479	imul eax,ebx
		480	add eax,edi
		481	shl eax,2
		482	add eax,[r]
		483	fld dword[eax]
		484	fmul st0,st1
		485	mov eax,esi
		486	imul eax,ebx
		487	add eax,edi
		488	shl eax,2
		489	add eax,[r]
		490	fsub dword[eax]
		491	fchs
		492	fstp dword[eax] ;r[ln+i]-=r[jn+i]*t
		493	inc edi
		494	jmp .cycle_5
		495	.cycle_5_end:
		496	ffree st0 ;t
		497	fincstp
5418	IgorA	498	.if_end:
5256	IgorA	499	inc esi
		500	jmp .cycle_4
		501	.cycle_4_end:
		502	inc ecx
		503	jmp .cycle_0
		504	.cycle_0_end:
		505
		506	xor eax,eax ;return 0
		507	.end_f:
5153	IgorA	508	ret
		509	endp
		510
		511	; inversion of a 4x4 matrix
		512
		513	align 4
		514	proc gl_M4_Inv uses eax ecx edi esi, a:dword, b:dword
		515	locals
		516	tmp M4
		517	endl
		518	mov esi,[b]
		519	mov edi,ebp
		520	sub edi,sizeof.M4 ;edi = &tmp
		521	mov ecx,16
		522	rep movsd
		523	sub edi,sizeof.M4 ;edi = &tmp
		524	stdcall Matrix_Inv,[a],edi,4 ;�� eax �� uses �� eax
		525	ret
		526	endp
		527
		528	align 4
		529	proc gl_M4_Rotate uses eax ecx, a:dword,t:dword,u:dword
		530	locals
		531	s dd ? ;float
		532	c dd ? ;float
		533	v dd ? ;int
		534	w dd ? ;int
		535	endl
		536	mov eax,[u]
		537	inc eax
5415	IgorA	538	cmp eax,2
5153	IgorA	539	jle @f
5415	IgorA	540	xor eax,eax
5153	IgorA	541	@@:
5415	IgorA	542	mov [v],eax
5153	IgorA	543	inc eax
5415	IgorA	544	cmp eax,2
5153	IgorA	545	jle @f
5415	IgorA	546	xor eax,eax
5153	IgorA	547	@@:
5415	IgorA	548	mov [w],eax
5153	IgorA	549	fld dword [t]
		550	fsin
		551	fstp dword [s]
		552	fld dword [t]
		553	fcos
		554	fstp dword [c]
		555
		556	stdcall gl_M4_Id,[a]
		557
		558	M4_reg ecx,[a],[v],[v]
		559	mov eax,[c]
		560	mov [ecx],eax
		561
		562	M4_reg ecx,[a],[v],[w]
		563	fld dword [s]
		564	fchs
		565	fstp dword [ecx]
		566
		567	M4_reg ecx,[a],[w],[v]
		568	mov eax,[s]
		569	mov [ecx],eax
		570
		571	M4_reg ecx,[a],[w],[w]
		572	mov eax,[c]
		573	mov [ecx],eax
		574
		575	ret
		576	endp
		577
		578	; inverse of a 3x3 matrix
		579	;void gl_M3_Inv(M3 a,M3 m)
		580	;{
		581	; float det;
		582
		583	; det = m->m[0][0]m->m[1][1]m->m[2][2]-m->m[0][0]m->m[1][2]m->m[2][1]-
		584	; m->m[1][0]m->m[0][1]m->m[2][2]+m->m[1][0]m->m[0][2]m->m[2][1]+
		585	; m->m[2][0]m->m[0][1]m->m[1][2]-m->m[2][0]m->m[0][2]m->m[1][1];
		586
		587	; a->m[0][0] = (m->m[1][1]m->m[2][2]-m->m[1][2]m->m[2][1])/det;
		588	; a->m[0][1] = -(m->m[0][1]m->m[2][2]-m->m[0][2]m->m[2][1])/det;
		589	; a->m[0][2] = -(-m->m[0][1]m->m[1][2]+m->m[0][2]m->m[1][1])/det;
		590
		591	; a->m[1][0] = -(m->m[1][0]m->m[2][2]-m->m[1][2]m->m[2][0])/det;
		592	; a->m[1][1] = (m->m[0][0]m->m[2][2]-m->m[0][2]m->m[2][0])/det;
		593	; a->m[1][2] = -(m->m[0][0]m->m[1][2]-m->m[0][2]m->m[1][0])/det;
		594
		595	; a->m[2][0] = (m->m[1][0]m->m[2][1]-m->m[1][1]m->m[2][0])/det;
		596	; a->m[2][1] = -(m->m[0][0]m->m[2][1]-m->m[0][1]m->m[2][0])/det;
		597	; a->m[2][2] = (m->m[0][0]m->m[1][1]-m->m[0][1]m->m[1][0])/det;
		598	;}
		599
		600	; vector arithmetic
		601
5218	IgorA	602	align 4
		603	proc gl_V3_Norm uses ebx, a:dword
		604	mov ebx,[a]
		605	fld dword[ebx]
5256	IgorA	606	fmul st0,st0
6017	IgorA	607	fld dword[ebx+offs_Y]
5256	IgorA	608	fmul st0,st0
5218	IgorA	609	faddp
6017	IgorA	610	fld dword[ebx+offs_Z]
5256	IgorA	611	fmul st0,st0
5218	IgorA	612	faddp
		613	fsqrt ;st0 = sqrt(a.X^2 +a.Y^2 +a.Z^2)
5269	IgorA	614	ftst
5218	IgorA	615	fstsw ax
		616	sahf
		617	je .r1 ;if (sqrt(...)==0) return 1
		618	fld dword[ebx] ;offs_X = 0
		619	fdiv st0,st1
		620	fstp dword[ebx] ;a.X/=sqrt(...)
6017	IgorA	621	fld dword[ebx+offs_Y]
5218	IgorA	622	fdiv st0,st1
6017	IgorA	623	fstp dword[ebx+offs_Y] ;a.Y/=sqrt(...)
		624	fld dword[ebx+offs_Z]
5218	IgorA	625	fdiv st0,st1
6017	IgorA	626	fstp dword[ebx+offs_Z] ;a.Z/=sqrt(...)
5218	IgorA	627	xor eax,eax
		628	jmp @f
		629	.r1:
		630	xor eax,eax
		631	inc eax
		632	@@:
		633	ffree st0
		634	fincstp
		635	ret
		636	endp
5153	IgorA	637
		638	macro gl_V3_New p_mem, x, y, z
		639	{
		640	mov dword[p_mem],x
		641	mov dword[p_mem+4],y
		642	mov dword[p_mem+8],z
		643	}
		644
		645	macro gl_V4_New p_mem, x, y, z, w
		646	{
		647	mov dword[p_mem],x
		648	mov dword[p_mem+4],y
		649	mov dword[p_mem+8],z
		650	mov dword[p_mem+12],w
		651	}

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/TinyGL/asm_fork/zmath.asm – Rev 6017