WebSVN – Kolibri OS – Diff – /programs/develop/libraries/TinyGL/asm_fork/zmath.asm

 	mov edx,[c]
 	xor eax,eax
 	.cycle_0: ;i
 		xor ebx,ebx
 		.cycle_1: ;j
-			finit
 			fldz ;sum=0
 			xor ecx,ecx
 			M4_reg edi,[a],eax,0
 			.cycle_2: ;k
 				fld dword[edi]
 				add edi,4
 				M4_reg esi,[b],ecx,ebx
 				fmul dword[esi]
-				fadd st0,st1 ;sum += a[i][k] * b[k][j]
+				faddp ;sum += a[i][k] * b[k][j]
 				inc ecx
 				cmp ecx,4
 				jl .cycle_2
 			fstp dword[edx] ;c[i][j] = sum
 			add edx,4
 			cmp ebx,4
 			jl .cycle_1
 		inc eax
 		cmp eax,4
 		jl .cycle_0
-	finit
 if DEBUG ;gl_M4_Mul
 	stdcall dbg_print,f_m4m,txt_nl
 	stdcall gl_print_matrix,[c],4
 	stdcall dbg_print,txt_sp,txt_nl
 end if
 ;        a->X=b->m[0][0]*c->X+b->m[0][1]*c->Y+b->m[0][2]*c->Z;
 ;        a->Y=b->m[1][0]*c->X+b->m[1][1]*c->Y+b->m[1][2]*c->Z;
 ;        a->Z=b->m[2][0]*c->X+b->m[2][1]*c->Y+b->m[2][2]*c->Z;
 ;}
+align 4
+proc gl_M4_MulV4 uses ebx ecx edx, a:dword, b:dword, c:dword ;V4 *a, M4 *b, V4 *c
+	mov ebx,[b]
+	mov edx,[c]
+	fld dword[edx]
+	fld dword[edx+4]
+	fld dword[edx+8]
+	fld dword[edx+12]
+	mov edx,[a]
+	mov ecx,4
+	.cycle_1:
+		fld dword[ebx]    ;st0 = m[_][0]
+		fmul st0,st4      ;st0 *= c.X
+		fld dword[ebx+4]  ;st0 = m[_][1]
+		fmul st0,st4      ;st0 *= c.Y
+		faddp
+		fld dword[ebx+8]  ;st0 = m[_][2]
-;void gl_M4_MulV4(V4 *a,M4 *b,V4 *c)
+		fmul st0,st3      ;st0 *= c.Z
-;{
+		faddp
+		fld dword[ebx+12] ;st0 += m[_][3]
+		fmul st0,st2      ;st0 *= c.Z
-;        a->X=b->m[0][0]*c->X+b->m[0][1]*c->Y+b->m[0][2]*c->Z+b->m[0][3]*c->W;
+		faddp
-;        a->Y=b->m[1][0]*c->X+b->m[1][1]*c->Y+b->m[1][2]*c->Z+b->m[1][3]*c->W;
+		fstp dword[edx]   ;a.X = b.m[_][0]*c.X +b.m[_][1]*c.Y +b.m[_][2]*c.Z +b.m[_][3]*c.W
-;        a->Z=b->m[2][0]*c->X+b->m[2][1]*c->Y+b->m[2][2]*c->Z+b->m[2][3]*c->W;
+		add ebx,16 ;ñëåäóùàÿ ñòðîêà ìàòðèöû
+		add edx,4  ;ñëåäóùàÿ êîîðäèíàòà âåêòîðà
+	loop .cycle_1
+	ffree st0
+	fincstp
+	ffree st0
+	fincstp
+	ffree st0
+	fincstp
+	ffree st0
+	fincstp
-;        a->W=b->m[3][0]*c->X+b->m[3][1]*c->Y+b->m[3][2]*c->Z+b->m[3][3]*c->W;
+	ret
-;}
+endp
 ; transposition of a 4x4 matrix
 align 4
 ; Inversion of a general nxn matrix.
 ; Note : m is destroyed
 align 4
-proc Matrix_Inv uses ecx, r:dword, m:dword, n:dword ;(float *r,float *m,int n)
+proc Matrix_Inv uses ebx ecx edx edi esi, r:dword, m:dword, n:dword ;(float *r,float *m,int n)
-;        int i,j,k,l;
-;        float max,tmp,t;
+locals
-;        /* identitée dans r */
-;        for(i=0;i
-;        for(i=0;i
-;        for(j=0;j
-;                       /* recherche du nombre de plus grand module sur la colonne j */
-;                       max=m[j*n+j];
-;                       k=j;
-;                       for(i=j+1;i
-;                               if (fabs(m[i*n+j])>fabs(max)) {
-;                                        k=i;
-;                                        max=m[i*n+j];
+	max dd ? ;float
-;                               }
-;      /* non intersible matrix */
+	tmp dd ?
+endl
+	; identitée dans r
-;      if (max==0) return 1;
+	mov eax,0.0
+	mov ecx,[n]
+	imul ecx,ecx
-;                       /* permutation des lignes j et k */
+	mov edi,[r]
-;                       if (k!=j) {
+	rep stosd ;for(i=0;i
-;                                for(i=0;i
+	mov eax,1.0
-;                                               tmp=m[j*n+i];
+	xor ebx,ebx
-;                                               m[j*n+i]=m[k*n+i];
+	mov edi,[r]
-;                                               m[k*n+i]=tmp;
+	mov ecx,[n]
+	shl ecx,2
-;
+	@@: ;for(i=0;i
-;                                               tmp=r[j*n+i];
+		cmp ebx,[n]
-;                                               r[j*n+i]=r[k*n+i];
+		jge .end_0
+		stosd ;r[i*n+i]=1
+		add edi,ecx
+		inc ebx
+		jmp @b
+	.end_0:
+	; ebx -> n
+	; ecx -> j
+	; edx -> k
+	; edi -> i
+	; esi -> l
+	mov ebx,[n]
-;                                               r[k*n+i]=tmp;
+	xor ecx,ecx
+	.cycle_0: ;for(j=0;j
+	cmp ecx,ebx
+	jge .cycle_0_end
+		; recherche du nombre de plus grand module sur la colonne j
+		mov eax,ecx
+		imul eax,ebx
-;                                }
+		add eax,ecx
+		shl eax,2
+		add eax,[m]
+		mov eax,[eax]
-;                       }
+		mov [max],eax ;max=m[j*n+j]
+		mov edx,ecx ;k=j
+		mov edi,ecx
+		inc edi
+		.cycle_1: ;for(i=j+1;i
+		cmp edi,ebx
+		jge .cycle_1_end
+			mov eax,edi
+			imul eax,ebx
+			add eax,ecx
+			shl eax,2
+			add eax,[m]
+			fld dword[eax]
+			fcom dword[max] ;if (fabs(m[i*n+j])>fabs(max))
+			fstsw ax
+			sahf
+			jbe @f
+				mov edx,edi ;k=i
+				fst dword[max]
+			@@:
+			ffree st0
+			fincstp
-;                       /* multiplication de la ligne j par 1/max */
+		inc edi
+		jmp .cycle_1
+		.cycle_1_end:
+		; non intersible matrix
+		fld dword[max]
+		ftst ;if (max==0)
+		fstsw ax
+		ffree st0
+		fincstp
-;                       max=1/max;
+		sahf
+		jne @f
-;                       for(i=0;i
+			xor eax,eax
-;                                m[j*n+i]*=max;
+			inc eax
+			jmp .end_f ;return 1
+		@@:
-;                                r[j*n+i]*=max;
+		; permutation des lignes j et k
+		cmp ecx,edx ;if (j!=k)
+		je .cycle_2_end
+			xor edi,edi
+			.cycle_2: ;for(i=0;i
+			cmp edi,ebx
+			jge .cycle_2_end
+				;òóò ïîêà esi != l
+				mov eax,ecx
+				imul eax,ebx
+				add eax,edi
+				shl eax,2
+				add eax,[m]
+				mov esi,[eax]
+				mov [tmp],esi ;tmp=m[j*n+i]
-;                       }
+				mov esi,edx
+				imul esi,ebx
+				add esi,edi
+				shl esi,2
+				add esi,[m]
+				m2m dword[eax],dword[esi] ;m[j*n+i]=m[k*n+i]
+				mov eax,[tmp]
+				mov [esi],eax ;m[k*n+i]=tmp
+				mov eax,ecx
+				imul eax,ebx
+				add eax,edi
+				shl eax,2
+				add eax,[r]
+				mov esi,[eax]
+				mov [tmp],esi ;tmp=r[j*n+i]
+				mov esi,edx
+				imul esi,ebx
+				add esi,edi
+				shl esi,2
+				add esi,[r]
+				m2m dword[eax],dword[esi] ;r[j*n+i]=r[k*n+i]
+				mov eax,[tmp]
-;                       for(l=0;l
+				mov [esi],eax ;r[k*n+i]=tmp
+			inc edi
+			jmp .cycle_2
+		.cycle_2_end:
+		; multiplication de la ligne j par 1/max
+		fld1
+		fdiv dword[max]
+		fst dword[max] ;max=1/max
+		xor edi,edi
+		mov eax,ecx
+		imul eax,ebx
+		shl eax,2
+		.cycle_3: ;for(i=0;i
+		cmp edi,ebx
+		jge .cycle_3_end
+			add eax,[m]
+			fld dword[eax]
+			fmul st0,st1
+			fstp dword[eax] ;m[j*n+i]*=max
+			sub eax,[m]
+			add eax,[r]
+			fld dword[eax]
+			fmul st0,st1
+			fstp dword[eax] ;r[j*n+i]*=max
+			sub eax,[r]
+			add eax,4
+		inc edi
+		jmp .cycle_3
+		.cycle_3_end:
+		ffree st0 ;max
+		fincstp
-;                                t=m[l*n+j];
+		xor esi,esi
+		.cycle_4: ;for(l=0;l
+		cmp esi,ebx
+		jge .cycle_4_end
+			cmp esi,ecx ;if (l!=j)
+			je .cycle_5_end
+			mov eax,esi
+			imul eax,ebx
+			add eax,ecx
+			shl eax,2
+			add eax,[m]
+			fld dword[eax] ;t=m[l*n+j]
+			xor edi,edi
+			.cycle_5: ;for(i=0;i
+			cmp edi,ebx
+			jge .cycle_5_end
+				mov eax,ecx
+				imul eax,ebx
+				add eax,edi
+				shl eax,2
+				add eax,[m]
+				fld dword[eax]
+				fmul st0,st1
+				mov eax,esi
+				imul eax,ebx
+				add eax,edi
+				shl eax,2
+				add eax,[m]
+				fsub dword[eax]
+				fchs
+				fstp dword[eax] ;m[l*n+i]-=m[j*n+i]*t
+				mov eax,ecx
+				imul eax,ebx
+				add eax,edi
+				shl eax,2
+				add eax,[r]
+				fld dword[eax]
+				fmul st0,st1
+				mov eax,esi
+				imul eax,ebx
+				add eax,edi
-;                                for(i=0;i
+				shl eax,2
+				add eax,[r]
+				fsub dword[eax]
+				fchs
+				fstp dword[eax] ;r[l*n+i]-=r[j*n+i]*t
+			inc edi
+			jmp .cycle_5
+			.cycle_5_end:
+			ffree st0 ;t
+			fincstp
+		inc esi
-;                                               m[l*n+i]-=m[j*n+i]*t;
+		jmp .cycle_4
+		.cycle_4_end:
-;                                               r[l*n+i]-=r[j*n+i]*t;
+	inc ecx
-;                                }
+	jmp .cycle_0
-;                       }
+	.cycle_0_end:
 align 4
 proc gl_V3_Norm uses ebx, a:dword
 	mov ebx,[a]
 	fld dword[ebx]
-	fmul dword[ebx]
+	fmul st0,st0
 	fld dword[ebx+4]
-	fmul dword[ebx+4]
+	fmul st0,st0
 	faddp
 	fld dword[ebx+8]
-	fmul dword[ebx+8]
+	fmul st0,st0
 	faddp
 	fsqrt ;st0 = sqrt(a.X^2 +a.Y^2 +a.Z^2)
 	fldz
 	fcomp

Subversion Repositories Kolibri OS

(root)/programs/develop/libraries/TinyGL/asm_fork/zmath.asm @ 5218 – Rev 5218 → 5256