WebSVN – Kolibri OS – Path Comparison – / – /programs/develop/libraries/TinyGL/asm_fork/zmath.asm Rev 5255 and /programs/develop/libraries/TinyGL/asm

Regard whitespace Rev 5255 → Rev 5256

 /programs/develop/libraries/TinyGL/asm_fork/zmath.asm
 ,7 → 70,6
         .cycle_0: ;i
                 xor ebx,ebx
                 .cycle_1: ;j
-                        finit
                         fldz ;sum=0
                         xor ecx,ecx
                         M4_reg edi,[a],eax,0
 ,7 → 78,7
                                 add edi,4
                                 M4_reg esi,[b],ecx,ebx
                                 fmul dword[esi]
-                                fadd st0,st1 ;sum += a[i][k] * b[k][j]
+                                faddp ;sum += a[i][k] * b[k][j]
                                 inc ecx
                                 cmp ecx,4
                                 jl .cycle_2
 ,7 → 90,6
                 inc eax
                 cmp eax,4
                 jl .cycle_0
-        finit
 if DEBUG ;gl_M4_Mul
         stdcall dbg_print,f_m4m,txt_nl
         stdcall gl_print_matrix,[c],4
 ,13 → 185,42
 ;        a->Z=b->m[2][0]*c->X+b->m[2][1]*c->Y+b->m[2][2]*c->Z;
 ;}
-;void gl_M4_MulV4(V4 *a,M4 *b,V4 *c)
-;{
-;        a->X=b->m[0][0]*c->X+b->m[0][1]*c->Y+b->m[0][2]*c->Z+b->m[0][3]*c->W;
-;        a->Y=b->m[1][0]*c->X+b->m[1][1]*c->Y+b->m[1][2]*c->Z+b->m[1][3]*c->W;
-;        a->Z=b->m[2][0]*c->X+b->m[2][1]*c->Y+b->m[2][2]*c->Z+b->m[2][3]*c->W;
-;        a->W=b->m[3][0]*c->X+b->m[3][1]*c->Y+b->m[3][2]*c->Z+b->m[3][3]*c->W;
-;}
+align 4
+proc gl_M4_MulV4 uses ebx ecx edx, a:dword, b:dword, c:dword ;V4 *a, M4 *b, V4 *c
+        mov ebx,[b]
+        mov edx,[c]
+        fld dword[edx]
+        fld dword[edx+4]
+        fld dword[edx+8]
+        fld dword[edx+12]
+        mov edx,[a]
+        mov ecx,4
+        .cycle_1:
+                fld dword[ebx]    ;st0 = m[_][0]
+                fmul st0,st4      ;st0 *= c.X
+                fld dword[ebx+4]  ;st0 = m[_][1]
+                fmul st0,st4      ;st0 *= c.Y
+                faddp
+                fld dword[ebx+8]  ;st0 = m[_][2]
+                fmul st0,st3      ;st0 *= c.Z
+                faddp
+                fld dword[ebx+12] ;st0 += m[_][3]
+                fmul st0,st2      ;st0 *= c.Z
+                faddp
+                fstp dword[edx]   ;a.X = b.m[_][0]*c.X +b.m[_][1]*c.Y +b.m[_][2]*c.Z +b.m[_][3]*c.W
+                add ebx,16 ;ñëåäóùàÿ ñòðîêà ìàòðèöû
+                add edx,4  ;ñëåäóùàÿ êîîðäèíàòà âåêòîðà
+        loop .cycle_1
+        ffree st0
+        fincstp
+        ffree st0
+        fincstp
+        ffree st0
+        fincstp
+        ffree st0
+        fincstp
+        ret
+endp
 ; transposition of a 4x4 matrix
 align 4
 ,58 → 285,219
 ; Note : m is destroyed
 align 4
-proc Matrix_Inv uses ecx, r:dword, m:dword, n:dword ;(float *r,float *m,int n)
-;        int i,j,k,l;
-;        float max,tmp,t;
+proc Matrix_Inv uses ebx ecx edx edi esi, r:dword, m:dword, n:dword ;(float *r,float *m,int n)
+locals
+        max dd ? ;float
+        tmp dd ?
+endl
-;        /* identitée dans r */
-;        for(i=0;i<n*n;i++) r[i]=0;
-;        for(i=0;i<n;i++) r[i*n+i]=1;
+        ; identitée dans r
+        mov eax,0.0
+        mov ecx,[n]
+        imul ecx,ecx
+        mov edi,[r]
+        rep stosd ;for(i=0;i<n*n;i++) r[i]=0
+        mov eax,1.0
+        xor ebx,ebx
+        mov edi,[r]
+        mov ecx,[n]
+        shl ecx,2
+        @@: ;for(i=0;i<n;i++)
+                cmp ebx,[n]
+                jge .end_0
+                stosd ;r[i*n+i]=1
+                add edi,ecx
+                inc ebx
+                jmp @b
+        .end_0:
-;        for(j=0;j<n;j++) {
+        ; ebx -> n
+        ; ecx -> j
+        ; edx -> k
+        ; edi -> i
+        ; esi -> l
+        mov ebx,[n]
+        xor ecx,ecx
+        .cycle_0: ;for(j=0;j<n;j++)
+        cmp ecx,ebx
+        jge .cycle_0_end
+                ; recherche du nombre de plus grand module sur la colonne j
+                mov eax,ecx
+                imul eax,ebx
+                add eax,ecx
+                shl eax,2
+                add eax,[m]
+                mov eax,[eax]
+                mov [max],eax ;max=m[j*n+j]
+                mov edx,ecx ;k=j
+                mov edi,ecx
+                inc edi
+                .cycle_1: ;for(i=j+1;i<n;i++)
+                cmp edi,ebx
+                jge .cycle_1_end
+                        mov eax,edi
+                        imul eax,ebx
+                        add eax,ecx
+                        shl eax,2
+                        add eax,[m]
+                        fld dword[eax]
+                        fcom dword[max] ;if (fabs(m[i*n+j])>fabs(max))
+                        fstsw ax
+                        sahf
+                        jbe @f
+                                mov edx,edi ;k=i
+                                fst dword[max]
+                        @@:
+                        ffree st0
+                        fincstp
+                inc edi
+                jmp .cycle_1
+                .cycle_1_end:
-;                       /* recherche du nombre de plus grand module sur la colonne j */
-;                       max=m[j*n+j];
-;                       k=j;
-;                       for(i=j+1;i<n;i++)
-;                               if (fabs(m[i*n+j])>fabs(max)) {
-;                                        k=i;
-;                                        max=m[i*n+j];
-;                               }
+                ; non intersible matrix
+                fld dword[max]
+                ftst ;if (max==0)
+                fstsw ax
+                ffree st0
+                fincstp
+                sahf
+                jne @f
+                        xor eax,eax
+                        inc eax
+                        jmp .end_f ;return 1
+                @@:
-;      /* non intersible matrix */
-;      if (max==0) return 1;
+                ; permutation des lignes j et k
+                cmp ecx,edx ;if (j!=k)
+                je .cycle_2_end
+                        xor edi,edi
+                        .cycle_2: ;for(i=0;i<n;i++)
+                        cmp edi,ebx
+                        jge .cycle_2_end
+                                ;òóò ïîêà esi != l
+                                mov eax,ecx
+                                imul eax,ebx
+                                add eax,edi
+                                shl eax,2
+                                add eax,[m]
+                                mov esi,[eax]
+                                mov [tmp],esi ;tmp=m[j*n+i]
+                                mov esi,edx
+                                imul esi,ebx
+                                add esi,edi
+                                shl esi,2
+                                add esi,[m]
+                                m2m dword[eax],dword[esi] ;m[j*n+i]=m[k*n+i]
+                                mov eax,[tmp]
+                                mov [esi],eax ;m[k*n+i]=tmp
-;                       /* permutation des lignes j et k */
-;                       if (k!=j) {
-;                                for(i=0;i<n;i++) {
-;                                               tmp=m[j*n+i];
-;                                               m[j*n+i]=m[k*n+i];
-;                                               m[k*n+i]=tmp;
-;
-;                                               tmp=r[j*n+i];
-;                                               r[j*n+i]=r[k*n+i];
-;                                               r[k*n+i]=tmp;
-;                                }
-;                       }
+                                mov eax,ecx
+                                imul eax,ebx
+                                add eax,edi
+                                shl eax,2
+                                add eax,[r]
+                                mov esi,[eax]
+                                mov [tmp],esi ;tmp=r[j*n+i]
+                                mov esi,edx
+                                imul esi,ebx
+                                add esi,edi
+                                shl esi,2
+                                add esi,[r]
+                                m2m dword[eax],dword[esi] ;r[j*n+i]=r[k*n+i]
+                                mov eax,[tmp]
+                                mov [esi],eax ;r[k*n+i]=tmp
+                        inc edi
+                        jmp .cycle_2
+                .cycle_2_end:
-;                       /* multiplication de la ligne j par 1/max */
-;                       max=1/max;
-;                       for(i=0;i<n;i++) {
-;                                m[j*n+i]*=max;
-;                                r[j*n+i]*=max;
-;                       }
+                ; multiplication de la ligne j par 1/max
+                fld1
+                fdiv dword[max]
+                fst dword[max] ;max=1/max
+                xor edi,edi
+                mov eax,ecx
+                imul eax,ebx
+                shl eax,2
+                .cycle_3: ;for(i=0;i<n;i++)
+                cmp edi,ebx
+                jge .cycle_3_end
+                        add eax,[m]
+                        fld dword[eax]
+                        fmul st0,st1
+                        fstp dword[eax] ;m[j*n+i]*=max
+                        sub eax,[m]
+                        add eax,[r]
+                        fld dword[eax]
+                        fmul st0,st1
+                        fstp dword[eax] ;r[j*n+i]*=max
+                        sub eax,[r]
+                        add eax,4
+                inc edi
+                jmp .cycle_3
+                .cycle_3_end:
+                ffree st0 ;max
+                fincstp
-;                       for(l=0;l<n;l++) if (l!=j) {
-;                                t=m[l*n+j];
-;                                for(i=0;i<n;i++) {
-;                                               m[l*n+i]-=m[j*n+i]*t;
-;                                               r[l*n+i]-=r[j*n+i]*t;
-;                                }
-;                       }
-;        }
+                xor esi,esi
+                .cycle_4: ;for(l=0;l<n;l++)
+                cmp esi,ebx
+                jge .cycle_4_end
+                        cmp esi,ecx ;if (l!=j)
+                        je .cycle_5_end
+                        mov eax,esi
+                        imul eax,ebx
+                        add eax,ecx
+                        shl eax,2
+                        add eax,[m]
+                        fld dword[eax] ;t=m[l*n+j]
+                        xor edi,edi
+                        .cycle_5: ;for(i=0;i<n;i++)
+                        cmp edi,ebx
+                        jge .cycle_5_end
+                                mov eax,ecx
+                                imul eax,ebx
+                                add eax,edi
+                                shl eax,2
+                                add eax,[m]
+                                fld dword[eax]
+                                fmul st0,st1
+                                mov eax,esi
+                                imul eax,ebx
+                                add eax,edi
+                                shl eax,2
+                                add eax,[m]
+                                fsub dword[eax]
+                                fchs
+                                fstp dword[eax] ;m[l*n+i]-=m[j*n+i]*t
+                                mov eax,ecx
+                                imul eax,ebx
+                                add eax,edi
+                                shl eax,2
+                                add eax,[r]
+                                fld dword[eax]
+                                fmul st0,st1
+                                mov eax,esi
+                                imul eax,ebx
+                                add eax,edi
+                                shl eax,2
+                                add eax,[r]
+                                fsub dword[eax]
+                                fchs
+                                fstp dword[eax] ;r[l*n+i]-=r[j*n+i]*t
+                        inc edi
+                        jmp .cycle_5
+                        .cycle_5_end:
+                        ffree st0 ;t
+                        fincstp
+                inc esi
+                jmp .cycle_4
+                .cycle_4_end:
+        inc ecx
+        jmp .cycle_0
+        .cycle_0_end:
-;        return 0;
+        xor eax,eax ;return 0
+        .end_f:
         ret
 endp
 ,12 → 597,12
 proc gl_V3_Norm uses ebx, a:dword
         mov ebx,[a]
         fld dword[ebx]
-        fmul dword[ebx]
+        fmul st0,st0
         fld dword[ebx+4]
-        fmul dword[ebx+4]
+        fmul st0,st0
         faddp
         fld dword[ebx+8]
-        fmul dword[ebx+8]
+        fmul st0,st0
         faddp
         fsqrt ;st0 = sqrt(a.X^2 +a.Y^2 +a.Z^2)
         fldz

Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 5255 → Rev 5256