Subversion Repositories Kolibri OS

Compare Revisions

Ignore whitespace Rev 2191 → Rev 2192

/programs/demos/3DS/BUMP_CAT.INC
81,10 → 81,10
.cz2 equ [ebp-108]
.cbx1 equ dword[ebp-112]
.cby1 equ [ebp-116]
.cbx2 equ dword[ebp-120]
.cby2 equ [ebp-124]
.cex1 equ dword[ebp-128]
.cey1 equ [ebp-132]
.cex1 equ dword[ebp-120]
.cey1 equ [ebp-124]
.cbx2 equ dword[ebp-128]
.cby2 equ [ebp-132]
.cex2 equ dword[ebp-136]
.cey2 equ [ebp-140]
 
533,8 → 533,25
jge .loop12_done
.loop12:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey12
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz12
movq .cz1,mm4
movq .cz2,mm5
end if
 
if Ext >= MMX
 
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
553,7 → 570,7
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else
else if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
617,8 → 634,23
 
.loop23:
call .call_bump_line
 
if Ext >= MMX
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey23
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz23
movq .cz1,mm4
movq .cz2,mm5
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
637,7 → 669,7
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else
else if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
/programs/demos/3DS/BUMP_TEX.INC
292,24 → 292,6
cvtps2pi mm1,xmm1
movq .dty12,mm0
movq .dz12,mm1
;temporaly conversion to keep upside down
; fninit
; fld .dx12
; fistp .dx12
; fld dword .dz12
; fistp dword .dz12
; fld .dbx12
; fistp .dbx12
; fld dword .dby12
; fistp dword .dby12
; fld .dex12
; fistp .dex12
; fld dword .dey12
; fistp dword .dey12
; fld .dtx12
; fistp .dtx12
; fld dword .dty12
; fistp dword .dty12
;----
; mov ax,.z2
; sub ax,.z1
518,25 → 500,6
movq .dty13,mm0
movq .dz13,mm1
 
;temporaly conversion to keep upside down
; fninit
; fld .dx13
; fistp .dx13
; fld dword .dz13
; fistp dword .dz13
; fld .dbx13
; fistp .dbx13
; fld dword .dby13
; fistp dword .dby13
; fld .dex13
; fistp .dex13
; fld dword .dey13
; fistp dword .dey13
; fld .dtx13
; fistp .dtx13
; fld dword .dty13
; fistp dword .dty13
 
else
 
mov ax,.x3
699,24 → 662,6
movq .dz23,mm1
 
 
;temporaly conversion to keep upside down
; fninit
; fld .dx23
; fistp .dx23
; fld dword .dz23
; fistp dword .dz23
; fld .dbx23
; fistp .dbx23
; fld dword .dby23
; fistp dword .dby23
; fld .dex23
; fistp .dex23
; fld dword .dey23
; fistp dword .dey23
; fld .dtx23
; fistp .dtx23
; fld dword .dty23
; fistp dword .dty23
else
mov ax,.x3
sub ax,.x2
794,58 → 739,7
; sub esp,40
.bt_dx23_done:
sub esp,64
;if Ext>=SSE
; movsx eax,.x1
; shl eax,ROUND
; cvtsi2ss xmm0,eax
; movss .cx1,xmm0
; movss .cx2,xmm0
;
; movsx ebx,word[.b_x1]
; shl ebx,ROUND
; cvtsi2ss xmm0,ebx
; movss .cbx1,xmm0
; movss .cbx2,xmm0
;
; movsx ecx,word[.b_y1]
; shl ecx,ROUND
; cvtsi2ss xmm0,ecx
; movss .cby1,xmm0
; movss .cby2,xmm0
 
; movsx edx,word[.e_x1]
; shl edx,ROUND
; cvtsi2ss xmm0,edx
; movss .cex1,xmm0
; movss .cex2,xmm0
;
; movsx eax,word[.e_y1]
; shl eax,ROUND
; cvtsi2ss xmm0,eax
; movss .cey1,xmm0
; movss .cey2,xmm0
;
 
; movsx ebx,.z1
; shl ebx,CATMULL_SHIFT
; cvtsi2ss xmm0,ebx
; movss .cz1,xmm0
; movss .cz2,xmm0
;
; movsx ecx,word[.t_x1]
; shl ecx,ROUND
; cvtsi2ss xmm0,ecx
; movss .ctx1,xmm0
; movss .ctx2,xmm0
 
; movsx edx,word[.t_y1]
; shl edx,ROUND
; cvtsi2ss xmm0,edx
; movss .cty1,xmm0
; movss .cty2,xmm0
 
;else
 
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
902,14 → 796,8
mov .cty2,edx
; push edx
; push edx
;end if
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_line
;if Ext >= SSE
;if Ext >= SSE2
; movups xmm0,.cby1
; movups xmm1,.cty1
; movups xmm2,.cby2
918,22 → 806,36
; movups xmm5,.dty13
; movups xmm6,.dby12
; movups xmm7,.dty12
; addps xmm0,xmm4
; addps xmm1,xmm5
; addps xmm2,xmm6
; addps xmm3,xmm7
;
;; addps xmm0,.dby12
;; addps xmm1,.dty12
;; addps xmm2,.dby13
;; addps xmm3,.dty13
; movups .cby1,xmm0
; movups .cty1,xmm1
; movups .cby2,xmm2
; movups .cty2,xmm3
;end if
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
if Ext >= SSE2
; fxrstor [sse_repository]
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby12
movups xmm7,.dty12
paddd xmm0,xmm4
paddd xmm1,xmm5
paddd xmm2,xmm6
paddd xmm3,xmm7
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
end if
 
if Ext >= MMX
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
1005,7 → 907,6
cmp cx,.y3
jge .loop23_done
 
;if Ext < SSE
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
1038,54 → 939,7
movzx ebx,word[.t_y2]
shl ebx,ROUND
mov .cty2,ebx
;else
; movsx eax,.z2
; shl eax,CATMULL_SHIFT
; cvtsi2ss xmm0,eax
; movss .cz2,xmm0
;
; movsx ebx,.x2
; shl ebx,ROUND
; cvtsi2ss xmm1,ebx
; movss .cx2,xmm1
;
; movzx edx,word[.b_x2]
; shl edx,ROUND
; cvtsi2ss xmm2,edx
; movss .cbx2,xmm2
;
; movzx eax,word[.b_y2]
; shl eax,ROUND
; cvtsi2ss xmm0,eax
; movss .cby2,xmm0
;
; movzx ebx,word[.e_x2]
; shl ebx,ROUND
; cvtsi2ss xmm1,ebx
; movss .cex2,xmm1
;
; movzx edx,word[.e_y2]
; shl edx,ROUND
; cvtsi2ss xmm2,edx
; movss .cey2,xmm2
;
; movzx eax,word[.t_x2]
; shl eax,ROUND
; cvtsi2ss xmm0,eax
; movss .ctx2,xmm0
;
; movzx ebx,word[.t_y2]
; shl ebx,ROUND
; cvtsi2ss xmm1,ebx
; movss .cty2,xmm1
 
;end if
 
.loop23:
call .call_line
 
;if Ext >= SSE
 
;if Ext >= SSE2
; movups xmm0,.cby1
; movups xmm1,.cty1
; movups xmm2,.cby2
1094,21 → 948,34
; movups xmm5,.dty13
; movups xmm6,.dby23
; movups xmm7,.dty23
; addps xmm0,xmm4
; addps xmm1,xmm5
; addps xmm2,xmm6
; addps xmm3,xmm7
; ; addps xmm0,.dby13
; ; addps xmm1,.dty13
; ; addps xmm2,.dby23
; ; addps xmm3,.dty23
; movups .cby1,xmm0
; movups .cty1,xmm1
; movups .cby2,xmm2
; movups .cty2,xmm3
;end if
.loop23:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
 
if Ext >= SSE2
; fxrstor [sse_repository]
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby23
movups xmm7,.dty23
paddd xmm0,xmm4
paddd xmm1,xmm5
paddd xmm2,xmm6
paddd xmm3,xmm7
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
;
;end if
if Ext >= MMX
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
1180,90 → 1047,7
ret 50
 
.call_line:
;if Ext >= SSE
; pushad
; push .tex_ptr
; fninit
; fld dword .cty1
; fistp dword [esp-4]
; sub esp,4
;; push dword .cty1
; fld .ctx1
; fistp dword [esp-4]
; sub esp,4
;; push .ctx1
; fld dword .cz1
; fistp dword [esp-4]
; sub esp,4
; ; push dwod .cz1
; fld dword .cty2
; fistp dword [esp-4]
; sub esp,4
; ; push .cty2
; fld .ctx2
; fistp dword [esp-4]
; sub esp,4
; ; push dword .ctx2
; fld dword .cz2
; fistp dword [esp-4]
; sub esp,4
;; push dword .cz2
;
; push .z_buff
; push .t_emap
; push .t_bmap
;--------------------------------------
; fld dword .cey2
; fistp dword [esp-4]
; sub esp,4
;; push dword .cey2
;
; fld .cex2
; fistp dword [esp-4]
; sub esp,4
;; push .cex2
;
; fld dword .cby2
; fistp dword [esp-4]
; sub esp,4
; ; push dword .cby2
 
; fld .cbx2
; fistp dword [esp-4]
; sub esp,4
; push .cbx2
;------------------------------------
; fld dword .cey1
; fistp dword [esp-4]
; sub esp,4
;; push dword .cey1
; fld .cex1
; fistp dword [esp-4]
; sub esp,4
; ; push .cex1
; fld dword .cby1
; fistp dword [esp-4]
; sub esp,4
;; push dword .cby1
; fld .cbx1
; fistp dword [esp-4]
; sub esp,4
;; push .cbx1
; push ecx
 
; fld .cx1
; fistp dword [esp-4]
; mov eax,[esp-4]
; sar eax,ROUND
; fld .cx2
; fistp dword [esp-4]
; mov ebx,[esp-4]
; sar ebx,ROUND
 
; call bump_tex_line_z
;
; popad
;else
pushad
push .tex_ptr
push dword .cty1
1281,8 → 1065,16
push .cbx2
push dword .cey1
push .cex1
;if Ext >= SSE2
; sub esp,8
; shufps xmm0,xmm0,10110100b
; movhps [esp],xmm0 ;================================
;else
 
push dword .cby1
push .cbx1
;end if
 
push ecx
 
mov eax,.cx1
/programs/demos/3DS/DATA.INC
271,7 → 271,7
if Ext=SSE2
db ' (SSE2)'
end if
db ' 0.059',0
db ' 0.060',0
labellen:
STRdata db '-1 '
 
/programs/demos/3DS/GRD_TEX.INC
486,7 → 486,7
; pop ebp ebx eax
popad
 
if Ext >= MMX
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
505,7 → 505,25
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
else
end if
if Ext >= SSE2
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc12b
movq mm2,.scan_y1
movq mm5,.scan_y2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy12
movq .scan_y1,mm2
movq .scan_y2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
 
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
602,7 → 620,7
 
popad
 
if Ext >= MMX
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
621,7 → 639,24
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
else
end if
if Ext >= SSE2
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc23b
movq mm2,.scan_y1
movq mm5,.scan_y2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy23
movq .scan_y1,mm2
movq .scan_y2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
740,8 → 775,8
mov ecx,dword .z1
xchg ecx, .z2
mov dword .z1, ecx
 
else
end if
if (Ext=MMX)
movq mm0,.b1 ; b, g
movq mm1,.b2
movq .b1, mm1
756,6 → 791,16
movq .tex_x2,mm4
 
end if
if Ext>=SSE
movups xmm0,.b1
movups xmm1,.b2
movups .b1,xmm1
movups .b2,xmm0
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
end if
 
@@:
or bx,bx
/programs/demos/3DS/TWO_TEX.INC
32,72 → 32,68
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - first texture
.b_x3 equ ebp+12
.b_y3 equ ebp+14 ; e - second texture
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - first texture
.b_x3 equ ebp+12
.b_y3 equ ebp+14 ; e - second texture
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
 
 
.t_bmap equ dword[ebp-4] ; pointer to b. texture
.t_emap equ dword[ebp-8] ; pointer to e. texture
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
.t_bmap equ dword[ebp-4] ; pointer to b. texture
.t_emap equ dword[ebp-8] ; pointer to e. texture
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.dbx12 equ dword[ebp-28]
.dby12 equ dword[ebp-32]
.dby12q equ [ebp-32]
.dby12q equ [ebp-32]
.dex12 equ dword[ebp-36]
.dey12 equ dword[ebp-40]
.dey12q equ [ebp-40]
.dey12q equ [ebp-40]
.dz12 equ dword[ebp-44]
 
.dx13 equ dword[ebp-48]
.dbx13 equ dword[ebp-52]
.dby13 equ dword[ebp-56]
.dby13q equ [ebp-56]
.dby13q equ [ebp-56]
.dex13 equ dword[ebp-60]
.dey13 equ dword[ebp-64]
.dey13q equ [ebp-64]
.dey13q equ [ebp-64]
.dz13 equ dword[ebp-68]
 
.dx23 equ dword[ebp-72]
.dbx23 equ dword[ebp-76]
.dby23 equ dword[ebp-80]
.dby23q equ [ebp-80]
.dby23q equ [ebp-80]
.dex23 equ dword[ebp-84]
.dey23 equ dword[ebp-88]
.dey23q equ [ebp-88]
.dey23q equ [ebp-88]
.dz23 equ dword[ebp-92]
 
.cx1 equ dword[ebp-96] ; current variables
.cx2 equ dword[ebp-100]
;.cbx1q equ [ebp-104]
.cbx1 equ dword[ebp-104]
.cby1 equ [ebp-108]
;.cbx2q [ebp-112]
.cbx2 equ dword[ebp-112]
.cby2 equ [ebp-116]
;.cex1q equ [ebp-120]
.cex1 equ dword[ebp-120]
.cey1 equ [ebp-124]
;.cex2q equ [ebp-128]
.cex1 equ dword[ebp-112]
.cey1 equ [ebp-116]
.cbx2 equ dword[ebp-120]
.cby2 equ [ebp-124]
.cex2 equ dword[ebp-128]
.cey2 equ [ebp-132]
 
110,10 → 106,10
cld
end if
mov ebp,esp
push edx esi ; store bump map
push edx esi ; store bump map
; push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
127,30 → 123,30
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
jmp .sort3
.sort2:
push eax ebx ecx ; store triangle coords in variables
push eax ebx ecx ; store triangle coords in variables
; push ebx
; push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
165,44 → 161,44
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx12_done
push edx ;dword 0
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
mov ax,.x2
sub ax,.x1
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dx12,eax
push eax
push eax
 
if Ext=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
rcpss xmm3,xmm3
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
; psubsw mm3,mm2
; psubsw mm1,mm0
pxor mm4,mm4
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
212,12 → 208,12
; pslld mm2,ROUND
; pslld mm3,ROUND
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
subps xmm1,xmm0
 
; pxor mm4,mm4
; movq mm5,mm1
245,13 → 241,13
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12q,mm0
movq .dby12q,mm1
movq .dey12q,mm0
movq .dby12q,mm1
 
; movd .dex12,mm0
; psrlq mm0,32
263,75 → 259,75
; movd .dby12,mm0
 
else
mov ax,word[.b_x2]
sub ax,word[.b_x1]
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dbx12,eax
push eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dby12,eax
push eax
push eax
 
; mov eax,.dbx12
; mov ebx,.dby12
; int3
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dex12,eax
push eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dey12,eax
push eax
push eax
 
end if
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dx13,eax
push eax
push eax
 
if Ext=SSE
 
338,18 → 334,18
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
 
pxor mm4,mm4
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
356,59 → 352,59
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
subps xmm1,xmm0
 
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13q,mm0
movq .dby13q,mm1
movq .dey13q,mm0
movq .dby13q,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dbx13,eax
push eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dby13,eax
push eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dex13,eax
push eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dey13,eax
push eax
push eax
 
end if
 
422,25 → 418,25
push eax
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dx23,eax
push eax
push eax
 
if Ext=SSE
 
447,16 → 443,16
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
divss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
 
pxor mm4,mm4
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
463,58 → 459,58
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
subps xmm1,xmm0
 
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23q,mm0
movq .dby23q,mm1
movq .dey23q,mm0
movq .dby23q,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dbx23,eax
push eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dby23,eax
push eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dex23,eax
push eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
shl eax,ROUND
cdq
idiv ebx
idiv ebx
; mov .dey23,eax
push eax
push eax
end if
mov ax,.z3
sub ax,.z2
526,187 → 522,203
push eax
; sub esp,40
.bt_dx23_done:
movsx eax,.x1
shl eax,ROUND
movsx eax,.x1
shl eax,ROUND
; mov .cx1,eax
; mov .cx2,eax
push eax eax
push eax eax
; push eax
 
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
; push eax eax
; push eax
 
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
; push eax eax
; push eax
 
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
; push eax eax
;push eax
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
sub esp,32
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
sub esp,32
; push eax eax
;push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
movsx eax,.z1
shl eax,CATMULL_SHIFT
; mov .cz1,eax
; mov .cz2,eax
push eax eax
;push eax
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_line
call .call_line
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
 
if Ext >= MMX
movq mm0,.cby2 ; with this optimization object
movq mm1,.cby1 ; looks bit annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby12q
paddd mm1,.dby13q
paddd mm2,.dey12q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
if Ext>= SSE2
movups xmm0,.cey1
movups xmm1,.cey2
movups xmm2,.dey12q
movups xmm3,.dey13q
paddd xmm0,xmm3
paddd xmm1,xmm2
movups .cey1,xmm0
movups .cey2,xmm1
else if (Ext = MMX) | (Ext=SSE)
movq mm0,.cby2 ; with this optimization object
movq mm1,.cby1 ; looks bit annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby12q
paddd mm1,.dby13q
paddd mm2,.dey12q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
else
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
end if
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
 
inc ecx
cmp cx,.y2
jl .loop12
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
 
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
 
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
 
.loop23:
call .call_line
call .call_line
;if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
if Ext>= SSE2
movups xmm0,.cey1
movups xmm1,.cey2
movups xmm2,.dey23q
movups xmm3,.dey13q
paddd xmm0,xmm3
paddd xmm1,xmm2
movups .cey1,xmm0
movups .cey2,xmm1
else if (Ext = MMX) | ( Ext = SSE)
movq mm0,.cby2 ; with this mmx optimization object looks bit
movq mm1,.cby1 ; annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby23q
paddd mm1,.dby13q
paddd mm2,.dey23q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
 
if Ext >= MMX
movq mm0,.cby2 ; with this mmx optimization object looks bit
movq mm1,.cby1 ; annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby23q
paddd mm1,.dby13q
paddd mm2,.dey23q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
 
else
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
end if
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
;else
; movq mm0,.db13q
; movq mm1,.cbx1q
 
inc ecx
cmp cx,.y3
jl .loop23
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
mov esp,ebp
ret 34
 
.call_line:
713,27 → 725,27
 
pushad
 
push .cz1
push .cz2
push .z_buff
push .t_bmap
push .t_emap
push dword .cey2
push .cex2
push dword .cey1
push .cex1
push dword .cby2
push .cbx2
push dword .cby1
push .cbx1
push ecx
push .cz1
push .cz2
push .z_buff
push .t_bmap
push .t_emap
push dword .cey2
push .cex2
push dword .cey1
push .cex1
push dword .cby2
push .cbx2
push dword .cby1
push .cbx1
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call two_tex_line_z
call two_tex_line_z
 
popad
ret
742,92 → 754,92
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ [ebp+12] ; |
.bx2 equ [ebp+16] ; |
.by2 equ [ebp+20] ; |> b. texture and e. texture coords
.ex1 equ [ebp+24] ; |> shifted shl ROUND
.ey1 equ [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ [ebp+36] ; ---
.emap equ [ebp+40] ; b texture offset
.bmap equ [ebp+44] ; e texture offset
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ [ebp+12] ; |
.bx2 equ [ebp+16] ; |
.by2 equ [ebp+20] ; |> b. texture and e. texture coords
.ex1 equ [ebp+24] ; |> shifted shl ROUND
.ey1 equ [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ [ebp+36] ; ---
.emap equ [ebp+40] ; b texture offset
.bmap equ [ebp+44] ; e texture offset
.z_buff equ dword [ebp+48]
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
 
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ [ebp-12]
.dex equ [ebp-16]
.dby equ [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ [ebp-32]
.cex equ [ebp-36]
.cby equ [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ [ebp-12]
.dex equ [ebp-16]
.dby equ [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ [ebp-32]
.cex equ [ebp-36]
.cby equ [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.czbuff equ dword [ebp-52]
 
mov ebp,esp
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
cmp ecx,SIZE_Y
jge .bl_end
mov ecx,.y
or ecx,ecx
jl .bl_end
cmp ecx,SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl @f
je .bl_end
cmp eax,ebx
jl @f
je .bl_end
 
xchg eax,ebx
xchg eax,ebx
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
else
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
end if
mov edx,.z1
xchg edx,.z2
mov .z1,edx
mov edx,.z1
xchg edx,.z2
mov .z1,edx
@@:
push eax ebx
push eax ebx
; push ebx ;store x1, x2
 
cmp .x1,SIZE_X
jge .bl_end
cmp .x2,0
jle .bl_end
cmp .x1,SIZE_X
jge .bl_end
cmp .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
mov ebx,.x2
sub ebx,.x1
 
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
 
; movq mm0,.bx1q
; movq mm1,.bx2q
840,107 → 852,107
; cvtpi2ps xmm1,mm3
 
cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1
movlhps xmm0,xmm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1
cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2
movlhps xmm1,xmm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2
subps xmm1,xmm0
; hi lo
divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex
subps xmm1,xmm0
; hi lo
divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex
 
shufps xmm1,xmm1,11011000b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
shufps xmm1,xmm1,11011000b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dex,mm0 ; hi - lo -> dbx, dex
movq .dey,mm1 ; hi - lo -> dby, dey
movq .dex,mm0 ; hi - lo -> dbx, dex
movq .dey,mm1 ; hi - lo -> dby, dey
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dby
sub eax,.ex1
cdq
idiv ebx
push eax
mov eax,.ex2 ; calc .dby
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dex
sub eax,.by1
cdq
idiv ebx
push eax
mov eax,.by2 ; calc .dex
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
end if
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
mov eax,.dey
imul ebx
add .ey1,eax
@@:
cmp .x2,SIZE_X
jl @f
mov .x2,SIZE_X
cmp .x2,SIZE_X
jl @f
mov .x2,SIZE_X
@@:
mov eax,SIZE_X ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax ; edi - screen
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx ; esi - Z buffer
mov eax,SIZE_X ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax ; edi - screen
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx ; esi - Z buffer
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi
push dword .ex1
push dword .by1
push dword .ey1
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi
push dword .ex1
push dword .by1
push dword .ey1
 
push .z1 ; current z shl CATMULL_SHIFT
push esi
push .z1 ; current z shl CATMULL_SHIFT
push esi
 
if Ext >= MMX
pxor mm0,mm0
959,104 → 971,104
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
if Ext=NON
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
else
mov ebx,.cz
cmp ebx,dword[edx]
mov ebx,.cz
cmp ebx,dword[edx]
end if
jge .skip
jge .skip
 
if Ext=NON
mov eax,.cby
mov esi,.cbx
sar eax,ROUND
sar esi,ROUND
shl eax,TEX_SHIFT ;-
add esi,eax
lea esi,[esi*3] ;- ; esi - current b. texture addres
add esi,.bmap
mov eax,.cby
mov esi,.cbx
sar eax,ROUND
sar esi,ROUND
shl eax,TEX_SHIFT ;-
add esi,eax
lea esi,[esi*3] ;- ; esi - current b. texture addres
add esi,.bmap
 
mov ebx,.cex ;.cex - current env map X
mov eax,.cey ;.cey - current env map y
sar ebx,ROUND
sar eax,ROUND
mov ebx,.cex ;.cex - current env map X
mov eax,.cey ;.cey - current env map y
sar ebx,ROUND
sar eax,ROUND
 
shl eax,TEX_SHIFT
add ebx,eax
lea ebx,[ebx*3]
add ebx,.emap
shl eax,TEX_SHIFT
add ebx,eax
lea ebx,[ebx*3]
add ebx,.emap
 
 
else
movq mm5,mm4 ;.cey
psrad mm5,ROUND
pslld mm5,TEX_SHIFT
movq mm6,mm3 ;.cex
psrad mm6,ROUND
paddd mm5,mm6
movq mm6,mm5
paddd mm5,mm5
paddd mm5,mm6
paddd mm5,.emap
movd esi,mm5
psrlq mm5,32
movd ebx,mm5
movq mm5,mm4 ;.cey
psrad mm5,ROUND
pslld mm5,TEX_SHIFT
movq mm6,mm3 ;.cex
psrad mm6,ROUND
paddd mm5,mm6
movq mm6,mm5
paddd mm5,mm5
paddd mm5,mm6
paddd mm5,.emap
movd esi,mm5
psrlq mm5,32
movd ebx,mm5
end if
if Ext>=MMX
movd mm1,[esi]
movd mm2,[ebx]
punpcklbw mm1,mm0
punpcklbw mm2,mm0
pmullw mm1,mm2
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
mov ebx,.cz
mov dword[edx],ebx
movd mm1,[esi]
movd mm2,[ebx]
punpcklbw mm1,mm0
punpcklbw mm2,mm0
pmullw mm1,mm2
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
mov ebx,.cz
mov dword[edx],ebx
else
cld ; esi - tex e.
lodsb ; ebx - tex b.
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
cld ; esi - tex e.
lodsb ; ebx - tex b.
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
end if
.skip:
add edi,3
add edi,3
 
if Ext = NON
.no_skip:
add .czbuff,4
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
add .czbuff,4
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
else
add edx,4
paddd mm3,.dex
paddd mm4,.dey
add edx,4
paddd mm3,.dex
paddd mm4,.dey
; movq mm5,mm3
; movq mm6,mm4
; psrad mm5,ROUND
1064,16 → 1076,16
; movq .cex,mm3
; movq .cey,mm4
end if
mov eax,.dz
add .cz,eax
mov eax,.dz
add .cz,eax
if Ext = NON
dec ecx
jnz .draw
dec ecx
jnz .draw
else
loop .draw
loop .draw
end if
 
.bl_end:
mov esp,ebp
mov esp,ebp
ret 56
 
/programs/demos/3DS/VIEW3DS.ASM
1,5 → 1,5
 
; application : View3ds ver. 0.059 - tiny .3ds files viewer.
; application : View3ds ver. 0.060 - tiny .3ds files viewer.
; compiler : FASM
; system : KolibriOS
; author : Macgub aka Maciej Guba
35,7 → 35,7
MMX = 1
SSE = 2
SSE2 = 3
Ext = MMX ;Ext={ NON | MMX | SSE | SSE2 }
Ext = MMX ;Ext={ NON | MMX | SSE | SSE2 }
 
; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features)
USE_LFN = 1
/programs/demos/3DS/readme.txt
1,11 → 1,8
View3ds 0.059 - tiny viewer to .3ds files.
View3ds 0.060 - tiny viewer to .3ds files.
 
What's new?
1. Bump and pararell two texture mapping functions optimizations.
(files bump_cat.inc & two_tex.inc)
On my P4 changes are rather non visable, but on dual core in KlbrInWin
optimizations runs preety nice.
1. Header fix by Leency.
2. SSE2 optimizations by me. (Most visable in BUMP_TEX mode.)
 
Buttons description:
1. rotary: choosing rotary axle: x, y, x+y.
33,4 → 30,4
18. re-map tex -> re-map texture and bump map coordinates, to change spherical mapping
around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button.
 
Macgub Jun 2011
Macgub Aug 2011