Subversion Repositories Kolibri OS

Compare Revisions

Regard whitespace Rev 9511 → Rev 9512

/programs/demos/view3ds/3dmath.inc
245,7 → 245,7
;---------------------- out : none ------------------------
 
make_vector_r:
if Ext < SSE2
if Ext < SSE
fninit
fld dword[edi] ;edi+x3d
fsub dword[esi] ;esi+x3d
297,6 → 297,7
fstp dword [ebx+vec_z]
ret
cross_aligned:
; params as above cross_p
movaps xmm0,[esi]
movaps xmm1,[esi]
movaps xmm2,[edi]
607,6 → 608,19
;----------------------------------------------
; esi - pointer to 3x3 matrix
add_scale_to_matrix:
if Ext>SSE
movss xmm0,[rsscale]
shufps xmm0,xmm0,0
movups xmm1,[esi]
movups xmm2,[esi+16]
movss xmm3,[esi+32]
mulps xmm1,xmm0
mulps xmm2,xmm0
mulss xmm3,xmm0
movups [esi],xmm1
movups [esi+16],xmm2
movss [esi+32],xmm3
else
fninit
fld [rsscale]
fld dword[esi] ;-----
639,7 → 653,7
fld dword[esi+32]
fmulp st1,st
fstp dword[esi+32] ;------
 
end if
ret
 
;in esi - offset to 3d points (point as 3 dwords float)
646,7 → 660,12
; edi - offset to 2d points ( as 3 words integer)
; ecx - number of points
translate_points: ; just convert into integer; z coord still needed
if Ext < SSE
fninit
else
; movaps xmm1,[vect_x]
end if
 
.again:
if 0
fld dword[esi+8]
676,7 → 695,18
fiadd [vect_y]
fistp word[edi+2]
end if
; movups xmm0,[esi]
if Ext>=SSE
movups xmm0,[esi]
cvtps2dq xmm0,xmm0
packssdw xmm0,xmm0
paddw xmm0,[vect_x]
movd [edi],xmm0
; psrldq xmm0,4
; movd eax,xmm0
pextrw eax,xmm0,6
mov [edi+4],ax
else
 
; cvtps2dq xmm0,xmm0
; packsdw xmm0,xmm0
; movq [edi]
688,9 → 718,12
fistp word[edi+2]
fld dword[esi+8]
fistp word[edi+4]
end if
 
add esi,12
add edi,6
dec ecx
jnz .again
; dec ecx
; jnz .again
loop .again
 
ret
/programs/demos/view3ds/3r_phg.inc
341,7 → 341,6
pop ebp
 
ret
align 16
real_phong_line_z:
; in:
; xmm0 - normal vector 1
456,7 → 455,7
sub ecx,.lx1
movaps xmm0,.n1
movss xmm2,.z1
align 16
 
.ddraw:
movss xmm7,xmm2
cmpnltss xmm7,dword[esi]
/programs/demos/view3ds/3ray_shd.inc
353,7 → 353,7
 
 
ret
align 16
 
ray_shd_l:
; in:
; xmm0 - normal vector 1
392,7 → 392,7
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
; .cur_tri equ [ebp-108]
.startx equ [ebp-108]
.cnv equ [ebp-128]
.Rlen equ [ebp-128-16]
.r1 equ [ebp-128-32]
431,6 → 431,7
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
; mov .startx,eax
mov .lx2,ebx
movlps .z1,xmm3
 
546,9 → 547,15
mov edi,[triangles_ptr]
xor ecx,ecx
.nx_tri: ; next triangle
; mov eax,.lx1
; cmp eax,.startx
; je @f ; prevent artifact borders on tri
; cmp eax,.lx2 ; NOT work as I want !!
; je @f
 
cmp ecx,.cur_tri ; prevent self shadowing
je .skipp
@@:
if 0
mov edi,ecx
imul edi,[i12]
/programs/demos/view3ds/3stencil.inc
16,10 → 16,10
.y3 equ [ebp-12]
 
.dx12 equ dword[ebp-20]
.dx13 equ dword[ebp-24]
.dx23 equ dword[ebp-28]
.dz12 equ dword[ebp-32]
.dz13 equ dword[ebp-36]
.dz12 equ dword[ebp-24]
.dx13 equ dword[ebp-28]
.dz13 equ dword[ebp-32]
.dx23 equ dword[ebp-36]
.dz23 equ dword[ebp-40]
.zz2 equ [ebp-44]
.zz1 equ [ebp-48]
26,12 → 26,12
.z3 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.s_buff equ [ebp-68]
;.s_buff equ [ebp-68]
 
push ebp
mov ebp,esp
sub esp,128
and ebp,0xfffffff0
; sub esp,128
; and ebp,0xfffffff0
.sort2:
cmp ax,bx
jle .sort1
44,19 → 44,24
shufps xmm0,xmm0,11011000b
jmp .sort2
.sort3:
mov .y1,eax ; store triangle coordinates in user friendly variables
mov .y2,ebx
mov .y3,ecx
; mov .y1,eax ; store triangle coordinates in user friendly variables
; mov .y2,ebx
; mov .y3,ecx
push eax
push ebx
push ecx
sub esp,60
 
 
; mov edx,100.11
; movd xmm0,edx
; shufps xmm0,xmm0,11100000b
 
movaps .z1,xmm0
movups .z1,xmm0
; mov dword .z1,edx
; mov .z2,edx
; mov .z3,edx
mov .s_buff,esi
; mov .s_buff,esi
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
157,7 → 162,7
; mov edx,0.11
; movd xmm0,edx
; shufps xmm0,xmm0,11100000b
mov esi,.s_buff
; mov esi,.s_buff
 
call stencil_line
 
192,7 → 197,7
sar ebx,ROUND2
sar eax,ROUND2
movlps xmm0,.zz1
mov esi,.s_buff
; mov esi,.s_buff
 
 
call stencil_line
215,7 → 220,7
jl .loop2
.loop2_end:
 
add esp,128
mov esp,ebp
pop ebp
 
ret
312,17 → 317,19
sub ecx,.x1
movss xmm2,.z1 ; cz
.ccalc:
movss xmm1,xmm2
cmpltss xmm1,dword[esi]
movd eax,xmm1
cmp eax,-1
jnz @f
; movss xmm1,xmm2
; cmpltss xmm1,dword[esi]
; movd eax,xmm1
; cmp eax,-1
comiss xmm2,[esi]
ja @f
movss dword[esi],xmm2
@@:
add esi,4
addss xmm2,.dz
sub ecx,1
jnz .ccalc
; sub ecx,1
; jnz .ccalc
loop .ccalc
.l_quit:
mov esp,ebp
pop ebp
/programs/demos/view3ds/a_procs.inc
1,5 → 1,64
;=============================================================
 
remove_dead_tri:
; push ebp
; mov ebp,esp
mov edi,-1
movd xmm7,edi
pshufd xmm7,xmm7,0
 
mov esi,[triangles_ptr]
mov ecx,[triangles_count_var]
.chck:
; jecxz .cop
mov eax,[esi]
; mov ebx,[esi+4]
; mov edx,[esi+8]
cmp eax,[esi+4]
je .tri_fail
cmp eax,[esi+8]
je .tri_fail
mov eax,[esi+4]
cmp eax,[esi+8]
je .tri_fail
 
; cmp ebx,[esi]
; je .tri_fail
; cmp ebx,[esi+8]
; je .tri_fail
; cmp edx,[esi]
; je .tri_fail
; cmp edx,[esi+4]
; je .tri_fail
add esi,12
loop .chck
jmp .cop
.tri_fail:
movq [esi],xmm7
movd [esi+8],xmm7
add esi,12
loop .chck
.cop:
mov esi,[triangles_ptr]
mov edi,[triangles_ptr]
mov ecx,[triangles_count_var]
xor edx,edx
.cp:
cmp [esi],dword -1
je @f
movdqu xmm0,[esi]
movq [edi],xmm0
movhlps xmm0,xmm0
movd [edi+8],xmm0
add edi,12
inc edx
@@:
add esi,12
loop .cp
mov [triangles_count_var],edx
ret
;========================================================
 
if Ext > SSE2
;--------------------------------------------------------------------
init_point_lights:
/programs/demos/view3ds/bump_tex.inc
25,32 → 25,43
;---------------------- texture coordinates-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - bump map coords
.b_x3 equ ebp+12 ; e - env map coords
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
.tex_ptr equ dword[ebp+38] ; ptr to texture
.t_x1 equ ebp+42 ; texture coords
.t_y1 equ ebp+44
.t_x2 equ ebp+46
.t_y2 equ ebp+48
.t_x3 equ ebp+50
.t_y3 equ ebp+52
 
 
 
.t_y1 equ ebp+4 ; procedure don't save registers !!!
.t_x1 equ ebp+6 ; each coordinate as word
.e_y1 equ ebp+8 ; texture coords
.e_x1 equ ebp+10
.b_y1 equ ebp+12
.b_x1 equ ebp+14
 
 
 
.t_y2 equ ebp+16
.t_x2 equ ebp+18 ; b - bump map coords
.e_y2 equ ebp+20 ; texture coords
.e_x2 equ ebp+22
.b_y2 equ ebp+24
.b_x2 equ ebp+26
 
 
 
 
.t_y3 equ ebp+28 ; e - env map coords
.t_x3 equ ebp+30
.e_y3 equ ebp+32 ; texture coords
.e_x3 equ ebp+34
.b_y3 equ ebp+36
.b_x3 equ ebp+38
 
.z1 equ word[ebp+40]
.z2 equ word[ebp+42]
.z3 equ word[ebp+44]
.z_buff equ dword[ebp+46] ; pointer to Z-buffer
.tex_ptr equ dword[ebp+50] ; ptr to texture
 
 
 
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to env map
.x1 equ word[ebp-10]
60,38 → 71,9
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
if 0 ;Ext <= SSE2
 
.dx12 equ dword[edi-4]
.dz12 equ [edi-8]
.dbx12 equ dword[edi-12]
.dby12 equ [edi-16]
.dex12 equ dword[edi-20]
.dey12 equ [edi-24]
.dtx12 equ dword[edi-28]
.dty12 equ [edi-32]
 
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
 
 
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
 
else
 
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
120,32 → 102,10
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
 
end if
 
if Ext < SSE
 
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cx2 equ dword[ebp-(52+4*19)]
.cz2 equ [ebp-(52+4*20)]
.cbx1 equ dword[ebp-(52+4*21)]
.cby1 equ [ebp-(52+4*22)]
.cbx2 equ dword[ebp-(52+4*23)]
.cby2 equ [ebp-(52+4*24)]
.cex1 equ dword[ebp-(52+4*25)]
.cey1 equ [ebp-(52+4*26)]
.cex2 equ dword[ebp-(52+4*27)]
.cey2 equ [ebp-(52+4*28)]
 
.ctx1 equ dword[ebp-(52+4*29)]
.cty1 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
 
else
 
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cbx1 equ dword[ebp-(52+4*19)]
.cby1 equ [ebp-(52+4*20)]
.cex1 equ dword[ebp-(52+4*21)]
162,7 → 122,10
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
 
if Ext >+ MMX
emms
end if
 
cld
mov ebp,esp
push edx ; store bump map
172,15 → 135,23
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov edx,dword[.t_x1]
xchg edx,dword[.t_x2]
mov dword[.t_x1],edx
if Ext >= MMX
movq mm0,[.t_y1]
movq mm1,[.t_y2]
movq [.t_y1],mm1
movq [.t_y2],mm0
end if
mov edx,dword[.b_y1]
xchg edx,dword[.b_y2]
mov dword[.b_y1],edx
if Ext = NON
mov edx,dword[.e_y1]
xchg edx,dword[.e_y2]
mov dword[.e_y1],edx
mov edx,dword[.t_y1]
xchg edx,dword[.t_y2]
mov dword[.t_y1],edx
end if
mov dx,.z1
xchg dx,.z2
mov .z1,dx
188,15 → 159,23
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov edx,dword[.t_x2]
xchg edx,dword[.t_x3]
mov dword[.t_x2],edx
if Ext >= MMX
movq mm0,[.t_y3]
movq mm1,[.t_y2]
movq [.t_y3],mm1
movq [.t_y2],mm0
end if
mov edx,dword[.b_y2]
xchg edx,dword[.b_y3]
mov dword[.b_y2],edx
if Ext = NON
mov edx,dword[.e_y2]
xchg edx,dword[.e_y3]
mov dword[.e_y2],edx
mov edx,dword[.t_y2]
xchg edx,dword[.t_y3]
mov dword[.t_y2],edx
end if
mov dx,.z2
xchg dx,.z3
mov .z2,dx
228,166 → 207,57
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
if 0 ;Ext >= SSE2
pxor xmm0,xmm0
movups .dty12,xmm0
movups .dey12,xmm0
sub esp,16
else
 
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
push edx
loop @b
end if
 
jmp .bt_dx12_done
.bt_dx12_make:
movsx ebx,bx
if Ext >= SSE2
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
 
 
if Ext>=SSE
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
if 0 ;Ext >= SSE2
mov edi,ebp
sub edi,512
or edi,0x0000000f
end if
divss xmm3,xmm4
shufps xmm3,xmm3,0
mov ax,.x2
sub ax,.x1
cwde
imul ebx
sar eax,15 - ROUND
push eax
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey12,xmm1
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
;-------------
; pxor mm0,mm0
; pxor mm1,mm1
;/ pinsrw mm0,.z1,1
;/ pinsrw mm0,.x1,0
;/ pinsrw mm1,.z2,1
;/ pinsrw mm1,.x2,0
mov ax,.z2
sub ax,.z1
cwde
imul ebx
sar eax,15 - ROUND
push eax
 
mov dx,.x2
sub dx,.x1
movsx edx,dx
sub esp,4*6
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movlhps xmm0,xmm0
movdqu xmm1,[.t_y1]
movdqu xmm2,[.t_y2]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
movdqu .dty12,xmm2
movq .dby12,xmm4
else
 
;/ movd mm1,eax
 
;/ punpcklwd mm0,mm4
;/ punpcklwd mm1,mm4
 
; cvtpi2ps xmm1,mm1
; cvtpi2ps xmm2,mm0
; subps xmm1,xmm2
 
;/ psubd mm1,mm0
 
movd mm2,[.t_x1]
movd mm3,[.t_x2]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
;/ cvtpi2ps xmm1,mm1
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
; movss xmm1,xmm4
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty12,xmm1
;1 movhps .dz12,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty12,mm0
movq .dz12,mm1
;----
; mov ax,.z2
; sub ax,.z1
; cwde
; mov bx,.x2
; sub bx,.x1
; movsx ebx,bx
; movd mm1,eax
; psllq mm1,32
; movd mm1,ebx
 
;; push ebx
;; push eax
;; movq mm1,[esp]
;; add esp,8
;;; mov ax,.z1
;;; mov bx,.z2
;;; shl eax,16
;;; shl ebx,16
;;; mov ax,.x1
;;; mov bx,.x2
; movd mm2,[.t_x1]
; movd mm3,[.t_x2]
;; movd mm0,eax
;; movd mm1,ebx
 
; pxor mm4,mm4
;; punpcklwd mm0,mm4
;; punpcklwd mm1,mm4
; punpcklwd mm2,mm4
; punpcklwd mm3,mm4
 
;; psubd mm1,mm0
; psubd mm3,mm2
 
 
; cvtpi2ps xmm1,mm1
; movlhps xmm1,xmm1
; cvtpi2ps xmm1,mm3
 
; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx |
 
; shufps xmm1,xmm1,10110001b
; xmm1--> | dx | dz | dtx | dty |
; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
; movhlps xmm1,xmm1
; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
; movq .dty12,mm0
; movq .dz12,mm1
else
mov ax,.x2
sub ax,.x1
cwde
473,79 → 343,46
.bt_dx13_make:
movsx ebx,bx
 
if Ext>=SSE
if Ext >= SSE2
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
 
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
mov ax,.x3
sub ax,.x1
cwde
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx12,eax
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey13,xmm1
 
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
 
mov ax,.z3
sub ax,.z1
cwde
imul ebx
sar eax,15 - ROUND
push eax
 
mov dx,.x3
sub dx,.x1
movsx edx,dx
 
movd mm2,[.t_x1]
movd mm3,[.t_x3]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty13,xmm1
;1 movhps .dz13,xmm1
 
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty13,mm0
movq .dz13,mm1
 
sub esp,4*6
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movlhps xmm0,xmm0
movdqu xmm1,[.t_y1]
movdqu xmm2,[.t_y3]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
movdqu .dty13,xmm2
movq .dby13,xmm4
else
 
mov ax,.x3
634,80 → 471,48
jmp .bt_dx23_done
.bt_dx23_make:
movsx ebx,bx
if Ext >= SSE2
 
if Ext>=SSE
mov eax,1 shl 15
cdq
idiv ebx
; push eax
mov ebx,eax
 
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
mov ax,.x3
sub ax,.x2
cwde
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx12,eax
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey23,xmm1
 
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
 
mov ax,.z3
sub ax,.z2
cwde
imul ebx
sar eax,15 - ROUND
push eax
 
mov dx,.x3
sub dx,.x2
movsx edx,dx
 
movd mm2,[.t_x2]
movd mm3,[.t_x3]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
; movlps .dty23,xmm1
; movhps .dz23,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
movq .dty23,mm0
movq .dz23,mm1
 
 
sub esp,4*6
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movlhps xmm0,xmm0
movdqu xmm1,[.t_y2]
movdqu xmm2,[.t_y3]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
movdqu .dty23,xmm2
movq .dby23,xmm4
else
mov ax,.x3
sub ax,.x2
782,8 → 587,10
; mov .dty23,eax
push eax
end if
; sub esp,40
 
 
.bt_dx23_done:
 
sub esp,64
 
movsx eax,.x1
843,7 → 650,7
; push edx
; push edx
 
if Ext >= SSE2
if 0 ;Ext >= SSE2
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
852,15 → 659,15
movups xmm5,.dty13
movups xmm6,.dby12
movups xmm7,.dty12
.scby1 equ [edi]
.scty1 equ [edi+16]
.scby2 equ [edi+32]
.scty2 equ [edi+48]
.sdby13 equ [edi+64]
.sdty13 equ [edi+80]
.sdby12 equ [edi+96]
.sdty12 equ [edi+128]
push edi
; .scby1 equ [edi]
; .scty1 equ [edi+16]
; .scby2 equ [edi+32]
; .scty2 equ [edi+48]
; .sdby13 equ [edi+64]
; .sdty13 equ [edi+80]
; .sdby12 equ [edi+96]
; .sdty12 equ [edi+128]
; push edi
mov edi,sse_repository
movaps .scby1,xmm0
movaps .scty1,xmm1
873,35 → 680,27
pop edi
 
end if
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
 
if Ext >= SSE2
; fxrstor [sse_repository]
 
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby12
; movups xmm7,.dty12
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby12
paddd xmm3,.sdty12
pop edi
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby12
movups xmm7,.dty12
paddd xmm0,xmm4
paddd xmm1,xmm5
paddd xmm2,xmm6
paddd xmm3,xmm7
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
908,7 → 707,7
movups .cty2,xmm3
end if
 
if (Ext = MMX) | (Ext = SSE)
if (Ext = MMX)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
1012,84 → 811,39
movzx ebx,word[.t_y2]
shl ebx,ROUND
mov .cty2,ebx
if Ext >= SSE2
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
movups xmm6,.dby23
movups xmm7,.dty23
; .scby1 equ [edi]
; .scty1 equ [edi+16]
; .scby2 equ [edi+32]
; .scty2 equ [edi+48]
; .sdby13 equ [edi+64]
; .sdty13 equ [edi+80]
.sdby23 equ [edi+160]
.sdty23 equ [edi+192]
push edi
mov edi,sse_repository
; movaps .scby1,xmm0
; movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
; movaps .sdby13,xmm4
; movaps .sdty13,xmm5
movaps .sdby23,xmm6
movaps .sdty23,xmm7
pop edi
 
end if
 
.loop23:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
 
if Ext >= SSE2
 
; fxrstor [sse_repository]
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
 
 
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby23
paddd xmm3,.sdty23
pop edi
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby23
movups xmm7,.dty23
paddd xmm0,xmm4
paddd xmm1,xmm5
paddd xmm2,xmm6
paddd xmm3,xmm7
; push edi
; mov edi,sse_repository
; paddd xmm0,.sdby13
; paddd xmm1,.sdty13
; paddd xmm2,.sdby12
; paddd xmm3,.sdty12
; pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
end if
 
 
 
 
; fxrstor [sse_repository]
; movups xmm0,.cby1
; movups xmm1,.cty1
; movups xmm2,.cby2
; movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby23
; movups xmm7,.dty23
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
; movups .cby1,xmm0
; movups .cty1,xmm1
; movups .cby2,xmm2
; movups .cty2,xmm3
;
end if
if (Ext = MMX) | (Ext = SSE)
if (Ext = MMX)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
1163,36 → 917,15
.call_line:
 
pushad
; xmm0= cby1,cbx1,cz1,cx1
; xmm1= cty1,ctx1,cey1,cex1
if Ext >= SSE2
sub esp,8
shufps xmm1,xmm1,10110001b
shufps xmm3,xmm3,10110001b
movlps [esp],xmm1
else
push dword .cty1
push .ctx1
end if
 
push dword .cz1
if Ext>=SSE2
sub esp,8
movlps [esp],xmm3
else
push dword .cty2
push .ctx2
end if
 
push dword .cz2
if Ext>=SSE2
sub esp,32
movhps [esp+24],xmm3
shufps xmm2,xmm2,10110001b
movlps [esp+16],xmm2
movhps [esp+8],xmm1
shufps xmm0,xmm0,10110001b
movlps [esp],xmm0 ;================================
 
else
push dword .cey2
push .cex2
push dword .cby2
1201,8 → 934,8
push .cex1
push dword .cby1
push .cbx1
end if
 
 
push .tex_ptr
push .z_buff
push .t_emap
1218,7 → 951,7
call bump_tex_line_z
 
popad
;end if
 
ret
bump_tex_line_z:
;--------------in: eax - x1
1334,51 → 1067,14
movq .tx1,mm1
movq .tx2,mm0
end if
;if Ext>=SSE2
; movaps xmm4,xmm0
; movaps xmm0,xmm2
; movaps xmm2,xmm4
; movaps xmm5,xmm1
; movaps xmm1,xmm3
; movaps xmm3,xmm5
;else
 
xchg eax,ebx
mov edx,.z1
xchg edx,.z2
mov .z1,edx
;end if
 
.bl_ok:
;if Ext >= SSE2
; shufps xmm0,xmm0,11100001b
; shufps xmm2,xmm2,11100001b
; movlps .bx1,xmm0
; movlps .bx2,xmm2
 
 
; shufps xmm0,xmm0,00011011b
; shufps xmm2,xmm2,00011011b
; movd eax,xmm0
; movd ebx,xmm2
; shufps xmm0,xmm0,11000110b
; shufps xmm2,xmm2,11000110b
; movd .z1,xmm0
; movd .z2,xmm2
; shufps xmm1,xmm1,10110001b
; shufps xmm3,xmm3,10110001b
; movlps .ex1,xmm1
; movlps .ex2,xmm2
; movhps .tx1,xmm1
; movhps .tx2,xmm2
 
; xchg eax,ebx
; mov edx,.z1
; xchg edx,.z2
; mov .z1,edx
 
 
;end if
 
push eax
push ebx ;store x1, x2
movzx ebx,word[size_x_var]
1391,103 → 1087,71
mov ebx,.x2
sub ebx,.x1
 
if Ext>=SSE
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
 
sub esp,28
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
; float using SSE variant ::-->
; movups xmm0,.bx1 ; new
; movups xmm1,.bx2 ; new
 
cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
mov eax,.bx2
sub eax,.bx1
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
divps xmm1,xmm3
 
shufps xmm1,xmm1,10110001b
; movups .dey,xmm1 ; new
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
 
movd mm2,.z1
movd mm3,.z2
 
cvtpi2ps xmm0,.tx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,.tx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; movups xmm0,,z1 ; new
; movups xmm1,.z2 ; new
subps xmm1,xmm0
mov eax,.by2
sub eax,.by1
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
divps xmm1,xmm3
 
; movups .dz,xmm1 ;new
 
shufps xmm1,xmm1,10110100b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movd .dz,mm0
movq .dty,mm1
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
mov eax,.ex2
sub eax,.ex1
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
mov eax,.ey2
sub eax,.ey1
cdq
idiv ebx
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
 
mov eax,.z2 ; calc .dz
mov eax,.z2
sub eax,.z1
cdq
idiv ebx
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
mov eax,.tx2 ; calc .dtx
mov eax,.tx2
sub eax,.tx1
cdq
idiv ebx
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
mov eax,.ty2 ; calc .dty
 
mov eax,.ty2
sub eax,.ty1
cdq
idiv ebx
sar eax,ROUND
imul ebx
sar eax,15 - ROUND
push eax
 
end if
 
cmp dword .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
/programs/demos/view3ds/chunks.inc
0,0 → 1,417
;=========================================================================
detect_chunks:
; make pivot table, sort, remove unused vertices, find chunks...
; in - some global variables
; out:
; ebx - chunks list ptr, every chunk as word
; ecx - chunks number
; esi - tri_ch ; vertices with triangles list
; edi - t_ptr ; pointers to tri_ch list
 
push ebp
mov ebp,esp
sub esp,60
 
 
.tri_ch equ dword[ebp-4] ; tri chunks list ptr
.chunks equ dword[ebp-8] ; chunks ptreach tri chunk No. as word
.endt equ dword[ebp-12] ;
.t_ptr equ dword[ebp-16] ; pointers list
.tri_ch1 equ dword[ebp-20] ;
.up equ dword[ebp-24] ; upload ptr
.chmr equ dword[ebp-28] ; bit mark list if tri stored
.str equ dword[ebp-32] ; store ptr
.ltch1 equ dword[ebp-36] ; end of tri_ch1 ptr
.ch_cnt equ dword[ebp-40]
.cntt equ dword[ebp-44]
.cc equ dword[ebp-48]
.lsparam equ dword[ebp-52]
.fix_mark equ dword[ebp-56]
.endVptr equ dword[ebp-60]
 
; some triangles have repeated indices of vertices
; check and remove such triangles
call remove_dead_tri
 
mov ecx,[triangles_count_var]
shl ecx,3
lea ecx,[ecx*3]
add ecx,100
mov eax,68
mov ebx,12
int 0x40
mov .tri_ch,eax
 
 
mov ecx,[triangles_count_var]
imul ecx,[i12]
add ecx,32
mov eax,68
mov ebx,12
int 0x40
mov .tri_ch1,eax
 
mov ecx,[points_count_var]
shl ecx,2
add ecx,1120
mov eax,68
mov ebx,12
int 0x40
mov .t_ptr,eax
 
mov ecx,[triangles_count_var]
shl ecx,1
add ecx,20
mov eax,68
mov ebx,12
int 0x40
mov .chunks,eax
 
mov ecx,[triangles_count_var]
shr ecx,3
add ecx,20
mov eax,68
mov ebx,12
int 0x40
mov .chmr,eax ; chunks mark if bit is set - tri was used
 
mov edi,eax
pxor xmm0,xmm0
mov ecx,[triangles_count_var]
shr ecx,7
inc ecx
@@:
movdqa [edi],xmm0
add edi,16
loop @b
 
 
mov eax,[points_count_var]
imul eax,[i12]
add eax,[points_ptr]
mov .endVptr,eax
 
; make pivot table
 
mov edi,.tri_ch
mov esi,[triangles_ptr]
xor ecx,ecx
@@:
movd xmm1,ecx
movq xmm0,[esi]
pshufd xmm1,xmm1,0
movd xmm2,[esi+8]
punpckldq xmm0,xmm1
punpckldq xmm2,xmm1
movdqu [edi],xmm0
movq [edi+16],xmm2
add esi,12
add edi,24
inc ecx
cmp ecx,[triangles_count_var]
jnz @b
 
;sort
 
mov ebx,.tri_ch
mov ecx,[triangles_count_var]
lea ecx,[ecx*3]
 
mov esi,ecx
shl esi,3
add esi,ebx
mov .endt,esi
 
.ccc: ; ebx - vert index
mov eax,[ebx+8] ; ebx+4 - tri index
cmp eax,[ebx]
jge .g
movq xmm0,[ebx+8]
push ebx
.c:
cmp ebx,esi
jae .done
cmp ebx,.tri_ch
jb .done
cmp eax,[ebx]
jae .done
movq xmm7,[ebx]
movq [ebx+8],xmm7
sub ebx,8
jnc .c
add ebx,8
.done:
movq [ebx+8],xmm0
.p:
pop ebx
.g:
add ebx,8
dec ecx
cmp ecx,1
jnz .ccc
 
 
 
 
 
mov ecx,[points_count_var]
mov esi,.tri_ch
dec ecx
.ptC:
mov eax,[esi]
add esi,8
.ptCff:
cmp esi,.endt
jae .dnC
cmp eax,[esi]
je @f
lea ebx,[eax+1]
cmp ebx,[esi]
jne .movt
dec ecx
jz .dnC ; check done
@@:
jmp .ptC
 
; jmp .dnC
 
.movt:
 
movd xmm5,esi
movd xmm7,ebx
mov edi,[esi]
sub edi,ebx
movd xmm6,edi
 
@@:
cmp esi,.endt
jnb @f
sub [esi],edi ; fix .tri_ch pivot table list
add esi,8
jmp @b
@@:
 
 
;shrink vert
lea ebx,[ebx*3]
shl ebx,2
add ebx,[points_ptr]
imul edi,[i12]
add edi,ebx
 
cmp edi,.endVptr ; fix points_r list
ja .dnV
@@:
movq xmm0,[edi]
movd xmm1,[edi+8]
movq [ebx],xmm0
movd [ebx+8],xmm1
add edi,12
add ebx,12
cmp edi,.endVptr ; fix point_r list
jna @b
 
.dnV:
 
; recalc tri all indices above ebx - sub edi
push ecx
 
mov esi,[triangles_ptr]
mov ecx,[triangles_count_var]
lea ecx,[ecx*3]
movd edi,xmm6
movd ebx,xmm7
.agT:
cmp [esi],ebx
jb @f
sub [esi],edi
@@:
add esi,4
loop .agT
 
pop ecx
 
movd esi,xmm5
 
sub [points_count_var],edi
 
dec ecx
 
jmp .ptCff ; again check sth found
 
 
.dnC: ; check done
 
 
 
.do_ch:
 
 
;make t_ptr - table with pointers/adresses
 
 
mov ecx,[points_count_var]
mov esi,.tri_ch
mov edi,.t_ptr
mov ebx,ecx
 
mov [edi],esi
add edi,4
dec ecx
jz .dn
.pt:
mov eax,[esi] ; [esi] - vert ind
add esi,8
cmp eax,[esi] ; [esi+4] - tri ind
je @f
mov [edi],esi
add edi,4
dec ecx
jz .dn
@@:
cmp esi,.endt
jb .pt
 
 
.dn:
 
 
; each dword position in .t_ptr list - adress of corresponding
; triangles indices, each triangles from such index contains this
; vertice
 
 
mov eax,[triangles_count_var]
mov .cntt,eax ; temp help cnt
xor ecx,ecx
mov .cc,ecx
mov esi,[triangles_ptr]
mov edi,.tri_ch1
imul eax,[i12]
add eax,edi
mov .ltch1,eax ; last
 
mov .up,esi
mov .str,edi
.lb1: ; nx chunk
cmp edi,.ltch1
jnb .endl
mov edi,.tri_ch1
mov .str,edi
mov eax,.cc
mov edx,.cc
inc .cc
cmp edx,[triangles_count_var]
jz .endl
shr eax,3
and edx,111b
add eax,.chmr
 
xor ebx,ebx
bt [eax],edx ; mark
jc @f ; tri was stored
inc ecx
or ebx,1b
mov esi,.up
movdqu xmm0,[esi]
movdqu [edi],xmm0
add .str,12
@@:
add .up,12
or ebx,ebx
jz .lb1
 
.lb2:
mov eax,[edi]
mov edx,[edi] ; edx - vert ind
shl eax,2
add eax,.t_ptr
mov eax,[eax] ; [eax] - t ptr
or eax,eax
jz .endl
.nxt:
 
mov esi,[eax+4]
mov ebx,[eax+4]
shr esi,3
and ebx,111b
add esi,.chmr
bts [esi],ebx ; mark
jc @f ; tri was stored
dec .cntt
je .endl
mov esi,[eax+4] ; [eax+4] - tri ind
add esi,esi
add esi,.chunks
mov [esi],cx
mov esi,[eax+4]
 
imul esi,[i12]
add esi,[triangles_ptr]
movups xmm0,[esi]
mov esi,.str
movups [esi],xmm0
add .str,12
@@:
add eax,8
cmp edx,[eax]
je .nxt
add edi,4
 
cmp edi,.str
jne .lb2
jmp .lb1
 
.endl:
 
mov .ch_cnt,ecx
 
 
.end:
 
 
 
; mov eax,68
; mov ebx,13
; mov ecx,.t_ptr
; int 0x40
 
; mov eax,68
; mov ebx,13
; mov ecx,.tri_ch
; int 0x40
 
mov eax,68
mov ebx,13
mov ecx,.tri_ch1
int 0x40
 
mov eax,68
mov ebx,13
mov ecx,.chmr
int 0x40
; for now free mem - cunks list - unused
 
; mov eax,68
; mov ebx,13
; mov ecx,.chunks
; int 0x40
 
 
 
; mov ebx,.chunks
mov ecx,.ch_cnt
 
mov esi,.tri_ch
mov edi,.t_ptr
 
 
mov esp,ebp
pop ebp
ret
 
 
 
 
 
 
/programs/demos/view3ds/data.inc
1,7 → 1,7
; DATA AREA ************************************
if Ext > SSE2
; if Ext > SSE2
isSSE3 db 1
end if
; end if
i3 dw 3
i6 dd 6
i12 dd 12
28,14 → 28,15
y_offset dw SIZE_Y / 2
z_offset dw 0
rsscale dd 175.0 ; next real scale
vect_x: dw SIZE_X / 2
vect_y dw SIZE_Y / 2
vect_z dw 0
size_y_var:
yres_var dw SIZE_Y
 
size_x_var:
xres_var dw SIZE_X
; vect_x: dw SIZE_X / 2
; vect_y dw SIZE_Y / 2
; vect_z dw 0
; size_y_var:
; yres_var dw SIZE_Y
;
; size_x_var:
; xres_var dw SIZE_X
 
angle_x dw 0
angle_y dw 0
64,7 → 65,7
screen_ptr dd 0
Zbuffer_ptr dd 0
vertices_index_ptr dd 0
vertex_edit_no dw 0
vertex_edit_no dd -1
edit_start_x:
dw 0
edit_start_y dw 0
86,8 → 87,9
 
db 3
db 'shd. model'
max_dr_flg:
if Ext >= SSE3
max_dr_flg db 15
db 15
else
db 12
end if
290,6 → 292,7
db 'x+y '
db ' x '
db 'keys'
 
onoff_f:
db 'off '
db 'on '
354,7 → 357,7
if Ext=SSE3
db ' (SSE3)'
end if
db ' 0.075',0
db ' 0.076',0
labellen:
STRdata db '-1 '
lab_vert:
367,7 → 370,10
db 'Edges count: '
lab_ed_end:
 
db 'Chunks detected:'
 
 
 
all_lights_size dw lightsend-lights
 
 
483,6 → 489,17
times 4 dd 1.0
 
eps: times 4 dd 0.00000
 
vect_x: dw SIZE_X / 2
vect_y dw SIZE_Y / 2
vect_z dw 0
size_y_var:
yres_var dw SIZE_Y
 
size_x_var:
xres_var dw SIZE_X
 
 
epsone dd 1.0001
aprox dd 0.0001
epsminus dd -0.0001
496,7 → 513,7
fptr dd 0 ;workarea
file_name:
db '/rd/1/3d/house.3ds',0
; db '/tmp0/1/ant.3ds',0
; db '/tmp0/1/sc.3ds',0
 
rb 256
 
521,7 → 538,9
points_count_var dd ? ;
triangles_count_var dd ? ; dont change order
edges_count dd ? ;
chunks_number dd ?
tex_points_ptr dd ?
chunks_ptr dd ?
 
temp_col dw ?
high dd ?
/programs/demos/view3ds/flat_cat.inc
26,22 → 26,14
.y3 equ word[ebp-16]
 
.dx12 equ dword[ebp-20]
;.dz12 equ dword[ebp-24]
.dx13 equ dword[ebp-24]
.dz13 equ dword[ebp-28]
.dz12 equ dword[ebp-32]
;.dz13 equ dword[ebp-32]
.dz12 equ dword[ebp-24]
.dx13 equ dword[ebp-28]
.dz13 equ dword[ebp-32]
.dx23 equ dword[ebp-36]
.dz13M equ [ebp-40]
.dz23 equ dword[ebp-44]
.zz1 equ dword[ebp-48]
.zz2 equ dword[ebp-52]
.zz2M equ qword[ebp-52]
.dz12M equ qword[ebp-32]
.dz23M equ qword[ebp-44]
;if Ext>=MMX
; emms
;end if
.dz23 equ dword[ebp-40]
.zz1 equ dword[ebp-44]
.zz2 equ dword[ebp-48]
 
mov ebp,esp
 
push edx ; store edx in variable .col
87,13 → 79,13
; jle @f
; jmp .ft_loop2_end
;@@:
sub esp,52-12
; sub esp,52-12
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .ft_dx12_make
mov .dx12,0
mov .dz12,0
push dword 0
push dword 0
jmp .ft_dx12_done
.ft_dx12_make:
mov ax,.x2
103,7 → 95,7
shl eax,ROUND
cdq
idiv ebx
mov .dx12,eax
push eax
 
mov ax,.z2
sub ax,.z1
111,14 → 103,13
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz12,eax
push eax
.ft_dx12_done:
mov bx,.y3 ; calc delta 13
sub bx,.y1
jnz .ft_dx13_make
mov .dx13,0
mov .dz13,0
mov dword .dz13M,0
push dword 0
push dword 0
jmp .ft_dx13_done
.ft_dx13_make:
mov ax,.x3
128,7 → 119,7
shl eax,ROUND
cdq
idiv ebx
mov .dx13,eax
push eax
 
mov ax,.z3
sub ax,.z1
136,14 → 127,17
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz13,eax
mov dword .dz13M,eax
push eax
 
.ft_dx13_done:
; sub esp,48
mov bx,.y3 ; calc delta 23
sub bx,.y2
jnz .gt_dx23_make
mov .dx23,0
mov .dz23,0
push dword 0
push dword 0
; mov .dx23,0
; mov .dz23,0
jmp .gt_dx23_done
.gt_dx23_make:
mov ax,.x3
153,7 → 147,7
shl eax,ROUND
cdq
idiv ebx
mov .dx23,eax
push eax
 
mov ax,.z3
sub ax,.z2
161,19 → 155,18
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz23,eax
push eax
; mov .dz23,eax
.gt_dx23_done:
 
movsx edx,.z1
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
push edx
push edx
 
movsx eax,.x1
shl eax,ROUND ; eax - x1
mov ebx,eax ; ebx - x2
;if Ext>=MMX
; movq mm0,.zz2M
;end if
mov cx,.y1
cmp cx,.y2
jge .ft_loop1_end
187,13 → 180,9
push bx ; x2
sar eax,ROUND
push ax ; x1
;if Ext>=MMX
; sub esp,8
; movq [esp],mm0
;else
push .zz2 ; z2 shl CATMULL_SHIFT
push .zz1 ; z1 shl CATMULL_SHIFT
;end if
 
call flat_line_z
 
popad
200,9 → 189,6
 
add eax,.dx13
add ebx,.dx12
;if Ext>=MMX
; paddd mm0,.dz12M
;else
 
mov edx,.dz13
add .zz1,edx
219,13 → 205,7
mov .zz2,edx
movsx ebx,.x2
shl ebx,ROUND
;if Ext>=MMX
; movq mm0,.zz2M
;; push .dz13 ; exchange
;; pop .dz12
;; push .dz23 ; exchange
;; pop .dz13
;end if
 
mov cx,.y2
cmp cx,.y3
jge .ft_loop2_end
238,13 → 218,10
push bx
sar eax,ROUND
push ax ; x1
;if Ext>=MMX
; sub esp,8
; movq [esp],mm0
;else
 
push .zz2 ; z2 shl CATMULL_SHIFT
push .zz1 ; z1 shl CATMULL_SHIFT
;end if
 
call flat_line_z
 
popad
251,19 → 228,12
 
add eax,.dx13
add ebx,.dx23
;if Ext>=MMX
; paddd mm0,.dz23M
;else
 
mov edx,.dz13
add .zz1,edx
mov edx,.dz23
add .zz2,edx
 
; mov edx,.dz13
; add .zz1,edx
; mov edx,.dz12
; add .zz2,edx
;end if
inc cx
cmp cx,.y3
jl .ft_loop2
374,13 → 344,15
; cmovl [edi],eax
; cmovl [esi],ebx
jge @f
stosd
dec edi
mov dword[esi],ebx
jmp .no_skip
mov [edi],eax
mov [esi],ebx
; stosd ; less branches
; dec edi
; mov dword[esi],ebx
; jmp .no_skip
@@:
add edi,3
.no_skip:
; .no_skip:
add esi,4
add ebx,edx
loop .ddraw
/programs/demos/view3ds/grd_tex.inc
4,12 → 4,18
ROUND equ 8
;NON=0
;MMX=1
;Ext=MMX
;SSE=2
;SSE2=3
 
;Ext=SSE2
 
 
;TEX_SIZE=0x3fff
;SIZE_X equ 512
;SIZE_Y equ 512
;ROUND = 8
;TEX_SHIFT equ 6
; TEXTURE_SIZE = 0xFFFFF
 
; procedure drawing textured triangle with Gouraud shading
; Z-buffer alghoritm included, Z coord interpolation ----
64,8 → 70,8
.y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20]
 
.dx12 equ dword[ebp-28] ;dd ?
.tex_dx12 equ dword[ebp-32] ;dd ?
.tex_dy12 equ [ebp-36] ;dd ?
.tex_dy12 equ [ebp-32] ;dd ?
.tex_dx12 equ [ebp-36] ;dd ?
.dz12 equ dword[ebp-40] ;dd ?
.dc12r equ [ebp-44] ;dd ?
.dc12g equ dword[ebp-48] ;dd ?
72,8 → 78,8
.dc12b equ [ebp-52] ;dd ?
 
.dx23 equ dword[ebp-56] ;dd ?
.tex_dx23 equ dword[ebp-60] ;dd ?
.tex_dy23 equ [ebp-64] ;dd ?
.tex_dy23 equ [ebp-60] ;dd ?
.tex_dx23 equ [ebp-64] ;dd ?
.dz23 equ dword[ebp-68] ;dd ?
.dc23r equ [ebp-72] ;dd ?
.dc23g equ dword[ebp-76] ;dd ?
80,36 → 86,32
.dc23b equ [ebp-80] ;dword[ebp-8]dd ?
 
.dx13 equ dword[ebp-84] ;dd ?
.tex_dx13 equ dword[ebp-88] ;dd ?
.tex_dy13 equ [ebp-92] ;dd ?
.tex_dy13 equ [ebp-88] ;dd ?
.tex_dx13 equ [ebp-92] ;dd ?
.dz13 equ dword[ebp-96] ;dd ?
.dc13r equ [ebp-100] ;dd ?
.dc13g equ dword[ebp-104] ;dd ?
.dc13b equ [ebp-108] ;dd ?
 
.scan_x1 equ dword[ebp-112] ;dd ?
.scan_y1 equ [ebp-116] ;dd ?
.scan_y1 equ [ebp-112] ;dd ?
.scan_x1 equ [ebp-116] ;dd ?
.zz1 equ dword[ebp-120] ;dw ?
.cur1r equ [ebp-124] ;dw ?
.cur1g equ dword[ebp-128] ;dw ?
.cur1g equ [ebp-128] ;dw ?
.cur1b equ [ebp-132] ;dw ?
 
.scan_x2 equ dword[ebp-136] ;dd ?
.scan_y2 equ [ebp-140] ;dd ?
.zz2 equ dword[ebp-144] ;dw ?
.scan_y2 equ [ebp-136] ;dd ?
.scan_x2 equ [ebp-140] ;dd ?
.zz2 equ [ebp-144] ;dw ?
.cur2r equ [ebp-148] ;dw ?
.cur2g equ dword[ebp-152] ;dw ?
.cur2g equ [ebp-152] ;dw ?
.cur2b equ [ebp-156] ;dw ?
 
 
mov ebp,esp
 
; mov .tex_ptr,edx
; mov .z_ptr,esi
; mov .scr_buff,edi
push edx esi edi
; push esi
; push edi
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
184,86 → 186,50
loop @b
jmp .dx12_done
.dx12_make:
; sub esp,7*4
 
 
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
mov eax,1 shl 15
cdq
idiv ebx
; mov .dx12,eax
push eax
; push eax
mov ebx,eax
 
if 0 ; Ext=SSE
movd mm0,.col1r ; 2 words r, g
pxor mm1,mm1
punpcklwd mm0,mm1
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
movd mm0,.col1g ; 2 words b, z
punpcklwd mm0,mm1
cvtpi2ps xmm0,mm0
; xmm0=four float double words
divss xmm0,.pack3
;convert and insert mm0 to lower xmm1 ..
end if
 
mov ax,word .tex_x2
sub ax,word .tex_x1
mov ax,.x2
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx12r,eax
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx12,eax
 
mov ax,word .tex_y2
sub ax,word .tex_y1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx12,eax
push eax
sub esp,6*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
; pshufd xmm0,xmm0,0
movlhps xmm0,xmm0
movq xmm1,.col1r
movq xmm2,.col2r
movhps xmm1,.tex_x1
movhps xmm2,.tex_x2
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc12b,xmm2
; punpcklwd xmm4,xmm5
; psrad xmm4,15 - ROUND
movq .tex_dx12,xmm4
 
mov ax,word .z2
sub ax,word .z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz12,eax
push eax ; .dza12
 
mov ax,word .col2r
sub ax,word .col1r
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12r,eax
push eax
 
mov ax,word .col2g
sub ax,word .col1g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12g,eax
push eax
 
mov ax,word .col2b ;;---
sub ax,word .col1b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12b,eax
push eax
 
;+++++++++++++++++ second zone +++++++++++++
.dx12_done:
 
277,70 → 243,43
jmp .dx23_done
 
.dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
mov eax,1 shl 15
cdq
idiv ebx
; mov .dx23,eax
push eax
mov ebx,eax
 
mov ax,word .tex_x3
sub ax,word .tex_x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx23,eax
push eax
 
mov ax,word .tex_y3
sub ax,word .tex_y2
mov ax,.x3
sub ax,.x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dy23,eax
imul ebx
sar eax,15 - ROUND
push eax
 
mov ax,word .z3
sub ax,word .z2
cwde ;
shl eax,CATMULL_SHIFT ; 2222222
cdq ; 2 2
idiv ebx ; 2
; mov .dz23,eax ; 2
push eax ; .dza12 ; 2
; 2
mov ax,word .col3r ; 2
sub ax,word .col2r ; 2222222
cwde ; second delta
shl eax,ROUND ;
cdq ;
idiv ebx ;
; mov .dc23r,eax ;
push eax
sub esp,6*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movlhps xmm0,xmm0
movq xmm1,.col2r
movq xmm2,.col3r
movhps xmm1,.tex_x2
movhps xmm2,.tex_x3
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc23b,xmm2
movq .tex_dx23,xmm4
 
mov ax,word .col3g
sub ax,word .col2g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23g,eax
push eax
 
mov ax,word .col3b ;;---
sub ax,word .col2b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23b,eax
push eax
 
.dx23_done:
;++++++++++++++++++third zone++++++++++++++++++++++++
mov bx,.y3 ; calc delta13
352,80 → 291,69
loop @b
jmp .dx13_done
.dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
mov eax,1 shl 15
cdq
idiv ebx
; mov .dx13,eax
push eax
mov ebx,eax
 
mov ax,word .tex_x3 ; triangle b
sub ax,word .tex_x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx13r,eax
push eax
 
mov ax,word .tex_y3
sub ax,word .tex_y1
mov ax,.x3
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dy13,eax
imul ebx
sar eax,15 - ROUND
push eax
 
mov ax,word .z3
sub ax,word .z1 ; 333333333
cwde ; 3 3
shl eax,CATMULL_SHIFT ; 3
cdq ; 3
idiv ebx ; 3
; mov .dz13,eax ; 3
push eax ; .dza12 ; 3
; 3
mov ax,word .col3r ; 3333333333
sub ax,word .col1r ; 3
cwde ; 3
shl eax,ROUND ; 3
cdq ; 3
idiv ebx ; 3
; mov .dc13r,eax ; 3 3
push eax ; 33333333
 
mov ax,word .col3g
sub ax,word .col1g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13g,eax
push eax
 
mov ax,word .col3b ;;---
sub ax,word .col1b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13b,eax
push eax
 
sub esp,6*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movlhps xmm0,xmm0
movq xmm1,.col1r
movq xmm2,.col3r
movhps xmm1,.tex_x1
movhps xmm2,.tex_x3
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
pshufd xmm2,xmm2,11000110b
movdqu .dc13b,xmm2
movq .tex_dx13,xmm4
.dx13_done:
 
; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>>
sub esp,55 ;(12*4)
 
sub esp,(12*4)
 
movsx eax,.x1 ; eax - cur x1
shl eax,ROUND ; ebx - cur x2
mov ebx,eax
 
 
movzx edi,word .tex_x1
shl edi,ROUND
mov .scan_x1,edi
mov .scan_x2,edi
; push edi
; push edi
movzx edx,word .tex_y1
shl edx,ROUND
; push edx
; push edx
mov .scan_y1,edx
mov .scan_y2,edx
 
movsx edx,word .z1
shl edx,CATMULL_SHIFT
; push edx
; push edx
mov .zz1,edx
mov .zz2,edx
 
442,20 → 370,12
mov .cur1b,edx
mov .cur2b,edx
 
movzx edi,word .tex_x1
shl edi,ROUND
mov .scan_x1,edi
mov .scan_x2,edi
movzx edx,word .tex_y1
shl edx,ROUND
mov .scan_y1,edx
mov .scan_y2,edx
 
mov cx,.y1
cmp cx,.y2
jge .loop1_end
.loop_1:
; push eax ebx ebp
 
pushad
 
push .tex_ptr
463,93 → 383,46
push .z_ptr
push cx
 
push .zz2
push dword .zz2
 
push .scan_x2
push dword .cur2b
push dword .cur2g
push dword .cur2r
push dword .scan_x2
push dword .scan_y2
push dword .cur2r
push .cur2g
push dword .cur2b
 
push .zz1
 
push .scan_x1
push dword .cur1b
push dword .cur1g
push dword .cur1r
push dword .scan_x1
push dword .scan_y1
push dword .cur1r
push .cur1g
push dword .cur1b
 
sar eax,ROUND
sar ebx,ROUND
call horizontal_tex_grd_line
 
; pop ebp ebx eax
 
popad
 
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
movq mm3,.cur2b
movq mm4,.cur2r
movq mm5,.scan_y2
paddd mm0,.dc13b
paddd mm1,.dc13r
paddd mm2,.tex_dy13
paddd mm3,.dc12b
paddd mm4,.dc12r
paddd mm5,.tex_dy12
movq .cur1b,mm0
movq .cur1r,mm1
movq .scan_y1,mm2
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
end if
if Ext >= SSE2
 
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc12b
movq mm2,.scan_y1
movq mm5,.scan_y2
movq mm2,.scan_x1
movq mm5,.scan_x2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy12
movq .scan_y1,mm2
movq .scan_y2,mm5
paddd mm2,.tex_dx13
paddd mm5,.tex_dx12
movq .scan_x1,mm2
movq .scan_x2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
 
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
add .cur1g,esi
mov edi,.dc13r
add .cur1r,edi
mov edx,.dz13
add .zz1,edx
mov edx,.tex_dx13
add .scan_x1,edx
mov esi,.tex_dy13
add .scan_y1,esi
 
mov edi,.dc12b
add .cur2b,edi
mov esi,.dc12g
add .cur2g,esi
mov edx,.dc12r
add .cur2r,edx
mov edi,.tex_dx12
add .scan_x2,edi
mov esi,.tex_dy12
add .scan_y2,esi
mov edx,.dz12
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx12
inc cx
598,21 → 471,21
push .z_ptr
push cx
 
push .zz2
push dword .zz2
 
push .scan_x2
push dword .cur2b
push dword .cur2g
push dword .cur2r
push dword .scan_x2
push dword .scan_y2
push dword .cur2r
push .cur2g
push dword .cur2b
 
push .zz1
 
push .scan_x1
push dword .cur1b
push dword .cur1g
push dword .cur1r
push dword .scan_x1
push dword .scan_y1
push dword .cur1r
push .cur1g
push dword .cur1b
 
sar eax,ROUND
sar ebx,ROUND
620,69 → 493,22
 
popad
 
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
movq mm3,.cur2b
movq mm4,.cur2r
movq mm5,.scan_y2
paddd mm0,.dc13b
paddd mm1,.dc13r
paddd mm2,.tex_dy13
paddd mm3,.dc23b
paddd mm4,.dc23r
paddd mm5,.tex_dy23
movq .cur1b,mm0
movq .cur1r,mm1
movq .scan_y1,mm2
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
end if
if Ext >= SSE2
 
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc23b
movq mm2,.scan_y1
movq mm5,.scan_y2
movq mm2,.scan_x1
movq mm5,.scan_x2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy23
movq .scan_y1,mm2
movq .scan_y2,mm5
paddd mm2,.tex_dx13
paddd mm5,.tex_dx23
movq .scan_x1,mm2
movq .scan_x2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
add .cur1g,esi
mov edi,.dc13r
add .cur1r,edi
mov edx,.tex_dx13
add .scan_x1,edx
mov esi,.tex_dy13
add .scan_y1,esi
mov edx,.dz13
add .zz1,edx
 
mov edi,.dc23b
add .cur2b,edi
mov esi,.dc23g
add .cur2g,esi
mov edx,.dc23r
add .cur2r,edx
mov edi,.tex_dx23
add .scan_x2,edi
mov esi,.tex_dy23
add .scan_y2,esi
mov edx,.dz23
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx23
inc cx
702,42 → 528,33
.y equ [ebp+52]
 
.z2 equ [ebp+48]
.tex_x2 equ [ebp+44]
.tex_y2 equ [ebp+40]
.b2 equ [ebp+44]
.g2 equ [ebp+40]
.r2 equ [ebp+36]
.g2 equ [ebp+32]
.b2 equ [ebp+28]
.tex_x2 equ [ebp+32]
.tex_y2 equ [ebp+28]
 
 
.z1 equ [ebp+24]
.tex_x1 equ [ebp+20]
.tex_y1 equ [ebp+16]
.b1 equ [ebp+20]
.g1 equ [ebp+16]
.r1 equ [ebp+12]
.g1 equ [ebp+8]
.b1 equ [ebp+4]
.tex_x1 equ [ebp+8]
.tex_y1 equ [ebp+4]
 
 
.x1 equ word[ebp-2]
.x2 equ word[ebp-4]
.dz equ dword[ebp-8]
.db equ dword[ebp-12]
.db equ [ebp-12]
.dg equ dword[ebp-16]
.dr equ dword[ebp-20]
.dr equ [ebp-20]
.dtex_x equ dword[ebp-24]
.dtex_y equ dword[ebp-28]
.dtex_y equ [ebp-28]
 
.c_ty equ [ebp-32]
.c_tx equ [ebp-36]
.cb equ [ebp-40]
.cg equ [ebp-44]
.cr equ [ebp-48]
.t_col equ [ebp-52]
mov ebp,esp
 
.dtex_yM equ qword[ebp-28]
.drM equ qword[ebp-20]
.dbM equ qword[ebp-12]
 
mov ebp,esp
; sub esp,30
 
mov cx,word .y
or cx,cx
jl .quit_l
751,57 → 568,17
 
xchg eax,ebx
 
if Ext=NON
mov ecx,dword .r1
xchg ecx, .r2
mov dword .r1, ecx
 
mov ecx,dword .g1
xchg ecx, .g2
mov dword .g1, ecx
movdqu xmm0,.tex_y1
movdqu xmm1,.tex_y2
movdqu .tex_y1,xmm1
movdqu .tex_y2,xmm0
movq xmm4,.b1 ; x, z
movq xmm5,.b2
movq .b1,xmm5
movq .b2,xmm4
 
mov ecx,dword .b1
xchg ecx, .b2
mov dword .b1, ecx
 
mov ecx,dword .tex_x1
xchg ecx, .tex_x2
mov dword .tex_x1, ecx
 
mov ecx,dword .tex_y1
xchg ecx, .tex_y2
mov dword .tex_y1, ecx
 
mov ecx,dword .z1
xchg ecx, .z2
mov dword .z1, ecx
end if
if (Ext=MMX)
movq mm0,.b1 ; b, g
movq mm1,.b2
movq .b1, mm1
movq .b2, mm0
movq mm2,.r1 ; r, y
movq mm3,.r2
movq .r1,mm3
movq .r2,mm2
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
 
end if
if Ext>=SSE
movups xmm0,.b1
movups xmm1,.b2
movups .b1,xmm1
movups .b2,xmm0
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
end if
 
@@:
or bx,bx
jle .quit_l
810,46 → 587,88
 
push ax
push bx
 
mov eax,.z2 ; delta zone************
sub eax,.z1
cdq
if 1
mov bx,.x2
sub bx,.x1
 
movsx ebx,bx
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
 
 
mov eax,.z2 ; delta zone************
sub eax,.z1
imul ebx
sar eax,15
push eax ; .dz
 
mov eax,.b2
sub eax,.b1
cdq
idiv ebx
push eax ; .db
imul ebx
sar eax,15
push eax
 
mov eax,.g2
sub eax,.g1
cdq
idiv ebx
push eax ; .dg
imul ebx
sar eax,15
push eax ; .dz
 
mov eax,.r2
sub eax,.r1
cdq
idiv ebx
push eax ; .dr
imul ebx
sar eax,15
push eax
 
mov eax,.tex_x2
sub eax,.tex_x1
cdq
idiv ebx
push eax ; .dtex_x
imul ebx
sar eax,15
push eax
 
mov eax,.tex_y2
sub eax,.tex_y1
cdq
idiv ebx
push eax ; .dtey_x
imul ebx
sar eax,15
push eax
 
 
end if
if 0
sub esp,6*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movlhps xmm0,xmm0
movdqu xmm1,.tex_y1
movdqu xmm2,.tex_y2
movq xmm3,.b1
movq xmm4,.b2
psubd xmm4,xmm3
psubd xmm2,xmm1
packssdw xmm2,xmm4
; packlssdw xmm2,xmm2
; movlhps xmm2,xmm4
 
 
; psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
movhlps xmm4,xmm2
movhlps xmm5,xmm3
punpcklwd xmm2,xmm3
punpcklwd xmm4,xmm5
psrad xmm2,15 - ROUND
psrad xmm4,15 - ROUND
; pshufd xmm2,xmm2,11000110b
movdqu .dtex_y,xmm2
movq .db,xmm4
 
 
end if
 
cmp .x1,0
jg @f
 
904,113 → 723,56
movzx ecx,cx
 
; init current variables
push dword .tex_y1
;if Ext=NON
push dword .tex_x1
movdqu xmm0,.r1
movdqu xmm1,.dr
pxor xmm2,xmm2
movq xmm4,.dtex_y
movq xmm5,.tex_y1
 
push dword .b1
push dword .g1
push dword .r1
 
if Ext>=MMX
movq mm4,.cr ; lo -> r,g
movq mm6,.cb ; hi -> b, tex_x
pxor mm0,mm0
end if
mov ebx,.z1
.ddraw:
cmp ebx,dword[esi]
jge @f
mov eax,.c_ty
; if ROUND<TEX_SHIFT
; shl eax,TEX_SHIFT-ROUND
; end if
; if ROUND>TEX_SHIFT
; shr eax,ROUND-TEX_SHIFT
; end if
shr eax,ROUND
shl Eax,TEX_SHIFT
mov edx,.c_tx ; calc texture pixel mem addres
shr edx,ROUND
movdqa xmm6,xmm5
psrld xmm6,ROUND
movd eax,xmm6
psrldq xmm6,4
movd edx,xmm6
shl eax,TEX_SHIFT
 
; calc texture pixel mem addres
 
add eax,edx
and eax,TEXTURE_SIZE ; cutting
lea eax,[3*eax]
add eax,.tex_ptr
mov dword[esi],ebx
if Ext = NON
mov eax,dword[eax]
; mov .tex_col,eax
push ax
shl eax,8
pop ax
mov edx,.cr
sar edx,ROUND
mul dl ; al*dl
shr ax,8
stosb
ror eax,16
push ax
mov edx,.cg
sar edx,ROUND
mul dl
shr ax,8
stosb
pop ax
shr ax,8
mov edx,.cb
sar edx,ROUND
mul dl
shr ax,8
stosb
jmp .no_skip
else
movd mm1,[eax]
punpcklbw mm1,mm0
movq mm3,mm4 ;.cr ; lo -> r,g
movq mm5,mm6 ;.cb ; lo -> b,tex_x
psrld mm3,ROUND ;
psrld mm5,ROUND ;
packssdw mm3,mm5
pmullw mm1,mm3
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
end if
 
movd xmm7,[eax]
punpcklbw xmm7,xmm2
movdqa xmm3,xmm0 ; calc col
psrld xmm3,ROUND ;
packssdw xmm3,xmm3
pmullw xmm7,xmm3
psrlw xmm7,8
packuswb xmm7,xmm7
movd [edi],xmm7
 
mov dword[esi],ebx
if Ext = NON
jmp .no_skip
end if
@@:
add edi,3
.no_skip:
add esi,4
add ebx,.dz
paddd xmm5,xmm4
paddd xmm0,xmm1
 
mov eax,.dtex_x
add .c_tx, eax
mov edx,.dtex_y
add .c_ty, edx
if Ext=NON
mov eax,.dr
add .cr,eax
mov edx,.dg
add .cg,edx
mov eax,.db
add .cb,eax
loop .ddraw
 
else
paddd mm4,.drM
paddd mm6,.dbM
;; paddd mm7,.dtex_y ; mm4 - b, g
;; movq .c_tx,mm7
; mm6 - r, x
end if ; mm7 - y, x
 
dec ecx
jnz .ddraw
 
.quit_l:
 
mov esp,ebp
ret 42+20 ; horizontal line
;the_zero:
;size_y_var:
;size_x_var:
 
/programs/demos/view3ds/history.txt
1,3 → 1,18
View3ds 0.075 - XII 2021
1. Cusom rotate using keys and mouse scroll support by Leency.
----------------------------------------------------------------------------------
 
View3ds 0.074 - IX 2021
1. Fixed emboss bug in grd lines displaying model.
2. Grd line exceedes screen problem fix.
3. New rendering model - ray casted shadows and appropiate button to
set 'on' this option. Note that is non real time model, especially when
complex object is computed. I took effort to introduce accelerating
structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled
for now - seems to work incorrect(slow).
----------------------------------------------------------------------------------
 
View3ds 0.073 - may 2021
1. I introduced procedure for searching nonredundand edges.
2. Writing some info about object: vertices, triangles unique edges
/programs/demos/view3ds/readme.txt
1,18 → 1,33
View3ds 0.074 - tiny viewer to .3ds and .asc files with several graphics
View3ds 0.076 - tiny viewer to .3ds and .asc files with several graphics
effects implementation.
 
What's new?
1. Fixed emboss bug in grd lines displaying model.
2. Grd line exceedes screen problem fix.
3. New rendering model - ray casted shadows and appropiate button to
set 'on' this option. Note that is non real time model, especially when
complex object is computed. I took effort to introduce accelerating
structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled
for now - seems to work incorrect(slow).
1. Detecting manifold chunks procedure based on kind of sorted pivot
table. Chunks are counted and this number displayed.
2. New calculating normal vectors proc that use some data produced
by new chunks routine. Now big object loading is fast. I load object that
contains ~500000 vertices, ~700000 faces and ~2000 0000 unique edges
in few seconds on i5 2cond gen. Earlier such objects calculating was
rather above time limits.
3. On http://board.flatassembler.net occasionaly there are some disccusions
about optimizing. Some clever people, wich skills and competence I trust,
claims - for CPU's manufactured last ~15 years size of code is crucial
for speed. (Better utilize CPU cache).
So I wrote some 'movsd' mnemonics instead 'mov [edi],sth'; 'loop' instead
'dec ecx,jnz sth'. Moreover I come back to init some local varibles
by 'push' (flat_cat.inc). I took effort to change divisions to
multiplications two_tex.inc (works ok in fpu only Ext = NON mode and
of course in Ext = SSE3 mode), grd_tex.inc (single line not parallel
muls, whole drawing routine 4 divs instead 27 divisions),
bump_tex.inc - 3 divs in SSE2 mode.s See sources for details.
4. Editor button allows now editing by vertex all above 65535 vert objects.
 
 
 
 
Buttons description:
1. rotary: choosing rotary axle: x, y, x+y.
1. rotary: choosing rotary axle: x, y, x+y, keys - for object translate
using keyboard. .
2. shd. model: choosing shading model: flat, grd (smooth), env (spherical
environment mapping, bump (bump mapping), tex (texture mapping),
pos (position shading depend), dots (app draws only points - nodes of object),
46,4 → 61,4
decrease whole handlers count by enable culling (using appropriate button) - some
back handlers become hidden.
 
Maciej Guba IX 2021
Maciej Guba XII 2021
/programs/demos/view3ds/tex_cat.inc
452,7 → 452,11
 
xchg ax,.x2 ; sort params
mov .x1,ax
if Ext >= MMX
if Ext >= SSE2
movdqu xmm0,[.tex_x1]
pshufd xmm0,xmm0,01001110b
movdqu [.tex_x1],xmm0
else if Ext >= MMX
movq mm0,[.tex_x1]
movq mm1,[.tex_x2]
movq [.tex_x2],mm0
/programs/demos/view3ds/two_tex.inc
5,14 → 5,18
;TEX_X equ 512
;TEX_Y equ 512
;TEXTURE_SIZE EQU (512*512)-1
 
;TEX_SHIFT EQU 9
 
;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;Ext = SSE
;SSE = 3
CATMULL_SHIFT equ 8
TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
 
;SSE3 = 4
;SSE2 = 3
;SSE = 2
;MMX = 1
;NON = 0
;Ext = NON
;use32
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
32,18 → 36,18
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - first texture
.b_x3 equ ebp+12
.b_y3 equ ebp+14 ; e - second texture
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.e_x1 equ ebp+4 ; procedure don't save registers !!!
.e_y1 equ ebp+6 ; each coordinate as word
.b_x1 equ ebp+8
.b_y1 equ ebp+10
.e_x2 equ ebp+12
.e_y2 equ ebp+14
.b_x2 equ ebp+16
.b_y2 equ ebp+18 ; b - first texture
.e_x3 equ ebp+20
.e_y3 equ ebp+22 ; e - second texture
.b_x3 equ ebp+24
.b_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
60,31 → 64,31
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.dbx12 equ dword[ebp-28]
.dbx12 equ [ebp-28]
.dby12 equ dword[ebp-32]
.dby12q equ [ebp-32]
.dex12 equ dword[ebp-36]
.dey12 equ dword[ebp-40]
.dey12q equ [ebp-40]
.dz12 equ dword[ebp-44]
.dz12 equ [ebp-44]
 
.dx13 equ dword[ebp-48]
.dbx13 equ dword[ebp-52]
.dbx13 equ [ebp-52]
.dby13 equ dword[ebp-56]
.dby13q equ [ebp-56]
.dex13 equ dword[ebp-60]
.dey13 equ dword[ebp-64]
.dey13q equ [ebp-64]
.dz13 equ dword[ebp-68]
.dz13 equ [ebp-68]
 
.dx23 equ dword[ebp-72]
.dbx23 equ dword[ebp-76]
.dbx23 equ [ebp-76]
.dby23 equ dword[ebp-80]
.dby23q equ [ebp-80]
.dex23 equ dword[ebp-84]
.dey23 equ dword[ebp-88]
.dey23q equ [ebp-88]
.dz23 equ dword[ebp-92]
.dz23 equ [ebp-92]
 
.cx1 equ dword[ebp-96] ; current variables
.cx2 equ dword[ebp-100]
138,8 → 142,6
jmp .sort3
.sort2:
push eax ebx ecx ; store triangle coords in variables
; push ebx
; push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
159,8 → 161,8
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
; sub esp,18*4
; pxor xmm7,xmm7
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
171,142 → 173,80
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
 
movsx ebx,bx
mov eax,1 shl 15
cdq
idiv ebx
mov ebx,eax
 
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx12,eax
push eax
 
if Ext=SSE
if Ext >= SSE2
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.e_x1]
movq xmm2,[.e_x2]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,10110001b
movdqu .dey12q,xmm2
else
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
; psubsw mm3,mm2
; psubsw mm1,mm0
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
; pslld mm0,ROUND
; pslld mm1,ROUND
; pslld mm2,ROUND
; pslld mm3,ROUND
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
; pxor mm4,mm4
; movq mm5,mm1
; movq mm6,mm1
; pcmpeqb mm5,mm4
; psubd mm1,mm0
; psubd mm3,mm2
 
; movq mm0,[.b_x1] ; bx1 by1 bx2 by2
; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2
; pxor
; punpcklhd mm0,mm1 ; lwd ;
; psubw mm1,mm0 ; mm1, mm0
; pxor mm2,mm2
; pmovmaskb eax,mm1
; and eax,10101010b
; pcmpgtw mm2,mm1
; punpcklwd mm1,mm2
; psllw mm0,ROUND
; psllw mm1,ROUND
; movq mm2,mm0
; psrlq mm0,32
 
; cvtpi2ps xmm0,mm1
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12q,mm0
movq .dby12q,mm1
 
; movd .dex12,mm0
; psrlq mm0,32
; movd .dey12,mm0
; movhlps xmm1,xmm1
; cvtps2pi mm0,xmm1
; movd .dbx12,mm0
; psrlq mm0,32
; movd .dby12,mm0
 
else
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
; mov eax,.dbx12
; mov ebx,.dby12
; int3
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
end if
 
end if
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
imul ebx
sar eax,15 - ROUND
push eax
; mov .dz12,eax
 
 
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
317,107 → 257,90
@@:
push edx ;dword 0
loop @b
; movq .dbx13,xmm7
; movdqu .dz13,xmm7
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
; sub esp,6*4
 
movsx ebx,bx
shl eax,ROUND
mov eax,1 shl 15
cdq
idiv ebx
; mov .dx13,eax
push eax
; push eax
mov ebx,eax
 
if Ext=SSE
 
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13q,mm0
movq .dby13q,mm1
 
mov ax,.x3
sub ax,.x1
cwde
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx13,eax
if 1
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.e_x1]
movq xmm2,[.e_x3]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,10110001b
movdqu .dey13q,xmm2
; punpcklwd xmm4,xmm5
; psrad xmm4,15 - ROUND
; movq .tex_dx12,xmm4
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
; shl eax,ROUND
; cdq
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
end if
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
imul ebx
sar eax,15 - ROUND
push eax
; mov .dz13,eax
push eax
 
.bt_dx13_done:
 
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
426,62 → 349,52
@@:
push edx ;dword 0
loop @b
; movq .dbx23,xmm7
; movdqu .dz23,xmm7
 
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
 
 
movsx ebx,bx
shl eax,ROUND
mov eax,1 shl 15
cdq
idiv ebx
; mov .dx23,eax
push eax
; push eax
mov ebx,eax
 
if Ext=SSE
 
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
mov ax,.x3
sub ax,.x2
cwde
imul ebx
sar eax,15 - ROUND
push eax
; mov .dx23,eax
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23q,mm0
movq .dby23q,mm1
 
if Ext >= SSE2
sub esp,4*4
movd xmm0,ebx
pshuflw xmm0,xmm0,0
movq xmm1,[.e_x2]
movq xmm2,[.e_x3]
psubw xmm2,xmm1
movdqa xmm3,xmm2
pmullw xmm2,xmm0
pmulhw xmm3,xmm0
punpcklwd xmm2,xmm3
psrad xmm2,15 - ROUND
pshufd xmm2,xmm2,10110001b
movdqu .dey23q,xmm2
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; shl eax,ROUND
; cdq
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
488,40 → 401,45
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
imul ebx
sar eax,15 - ROUND
; mov .dbx23,eax
push eax
end if
 
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
imul ebx
sar eax,15 - ROUND
push eax
; mov .dz23,eax
push eax
; sub esp,40
 
 
 
 
 
 
 
.bt_dx23_done:
 
movsx eax,.x1
shl eax,ROUND
; mov .cx1,eax
528,7 → 446,17
; mov .cx2,eax
push eax eax
; push eax
sub esp,32
; push eax eax
;push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
; mov .cz1,eax
; mov .cz2,eax
push eax eax
;push eax
 
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
554,17 → 482,7
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
sub esp,32
; push eax eax
;push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
; mov .cz1,eax
; mov .cz2,eax
push eax eax
;push eax
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
874,6 → 792,11
 
else
 
 
 
 
 
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
1102,4 → 1025,8
.bl_end:
mov esp,ebp
ret 56
;Ext = SSE2
;the_zero:
;size_y_var:
;size_x_var:
 
/programs/demos/view3ds/view3ds.asm
1,11 → 1,11
 
; application : View3ds ver. 0.075 - tiny .3ds and .asc files viewer
; application : View3ds ver. 0.076 - tiny .3ds and .asc files viewer
; with a few graphics effects demonstration.
; compiler : FASM
; system : KolibriOS
; author : Macgub aka Maciej Guba
; email : macgub3@wp.pl
; web : http://macgub.co.pl, http://macgub.j.pl
; web : http://macgub.co.pl
; Fell free to use this intro in your own distribution of KolibriOS.
; Special greetings to KolibriOS team .
; I hope because my demos Christian Belive will be near to each of You.
18,7 → 18,6
; 1) Read from a file (*.3DS standard)
; 2) Written in manually (at the end of the code) ; now not exist
 
format binary as ""
 
SIZE_X equ 512
SIZE_Y equ 512 ; ///// I want definitely
39,13 → 38,11
SSE2 = 3
SSE3 = 4
Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 }
 
; For now correct only SSE2 and SSE3 versions. if you have older CPU
; use older versions of app. Probably ver 005 will be OK but it need
; re-edit to support new Kolibri features.
 
; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features)
USE_LFN = 1 ; App is Kolibri only now.
 
use32
org 0x0
db 'MENUET01' ; 8 byte id
98,7 → 95,19
call normalize_all_light_vectors
call copy_lights ; to aligned float
call init_triangles_normals2
 
if Ext >= SSE2
call detect_chunks
mov [chunks_number],ecx
mov [chunks_ptr],ebx
 
; esi - tri_ch
; edi - t_ptr - every vertice index - pointer to to all triangles
; that have this index
end if
 
call init_point_normals
 
call init_envmap2
call init_envmap_cub
call generate_texture2
118,11 → 127,12
cpuid
bt ecx,0 ; is sse3 on board?
jc @f
mov [max_dr_flg],12
mov byte[max_dr_flg],12
mov [isSSE3],0
@@:
end if
 
 
still:
cmp [edit_flag],1
jne @f
828,6 → 838,7
 
jmp still
 
 
;--------------------------------------------------------------------------------
;-------------------------PROCEDURES---------------------------------------------
;--------------------------------------------------------------------------------
838,6 → 849,7
include "grd_line.inc"
include "b_procs.inc"
include "a_procs.inc"
include "chunks.inc"
include "grd_cat.inc"
include "bump_tex.inc"
include "grd_tex.inc"
879,7 → 891,9
imul edx,ecx
add ebx,edx
push ebx
lea ecx,[ebx*2]
mov ecx,ebx
shl ecx,2
; lea ecx,[ebx*2]
lea ebx,[ebx*3]
 
cmp [dr_flag],12
931,11 → 945,12
check_bar
jne .no_edit
add ecx,[vertices_index_ptr]
mov cx,word[ecx]
inc cx
mov ecx,[ecx]
; cmp ecx,-1
; je .no_edit
 
 
mov [vertex_edit_no],cx ;if vert_edit_no = 0, no vertex selected
mov [vertex_edit_no],ecx ;if vert_edit_no = -1, no vertex selected
 
mov eax,dword[.x_coord]
mov dword[edit_end_x],eax
949,7 → 964,7
; add ecx,[vertices_index_ptr]
; mov cx,[ecx]
; inc cx
cmp [vertex_edit_no],0 ; cx ; vertex number
cmp [vertex_edit_no],-1 ; cx ; vertex number
je .end
push dword[.x_coord]
pop dword[edit_end_x]
961,8 → 976,8
check_bar
jne .end
 
movzx esi,[vertex_edit_no]
dec esi
mov esi,[vertex_edit_no]
; dec esi
lea esi,[esi*3]
add esi,esi
add esi,[points_translated_ptr]
996,8 → 1011,8
call rotary
 
; inject into vertex list
movzx edi,[vertex_edit_no]
dec edi
mov edi,[vertex_edit_no]
; dec edi
lea edi,[edi*3]
shl edi,2
add edi,[points_ptr]
1012,7 → 1027,7
 
 
mov dword[edit_end_x],0
mov [vertex_edit_no],0
mov [vertex_edit_no],-1
 
.no_edit:
.end:
1055,7 → 1070,7
 
 
mov ecx,[.temp]
add ecx,ecx
shl ecx,2
add ecx,256
mov eax,68
mov ebx,20
1477,18 → 1492,84
 
if Ext >= SSE2
init_point_normals:
;in:
; esi - tri_ch
; edi - t_ptr
.z equ dword [ebp-8]
.y equ dword [ebp-12]
.x equ [ebp-16]
.point_number equ dword [ebp-28]
.hit_faces equ dword [ebp-32]
.t_ptr equ dword [ebp-36]
.tri_ch equ dword [ebp-40]
.max_val equ dword [ebp-44]
 
push ebp
mov ebp,esp
sub esp,64
and ebp,-16
mov .t_ptr,edi
mov .tri_ch,esi
 
 
 
 
 
 
mov ecx,[triangles_count_var]
shl ecx,3
lea ecx,[ecx*3]
add ecx,.tri_ch
mov .max_val,ecx
xor edx,edx
 
.lp1:
mov ebx,edx
shl ebx,2
add ebx,.t_ptr
mov esi,[ebx]
or esi,esi
jz .old
 
xorps xmm1,xmm1
xor ecx,ecx
@@:
mov eax,[esi+4] ; eax - tri index
mov ebx,[esi]
imul eax,[i12]
add eax,[triangles_normals_ptr]
movups xmm0,[eax]
inc ecx
addps xmm1,xmm0
add esi,8
cmp esi,.max_val ; some objects need this check
ja .old ;old method
cmp ebx,[esi]
je @b
 
cvtsi2ss xmm2,ecx
rcpss xmm2,xmm2
shufps xmm2,xmm2,0
mulps xmm1,xmm2
mov edi,edx
imul edi,[i12]
add edi,[points_normals_ptr]
movlps [edi],xmm1
movhlps xmm1,xmm1
movss [edi+8],xmm1
call normalize_vector
 
inc edx
cmp edx,[points_count_var]
jnz .lp1
 
jmp .end
 
 
.old:
 
mov edi,[points_normals_ptr]
mov .point_number,0
mov .point_number,edx
.ipn_loop:
movd xmm0,.point_number
pshufd xmm0,xmm0,0
1547,7 → 1628,21
mov edx,.point_number
cmp edx,[points_count_var]
jne .ipn_loop
.end:
 
mov eax,68
mov ebx,13
mov ecx,.t_ptr
int 0x40
 
mov eax,68
mov ebx,13
mov ecx,.tri_ch
int 0x40
 
 
 
 
add esp,64
pop ebp
ret
1776,10 → 1871,10
 
emms
; update translated list MMX required
cmp [vertex_edit_no],0
cmp [vertex_edit_no],-1
je @f
movzx eax,[vertex_edit_no]
dec eax
mov eax,[vertex_edit_no]
; dec eax
movd mm0,[edit_end_x]
psubw mm0,[edit_start_x]
lea eax,[eax*3]
2383,42 → 2478,36
push word .zz2
push word .zz1
 
mov esi, .point_index3 ; tex map coords
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
mov esi, .point_index2
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
mov esi, .point_index1
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
 
lea esi, .point_index1 ; env coords
sub esp,12
fninit
lea esi, .point_index3 ; env coords
mov edi,esp
sub esp,24
mov ecx,3
@@:
mov eax,dword[esi]
shl eax,2
mov ebx,eax
; mov ebx,eax
add ebx,[tex_points_ptr]
mov ebx,[ebx]
mov [edi-8],ebx
lea eax,[eax*3]
shl eax,2
add eax,[points_normals_rot_ptr]
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax]
fimul [correct_tex]
fiadd [correct_tex]
fistp word[edi]
 
fistp word[edi-4]
and word[edi-4],0x7fff ; some objects need it
; texture y=(rotated point normal -> y * 255)+255
fld dword[eax+4]
fimul [correct_tex]
fiadd [correct_tex]
fistp word[edi+2]
and word[edi+2],0x7fff ; some objects need it
add edi,4
add esi,4
fistp word[edi-2]
and word[edi-2],0x7fff ; some objects need it
 
sub edi,8
sub esi,4
loop @b
 
mov eax, .xx1
2434,18 → 2523,7
 
.bump_tex:
push ebp
mov esi, .point_index3 ; tex map coords
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
mov esi, .point_index2
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
mov esi, .point_index1
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
fninit
 
push dword texmap
 
2455,41 → 2533,37
push word .zz2
push word .zz1
 
lea esi, .index1x12 ; env coords
sub esp,12
 
lea ebx, .point_index1
sub esp,36
mov edi,esp
mov ecx,3
@@:
mov eax,dword[esi]
add eax,[points_normals_rot_ptr]
mov eax,[ebx]
shl eax,2
mov esi,eax
lea esi,[esi*3]
add eax,[tex_points_ptr]
mov eax,[eax]
ror eax,16
mov [edi],eax
mov [edi+8],eax
 
add esi,[points_normals_rot_ptr]
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax]
fld dword[esi]
fimul [correct_tex]
fiadd [correct_tex]
fistp word[edi]
fistp word[edi+6] ; env coords
; texture y=(rotated point normal -> y * 255)+255
fld dword[eax+4]
fld dword[esi+4]
fimul [correct_tex]
fiadd [correct_tex]
fistp word[edi+2]
 
add edi,4
add esi,4
fistp word[edi+4]
add ebx,4
add edi,12
loop @b
 
mov esi, .point_index3 ; bump map coords
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
mov esi, .point_index2
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
mov esi, .point_index1
shl esi,2
add esi,[tex_points_ptr]
push dword[esi]
 
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
2861,138 → 2935,110
ret
 
 
draw_handlers:
 
 
 
 
 
draw_handlers:
; in eax - render model
push ebp
mov ebp,esp
 
.counter equ ebp-16
.xres3m18 equ ebp-8
.xres2m12 equ ebp-12
; emms
.fac equ dword[ebp-16]
.xplus_scr equ ebp-8
.xplus_index equ ebp-12
.dr_model equ dword[ebp-4]
 
sub esp,16
mov .dr_model,eax
 
; init counter
sub esp,12
push dword 0
mov .dr_model,eax
movzx eax,word[size_x_var]
cmp .dr_model,12
jge @f
lea ebx,[eax*3]
sub ebx,18
add eax,eax
sub eax,12
mov [.xres3m18],ebx
mov [.xres2m12],eax
jmp .f
sub ebx,3*6
mov [.xplus_scr],ebx ; for scr 1st cause
mov .fac,3
jmp .in_r
@@:
lea ebx,[eax*4] ; for scr 2cond cause
sub ebx,4*6
mov [.xplus_scr],ebx
mov .fac,4
.in_r:
 
lea ebx,[eax*4]
sub ebx,4*6
add eax,eax
sub eax,3*4
mov [.xres3m18],ebx
mov [.xres2m12],eax
.f:
mov [.xplus_index],ebx ; index
 
xor ecx,ecx
mov eax,4 shl 16 + 4
movd xmm0,[size_y_var]
movd xmm1,eax
psubw xmm0,xmm1
pshuflw xmm0,xmm0,00000001b
 
 
mov esi,[points_translated_ptr]
.loop:
push esi
; DO culling AT FIRST
.l:
push ecx
cmp [culling_flag],1 ; (if culling_flag = 1)
jne .no_culling
mov edi,[.counter] ; *********************************
mov edi,ecx ; *********************************
lea edi,[edi*3]
shl edi,2
add edi,[points_normals_rot_ptr]
mov eax,[edi+8] ; check sign of z coof
shr eax,31
cmp eax,1
jnz .skip
bt dword[edi+8],31
jnc .skip
.no_culling:
mov eax,[esi]
movzx ebx,ax ; ebx - x
shr eax,16 ; eax - y
cmp eax,4 ; check if markers not exceedes screen
jle .skip
cmp ebx,4
jle .skip
movzx edx,word[size_x_var]
sub edx,4
movzx ecx,word[size_y_var]
sub ecx,4
cmp ebx,edx
jge .skip
cmp eax,ecx
jge .skip
mov esi,ecx
lea esi,[esi*3]
add esi,esi
add esi,[points_translated_ptr]
movd xmm2,[esi]
movd xmm3,[esi]
pcmpgtw xmm2,xmm0
pcmpgtw xmm3,xmm1
pxor xmm3,xmm2
movd eax,xmm3
cmp eax,-1
jne .skip
 
movzx eax,word[esi]
movzx ebx,word[esi+2]
sub eax,2
sub ebx,2
movzx edx,word[size_x_var]
; sub ebx,3
; sub eax,3
imul eax,edx
add eax,ebx
push eax
lea edi,[eax*3]
cmp .dr_model,12
jl @f
add edi,[esp]
@@:
add esp,4
lea eax,[eax*2]
; draw bar 6x6
add edi,[screen_ptr]
add eax,dword[vertices_index_ptr]
 
 
 
 
mov edx,[.counter]
imul ebx,edx
add ebx,eax
mov edi,ebx
imul ebx,.fac
shl edi,2
add ebx,[screen_ptr]
add edi,[vertices_index_ptr]
mov eax,ecx
cld
mov ecx,6
 
.oop:
.l2:
push ecx
mov ecx,6
 
.do:
mov word[edi],0x0000 ;ax
mov byte[edi+2],0xff ;al
mov word[eax],dx
add eax,2
cmp .dr_model,12
jl @f
add edi,4
loop .do
jmp .ad
@@:
add edi,3
loop .do
.ad:
add edi,[.xres3m18]
add eax,[.xres2m12]
mov ecx,6 ; draw bar
.l1:
mov word[ebx],0
mov byte[ebx+2],0xff
stosd
add ebx,.fac
loop .l1
add ebx,[.xplus_scr]
add edi,[.xplus_index]
pop ecx
loop .oop
 
loop .l2
.skip:
pop esi
add esi,6
inc dword[.counter]
mov ecx,[.counter]
pop ecx
inc ecx
cmp ecx,[points_count_var]
jng .loop
jna .l
 
mov esp,ebp
pop ebp
 
ret
 
 
 
fill_Z_buffer:
mov eax,0x70000000
cmp [dr_flag],11
3033,11 → 3079,7
xor ebp,ebp
mov [points_count_var],ebx
mov [triangles_count_var],ebx
if USE_LFN = 0
mov esi,SourceFile
else
mov esi,[fptr]
end if
 
cmp [esi],word 4D4Dh
je @f ;Must be legal .3DS file
3096,11 → 3138,13
mov edx,ecx
add esi,8
@@:
 
add ebx,6
add esi,12
lea ecx,[ecx*3]
add ecx,ecx
add ebx,ecx
add ecx,ecx
add esi,ecx
; dec ecx
loop @b
; loop @b
@@:
 
@@:
3114,9 → 3158,11
add esi,8
 
@@:
add esi,8
dec ecx
jnz @b
shl ecx,3
add esi,ecx
; dec ecx
; jnz @b
; loop @b
; xor ecx,ecx
add ebp,edx
jmp .find4k
3198,8 → 3244,9
 
add ebx,6
add esi,12
dec ecx
jnz @b
; dec ecx
; jnz @b
loop @b
@@:
; mov dword[points+ebx],-1
push edi
3227,8 → 3274,9
add dword[edi-8],ebp
add dword[edi-4],ebp
add esi,8
dec ecx
jnz @b
; dec ecx
; jnz @b
loop @b
add ebp,edx
jmp .find4k
mov eax,-1 ;<---mark if OK
3235,6 → 3283,8
.exit:
mov dword[edi],-1
ret
 
 
alloc_mem_for_tp:
mov eax, 68
cmp [re_alloc_flag],1
3265,7 → 3315,7
 
mov eax, 68
mov ecx, [triangles_count_var]
lea ecx, [3+ecx*3]
lea ecx, [6+ecx*3]
shl ecx, 2
mov edx,[triangles_normals_ptr]
int 0x40 ; -> allocate memory for triangles normals
3274,7 → 3324,7
 
mov eax, 68
mov ecx, [points_count_var]
lea ecx,[3+ecx*3]
lea ecx,[6+ecx*3]
shl ecx, 2
mov edx,[points_normals_ptr]
int 0x40
3284,7 → 3334,7
mov eax, 68
; mov ebx, 12
mov ecx, [points_count_var]
lea ecx,[3+ecx*3]
lea ecx,[10+ecx*3]
shl ecx, 2
mov edx,[points_normals_rot_ptr]
int 0x40
3291,6 → 3341,7
mov [points_normals_rot_ptr], eax
 
mov eax, 68
 
mov edx,[points_ptr]
int 0x40
mov [points_ptr], eax
3304,6 → 3355,7
mov ebx, 12
mov ecx, [points_count_var]
shl ecx,2
add ecx,32
mov edx,[tex_points_ptr]
int 0x40
mov [tex_points_ptr], eax
3310,7 → 3362,7
 
mov eax, 68
mov ecx, [points_count_var]
inc ecx
add ecx,10
shl ecx, 3
mov edx,[points_translated_ptr]
int 0x40
3417,7 → 3469,7
mov bx,[size_x_var]
shl ebx,16
add ebx,120*65536+70 ; [x start] *65536 + [y start]
mov ecx,30 shl 16 + 100
mov ecx,30 shl 16 + 150
xor edx,edx
int 0x40
 
3467,7 → 3519,7
int 40h
pop esi
add esi,4
cmp esi,12
cmp esi,16
jnz .nxxx
ret
; *********************************************