Subversion Repositories Kolibri OS

Compare Revisions

Ignore whitespace Rev 9236 → Rev 9237

/programs/demos/view3ds/3dmath.inc
4,13 → 4,108
vec_x equ 0
vec_y equ 4
vec_z equ 8
; 3d point - triple integer word coordinate
; vector - triple float dword coordinate
;----------------------in: --------------------------------
;------------------------ esi - pointer to 1st 3d point ---
;------------------------ edi - pointer to 2nd 3d point ---
;------------------------ ebx - pointer to result vector --
;---------------------- out : none ------------------------
 
if 0 ; Ext >= SSE3
calc_bounding_box:
; in:
; xmm0 - normal vector of ray
; xmm1 - light origin
; out:
; eax - axis aligned bounding boxes bit mask
 
.rmx equ [ebp-36]
.nray equ [ebp-64]
.origin equ [ebp-80]
.dirfrac equ [ebp-96]
.nrayr equ [ebp-112]
.originr equ [ebp-128]
.tmin equ [ebp-132]
.tmax equ [ebp-136]
 
 
push ebp
mov ebp,esp
and ebp,-16
sub esp,160
 
movss xmm5,[rsscale]
shufps xmm5,xmm1,0
movd xmm2,[vect_x]
punpcklwd xmm2,[the_zero]
cvtdq2ps xmm2,xmm2
subps xmm1,xmm2
movaps .origin,xmm1
mulps xmm0,xmm5
movaps .nray,xmm0
 
mov esi,matrix
lea edi,.rmx
call reverse_mx_3x3
 
; in: esi - ptr to points(normals], each point(normal) coeficient as dword
; edi - ptr to rotated points(normals)
; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix
; ecx - number of points(normals)
 
; reverse transform
lea esi,.nray
lea edi,.nrayr
lea ebx,.rmx
mov ecx,1
call rotary
 
lea esi,.origin
lea edi,.originr
lea ebx,.rmx
mov ecx,1
call rotary
 
xor ecx,ecx
mov ebx,aabb1
xor eax,eax
rcpps xmm7,.nrayr
movaps .dirfrac,xmm7
 
.nx_aabb:
movaps xmm5,[ebx]
movaps xmm6,[ebx]
minps xmm5,[the_zero]
maxps xmm6,[the_zero]
; xmm5 - lb corner of AABB with minimal coordinates
; xmm6 - rt cor. of AABB wit maximum coords
subps xmm5,.originr
subps xmm6,.originr
mulps xmm5,.dirfrac ; xmm5 - tx1, ty1
mulps xmm6,.dirfrac ; xmm6 - tx2, ty2
movaps xmm1,xmm6
movaps xmm2,xmm6
 
 
minps xmm1,xmm5
maxps xmm2,xmm5
 
movaps xmm5,xmm1
movaps xmm6,xmm2
shufps xmm5,xmm5,11100001b
shufps xmm6,xmm6,11100001b
maxss xmm1,xmm5 ;t min
minss xmm2,xmm6 ;t max
comiss xmm2,xmm1
jb .no_inter
.yes:
bts eax,ecx
.no_inter:
add ebx,16
inc ecx
cmp ecx,8
jne .nx_aabb
 
; out: eax - bit mask
add esp,160
pop ebp
ret
end if
 
reverse_mx_3x3:
; esi - source matrix
; edi - desired reversed matrix
141,6 → 236,13
mov esp,ebp
pop ebp
ret
; 3d point - triple integer word coordinate
; vector - triple float dword coordinate
;----------------------in: --------------------------------
;------------------------ esi - pointer to 1st 3d point ---
;------------------------ edi - pointer to 2nd 3d point ---
;------------------------ ebx - pointer to result vector --
;---------------------- out : none ------------------------
 
make_vector_r:
if Ext < SSE2
194,17 → 296,37
fsubp ;st1 ,st
fstp dword [ebx+vec_z]
ret
cross_aligned:
movaps xmm0,[esi]
movaps xmm1,[esi]
movaps xmm2,[edi]
movaps xmm3,[edi]
shufps xmm0,xmm0,00001001b
shufps xmm1,xmm1,00010010b
shufps xmm2,xmm2,00010010b
shufps xmm3,xmm3,00001001b
mulps xmm0,xmm2
mulps xmm1,xmm3
subps xmm0,xmm1
movaps [ebx],xmm0
ret
;----------------------- in: ------------------------------
;---------------------------- edi - pointer to vector -----
;----------------------- out : none
normalize_vector:
if Ext >= SSE3
if Ext >= SSE2
movups xmm0,[edi]
andps xmm0,[zero_hgst_dd]
movups xmm1,xmm0
mulps xmm0,xmm0
haddps xmm0,xmm0
haddps xmm0,xmm0
movhlps xmm2,xmm0
addps xmm0,xmm2
movaps xmm2,xmm0
shufps xmm2,xmm2,11100101b
addps xmm0,xmm2
shufps xmm0,xmm0,0
; haddps xmm0,xmm0
; haddps xmm0,xmm0
rsqrtps xmm0,xmm0
mulps xmm0,xmm1
movlps [edi],xmm0
559,7 → 681,7
; packsdw xmm0,xmm0
; movq [edi]
fld dword[esi]
fiadd [vect_x]
fiadd word[vect_x]
fistp word[edi]
fld dword[esi+4]
fiadd [vect_y]
/programs/demos/view3ds/3glass.inc
1,5 → 1,5
; Glass like rendering triangle by Maciej Guba.
; http://macgub.hekko.pl, macgub3@wp.pl
; http://macgub.co.pl, macgub3@wp.pl
 
ROUND2 equ 10
glass_tri:
/programs/demos/view3ds/3glass_tex.inc
1,762 → 1,762
; Bilinear filtering, real Phongs shading and glass like parallel.
; Thanks to authors of 3dica tutorial.
; Implemented in FASM by Maciej Guba.
; http://macgub.j.pl
 
ROUND2 equ 10
 
glass_tex_tri:
;----Procedure render Phongs shaded triangle with z coord
;----interpolation ( Catmull alghoritm ), each pixel is -
;----covered by texture using bilinear filtering.--------
;----I normalize normal vector in every pixel -----------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to stencil buffer--
;---------------------- filled with dd float variables-
;---------------------- edi - pointer to screen buffer---
;---------------------- edx - pointer to texture---------
;---------------------- xmm0 - 1st normal vector --------
;---------------------- xmm1 - 2cond normal vector ------
;---------------------- xmm2 - 3rd normal vector --------
;---------------------- xmm3 - normalized light vector --
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
;---------------------- as dwords floats ---------------
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
;---------------------- x_min, x_max as dword integers -
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, --
;---------------------- ty2, tx3, ty3 as word, xres as--
;---------------------- dword integers------------------
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
 
 
 
 
push ebp
mov ebp,esp
sub esp,512
sub ebp,16
and ebp,0xfffffff0
 
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
 
.cnv1 equ [ebp-192] ; cur normal vectors
.cnv2 equ [ebp-208]
.x_res equ [ebp-212]
.ty3 equ [ebp-214]
.tx3 equ [ebp-216]
.ty2 equ [ebp-218]
.tx2 equ [ebp-220]
.ty1 equ [ebp-222]
.tx1 equ [ebp-224]
.dz12 equ [ebp-232]
.dty12 equ [ebp-236]
.dtx12 equ [ebp-240]
.dz13 equ [ebp-248]
.dty13 equ [ebp-252]
.dtx13 equ [ebp-256]
.dz23 equ [ebp-264]
.dty23 equ [ebp-268]
.dtx23 equ [ebp-272]
.cz1 equ [ebp-280]
.cty1 equ [ebp-284]
.ctx1 equ [ebp-288]
.cz2 equ [ebp-296]
.cty2 equ [ebp-300]
.ctx2 equ [ebp-304]
.tx_ptr equ [ebp-308]
 
 
emms
; movd .x_res,xmm7
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
shufps xmm6,xmm6,11100001b
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
 
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
shufps xmm6,xmm6,11011000b
movaps xmm7,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm7
 
jmp .sort3
 
.sort2:
; movq .tx1,xmm6
; pshufd xmm6,xmm6,01001110b
; movd .tx3,xmm6
movaps .tx1,xmm6
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
 
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
mov .tx_ptr,edx
 
 
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
 
xorps xmm7,xmm7
mov dword .dx12,0
movaps .dtx12,xmm7
movaps .dn12,xmm7
jmp .rpt_dx12_done
 
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
 
cvtsi2ss xmm6,ebx
shufps xmm6,xmm6,0
movss xmm5,.z2
subss xmm5,.z1
divss xmm5,xmm6
movss .dz12,xmm5
 
movd xmm0,.tx1
movd xmm2,.tx2
pxor xmm1,xmm1
punpcklwd xmm0,xmm1
punpcklwd xmm2,xmm1
psubd xmm2,xmm0
; cvtdq2ps xmm0,xmm0
cvtdq2ps xmm2,xmm2
; movlps .ctx1,xmm0
; movlps .ctx2,xmm2
; subps xmm2,xmm0
divps xmm2,xmm6
movlps .dtx12,xmm2
 
movaps xmm0,.2_nv
subps xmm0,.1_nv
divps xmm0,xmm6
movaps .dn12,xmm0
 
 
.rpt_dx12_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
 
xorps xmm7,xmm7
mov dword .dx13,0
movaps .dtx13,xmm7
movaps .dn13,xmm7
jmp .rpt_dx13_done
 
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
 
 
cvtsi2ss xmm6,ebx
shufps xmm6,xmm6,0
 
movss xmm5,.z3
subss xmm5,.z1
divss xmm5,xmm6
movss .dz13,xmm5
 
movd xmm0,.tx1
movd xmm2,.tx3
pxor xmm1,xmm1
punpcklwd xmm0,xmm1
punpcklwd xmm2,xmm1
psubd xmm2,xmm0
; cvtdq2ps xmm0,xmm0
cvtdq2ps xmm2,xmm2
; subps xmm2,xmm0
divps xmm2,xmm6
movlps .dtx13,xmm2
 
 
 
movaps xmm0,.3_nv
subps xmm0,.1_nv
divps xmm0,xmm6
movaps .dn13,xmm0
 
.rpt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
 
xorps xmm7,xmm7
mov dword .dx23,0
movaps .dtx23,xmm7
movaps .dn23,xmm7
jmp .rpt_dx23_done
 
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
 
cvtsi2ss xmm6,ebx
shufps xmm6,xmm6,0
movss xmm5,.z3
subss xmm5,.z2
divss xmm5,xmm6
movss .dz23,xmm5
 
movd xmm0,.tx2
movd xmm2,.tx3
pxor xmm1,xmm1
punpcklwd xmm0,xmm1
punpcklwd xmm2,xmm1
psubd xmm2,xmm0
; cvtdq2ps xmm0,xmm0
cvtdq2ps xmm2,xmm2
; movlps .ctx1,xmm0
; movlps .ctx2,xmm2
; subps xmm2,xmm0
divps xmm2,xmm6
movlps .dtx23,xmm2
 
 
 
 
movaps xmm0,.3_nv
subps xmm0,.2_nv
divps xmm0,xmm6
movaps .dn23,xmm0
 
.rpt_dx23_done:
 
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov edx,.z1
movd xmm1,.tx1
pxor xmm2,xmm2
punpcklwd xmm1,xmm2
cvtdq2ps xmm1,xmm1
 
mov .cz1,edx
mov .cz2,edx
movaps xmm0,.1_nv
movlps .ctx1,xmm1
movlps .ctx2,xmm1
movaps .cnv1,xmm0
movaps .cnv2,xmm0
 
; mov edx,.dx13
; cmp edx,.dx12
; jg .second_cause
 
movsx ecx,word .y1
cmp cx,.y2
 
jge .rpt_loop1_end
 
.rpt_loop1:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; movlps xmm3,.cz1 ; cz1, cz2 both
movaps xmm3,.ctx1
movaps xmm5,.ctx2
movaps xmm4,.l_v
movd xmm6,.x_res
sar ebx,ROUND2
sar eax,ROUND2
mov edx,.tx_ptr
mov edi,.screen
 
mov esi,.Zbuf
 
call glass_tex_line
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; movss xmm2,.cz1
; movss xmm3,.cz2
movaps xmm2,.ctx1
movaps xmm3,.ctx2
addps xmm0,.dn13
addps xmm1,.dn12
addps xmm2,.dtx13
addps xmm3,.dtx12
add eax,.dx13
add ebx,.dx12
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
; movss .cz1,xmm2
; movss .cz2,xmm3
movaps .ctx1,xmm2
movaps .ctx2,xmm3
add ecx,1
cmp cx,.y2
jl .rpt_loop1
 
 
; jmp .rpt_loop2_end
 
 
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
 
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movd xmm1,.tx2
pxor xmm2,xmm2
punpcklwd xmm1,xmm2
cvtdq2ps xmm1,xmm1
movlps .ctx2,xmm1
movaps xmm0,.2_nv
movaps .cnv2,xmm0
 
 
.rpt_loop2:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movaps xmm3,.ctx1
movaps xmm5,.ctx2
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edx,.tx_ptr
mov edi,.screen
mov esi,.Zbuf
movd xmm6,.x_res
call glass_tex_line
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; movss xmm2,.cz1
; movss xmm3,.cz2
movaps xmm2,.ctx1
movaps xmm3,.ctx2
addps xmm0,.dn13
addps xmm1,.dn23
; addss xmm2,.dz13
; addss xmm3,.dz23
addps xmm2,.dtx13
addps xmm3,.dtx23
 
add eax,.dx13
add ebx,.dx23
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movaps .ctx1,xmm2
movaps .ctx2,xmm3
 
; movss .cz1,xmm2
; movss .cz2,xmm3
 
add ecx,1
cmp cx,.y3
jl .rpt_loop2
 
.second_cause: ;dx13 > dx12
 
.rpt_loop2_end:
 
add esp,512
pop ebp
 
ret
align 16
glass_tex_line:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float
; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - normalized light vector
; eax - x1
; ebx - x2
; ecx - y
; edi - screen buffer
; esi - stencil buffer filled with dd floats
; edx - texture pointer (handle)
; xmm6 - lowest dword x_res as integer
 
push ebp
mov ebp,esp
sub esp,350
sub ebp,16
and ebp,0xfffffff0
 
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
; .z2 equ [ebp-60]
; .z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.x_res equ [ebp-100]
.y equ [ebp-104]
.cnv equ [ebp-128]
.z1 equ [ebp-136]
.ty1 equ [ebp-140]
.tx1 equ [ebp-144]
.z2 equ [ebp-152]
.ty2 equ [ebp-156]
.tx2 equ [ebp-160]
.cz equ [ebp-168]
.cty equ [ebp-172]
.ctx equ [ebp-176]
.dz equ [ebp-184]
.dty equ [ebp-188]
.dtx equ [ebp-192]
.yd equ [ebp-196]
.xd equ [ebp-200]
.yf equ [ebp-204]
.xf equ [ebp-208]
.w4 equ [ebp-212]
.w3 equ [ebp-216]
.w2 equ [ebp-220]
.w1 equ [ebp-224]
.p4 equ [ebp-228]
.p3 equ [ebp-232]
.p2 equ [ebp-236]
.p1 equ [ebp-240]
 
 
.tx_ptr equ [ebp-244]
 
; movaps xmm7,xmm3
; movaps xmm3,xmm5
; movaps xmm5,xmm7
 
 
mov .y,ecx
packssdw xmm2,xmm2
; movaps xmm7,xmm2
; movhps xmm2,[the_zero]
; pshuflw xmm2,xmm2,11111000b
; pshufd xmm2,xmm2,11111100b
; movlps xmm7,[the_zero]
; pshufhw xmm7,xmm7,11111111b
; movlps xmm7,[the_zero]
; psrldq xmm7,4
; por xmm2,xmm7
movq .y_min,xmm2
cmp cx,.y_min
jl .end_line
cmp cx,.y_max
jge .end_line ;
 
cmp eax,ebx
je .end_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
movaps xmm7,xmm3
movaps xmm3,xmm5
movaps xmm5,xmm7
@@:
 
cmp ax,.x_max
jge .end_line
cmp bx,.x_min
jle .end_line
movaps .lv,xmm4
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movaps .tx1,xmm3
movaps .tx2,xmm5
movd .x_res,xmm6
mov .tx_ptr,edx
sub ebx,eax
cvtsi2ss xmm7,ebx
shufps xmm7,xmm7,0
subps xmm1,xmm0
divps xmm1,xmm7
movaps .dn,xmm1
subps xmm5,xmm3
divps xmm5,xmm7
movaps .dtx,xmm5
 
 
 
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulps xmm5,xmm7
mulps xmm1,xmm7
addps xmm5,.tx1
addps xmm1,.n1
movsx eax,word .x_min
movaps .tx1,xmm5
movaps .n1,xmm1
mov dword .lx1,eax
 
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
mov eax,.x_res
mul dword .y
add eax,.lx1
shl eax,2
add edi,eax
add esi,eax
 
mov ecx,.lx2
sub ecx,.lx1
; movaps xmm0,.n1
movaps xmm2,.tx1
; xorps xmm1,xmm1
align 16
.ddraw:
; movhlps xmm7,xmm2
; cmpnltss xmm7,dword[esi]
; movd eax,xmm7
; or eax,eax
; jnz .skip
xorps xmm5,xmm5
; movhlps xmm7,xmm2
; movss [esi],xmm7
movaps xmm7,.n1 ;xmm0
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,.n1 ;xmm0
; andps xmm7,[abs_z_coof]
movaps .cnv,xmm7
 
movaps xmm6,xmm2
minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2
cvttps2dq xmm7,xmm6
cvtdq2ps xmm4,xmm7
subps xmm6,xmm4
movlps .xf,xmm6
; movaps xmm5,.lv
mov eax,lights_aligned ; global
align 16
.again_col:
movaps xmm0,[eax] ; calc multple lights
mulps xmm0,.cnv ;.lv ; last dword should be zeroed
haddps xmm0,xmm0
haddps xmm0,xmm0
; andps xmm0,[abs_val] ;calc absolute value
if 1
; stencil
movhlps xmm6,xmm2
movhlps xmm4,xmm2
addss xmm6,[aprox]
subss xmm4,[aprox]
cmpnltss xmm6,dword[esi]
cmpnltss xmm4,dword[esi]
xorps xmm6,xmm4
xorps xmm4,xmm4
movd ebx,xmm6
cmp ebx,-1
jne .no_reflective
end if
movaps xmm4,xmm0
mulps xmm4,xmm4
mulps xmm4,xmm4
mulps xmm4,xmm4
mulps xmm4,xmm4
mulps xmm4,[eax+48]
 
.no_reflective:
maxps xmm0,[the_zero]
; movaps xmm1,xmm0
mulps xmm0,[eax+16]
addps xmm4,xmm0
addps xmm4,[eax+32]
maxps xmm5,xmm4
add eax,64
cmp eax,lights_aligned_end
jnz .again_col
minps xmm5,[mask_255f]
 
; texture coords work
movd eax,xmm7
psrldq xmm7,4
movd ebx,xmm7
shl ebx,TEX_SHIFT
add eax,ebx
lea eax,[eax*3]
add eax,.tx_ptr
mov ebx,eax
add ebx,TEX_X*3
movd xmm7,[eax]
movd xmm6,[eax+3]
movd xmm4,[ebx]
movd xmm3,[ebx+3]
punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2
punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4
punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ...
movdqa xmm6,xmm7
movdqa xmm4,xmm7
psrldq xmm6,4
psrldq xmm4,8
 
punpcklbw xmm7,[the_zero] ; broadcasted 0
punpcklbw xmm6,[the_zero]
punpcklbw xmm4,[the_zero]
punpcklwd xmm7,[the_zero]
punpcklwd xmm6,[the_zero]
punpcklwd xmm4,[the_zero]
 
 
; calc w .........
movlps xmm3,[the_one] ; broadcasted dword 1.0
cvtdq2ps xmm7,xmm7
subps xmm3,.xf
cvtdq2ps xmm6,xmm6
movhps xmm3,.xf
cvtdq2ps xmm4,xmm4
movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf
shufps xmm3,xmm3,10001000b
shufps xmm1,xmm1,11110101b
mulps xmm3,xmm1
 
mulps xmm7,xmm3
mulps xmm6,xmm3
mulps xmm4,xmm3
haddps xmm7,xmm7 ; r
haddps xmm6,xmm6 ; g
haddps xmm4,xmm4 ; b
haddps xmm7,xmm7 ; r
haddps xmm6,xmm6 ; g
haddps xmm4,xmm4 ; b
movlhps xmm7,xmm6
shufps xmm7,xmm7,11101000b
movlhps xmm7,xmm4
 
mulps xmm5,xmm7
cvtps2dq xmm5,xmm5
psrld xmm5,8
movd xmm6,[edi]
packssdw xmm5,xmm5
packuswb xmm5,xmm5
paddusb xmm5,xmm6
movd [edi],xmm5
.skip:
add edi,4
add esi,4
; addps xmm0,.dn
movaps xmm0,.n1 ; cur normal
addps xmm0,.dn
addps xmm2,.dtx
movaps .n1,xmm0
sub ecx,1
jnz .ddraw
 
.end_line:
add esp,350
pop ebp
 
ret
; Bilinear filtering, real Phongs shading and glass like parallel.
; Thanks to authors of 3dica tutorial.
; Implemented in FASM by Maciej Guba.
; http://macgub.co.pl
 
ROUND2 equ 10
 
glass_tex_tri:
;----Procedure render Phongs shaded triangle with z coord
;----interpolation ( Catmull alghoritm ), each pixel is -
;----covered by texture using bilinear filtering.--------
;----I normalize normal vector in every pixel -----------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to stencil buffer--
;---------------------- filled with dd float variables-
;---------------------- edi - pointer to screen buffer---
;---------------------- edx - pointer to texture---------
;---------------------- xmm0 - 1st normal vector --------
;---------------------- xmm1 - 2cond normal vector ------
;---------------------- xmm2 - 3rd normal vector --------
;---------------------- xmm3 - normalized light vector --
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
;---------------------- as dwords floats ---------------
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
;---------------------- x_min, x_max as dword integers -
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, --
;---------------------- ty2, tx3, ty3 as word, xres as--
;---------------------- dword integers------------------
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
 
 
 
 
push ebp
mov ebp,esp
sub esp,512
sub ebp,16
and ebp,0xfffffff0
 
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
 
.cnv1 equ [ebp-192] ; cur normal vectors
.cnv2 equ [ebp-208]
.x_res equ [ebp-212]
.ty3 equ [ebp-214]
.tx3 equ [ebp-216]
.ty2 equ [ebp-218]
.tx2 equ [ebp-220]
.ty1 equ [ebp-222]
.tx1 equ [ebp-224]
.dz12 equ [ebp-232]
.dty12 equ [ebp-236]
.dtx12 equ [ebp-240]
.dz13 equ [ebp-248]
.dty13 equ [ebp-252]
.dtx13 equ [ebp-256]
.dz23 equ [ebp-264]
.dty23 equ [ebp-268]
.dtx23 equ [ebp-272]
.cz1 equ [ebp-280]
.cty1 equ [ebp-284]
.ctx1 equ [ebp-288]
.cz2 equ [ebp-296]
.cty2 equ [ebp-300]
.ctx2 equ [ebp-304]
.tx_ptr equ [ebp-308]
 
 
emms
; movd .x_res,xmm7
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
shufps xmm6,xmm6,11100001b
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
 
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
shufps xmm6,xmm6,11011000b
movaps xmm7,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm7
 
jmp .sort3
 
.sort2:
; movq .tx1,xmm6
; pshufd xmm6,xmm6,01001110b
; movd .tx3,xmm6
movaps .tx1,xmm6
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
 
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
mov .tx_ptr,edx
 
 
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
 
xorps xmm7,xmm7
mov dword .dx12,0
movaps .dtx12,xmm7
movaps .dn12,xmm7
jmp .rpt_dx12_done
 
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
 
cvtsi2ss xmm6,ebx
shufps xmm6,xmm6,0
movss xmm5,.z2
subss xmm5,.z1
divss xmm5,xmm6
movss .dz12,xmm5
 
movd xmm0,.tx1
movd xmm2,.tx2
pxor xmm1,xmm1
punpcklwd xmm0,xmm1
punpcklwd xmm2,xmm1
psubd xmm2,xmm0
; cvtdq2ps xmm0,xmm0
cvtdq2ps xmm2,xmm2
; movlps .ctx1,xmm0
; movlps .ctx2,xmm2
; subps xmm2,xmm0
divps xmm2,xmm6
movlps .dtx12,xmm2
 
movaps xmm0,.2_nv
subps xmm0,.1_nv
divps xmm0,xmm6
movaps .dn12,xmm0
 
 
.rpt_dx12_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
 
xorps xmm7,xmm7
mov dword .dx13,0
movaps .dtx13,xmm7
movaps .dn13,xmm7
jmp .rpt_dx13_done
 
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
 
 
cvtsi2ss xmm6,ebx
shufps xmm6,xmm6,0
 
movss xmm5,.z3
subss xmm5,.z1
divss xmm5,xmm6
movss .dz13,xmm5
 
movd xmm0,.tx1
movd xmm2,.tx3
pxor xmm1,xmm1
punpcklwd xmm0,xmm1
punpcklwd xmm2,xmm1
psubd xmm2,xmm0
; cvtdq2ps xmm0,xmm0
cvtdq2ps xmm2,xmm2
; subps xmm2,xmm0
divps xmm2,xmm6
movlps .dtx13,xmm2
 
 
 
movaps xmm0,.3_nv
subps xmm0,.1_nv
divps xmm0,xmm6
movaps .dn13,xmm0
 
.rpt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
 
xorps xmm7,xmm7
mov dword .dx23,0
movaps .dtx23,xmm7
movaps .dn23,xmm7
jmp .rpt_dx23_done
 
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
 
cvtsi2ss xmm6,ebx
shufps xmm6,xmm6,0
movss xmm5,.z3
subss xmm5,.z2
divss xmm5,xmm6
movss .dz23,xmm5
 
movd xmm0,.tx2
movd xmm2,.tx3
pxor xmm1,xmm1
punpcklwd xmm0,xmm1
punpcklwd xmm2,xmm1
psubd xmm2,xmm0
; cvtdq2ps xmm0,xmm0
cvtdq2ps xmm2,xmm2
; movlps .ctx1,xmm0
; movlps .ctx2,xmm2
; subps xmm2,xmm0
divps xmm2,xmm6
movlps .dtx23,xmm2
 
 
 
 
movaps xmm0,.3_nv
subps xmm0,.2_nv
divps xmm0,xmm6
movaps .dn23,xmm0
 
.rpt_dx23_done:
 
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov edx,.z1
movd xmm1,.tx1
pxor xmm2,xmm2
punpcklwd xmm1,xmm2
cvtdq2ps xmm1,xmm1
 
mov .cz1,edx
mov .cz2,edx
movaps xmm0,.1_nv
movlps .ctx1,xmm1
movlps .ctx2,xmm1
movaps .cnv1,xmm0
movaps .cnv2,xmm0
 
; mov edx,.dx13
; cmp edx,.dx12
; jg .second_cause
 
movsx ecx,word .y1
cmp cx,.y2
 
jge .rpt_loop1_end
 
.rpt_loop1:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; movlps xmm3,.cz1 ; cz1, cz2 both
movaps xmm3,.ctx1
movaps xmm5,.ctx2
movaps xmm4,.l_v
movd xmm6,.x_res
sar ebx,ROUND2
sar eax,ROUND2
mov edx,.tx_ptr
mov edi,.screen
 
mov esi,.Zbuf
 
call glass_tex_line
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; movss xmm2,.cz1
; movss xmm3,.cz2
movaps xmm2,.ctx1
movaps xmm3,.ctx2
addps xmm0,.dn13
addps xmm1,.dn12
addps xmm2,.dtx13
addps xmm3,.dtx12
add eax,.dx13
add ebx,.dx12
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
; movss .cz1,xmm2
; movss .cz2,xmm3
movaps .ctx1,xmm2
movaps .ctx2,xmm3
add ecx,1
cmp cx,.y2
jl .rpt_loop1
 
 
; jmp .rpt_loop2_end
 
 
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
 
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movd xmm1,.tx2
pxor xmm2,xmm2
punpcklwd xmm1,xmm2
cvtdq2ps xmm1,xmm1
movlps .ctx2,xmm1
movaps xmm0,.2_nv
movaps .cnv2,xmm0
 
 
.rpt_loop2:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movaps xmm3,.ctx1
movaps xmm5,.ctx2
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edx,.tx_ptr
mov edi,.screen
mov esi,.Zbuf
movd xmm6,.x_res
call glass_tex_line
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; movss xmm2,.cz1
; movss xmm3,.cz2
movaps xmm2,.ctx1
movaps xmm3,.ctx2
addps xmm0,.dn13
addps xmm1,.dn23
; addss xmm2,.dz13
; addss xmm3,.dz23
addps xmm2,.dtx13
addps xmm3,.dtx23
 
add eax,.dx13
add ebx,.dx23
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movaps .ctx1,xmm2
movaps .ctx2,xmm3
 
; movss .cz1,xmm2
; movss .cz2,xmm3
 
add ecx,1
cmp cx,.y3
jl .rpt_loop2
 
.second_cause: ;dx13 > dx12
 
.rpt_loop2_end:
 
add esp,512
pop ebp
 
ret
align 16
glass_tex_line:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float
; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - normalized light vector
; eax - x1
; ebx - x2
; ecx - y
; edi - screen buffer
; esi - stencil buffer filled with dd floats
; edx - texture pointer (handle)
; xmm6 - lowest dword x_res as integer
 
push ebp
mov ebp,esp
sub esp,350
sub ebp,16
and ebp,0xfffffff0
 
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
; .z2 equ [ebp-60]
; .z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.x_res equ [ebp-100]
.y equ [ebp-104]
.cnv equ [ebp-128]
.z1 equ [ebp-136]
.ty1 equ [ebp-140]
.tx1 equ [ebp-144]
.z2 equ [ebp-152]
.ty2 equ [ebp-156]
.tx2 equ [ebp-160]
.cz equ [ebp-168]
.cty equ [ebp-172]
.ctx equ [ebp-176]
.dz equ [ebp-184]
.dty equ [ebp-188]
.dtx equ [ebp-192]
.yd equ [ebp-196]
.xd equ [ebp-200]
.yf equ [ebp-204]
.xf equ [ebp-208]
.w4 equ [ebp-212]
.w3 equ [ebp-216]
.w2 equ [ebp-220]
.w1 equ [ebp-224]
.p4 equ [ebp-228]
.p3 equ [ebp-232]
.p2 equ [ebp-236]
.p1 equ [ebp-240]
 
 
.tx_ptr equ [ebp-244]
 
; movaps xmm7,xmm3
; movaps xmm3,xmm5
; movaps xmm5,xmm7
 
 
mov .y,ecx
packssdw xmm2,xmm2
; movaps xmm7,xmm2
; movhps xmm2,[the_zero]
; pshuflw xmm2,xmm2,11111000b
; pshufd xmm2,xmm2,11111100b
; movlps xmm7,[the_zero]
; pshufhw xmm7,xmm7,11111111b
; movlps xmm7,[the_zero]
; psrldq xmm7,4
; por xmm2,xmm7
movq .y_min,xmm2
cmp cx,.y_min
jl .end_line
cmp cx,.y_max
jge .end_line ;
 
cmp eax,ebx
je .end_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
movaps xmm7,xmm3
movaps xmm3,xmm5
movaps xmm5,xmm7
@@:
 
cmp ax,.x_max
jge .end_line
cmp bx,.x_min
jle .end_line
movaps .lv,xmm4
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movaps .tx1,xmm3
movaps .tx2,xmm5
movd .x_res,xmm6
mov .tx_ptr,edx
sub ebx,eax
cvtsi2ss xmm7,ebx
shufps xmm7,xmm7,0
subps xmm1,xmm0
divps xmm1,xmm7
movaps .dn,xmm1
subps xmm5,xmm3
divps xmm5,xmm7
movaps .dtx,xmm5
 
 
 
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulps xmm5,xmm7
mulps xmm1,xmm7
addps xmm5,.tx1
addps xmm1,.n1
movsx eax,word .x_min
movaps .tx1,xmm5
movaps .n1,xmm1
mov dword .lx1,eax
 
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
mov eax,.x_res
mul dword .y
add eax,.lx1
shl eax,2
add edi,eax
add esi,eax
 
mov ecx,.lx2
sub ecx,.lx1
; movaps xmm0,.n1
movaps xmm2,.tx1
; xorps xmm1,xmm1
align 16
.ddraw:
; movhlps xmm7,xmm2
; cmpnltss xmm7,dword[esi]
; movd eax,xmm7
; or eax,eax
; jnz .skip
xorps xmm5,xmm5
; movhlps xmm7,xmm2
; movss [esi],xmm7
movaps xmm7,.n1 ;xmm0
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,.n1 ;xmm0
; andps xmm7,[abs_z_coof]
movaps .cnv,xmm7
 
movaps xmm6,xmm2
minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2
cvttps2dq xmm7,xmm6
cvtdq2ps xmm4,xmm7
subps xmm6,xmm4
movlps .xf,xmm6
; movaps xmm5,.lv
mov eax,lights_aligned ; global
align 16
.again_col:
movaps xmm0,[eax] ; calc multple lights
mulps xmm0,.cnv ;.lv ; last dword should be zeroed
haddps xmm0,xmm0
haddps xmm0,xmm0
; andps xmm0,[abs_val] ;calc absolute value
if 1
; stencil
movhlps xmm6,xmm2
movhlps xmm4,xmm2
addss xmm6,[aprox]
subss xmm4,[aprox]
cmpnltss xmm6,dword[esi]
cmpnltss xmm4,dword[esi]
xorps xmm6,xmm4
xorps xmm4,xmm4
movd ebx,xmm6
cmp ebx,-1
jne .no_reflective
end if
movaps xmm4,xmm0
mulps xmm4,xmm4
mulps xmm4,xmm4
mulps xmm4,xmm4
mulps xmm4,xmm4
mulps xmm4,[eax+48]
 
.no_reflective:
maxps xmm0,[the_zero]
; movaps xmm1,xmm0
mulps xmm0,[eax+16]
addps xmm4,xmm0
addps xmm4,[eax+32]
maxps xmm5,xmm4
add eax,64
cmp eax,lights_aligned_end
jnz .again_col
minps xmm5,[mask_255f]
 
; texture coords work
movd eax,xmm7
psrldq xmm7,4
movd ebx,xmm7
shl ebx,TEX_SHIFT
add eax,ebx
lea eax,[eax*3]
add eax,.tx_ptr
mov ebx,eax
add ebx,TEX_X*3
movd xmm7,[eax]
movd xmm6,[eax+3]
movd xmm4,[ebx]
movd xmm3,[ebx+3]
punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2
punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4
punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ...
movdqa xmm6,xmm7
movdqa xmm4,xmm7
psrldq xmm6,4
psrldq xmm4,8
 
punpcklbw xmm7,[the_zero] ; broadcasted 0
punpcklbw xmm6,[the_zero]
punpcklbw xmm4,[the_zero]
punpcklwd xmm7,[the_zero]
punpcklwd xmm6,[the_zero]
punpcklwd xmm4,[the_zero]
 
 
; calc w .........
movlps xmm3,[the_one] ; broadcasted dword 1.0
cvtdq2ps xmm7,xmm7
subps xmm3,.xf
cvtdq2ps xmm6,xmm6
movhps xmm3,.xf
cvtdq2ps xmm4,xmm4
movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf
shufps xmm3,xmm3,10001000b
shufps xmm1,xmm1,11110101b
mulps xmm3,xmm1
 
mulps xmm7,xmm3
mulps xmm6,xmm3
mulps xmm4,xmm3
haddps xmm7,xmm7 ; r
haddps xmm6,xmm6 ; g
haddps xmm4,xmm4 ; b
haddps xmm7,xmm7 ; r
haddps xmm6,xmm6 ; g
haddps xmm4,xmm4 ; b
movlhps xmm7,xmm6
shufps xmm7,xmm7,11101000b
movlhps xmm7,xmm4
 
mulps xmm5,xmm7
cvtps2dq xmm5,xmm5
psrld xmm5,8
movd xmm6,[edi]
packssdw xmm5,xmm5
packuswb xmm5,xmm5
paddusb xmm5,xmm6
movd [edi],xmm5
.skip:
add edi,4
add esi,4
; addps xmm0,.dn
movaps xmm0,.n1 ; cur normal
addps xmm0,.dn
addps xmm2,.dtx
movaps .n1,xmm0
sub ecx,1
jnz .ddraw
 
.end_line:
add esp,350
pop ebp
 
ret
/programs/demos/view3ds/3r_phg.inc
1,528 → 1,528
; Real Phong's shading implemented if flat assembler
; by Maciej Guba.
; http://macgub.vxm.pl
 
ROUND2 equ 10
real_phong_tri_z:
;----procedure render Phongs shaded triangle with z coord
;----interpolation ( Catmull alghoritm )-----------------
;----I normalize normal vector in every pixel -----------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer filled-
;---------------------- with dd float variables--------
;---------------------- edi - pointer to screen buffer---
;---------------------- xmm0 - 1st normal vector --------
;---------------------- xmm1 - 2cond normal vector ------
;---------------------- xmm2 - 3rd normal vector --------
;---------------------- xmm3 - normalized light vector --
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
;---------------------- as dwords floats ---------------
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
;---------------------- x_min, x_max as dword integers -
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
 
 
 
 
push ebp
mov ebp,esp
sub esp,512
sub ebp,16
and ebp,0xfffffff0
 
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
.dz12 equ [ebp-180]
.dz13 equ [ebp-184]
.dz23 equ [ebp-188]
 
.cnv1 equ [ebp-208] ; cur normal vectors
.cnv2 equ [ebp-224]
.cz2 equ [ebp-228]
.cz1 equ [ebp-232]
 
 
 
 
 
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
movaps xmm6,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm6
 
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
movaps xmm6,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm6
 
jmp .sort3
 
.sort2:
 
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
 
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
 
 
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
 
xorps xmm7,xmm7
mov dword .dx12,0
mov dword .dz12,0
movaps .dn12,xmm7
jmp .rpt_dx12_done
 
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z2
subss xmm5,.z1
divss xmm5,xmm6
movss .dz12,xmm5
 
movaps xmm0,.2_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn12,xmm0
 
 
.rpt_dx12_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
 
xorps xmm7,xmm7
mov dword .dx13,0
mov dword .dz13,0
movaps .dn13,xmm7
jmp .rpt_dx13_done
 
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z1
divss xmm5,xmm6
movss .dz13,xmm5
 
movaps xmm0,.3_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn13,xmm0
 
.rpt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
 
xorps xmm7,xmm7
mov dword .dx23,0
mov dword .dz23,0
movaps .dn23,xmm7
jmp .rpt_dx23_done
 
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z2
divss xmm5,xmm6
movss .dz23,xmm5
 
movaps xmm0,.3_nv
subps xmm0,.2_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn23,xmm0
 
.rpt_dx23_done:
 
 
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov edx,.z1
mov .cz1,edx
mov .cz2,edx
movaps xmm0,.1_nv
movaps .cnv1,xmm0
movaps .cnv2,xmm0
 
 
movsx ecx,word .y1
cmp cx,.y2
 
jge .rpt_loop1_end
 
.rpt_loop1:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edi,.screen
mov esi,.Zbuf
 
call real_phong_line_z
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn12
addss xmm2,.dz13
addss xmm3,.dz12
add eax,.dx13
add ebx,.dx12
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
 
add ecx,1
cmp cx,.y2
jl .rpt_loop1
 
 
 
 
 
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
 
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movaps xmm0,.2_nv
movaps .cnv2,xmm0
 
 
.rpt_loop2:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edi,.screen
mov esi,.Zbuf
 
call real_phong_line_z
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn23
addss xmm2,.dz13
addss xmm3,.dz23
add eax,.dx13
add ebx,.dx23
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
 
add ecx,1
cmp cx,.y3
jl .rpt_loop2
 
.rpt_loop2_end:
 
add esp,512
pop ebp
 
ret
align 16
real_phong_line_z:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi z1, z2 coords as dwords floats
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - normalized light vector
; eax - x1
; ebx - x2
; ecx - y
; edi - screen buffer
; esi - z buffer filled with dd floats
 
push ebp
mov ebp,esp
sub esp,160
sub ebp,16
and ebp,0xfffffff0
 
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
.cnv equ [ebp-128]
 
mov .y,ecx
packssdw xmm2,xmm2
movq .y_min,xmm2
cmp cx,.y_min
jl .end_rp_line
cmp cx,.y_max
jge .end_rp_line ;
 
cmp eax,ebx
je .end_rp_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
shufps xmm3,xmm3,11100001b
@@:
 
cmp ax,.x_max
jge .end_rp_line
cmp bx,.x_min
jle .end_rp_line
movaps .lv,xmm4
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movlps .z1,xmm3
 
sub ebx,eax
cvtsi2ss xmm7,ebx
shufps xmm7,xmm7,0
subps xmm1,xmm0
divps xmm1,xmm7
movaps .dn,xmm1
psrldq xmm3,4
subss xmm3,.z1
divss xmm3,xmm7
movss .dz,xmm3
 
 
 
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulss xmm3,xmm7
mulps xmm1,xmm7
addss xmm3,.z1
addps xmm1,.n1
movsx eax,word .x_min
movss .z1,xmm3
movaps .n1,xmm1
mov dword .lx1,eax
 
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
movzx eax,word[size_x_var]
mul dword .y
; mov edx,.x1
add eax,.lx1
shl eax,2
add edi,eax
add esi,eax
 
mov ecx,.lx2
sub ecx,.lx1
movaps xmm0,.n1
movss xmm2,.z1
align 16
.ddraw:
movss xmm7,xmm2
cmpnltss xmm7,dword[esi]
movd eax,xmm7
or eax,eax
jnz .skip
movss [esi],xmm2
movaps xmm7,xmm0
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,xmm0
movaps .cnv,xmm7
 
mov edx,lights_aligned ; lights - global variable
xorps xmm1,xmm1 ; instead global can be used .lv - light vect.
@@:
movaps xmm6,[edx+16]
movaps xmm5,[edx]
movaps xmm3,[edx+48]
andps xmm5,[zero_hgst_dd] ; global
 
mulps xmm5,.cnv ;.lv ; last dword should be zeroed
haddps xmm5,xmm5
haddps xmm5,xmm5
; mulps xmm5,[env_const2]
; maxps xmm5,[dot_min]
; minps xmm5,[dot_max]
movaps xmm7,xmm5
; mulps xmm7,[env_const2]
; mulps xmm7,[env_const2]
; maxps xmm7,[dot_min]
; minps xmm7,[dot_max]
 
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm5,xmm6
mulps xmm7,xmm7
mulps xmm7,xmm3
 
addps xmm5,xmm7
minps xmm5,[mask_255f] ; global
maxps xmm1,xmm5
; movq xmm3,[edx+20] ; minimal color
; punpcklwd xmm3,[minimum0]
; cvtdq2ps xmm3,xmm3
; maxps xmm1,xmm3
add edx,64
cmp edx,lights_aligned_end ; global
jnz @b
 
cvtps2dq xmm1,xmm1
packssdw xmm1,xmm1
packuswb xmm1,xmm1
movd [edi],xmm1
.skip:
add edi,4
add esi,4
addps xmm0,.dn
addss xmm2,.dz
sub ecx,1
jnz .ddraw
 
.end_rp_line:
add esp,160
pop ebp
 
ret
; Real Phong's shading implemented if flat assembler
; by Maciej Guba.
; http://macgub.co.pl
 
ROUND2 equ 10
real_phong_tri_z:
;----procedure render Phongs shaded triangle with z coord
;----interpolation ( Catmull alghoritm )-----------------
;----I normalize normal vector in every pixel -----------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer filled-
;---------------------- with dd float variables--------
;---------------------- edi - pointer to screen buffer---
;---------------------- xmm0 - 1st normal vector --------
;---------------------- xmm1 - 2cond normal vector ------
;---------------------- xmm2 - 3rd normal vector --------
;---------------------- xmm3 - normalized light vector --
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords
;---------------------- as dwords floats ---------------
;---------------------- xmm5 - lo -> hi y_min, y_max, ---
;---------------------- x_min, x_max as dword integers -
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
 
 
 
 
push ebp
mov ebp,esp
sub esp,512
sub ebp,16
and ebp,0xfffffff0
 
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
.dz12 equ [ebp-180]
.dz13 equ [ebp-184]
.dz23 equ [ebp-188]
 
.cnv1 equ [ebp-208] ; cur normal vectors
.cnv2 equ [ebp-224]
.cz2 equ [ebp-228]
.cz1 equ [ebp-232]
 
 
 
 
 
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
movaps xmm6,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm6
 
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
movaps xmm6,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm6
 
jmp .sort3
 
.sort2:
 
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
 
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
 
 
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
 
xorps xmm7,xmm7
mov dword .dx12,0
mov dword .dz12,0
movaps .dn12,xmm7
jmp .rpt_dx12_done
 
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z2
subss xmm5,.z1
divss xmm5,xmm6
movss .dz12,xmm5
 
movaps xmm0,.2_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn12,xmm0
 
 
.rpt_dx12_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
 
xorps xmm7,xmm7
mov dword .dx13,0
mov dword .dz13,0
movaps .dn13,xmm7
jmp .rpt_dx13_done
 
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z1
divss xmm5,xmm6
movss .dz13,xmm5
 
movaps xmm0,.3_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn13,xmm0
 
.rpt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
 
xorps xmm7,xmm7
mov dword .dx23,0
mov dword .dz23,0
movaps .dn23,xmm7
jmp .rpt_dx23_done
 
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z3
subss xmm5,.z2
divss xmm5,xmm6
movss .dz23,xmm5
 
movaps xmm0,.3_nv
subps xmm0,.2_nv
shufps xmm6,xmm6,0
divps xmm0,xmm6
movaps .dn23,xmm0
 
.rpt_dx23_done:
 
 
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov edx,.z1
mov .cz1,edx
mov .cz2,edx
movaps xmm0,.1_nv
movaps .cnv1,xmm0
movaps .cnv2,xmm0
 
 
movsx ecx,word .y1
cmp cx,.y2
 
jge .rpt_loop1_end
 
.rpt_loop1:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edi,.screen
mov esi,.Zbuf
 
call real_phong_line_z
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn12
addss xmm2,.dz13
addss xmm3,.dz12
add eax,.dx13
add ebx,.dx12
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
 
add ecx,1
cmp cx,.y2
jl .rpt_loop1
 
 
 
 
 
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
 
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movaps xmm0,.2_nv
movaps .cnv2,xmm0
 
 
.rpt_loop2:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
mov edi,.screen
mov esi,.Zbuf
 
call real_phong_line_z
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
addps xmm0,.dn13
addps xmm1,.dn23
addss xmm2,.dz13
addss xmm3,.dz23
add eax,.dx13
add ebx,.dx23
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
 
add ecx,1
cmp cx,.y3
jl .rpt_loop2
 
.rpt_loop2_end:
 
add esp,512
pop ebp
 
ret
align 16
real_phong_line_z:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi z1, z2 coords as dwords floats
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - normalized light vector
; eax - x1
; ebx - x2
; ecx - y
; edi - screen buffer
; esi - z buffer filled with dd floats
 
push ebp
mov ebp,esp
sub esp,160
sub ebp,16
and ebp,0xfffffff0
 
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
.cnv equ [ebp-128]
 
mov .y,ecx
packssdw xmm2,xmm2
movq .y_min,xmm2
cmp cx,.y_min
jl .end_rp_line
cmp cx,.y_max
jge .end_rp_line ;
 
cmp eax,ebx
je .end_rp_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
shufps xmm3,xmm3,11100001b
@@:
 
cmp ax,.x_max
jge .end_rp_line
cmp bx,.x_min
jle .end_rp_line
movaps .lv,xmm4
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movlps .z1,xmm3
 
sub ebx,eax
cvtsi2ss xmm7,ebx
shufps xmm7,xmm7,0
subps xmm1,xmm0
divps xmm1,xmm7
movaps .dn,xmm1
psrldq xmm3,4
subss xmm3,.z1
divss xmm3,xmm7
movss .dz,xmm3
 
 
 
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulss xmm3,xmm7
mulps xmm1,xmm7
addss xmm3,.z1
addps xmm1,.n1
movsx eax,word .x_min
movss .z1,xmm3
movaps .n1,xmm1
mov dword .lx1,eax
 
@@:
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
movzx eax,word[size_x_var]
mul dword .y
; mov edx,.x1
add eax,.lx1
shl eax,2
add edi,eax
add esi,eax
 
mov ecx,.lx2
sub ecx,.lx1
movaps xmm0,.n1
movss xmm2,.z1
align 16
.ddraw:
movss xmm7,xmm2
cmpnltss xmm7,dword[esi]
movd eax,xmm7
or eax,eax
jnz .skip
movss [esi],xmm2
movaps xmm7,xmm0
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,xmm0
movaps .cnv,xmm7
 
mov edx,lights_aligned ; lights - global variable
xorps xmm1,xmm1 ; instead global can be used .lv - light vect.
@@:
movaps xmm6,[edx+16]
movaps xmm5,[edx]
movaps xmm3,[edx+48]
andps xmm5,[zero_hgst_dd] ; global
 
mulps xmm5,.cnv ;.lv ; last dword should be zeroed
haddps xmm5,xmm5
haddps xmm5,xmm5
; mulps xmm5,[env_const2]
; maxps xmm5,[dot_min]
; minps xmm5,[dot_max]
movaps xmm7,xmm5
; mulps xmm7,[env_const2]
; mulps xmm7,[env_const2]
; maxps xmm7,[dot_min]
; minps xmm7,[dot_max]
 
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm5,xmm6
mulps xmm7,xmm7
mulps xmm7,xmm3
 
addps xmm5,xmm7
minps xmm5,[mask_255f] ; global
maxps xmm1,xmm5
; movq xmm3,[edx+20] ; minimal color
; punpcklwd xmm3,[minimum0]
; cvtdq2ps xmm3,xmm3
; maxps xmm1,xmm3
add edx,64
cmp edx,lights_aligned_end ; global
jnz @b
 
cvtps2dq xmm1,xmm1
packssdw xmm1,xmm1
packuswb xmm1,xmm1
movd [edi],xmm1
.skip:
add edi,4
add esi,4
addps xmm0,.dn
addss xmm2,.dz
sub ecx,1
jnz .ddraw
 
.end_rp_line:
add esp,160
pop ebp
 
ret
/programs/demos/view3ds/3ray_shd.inc
0,0 → 1,688
; Ray casted shadows
; by Maciej Guba.
; http://macgub.co.pl
 
 
ROUND2 equ 10
ray_shad:
;--- Procedure render triangle with ray casted shadow ---
;--- effect. Calc intersection with all triangles in ----
;--- everypixel. Its not real time process, especially --
;--- when many triangles are computed. ------------------
;------in - eax - x1 shl 16 + y1 ------------------------
;---------- ebx - x2 shl 16 + y2 ------------------------
;---------- ecx - x3 shl 16 + y3 ------------------------
;---------- edx - ptr to fur coords struct --------------
;---------- esi - pointer to stencil / Z-buffer, filled -
;-------------- with dword float variables, it masks --
;-------------- 'Z' position (coord) of every front ---
;-------------- pixel. --------------------------------
;---------- edi - pointer to screen buffer --------------
;---------- xmm0 - 1st normal vector --------------------
;---------- xmm1 - 2cond normal vector ------------------
;---------- xmm2 - 3rd normal vector --------------------
;---------- xmm3 - --------------------------------------
;---------- xmm4 - lo -> hi z1, z2, z3 coords -----------
;--------------- as dwords floats ---------------------
;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max --
;--------------- as dword integers --------------------
;-----------mm7 - current triangle index ---------------
;---------------------- stack - no parameters -----------
;--------------------------------------------------------
;----------------- procedure don't save registers !! ----
 
push ebp
mov ebp,esp
sub esp,1024
sub ebp,16
and ebp,0xfffffff0
 
.1_nv equ [ebp-16]
.2_nv equ [ebp-32]
.3_nv equ [ebp-48]
.l_v equ [ebp-64]
.z3 equ [ebp-72]
.z2 equ [ebp-76]
.z1 equ [ebp-80]
.x1 equ [ebp-82]
.y1 equ [ebp-84]
.x2 equ [ebp-86]
.y2 equ [ebp-88]
.x3 equ [ebp-90]
.y3 equ [ebp-92]
.Zbuf equ [ebp-96]
.x_max equ [ebp-100]
.x_min equ [ebp-104]
.y_max equ [ebp-108]
.y_min equ [ebp-112]
.screen equ [ebp-116]
.dx12 equ [ebp-120]
.dx13 equ [ebp-124]
.dx23 equ [ebp-128]
.dn12 equ [ebp-144]
.dn13 equ [ebp-160]
.dn23 equ [ebp-176]
.dz12 equ [ebp-180]
.dz13 equ [ebp-184]
.dz23 equ [ebp-188]
.cnv1 equ [ebp-208] ; current normal vectors
.cnv2 equ [ebp-240]
.cz2 equ [ebp-244]
.cz1 equ [ebp-248]
.tri_no equ [ebp-252]
 
 
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
shufps xmm4,xmm4,11100001b
movaps xmm6,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm6
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
shufps xmm4,xmm4,11011000b
movaps xmm6,xmm1
movaps xmm1,xmm2
movaps xmm2,xmm6
 
jmp .sort3
 
.sort2:
 
movaps .z1,xmm4
mov .y1,eax
mov .y2,ebx
mov .y3,ecx
 
movdqa .y_min,xmm5
if 1 ; check if at last only fragment
packssdw xmm5,xmm5 ; of triangle is in visable area
pshuflw xmm5,xmm5,11011000b
movdqu xmm7,.y3
movdqa xmm6,xmm5
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max
movdqa xmm4,xmm7
pcmpgtw xmm7,xmm5
pcmpgtw xmm4,xmm6
pxor xmm7,xmm4
pmovmskb eax,xmm7
and eax,0x00aaaaaa
or eax,eax
jz .rpt_loop2_end
end if
movd .tri_no,mm7
movaps .1_nv,xmm0
movaps .2_nv,xmm1
movaps .3_nv,xmm2
; movaps .l_v,xmm3
mov .Zbuf,esi
mov .screen,edi
 
 
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .rpt_dx12_make
 
xorps xmm7,xmm7
mov dword .dx12,0
mov dword .dz12,0
movaps .dn12,xmm7
jmp .rpt_dx12_done
 
.rpt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx12,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z2
rcpss xmm6,xmm6
subss xmm5,.z1
mulss xmm5,xmm6
movss .dz12,xmm5
 
shufps xmm6,xmm6,0
movaps xmm0,.2_nv
subps xmm0,.1_nv
mulps xmm0,xmm6
movaps .dn12,xmm0
; subps xmm3,xmm0
; mulps xmm3,xmm6
 
.rpt_dx12_done:
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .rpt_dx13_make
 
xorps xmm7,xmm7
mov dword .dx13,0
mov dword .dz13,0
movaps .dn13,xmm7
jmp .rpt_dx13_done
 
.rpt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx13,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z3
rcpss xmm6,xmm6
subss xmm5,.z1
mulss xmm5,xmm6
movss .dz13,xmm5
 
movaps xmm0,.3_nv
subps xmm0,.1_nv
shufps xmm6,xmm6,0
mulps xmm0,xmm6
movaps .dn13,xmm0
 
; mulps xmm0,xmm6
 
.rpt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .rpt_dx23_make
 
xorps xmm7,xmm7
mov dword .dx23,0
mov dword .dz23,0
movaps .dn23,xmm7
 
jmp .rpt_dx23_done
 
.rpt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND2
cdq
idiv ebx
mov .dx23,eax
 
cvtsi2ss xmm6,ebx
movss xmm5,.z3
rcpss xmm6,xmm6
subss xmm5,.z2
mulss xmm5,xmm6
movss .dz23,xmm5
 
movaps xmm0,.3_nv
subps xmm0,.2_nv
shufps xmm6,xmm6,0
mulps xmm0,xmm6
movaps .dn23,xmm0
; mulps xmm0,xmm6
 
.rpt_dx23_done:
 
movsx eax,word .x1
shl eax,ROUND2
mov ebx,eax
mov ecx,.z1
mov .cz1,ecx
mov .cz2,ecx
movaps xmm0,.1_nv
movaps .cnv1,xmm0
movaps .cnv2,xmm0
mov edi,.screen
mov esi,.Zbuf
movsx ecx,word .y1
cmp cx,.y2
 
jge .rpt_loop1_end
 
.rpt_loop1:
pushad
 
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
; movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
movd mm7,.tri_no
 
call ray_shd_l
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
; fur x,y
movss xmm2,.cz1
movss xmm3,.cz2
shufps xmm4,xmm4,01001110b
addps xmm0,.dn13
addps xmm1,.dn12
addss xmm2,.dz13
addss xmm3,.dz12
 
 
add eax,.dx13
add ebx,.dx12
 
shufps xmm4,xmm4,01001110b
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
 
add ecx,1
cmp cx,.y2
jl .rpt_loop1
 
 
.rpt_loop1_end:
movsx ecx,word .y2
cmp cx,.y3
jge .rpt_loop2_end
 
movsx ebx,word .x2 ; eax - cur x1
shl ebx,ROUND2 ; ebx - cur x2
push dword .z2
pop dword .cz2
movaps xmm0,.2_nv
movaps .cnv2,xmm0
 
mov edi,.screen
mov esi,.Zbuf
 
 
.rpt_loop2:
pushad
movaps xmm2,.y_min
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movlps xmm3,.cz1
; movaps xmm4,.l_v
sar ebx,ROUND2
sar eax,ROUND2
movd mm7,.tri_no
 
call ray_shd_l
 
popad
movaps xmm0,.cnv1
movaps xmm1,.cnv2
movss xmm2,.cz1
movss xmm3,.cz2
 
addps xmm0,.dn13
addps xmm1,.dn23
addss xmm2,.dz13
addss xmm3,.dz23
add eax,.dx13
add ebx,.dx23
addps xmm4,xmm6
 
movaps .cnv1,xmm0
movaps .cnv2,xmm1
movss .cz1,xmm2
movss .cz2,xmm3
 
add ecx,1
cmp cx,.y3
jl .rpt_loop2
 
.rpt_loop2_end:
 
add esp,1024
pop ebp
 
 
 
ret
align 16
ray_shd_l:
; in:
; xmm0 - normal vector 1
; xmm1 - normal vect 2
; xmm3 - lo -> hi z1, z2 coords as dwords floats
; xmm2 - lo -> hi y_min, y_max, x_min, x_max
; as dword integers
; xmm4 - ----
; mm7 - current triangle index
; eax - x1
; ebx - x2
; ecx - y
; edx - -----
; edi - screen buffer
; esi - z buffer / stencil buffer filled with dd floats
 
push ebp
mov ebp,esp
sub esp,320
sub ebp,16
and ebp,0xfffffff0
 
.n1 equ [ebp-16]
.n2 equ [ebp-32]
.lv equ [ebp-48]
.lx1 equ [ebp-52]
.lx2 equ [ebp-56]
.z2 equ [ebp-60]
.z1 equ [ebp-64]
.screen equ [ebp-68]
.zbuff equ [ebp-72]
.x_max equ [ebp-74]
.x_min equ [ebp-76]
.y_max equ [ebp-78]
.y_min equ [ebp-80]
.dn equ [ebp-96]
.dz equ [ebp-100]
.y equ [ebp-104]
; .cur_tri equ [ebp-108]
.cnv equ [ebp-128]
.Rlen equ [ebp-128-16]
.r1 equ [ebp-128-32]
.vect_t equ [ebp-128-48]
.cur_tri equ [ebp-128-64]
; .p3t equ [ebp-128-80]
.nray equ [ebp-128-96]
.final_col equ [ebp-128-112]
.aabb_mask equ dword[ebp-128-112-4]
 
mov .y,ecx
movdqa xmm4,xmm2
packssdw xmm2,xmm2
movq .y_min,xmm2
cmp cx,.y_min
jl .end_rp_line
cmp cx,.y_max
jge .end_rp_line ;
cmp eax,ebx
je .end_rp_line
jl @f
xchg eax,ebx
movaps xmm7,xmm0
movaps xmm0,xmm1
movaps xmm1,xmm7
shufps xmm3,xmm3,11100001b
@@:
movd .cur_tri,mm7
cmp ax,.x_max
jge .end_rp_line
cmp bx,.x_min
jle .end_rp_line
; movaps .lv,xmm4
andps xmm0,[zero_hgst_dd]
andps xmm1,[zero_hgst_dd]
movaps .n1,xmm0
movaps .n2,xmm1
mov .lx1,eax
mov .lx2,ebx
movlps .z1,xmm3
 
sub ebx,eax
cvtsi2ss xmm7,ebx
rcpss xmm7,xmm7
shufps xmm7,xmm7,0
subps xmm1,xmm0
mulps xmm1,xmm7
movaps .dn,xmm1
shufps xmm3,xmm3,11111001b
subss xmm3,.z1
mulss xmm3,xmm7
movss .dz,xmm3
 
subps xmm6,xmm5
mulps xmm6,xmm7
 
mov ebx,.lx1
cmp bx,.x_min ; clipping on function4
jge @f
movzx eax,word .x_min
sub eax,ebx
cvtsi2ss xmm7,eax
shufps xmm7,xmm7,0
mulss xmm3,xmm7
mulps xmm1,xmm7
mulps xmm6,xmm7
addss xmm3,.z1
addps xmm1,.n1
addps xmm6,xmm5
movsx eax,word .x_min
movss .z1,xmm3
movaps .n1,xmm1
mov dword .lx1,eax
@@:
 
movzx eax,word .x_max
cmp .lx2,eax
jl @f
mov .lx2,eax
@@:
movzx eax,word[xres_var]
mul dword .y
add eax,.lx1
mov .zbuff,esi
mov .screen,edi
shl eax,2
add edi,eax
add esi,eax
mov ecx,.lx2
sub ecx,.lx1
 
movd xmm0,[vect_x]
punpcklwd xmm0,[the_zero]
cvtdq2ps xmm0,xmm0
movaps .vect_t,xmm0
 
 
.ddraw:
 
xorps xmm0,xmm0
movss xmm2,.z1
movss xmm5,.z1
movaps .final_col,xmm0
addss xmm2,[f1]
subss xmm5,[f1]
cmpnltss xmm2,dword[esi]
cmpnltss xmm5,dword[esi]
pxor xmm2,xmm5
movd eax,xmm2
or eax,eax
jz .skips
 
movaps xmm7,.n1
andps xmm7,[zero_hgst_dd]
mulps xmm7,xmm7 ; normalize
haddps xmm7,xmm7
haddps xmm7,xmm7
rsqrtps xmm7,xmm7
mulps xmm7,.n1
movaps .cnv,xmm7
mov ebx,point_light_coords
mov edx,lights_aligned
xor eax,eax
.nx_light:
pushad
cvtsi2ss xmm0,.lx1
cvtsi2ss xmm1,.y
movss xmm2,.z1
movlhps xmm0,xmm1
shufps xmm0,xmm2,11001000b
subps xmm0,[ebx] ; xmm0 - ray end, -> current vertex
movaps xmm3,[ebx]
andps xmm0,[zero_hgst_dd]
movaps xmm1,xmm0
mulps xmm0,xmm0
haddps xmm0,xmm0
haddps xmm0,xmm0
sqrtps xmm0,xmm0
movss .Rlen,xmm0
rcpps xmm0,xmm0
mulps xmm0,xmm1 ; xmm0 - normalized ray vector
andps xmm0,[zero_hgst_dd]
movaps .nray,xmm0
movaps .r1,xmm3 ; ray orgin
if 0
movaps xmm1,xmm3
call calc_bounding_box
 
mov .aabb_mask,eax
end if
mov edi,[triangles_ptr]
xor ecx,ecx
.nx_tri: ; next triangle
 
cmp ecx,.cur_tri ; prevent self shadowing
je .skipp
if 0
mov edi,ecx
imul edi,[i12]
add edi,[triangles_ptr]
mov eax,[edi]
mov ebx,[edi+4]
mov edx,[edi+8]
imul eax,[i12]
imul ebx,[i12]
imul edx,[i12]
add eax,[points_ptr]
add ebx,[points_ptr]
add edx,[points_ptr]
movups xmm2,[eax]
movups xmm3,[ebx]
movups xmm4,[edx]
andps xmm2,[sign_mask]
andps xmm3,[sign_mask]
andps xmm4,[sign_mask]
movmskps ebx,xmm4
cmpeqps xmm2,xmm3
cmpeqps xmm3,xmm4
andps xmm2,xmm3
movmskps eax,xmm2
and eax,111b
and ebx,111b
cmp eax,111b
jne @f
bt .aabb_mask,ebx
jnc .skipp
@@:
end if
mov edi,ecx
imul edi,[i12]
add edi,[triangles_ptr]
mov eax,[edi]
mov ebx,[edi+4]
mov edx,[edi+8]
imul eax,[i12]
imul ebx,[i12]
imul edx,[i12]
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add edx,[points_rotated_ptr]
movups xmm2,[eax]
movups xmm3,[ebx]
movups xmm4,[edx]
addps xmm2,.vect_t
addps xmm3,.vect_t
addps xmm4,.vect_t
 
 
;intersect_tri: procs header
; in:
; xmm0 - ray direction ; should be normalized
; xmm1 - ray orgin
; xmm2 - tri vert1
; xmm3 - tri vert2
; xmm4 - tri vert3
; if eax = 1 - intersction with edge
; xmm6 - edge lenght
; if eax = 0 - intersect with ray (classic)
; out:
; eax = 1 - intersection occured
; xmm0 - float lo -> hi = t, v, u, ...
 
movss xmm6,.Rlen
movaps xmm0,.nray
movaps xmm1,.r1
subss xmm6,[the_one]
mov eax,1
push ecx
call intersect_tri
pop ecx
cmp eax,1
je .inter
.skipp:
.skp:
inc ecx
cmp ecx,[triangles_count_var]
jnz .nx_tri
; jz .do_process
; comiss xmm0,.Rlen
; jl .inter
 
popad
.do_process:
movaps xmm5,.nray ;[edx]
andps xmm5,[zero_hgst_dd] ; global
mulps xmm5,.cnv ;.lv ; last dword should be zeroed
; andps xmm5,[sign_z] ; global
haddps xmm5,xmm5
haddps xmm5,xmm5
andps xmm5,[abs_mask] ; global
movaps xmm7,xmm5
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm5,[edx+16]
mulps xmm7,xmm7
mulps xmm7,xmm7
mulps xmm7,[edx+48]
addps xmm5,xmm7
minps xmm5,[mask_255f] ; global
maxps xmm5,.final_col ; addps maxps
movaps .final_col,xmm5
jmp .nx_loop
.inter:
 
popad
.nx_loop:
; add edx,64 ; unncomment to achive 3 lights
; add ebx,16
; cmp edx,lights_aligned_end ; global
; jnz .nx_light
 
movaps xmm1,.final_col
cvtps2dq xmm1,xmm1
packssdw xmm1,xmm1
packuswb xmm1,xmm1
movd [edi],xmm1
.skips:
movaps xmm0,.n1
movss xmm2,.z1
add edi,4
add esi,4
add dword .lx1,1
addps xmm0,.dn
addss xmm2,.dz
movaps .n1,xmm0
movss .z1,xmm2
dec ecx
jnz .ddraw
.end_rp_line:
add esp,320
pop ebp
 
ret
/programs/demos/view3ds/a_procs.inc
1,3 → 1,200
 
 
if Ext > SSE2
;--------------------------------------------------------------------
init_point_lights:
; mov eax,1000
; cvtsi2ss xmm1,eax
; shufps xmm1,xmm1,11000000b
; mov esi,lights_aligned
; mov edi,point_light_coords
; mov ecx,3
; @@:
; movaps xmm0,[esi]
; addps xmm0,[f05xz]
; mulps xmm0,xmm1
; movaps [edi],xmm0
; add esi,64
; add edi,16
; loop @b
mov ecx,3
mov edi,point_light_coords
@@:
push ecx
xor ecx,ecx
movzx edx,word[size_x_var]
call random
cvtsi2ss xmm0,eax
movss [edi],xmm0
xor ecx,ecx
movzx edx,word[size_x_var]
call random
cvtsi2ss xmm0,eax
movss [edi+4],xmm0
; movzx ebx,word[size_x_var]
; shl ebx,2
; neg ebx
mov ecx,-1900
; sub ecx,100
mov edx,-600
call random
cvtsi2ss xmm0,eax
movss [edi+8],xmm0
; mov dword[edi+8],-1700.0
mov [edi+12],dword 0
add edi,16
pop ecx
loop @b
 
ret
 
;------------------------------------------------------------------
intersect_tri: ; Moeller-Trumbore method
; in:
; xmm0 - ray direction ; should be normalized
; xmm1 - ray orgin
; xmm2 - tri vert1
; xmm3 - tri vert2
; xmm4 - tri vert3
; if eax = 1 - intersction with edge
; xmm6 - edge lenght
; if eax = 0 - intersect with ray (classic)
; out:
; eax = 1 - intersection occured
; xmm0 - float lo -> hi = t, v, u, ...
push ebp
mov ebp,esp
and ebp,-16
sub esp,220
 
.dir equ [ebp-16]
.origin equ [ebp-32]
.ta equ [ebp-48]
.tb equ [ebp-64]
.tc equ [ebp-80]
.tvec equ [ebp-96]
.pvec equ [ebp-112]
.qvec equ [ebp-128]
.e1 equ [ebp-128-16]
.ift equ dword[ebp-152]
.invdet equ [ebp-156]
.det equ [ebp-160]
.ed_l equ [ebp-164]
.u equ [ebp-168]
.v equ [ebp-172]
.t equ [ebp-176]
.e2 equ [ebp-192]
 
movaps .dir,xmm0
movaps .origin,xmm1
movaps .ta,xmm2
movaps .tb,xmm3
movaps .tc,xmm4
mov .ift,eax
movss .ed_l,xmm6
subps xmm3,xmm2
subps xmm4,xmm2
andps xmm3,[zero_hgst_dd]
andps xmm4,[zero_hgst_dd]
movaps .e1,xmm3
movaps .e2,xmm4
 
lea esi,.dir
lea edi,.e2
lea ebx,.pvec
call cross_aligned
 
movaps xmm0,.e1
mulps xmm0,.pvec
; andps xmm0,[zero_hgst_dd]
haddps xmm0,xmm0
haddps xmm0,xmm0
movss .det,xmm0
; cmpnless xmm0,[eps]
; movd eax,xmm0
; or eax,eax
; jz @f
comiss xmm0,[eps]
jl @f
 
rcpss xmm0,.det
movss .invdet,xmm0
 
movaps xmm0,.origin
subps xmm0,.ta
andps xmm0,[zero_hgst_dd]
movaps .tvec,xmm0
 
mulps xmm0,.pvec
haddps xmm0,xmm0
haddps xmm0,xmm0
mulss xmm0,.invdet
movss xmm1,xmm0
movss .u,xmm0
cmpnless xmm1,[epsone]
cmpnless xmm0,[epsminus]
pxor xmm1,xmm0
movd eax,xmm1
or eax,eax
jz @f
 
lea esi,.tvec
lea edi,.e1
lea ebx,.qvec
call cross_aligned
 
movaps xmm0,.dir
mulps xmm0,.qvec
haddps xmm0,xmm0
haddps xmm0,xmm0
mulss xmm0,.invdet
movss .v,xmm0
movss xmm1,xmm0
addss xmm1,.u
cmpnless xmm1,[epsone]
cmpnless xmm0,[epsminus]
pxor xmm1,xmm0
movd eax,xmm1
or eax,eax
jz @f
 
movaps xmm1,.e2
mulps xmm1,.qvec
haddps xmm1,xmm1
haddps xmm1,xmm1
mulss xmm1,.invdet
movss .t,xmm1
; cmpnless xmm1,[eps]
; movmskps eax,xmm1
; test eax,1
; jz @f
comiss xmm1,[eps]
jl @f
 
mov eax,1
cmp .ift,0
je .end ; ok intersect occured, no edge cause
 
movss xmm0,.t ; else check with edge lenght
; movss xmm1,.t
cmpnless xmm0,[eps]
cmpnless xmm1,.ed_l
xorps xmm0,xmm1
movd ebx,xmm0
or ebx,ebx
jz @f
 
; mov eax,1
; movaps xmm0,.t
jmp .end
@@:
xor eax,eax
.end:
movaps xmm0,.t
add esp,220
pop ebp
ret
end if
;===============================================================
do_edges_list:
push ebp
223,13 → 420,18
 
 
do_sinus:
;in - ax - render mode
.x equ [ebp-8]
.y equ [ebp-12]
.new_y equ [ebp-16]
.temp equ [ebp-20]
.dr_f equ word[ebp-22]
 
push ebp
mov ebp,esp
sub esp,64
sub esp,30
mov .dr_f,ax
 
mov dword .x,0
mov dword .y,0
mov esi,[screen_ptr]
243,53 → 445,20
cld
rep stosd
pop edi
; movzx eax,[sinus_flag]
; mov edx,10
; mul edx
; mov [sin_amplitude],eax
; mov [sin_frq],eax
fninit
;if Ext = SSE2
; movups xmm1,[const0123] ; xmm1 - init values
; mov eax,0x000000ff
; movd xmm2,eax
; shufps xmm2,xmm2,0 ; xmm2 - mask value
; mov eax,4
; movd xmm3,eax
; shufps xmm3,xmm3,0
.again:
if 0
fild dword .x
fidiv [sin_frq]
fsin
fimul [sin_amplitude]
fiadd dword .y
fistp dword .new_y
else
fild dword .x
fmul [sin_frq]
fistp dword .temp
mov eax, .temp
; mov bx, [angle_x]
; add bx, [angle_y]
; movzx ebx,bx
; shr ebx,1 ; change phase
; add eax,ebx
 
 
and eax, 0x000000ff
 
; cdq
; mul [sin_frq]
; and eax,0x000000ff
; and ax,0x00ff
; cwde
 
fld dword [sin_tab+eax*4]
fimul dword [sin_amplitude]
fiadd dword .y
fistp dword .new_y
end if
 
mov eax,.new_y
or eax,eax
jl .skip
298,12 → 467,11
jg .skip
movzx edx,word[size_x_var]
mul edx
; shl eax,9
add eax,dword .x
 
lea ebx,[eax*3]
cmp [dr_flag],12 ; 32 bit col cause
jl @f
cmp .dr_f,12 ; 32 bit col cause
jb @f
add ebx,eax
@@:
mov eax,[esi]
310,8 → 478,8
mov [edi+ebx],eax
.skip:
add esi,3
cmp [dr_flag],12
jl @f
cmp .dr_f,12
jb @f
inc esi
@@:
inc dword .x
330,8 → 498,8
movzx ecx,word[size_x_var]
movzx eax,word[size_y_var]
imul ecx,eax
cmp [dr_flag],12
jge @f
cmp .dr_f,12
jae @f
lea ecx,[ecx*3]
shr ecx,2
; mov ecx,SIZE_X*SIZE_Y*3/4
377,7 → 545,19
ret
 
do_emboss: ; sse2 version only
; in ax - render model
push ebp
mov ebp,esp
sub esp,4
 
.dr_mod equ word[ebp-2]
 
mov .dr_mod,ax
 
if Ext >= SSE2
 
 
 
movzx ecx,[bumps_deep_flag]
inc ecx
call blur_screen ;blur n times
392,13 → 572,13
sub ecx,ebx
mov esi,[screen_ptr]
mov edi,[Zbuffer_ptr]
cmp [dr_flag],12
cmp .dr_mod,11
jge @f
lea ebx,[ebx*3]
jmp .f
jmp .gf
@@:
shl ebx,2
.f:
.gf:
mov edx,esi
add esi,ebx
lea ebx,[ebx+esi]
405,7 → 585,7
pxor xmm0,xmm0
push eax
.emb:
cmp [dr_flag],12
cmp .dr_mod ,11
jge @f
movlps xmm1,[esi+3]
movhps xmm1,[esi+6]
442,14 → 622,7
pmaxsw xmm1,xmm7
pmaxsw xmm1,xmm6
 
if 0
movaps xmm7,xmm3
movaps xmm6,xmm3
psrlq xmm7,2*8
psrlq xmm6,4*8
pmaxsw xmm3,xmm7
pmaxsw xmm3,xmm6
end if
 
pmaxsw xmm1,xmm3
 
movd eax,xmm1
469,7 → 642,7
mov eax,[eax]
mov [edi+4],eax
 
cmp [dr_flag],12
cmp .dr_mod,11
jl @f
add esi,2
add ebx,2
487,7 → 660,7
pop ecx ;,eax
mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr]
cmp [dr_flag],12
cmp .dr_mod,11
jge .e
@@:
movsd
498,6 → 671,11
 
end if
 
 
 
mov esp,ebp
pop ebp
 
ret
 
;align 16
/programs/demos/view3ds/asc.inc
1,815 → 1,815
; Files *.asc routines by Maciej Guba
; Thanks to Reverend for integer/float/ascii conversion examples
read_asc:
mov eax,[fptr]
.find_vert:
cmp dword[eax],'Vert'
je @f
inc eax
jmp .find_vert
@@:
add eax,4
cmp dword[eax],'ices'
jne .find_vert
add eax,3
@@:
inc eax
cmp byte[eax],'0' ; search end of ascii number of vertices string
jb @b
cmp byte[eax],'9'
ja @b
; eax - start ascii number
@@:
inc eax
cmp byte[eax],'0'
jb .convert1
cmp byte[eax],'9'
ja .convert1
jmp @b
.convert1:
dec eax
mov ebx,eax
push eax
call ascii_to_integer
mov [points_count_var],edx
pop eax
 
@@:
inc eax
cmp dword[eax],'Face'
jne @b
add eax,3
@@:
inc eax
cmp byte[eax],'0'
jb @b
cmp byte[eax],'9'
ja @b
; eax - start ascii number
@@:
inc eax
cmp byte[eax],'0'
jb .convert2
cmp byte[eax],'9'
ja .convert2
jmp @b
; eax - end ascii number
.convert2:
dec eax
mov ebx,eax
push eax
call ascii_to_integer
mov [triangles_count_var],edx
pop eax
 
@@:
inc eax
cmp dword[eax],'Vert'
jnz @b
inc eax
 
mov edi,[points_ptr]
xor ebx,ebx
.decode_vertices:
push ebx
@@:
inc eax
cmp dword[eax],'Vert'
jne @b
xor ecx,ecx
 
.decode_coord:
push ecx
@@:
inc eax
mov dl,byte[eax]
cmp dl,byte[XYZpartices+ecx]
jne @b
@@:
inc eax
cmp byte[eax],'.'
je .readF
cmp byte[eax],'-'
je .readF
cmp byte[eax],'0'
jb @b
cmp byte[eax],'9'
ja @b
.readF: ; read float
mov esi,eax
push eax
push ecx
 
call atof ; st0 - desired dword float
 
pop ecx
pop eax
 
fstp dword[edi]
add edi,4
 
pop ecx
inc ecx
cmp ecx,3
jne .decode_coord
pop ebx
inc ebx
cmp ebx,[points_count_var]
jne .decode_vertices
mov dword[edi],-1
 
 
 
mov esi,eax
@@:
inc esi
cmp dword[esi],'Face'
jne @b
xor edx,edx
mov edi,[triangles_ptr]
cld
.decode_face:
 
push edx
@@:
inc esi
cmp dword[esi],'Face'
jne @b
@@:
inc esi
cmp byte[esi],'0' ; face number start
jb @b
cmp byte[esi],'9'
ja @b
@@:
inc esi
cmp byte[esi],'0'
jb @f
cmp byte[esi],'9' ; face number end
ja @f
jmp @b
@@:
xor ecx,ecx
.next_vertex_number:
 
push ecx
@@:
inc esi
cmp byte[esi],'0'
jb @b
cmp byte[esi],'9'
ja @b
; eax - start ascii number
@@:
inc esi
cmp byte[esi],'0'
jb @f
cmp byte[esi],'9'
ja @f
jmp @b
; eax - end ascii number
@@:
dec esi
mov ebx,esi
push esi
call ascii_to_integer
mov eax,edx
 
stosd
pop esi
add esi,4
 
pop ecx
inc ecx
cmp ecx,3
jne .next_vertex_number
pop edx
inc edx
cmp edx,[triangles_count_var]
jne .decode_face
mov dword[edi],-1 ;dword[triangles+ebx+2],-1 ; end mark
mov eax,1 ;-> mark if ok
ret
 
ascii_to_integer:
; in --- [ebx] -> end of ascii string
; out -- edx -> desired number
xor edx,edx
xor ecx,ecx
.again:
movzx eax,byte[ebx]
sub al,'0'
cwde
push edx
mul dword[convert_muler+ecx]
pop edx
add edx,eax
dec ebx
cmp byte[ebx],'0'
jb .end
cmp byte[ebx],'9'
ja .end
add ecx,4
jmp .again
@@:
 
.end:
ret
 
;===============================================================================
; ASCII to float conversion procedure
;
; input:
; esi - pointer to string
;
; output:
; st0 - number changed into float
;
;===============================================================================
 
atof:
.string equ ebp-4
 
push ebp
mov ebp,esp
sub esp,32
push eax ecx esi
mov [.string],esi
fninit
fldz
fldz
 
cld
cmp byte [esi], '-'
jnz @F
inc esi
@@:
xor eax, eax
align 4
.loop.integer_part:
lodsb
cmp al, '.'
jz .mantisa
cmp al,'0'
jb .exit
cmp al,'9'
ja .exit
fimul [i10]
sub al, '0'
push eax
fiadd dword [esp]
add esp, 4
jmp .loop.integer_part
 
.mantisa:
xor ecx, ecx
xor eax, eax
cld
fxch st1
@@:
 
lodsb
cmp al,'0'
jb .exit
cmp al,'9'
ja .exit
cmp ecx,7*4
je .exit ; max 7 digits in mantisa
sub al,'0'
push eax
fild dword[esp]
fidiv dword[convert_muler+4+ecx]
faddp
add esp,4
add ecx,4
jmp @b
.exit:
faddp
 
mov eax, [.string]
cmp byte [eax], '-'
jnz @F
fchs
@@:
cld
stc ; always returns no error
pop esi ecx eax
mov esp,ebp
pop ebp
ret
 
 
itoa: ; unsigned dword integer to ascii procedure
; in eax - variable
; esi - Pointer to ascii string
; out esi - desired ascii string
; edi - end of ascii string - ptr to memory
.temp_string equ dword[ebp-36]
.ptr equ dword[ebp-40]
.var equ dword[ebp-44]
push ecx
push ebp
mov ebp,esp
sub esp,64
mov .var,eax
mov eax,-1
lea edi,.temp_string
cld
mov ecx,9
rep stosd ; make floor
 
 
mov .ptr,esi
lea edi,.temp_string
add edi,34
std
xor eax,eax
stosb ; mark begin
mov eax,.var
mov esi,10
@@:
xor edx,edx
div esi
xchg eax,edx
add al,'0'
stosb
xchg eax,edx
or eax,eax
jnz @b
stosb ; mark end
 
lea esi,.temp_string
cld
@@:
lodsb
or al,al
jnz @b
 
mov edi,.ptr
@@:
lodsb
stosb
or al,al
jnz @b
 
mov esp,ebp
pop ebp
pop ecx
ret
if 1
ftoa_mac:
; in : esi - pointer to dword float
; edi - pointer to ascii string
.ptr_f equ dword[ebp-4]
.sign equ dword[ebp-8] ; 0 -> less than zero, 1 - otherwise
.ptr_ascii equ dword[ebp-12]
.integer equ dword[ebp-20]
.fraction equ dword[ebp-28]
.status_orginal equ word[ebp-32]
.status_changed equ word[ebp-34]
push ecx
push ebp
mov ebp,esp
sub esp,64
fninit
fnstcw .status_orginal
mov ax, .status_orginal
or ax, 0000110000000000b
mov .status_changed, ax
fldcw .status_changed
; --------------------------------
; check if signed
xor eax, eax
fld dword[esi]
fst .sign
test .sign, 80000000h
setz al
mov .sign, eax
 
mov .ptr_f,esi
mov .ptr_ascii,edi
fabs
fld st0
frndint
fist .integer
fsubp st1, st0
 
mov eax,.integer
mov esi,.ptr_ascii
call itoa
; edi -> ptr to end of ascii string
dec edi
mov al,'.'
stosb
 
mov ecx, 6 ; max 6 digits in fraction part
.loop:
fimul [i10]
fld st0
frndint
fist .fraction
fsubp st1, st0
mov esi,edi
mov eax,.fraction
add al,'0'
stosb
ftst
fnstsw ax
test ax, 0100000000000000b
jz @F
test ax, 0000010100000000b
jz .finish
@@:
loop .loop
if 0
fldcw .status_orginal
fimul [i10]
fist .fraction
; mov esi,edi
mov eax,.fraction
add al,'0'
stosb
; call itoa
; --------------------------------
; restore previous values
.finish:
; fstp st0
ffree st
mov eax,.fraction
mov esi,edi
; call itoa
 
add al,'0'
stosb
end if
.finish:
ffree st
cmp .sign,0
jnz @f
mov esi,.ptr_ascii
dec esi
mov byte[esi],'-'
@@:
mov esp,ebp
pop ebp
pop ecx
 
ret
end if
if 0
;===============================================================================
; float to ASCII conversion procedure
;
; input:
; buffer - pointer to memory where output will be saved
; precision - number of digits after dot
;
; output:
; no immediate output
;
; notes:
; separate integer and mantisa part with dot '.'
; so GOOD 123.456
; WRONG 123,456
;
; coded by Reverend // HTB + RAG
;===============================================================================
proc ftoa buffer, precision
locals
status_original dw ?
status_changed dw ?
integer dd ?
mantisa dd ?
signed dd ?
endl
push eax ecx;edi ecx
; --------------------------------
; set correct precision
mov eax, [precision]
cmp eax, 51
jb @F
mov eax, 51
@@:
mov [precision], eax
; --------------------------------
; change control wortd of fpu to prevent rounding
fnstcw [status_original]
mov ax, [status_original]
or ax, 0000110000000000b
mov [status_changed], ax
fldcw [status_changed]
; --------------------------------
; check if signed
xor eax, eax
fst [signed]
test [signed], 80000000h
setnz al
mov [signed], eax
; --------------------------------
; cut integer and mantisa separately
fld st0
fld st0 ; st0 = x, st1 = x
frndint
fist [integer] ; st0 = x, st1 = x
fabs
fsubp st1, st0 ; st0 = mantisa(x)
; --------------------------------
; save integer part in buffer
; mov edi, [buffer]
mov esi,[buffer]
; push [signed]
; push edi
; push 10
; push [integer]
mov eax,[integer]
call itoa
; add edi, eax
mov al, '.'
stosb
mov esi,edi
; --------------------------------
; save mantisa part in buffer
mov ecx, [precision]
dec ecx
.loop:
fimul [i10]
fld st0
frndint
fist [mantisa]
fsubp st1, st0
; push 0
; push edi
; push 10
; push [mantisa]
mov esi,edi
mov eax,[mantisa]
call itoa
; add edi, eax
ftst
fnstsw ax
test ax, 0100000000000000b
jz @F
test ax, 0000010100000000b
jz .finish
@@:
loop .loop
fldcw [status_original]
fimul [i10]
fist [mantisa]
; push 0
; push edi
; push 10
; push [mantisa]
mov esi,edi
mov eax,[mantisa]
call itoa
; --------------------------------
; restore previous values
.finish:
fstp st0
cmp [signed],1
jnz @f
mov byte[buffer],'-'
@@:
stc
pop ecx eax ;edi eax
ret
endp
end if
if 0
write_asc:
.counter equ dword[ebp-4]
push ebp
mov ebp,esp
sub esp,64
fninit
mov edi,asc_file_buffer
mov esi,asc_main_header
cld
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
 
mov esi,asc_info_header
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
push esi ; -> position in header info
movzx eax,[points_count_var]
mov esi,edi
call itoa ; unsigned dword integer to ascii procedure
pop esi
inc esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
push esi
movzx eax,[triangles_count_var]
mov esi,edi
call itoa
pop esi
inc esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
;=============================================================
;================vertex list parser===========================
;=============================================================
 
xor ecx,ecx
.again_vertex:
push ecx
mov esi,asc_one_vertex_formula
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
mov eax,ecx
; push ecx
push esi
mov esi,edi
call itoa
pop esi
; pop ecx
inc esi
xor ebx,ebx
.next_vertex_coef:
push ebx
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
; int3
push esi
lea esi,[ecx*3]
shl esi,2
add esi,points_r
add esi,ebx
; int3
call ftoa_mac
; std
; fld dword[esi]
 
 
; pushad
; stdcall ftoa, edi, 30
; popad
; add edi,20
 
 
pop esi
pop ebx
add ebx,4
cmp ebx,12
jnz .next_vertex_coef
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
pop ecx
inc ecx
cmp cx,[points_count_var]
jnz .again_vertex
 
 
; mov edi,[temp_edi]
 
 
mov esi,asc_face_list_header
@@:
lodsb
cmp al,1 ; all face header
jz @f
stosb
jmp @b
@@:
;=====================================
; ==============face list parser======
;=====================================
xor ecx,ecx
.again_face:
push ecx
mov .counter,ecx
mov esi,asc_one_face_formula
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
mov eax,ecx
push esi
mov esi,edi
call itoa
pop esi
inc esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
 
xor ebx,ebx
.next_face_index:
push ebx
mov ecx,.counter
lea ecx,[ecx*3]
add ecx,ecx
movzx eax,word[triangles+ecx+ebx]
push esi
mov esi,edi
call itoa
pop esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
pop ebx
add ebx,2
cmp ebx,6
jnz .next_face_index
 
; push esi
mov esi,asc_material
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
; pop esi
 
pop ecx
inc ecx
cmp cx,[triangles_count_var]
jnz .again_face
 
; write file
sub edi,asc_file_buffer
; mov [file_buffer+2],edi
mov [FileSize],edi
 
invoke CreateFile,asc_file_name, GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0
mov [hfile],eax
invoke WriteFile,eax,asc_file_buffer,[FileSize], byteswritten, 0
invoke CloseHandle,[hfile]
 
mov esp,ebp
pop ebp
ret
end if
if 0
asc_file_buffer:
rd 65536
ascii_string rb 50
ftoa_muler dd 1000000000
file_size dd ?
file_handle dd ?
end if
 
if 0
convert_muler:
dd 1, 10, 100, 1000, 10000
XYZpartices:
db 'X','Y','Z'
i10 dw 10
points_count_var dd ?
triangles_count_var dd ?
points rb 100
triangles rb 100
asc_file:
 
file "2TORUS.ASC"
; Files *.asc routines by Maciej Guba
; Thanks to Reverend for integer/float/ascii conversion examples
read_asc:
mov eax,[fptr]
.find_vert:
cmp dword[eax],'Vert'
je @f
inc eax
jmp .find_vert
@@:
add eax,4
cmp dword[eax],'ices'
jne .find_vert
add eax,3
@@:
inc eax
cmp byte[eax],'0' ; search end of ascii number of vertices string
jb @b
cmp byte[eax],'9'
ja @b
; eax - start ascii number
@@:
inc eax
cmp byte[eax],'0'
jb .convert1
cmp byte[eax],'9'
ja .convert1
jmp @b
.convert1:
dec eax
mov ebx,eax
push eax
call ascii_to_integer
mov [points_count_var],edx
pop eax
 
@@:
inc eax
cmp dword[eax],'Face'
jne @b
add eax,3
@@:
inc eax
cmp byte[eax],'0'
jb @b
cmp byte[eax],'9'
ja @b
; eax - start ascii number
@@:
inc eax
cmp byte[eax],'0'
jb .convert2
cmp byte[eax],'9'
ja .convert2
jmp @b
; eax - end ascii number
.convert2:
dec eax
mov ebx,eax
push eax
call ascii_to_integer
mov [triangles_count_var],edx
pop eax
 
@@:
inc eax
cmp dword[eax],'Vert'
jnz @b
inc eax
 
mov edi,[points_ptr]
xor ebx,ebx
.decode_vertices:
push ebx
@@:
inc eax
cmp dword[eax],'Vert'
jne @b
xor ecx,ecx
 
.decode_coord:
push ecx
@@:
inc eax
mov dl,byte[eax]
cmp dl,byte[XYZpartices+ecx]
jne @b
@@:
inc eax
cmp byte[eax],'.'
je .readF
cmp byte[eax],'-'
je .readF
cmp byte[eax],'0'
jb @b
cmp byte[eax],'9'
ja @b
.readF: ; read float
mov esi,eax
push eax
push ecx
 
call atof ; st0 - desired dword float
 
pop ecx
pop eax
 
fstp dword[edi]
add edi,4
 
pop ecx
inc ecx
cmp ecx,3
jne .decode_coord
pop ebx
inc ebx
cmp ebx,[points_count_var]
jne .decode_vertices
mov dword[edi],-1
 
 
 
mov esi,eax
@@:
inc esi
cmp dword[esi],'Face'
jne @b
xor edx,edx
mov edi,[triangles_ptr]
cld
.decode_face:
 
push edx
@@:
inc esi
cmp dword[esi],'Face'
jne @b
@@:
inc esi
cmp byte[esi],'0' ; face number start
jb @b
cmp byte[esi],'9'
ja @b
@@:
inc esi
cmp byte[esi],'0'
jb @f
cmp byte[esi],'9' ; face number end
ja @f
jmp @b
@@:
xor ecx,ecx
.next_vertex_number:
 
push ecx
@@:
inc esi
cmp byte[esi],'0'
jb @b
cmp byte[esi],'9'
ja @b
; eax - start ascii number
@@:
inc esi
cmp byte[esi],'0'
jb @f
cmp byte[esi],'9'
ja @f
jmp @b
; eax - end ascii number
@@:
dec esi
mov ebx,esi
push esi
call ascii_to_integer
mov eax,edx
 
stosd
pop esi
add esi,4
 
pop ecx
inc ecx
cmp ecx,3
jne .next_vertex_number
pop edx
inc edx
cmp edx,[triangles_count_var]
jne .decode_face
mov dword[edi],-1 ;dword[triangles+ebx+2],-1 ; end mark
mov eax,1 ;-> mark if ok
ret
 
ascii_to_integer:
; in --- [ebx] -> end of ascii string
; out -- edx -> desired number
xor edx,edx
xor ecx,ecx
.again:
movzx eax,byte[ebx]
sub al,'0'
cwde
push edx
mul dword[convert_muler+ecx]
pop edx
add edx,eax
dec ebx
cmp byte[ebx],'0'
jb .end
cmp byte[ebx],'9'
ja .end
add ecx,4
jmp .again
@@:
 
.end:
ret
 
;===============================================================================
; ASCII to float conversion procedure
;
; input:
; esi - pointer to string
;
; output:
; st0 - number changed into float
;
;===============================================================================
 
atof:
.string equ ebp-4
 
push ebp
mov ebp,esp
sub esp,32
push eax ecx esi
mov [.string],esi
fninit
fldz
fldz
 
cld
cmp byte [esi], '-'
jnz @F
inc esi
@@:
xor eax, eax
align 4
.loop.integer_part:
lodsb
cmp al, '.'
jz .mantisa
cmp al,'0'
jb .exit
cmp al,'9'
ja .exit
fimul [i10]
sub al, '0'
push eax
fiadd dword [esp]
add esp, 4
jmp .loop.integer_part
 
.mantisa:
xor ecx, ecx
xor eax, eax
cld
fxch st1
@@:
 
lodsb
cmp al,'0'
jb .exit
cmp al,'9'
ja .exit
cmp ecx,7*4
je .exit ; max 7 digits in mantisa
sub al,'0'
push eax
fild dword[esp]
fidiv dword[convert_muler+4+ecx]
faddp
add esp,4
add ecx,4
jmp @b
.exit:
faddp
 
mov eax, [.string]
cmp byte [eax], '-'
jnz @F
fchs
@@:
cld
stc ; always returns no error
pop esi ecx eax
mov esp,ebp
pop ebp
ret
 
 
itoa: ; unsigned dword integer to ascii procedure
; in eax - variable
; esi - Pointer to ascii string
; out esi - desired ascii string
; edi - end of ascii string - ptr to memory
.temp_string equ dword[ebp-36]
.ptr equ dword[ebp-40]
.var equ dword[ebp-44]
push ecx
push ebp
mov ebp,esp
sub esp,64
mov .var,eax
mov eax,-1
lea edi,.temp_string
cld
mov ecx,9
rep stosd ; make floor
 
 
mov .ptr,esi
lea edi,.temp_string
add edi,34
std
xor eax,eax
stosb ; mark begin
mov eax,.var
mov esi,10
@@:
xor edx,edx
div esi
xchg eax,edx
add al,'0'
stosb
xchg eax,edx
or eax,eax
jnz @b
stosb ; mark end
 
lea esi,.temp_string
cld
@@:
lodsb
or al,al
jnz @b
 
mov edi,.ptr
@@:
lodsb
stosb
or al,al
jnz @b
 
mov esp,ebp
pop ebp
pop ecx
ret
if 1
ftoa_mac:
; in : esi - pointer to dword float
; edi - pointer to ascii string
.ptr_f equ dword[ebp-4]
.sign equ dword[ebp-8] ; 0 -> less than zero, 1 - otherwise
.ptr_ascii equ dword[ebp-12]
.integer equ dword[ebp-20]
.fraction equ dword[ebp-28]
.status_orginal equ word[ebp-32]
.status_changed equ word[ebp-34]
push ecx
push ebp
mov ebp,esp
sub esp,64
fninit
fnstcw .status_orginal
mov ax, .status_orginal
or ax, 0000110000000000b
mov .status_changed, ax
fldcw .status_changed
; --------------------------------
; check if signed
xor eax, eax
fld dword[esi]
fst .sign
test .sign, 80000000h
setz al
mov .sign, eax
 
mov .ptr_f,esi
mov .ptr_ascii,edi
fabs
fld st0
frndint
fist .integer
fsubp st1, st0
 
mov eax,.integer
mov esi,.ptr_ascii
call itoa
; edi -> ptr to end of ascii string
dec edi
mov al,'.'
stosb
 
mov ecx, 6 ; max 6 digits in fraction part
.loop:
fimul [i10]
fld st0
frndint
fist .fraction
fsubp st1, st0
mov esi,edi
mov eax,.fraction
add al,'0'
stosb
ftst
fnstsw ax
test ax, 0100000000000000b
jz @F
test ax, 0000010100000000b
jz .finish
@@:
loop .loop
if 0
fldcw .status_orginal
fimul [i10]
fist .fraction
; mov esi,edi
mov eax,.fraction
add al,'0'
stosb
; call itoa
; --------------------------------
; restore previous values
.finish:
; fstp st0
ffree st
mov eax,.fraction
mov esi,edi
; call itoa
 
add al,'0'
stosb
end if
.finish:
ffree st
cmp .sign,0
jnz @f
mov esi,.ptr_ascii
dec esi
mov byte[esi],'-'
@@:
mov esp,ebp
pop ebp
pop ecx
 
ret
end if
if 0
;===============================================================================
; float to ASCII conversion procedure
;
; input:
; buffer - pointer to memory where output will be saved
; precision - number of digits after dot
;
; output:
; no immediate output
;
; notes:
; separate integer and mantisa part with dot '.'
; so GOOD 123.456
; WRONG 123,456
;
; coded by Reverend // HTB + RAG
;===============================================================================
proc ftoa buffer, precision
locals
status_original dw ?
status_changed dw ?
integer dd ?
mantisa dd ?
signed dd ?
endl
push eax ecx;edi ecx
; --------------------------------
; set correct precision
mov eax, [precision]
cmp eax, 51
jb @F
mov eax, 51
@@:
mov [precision], eax
; --------------------------------
; change control wortd of fpu to prevent rounding
fnstcw [status_original]
mov ax, [status_original]
or ax, 0000110000000000b
mov [status_changed], ax
fldcw [status_changed]
; --------------------------------
; check if signed
xor eax, eax
fst [signed]
test [signed], 80000000h
setnz al
mov [signed], eax
; --------------------------------
; cut integer and mantisa separately
fld st0
fld st0 ; st0 = x, st1 = x
frndint
fist [integer] ; st0 = x, st1 = x
fabs
fsubp st1, st0 ; st0 = mantisa(x)
; --------------------------------
; save integer part in buffer
; mov edi, [buffer]
mov esi,[buffer]
; push [signed]
; push edi
; push 10
; push [integer]
mov eax,[integer]
call itoa
; add edi, eax
mov al, '.'
stosb
mov esi,edi
; --------------------------------
; save mantisa part in buffer
mov ecx, [precision]
dec ecx
.loop:
fimul [i10]
fld st0
frndint
fist [mantisa]
fsubp st1, st0
; push 0
; push edi
; push 10
; push [mantisa]
mov esi,edi
mov eax,[mantisa]
call itoa
; add edi, eax
ftst
fnstsw ax
test ax, 0100000000000000b
jz @F
test ax, 0000010100000000b
jz .finish
@@:
loop .loop
fldcw [status_original]
fimul [i10]
fist [mantisa]
; push 0
; push edi
; push 10
; push [mantisa]
mov esi,edi
mov eax,[mantisa]
call itoa
; --------------------------------
; restore previous values
.finish:
fstp st0
cmp [signed],1
jnz @f
mov byte[buffer],'-'
@@:
stc
pop ecx eax ;edi eax
ret
endp
end if
if 0
write_asc:
.counter equ dword[ebp-4]
push ebp
mov ebp,esp
sub esp,64
fninit
mov edi,asc_file_buffer
mov esi,asc_main_header
cld
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
 
mov esi,asc_info_header
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
push esi ; -> position in header info
movzx eax,[points_count_var]
mov esi,edi
call itoa ; unsigned dword integer to ascii procedure
pop esi
inc esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
push esi
movzx eax,[triangles_count_var]
mov esi,edi
call itoa
pop esi
inc esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
;=============================================================
;================vertex list parser===========================
;=============================================================
 
xor ecx,ecx
.again_vertex:
push ecx
mov esi,asc_one_vertex_formula
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
mov eax,ecx
; push ecx
push esi
mov esi,edi
call itoa
pop esi
; pop ecx
inc esi
xor ebx,ebx
.next_vertex_coef:
push ebx
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
; int3
push esi
lea esi,[ecx*3]
shl esi,2
add esi,points_r
add esi,ebx
; int3
call ftoa_mac
; std
; fld dword[esi]
 
 
; pushad
; stdcall ftoa, edi, 30
; popad
; add edi,20
 
 
pop esi
pop ebx
add ebx,4
cmp ebx,12
jnz .next_vertex_coef
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
pop ecx
inc ecx
cmp cx,[points_count_var]
jnz .again_vertex
 
 
; mov edi,[temp_edi]
 
 
mov esi,asc_face_list_header
@@:
lodsb
cmp al,1 ; all face header
jz @f
stosb
jmp @b
@@:
;=====================================
; ==============face list parser======
;=====================================
xor ecx,ecx
.again_face:
push ecx
mov .counter,ecx
mov esi,asc_one_face_formula
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
mov eax,ecx
push esi
mov esi,edi
call itoa
pop esi
inc esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
 
xor ebx,ebx
.next_face_index:
push ebx
mov ecx,.counter
lea ecx,[ecx*3]
add ecx,ecx
movzx eax,word[triangles+ecx+ebx]
push esi
mov esi,edi
call itoa
pop esi
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
pop ebx
add ebx,2
cmp ebx,6
jnz .next_face_index
 
; push esi
mov esi,asc_material
@@:
lodsb
cmp al,1
jz @f
stosb
jmp @b
@@:
; pop esi
 
pop ecx
inc ecx
cmp cx,[triangles_count_var]
jnz .again_face
 
; write file
sub edi,asc_file_buffer
; mov [file_buffer+2],edi
mov [FileSize],edi
 
invoke CreateFile,asc_file_name, GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0
mov [hfile],eax
invoke WriteFile,eax,asc_file_buffer,[FileSize], byteswritten, 0
invoke CloseHandle,[hfile]
 
mov esp,ebp
pop ebp
ret
end if
if 0
asc_file_buffer:
rd 65536
ascii_string rb 50
ftoa_muler dd 1000000000
file_size dd ?
file_handle dd ?
end if
 
if 0
convert_muler:
dd 1, 10, 100, 1000, 10000
XYZpartices:
db 'X','Y','Z'
i10 dw 10
points_count_var dd ?
triangles_count_var dd ?
points rb 100
triangles rb 100
asc_file:
 
file "2TORUS.ASC"
end if
/programs/demos/view3ds/b_procs.inc
737,22 → 737,33
 
blur_screen: ;blur n times ; blur or fire
;in - ecx times count
;.counter equ dword[esp-4]
.counter1 equ dword[esp-8]
; ax - render mode
 
.val equ dword[ebp-4]
.dr_model equ word[ebp-6]
.fire equ dword[ebp-10]
 
if Ext>=SSE2
push ebp
mov ebp,esp
push dword 0x01010101
movss xmm5,[esp]
sub esp,10
; xorps xmm5,xmm5
; or edx,edx
; jz @f
mov .val,0x01010101
movss xmm5,.val
shufps xmm5,xmm5,0
@@:
mov .dr_model,ax
 
 
.again_blur:
push ecx
mov edi,[screen_ptr]
movzx ecx,word[size_x_var] ;SIZE_X*3/4
 
cmp [dr_flag],12
cmp .dr_model,11
jge @f
lea ecx,[ecx*3+1]
lea ecx,[ecx*3+3]
shr ecx,2
@@:
 
763,11 → 774,11
movzx ecx,word[size_y_var]
sub ecx,3
imul ecx,ebx
cmp [dr_flag],12 ; 32 bit per pix cause
cmp .dr_model,11 ; 32 bit per pix cause
jge @f
lea ecx,[ecx*3]
shr ecx,4
lea ebx,[ebx *3]
lea ebx,[ebx*3]
jmp .blr
@@:
 
781,7 → 792,7
mov ecx,edi
sub ecx,ebx
movups xmm1,[ecx]
cmp [dr_flag],12
cmp .dr_model,12
jge @f
movups xmm2,[edi-3]
movups xmm3,[edi+3]
802,9 → 813,9
end if
xor eax,eax
movzx ecx,word[size_x_var]
cmp [dr_flag],12
cmp .dr_model,11
jge @f
lea ecx,[ecx*3]
lea ecx,[ecx*3+3]
shr ecx,2
@@:
; mov ecx,SIZE_X*3/4
/programs/demos/view3ds/bump_cat.inc
1,1132 → 1,1132
;SIZE_X equ 350
;SIZE_Y equ 350
ROUND equ 8
;TEX_X equ 512
;TEX_Y equ 512
;TEXTURE_SIZE EQU (512*512)-1
;TEX_SHIFT EQU 9
CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle using Catmull Z-buffer algorithm-
;------- (Z coordinate interpolation)-----------------------------------
bump_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map ------
;---------------------- esi - pointer to environment map
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;---------------------- environment coordinates-
;---------------------- Z position coordinates--
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10
.b_x3 equ ebp+12
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
 
 
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to e. map
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
.dby12 equ [ebp-36]
.dex12 equ dword[ebp-40]
.dey12 equ [ebp-44]
 
.dx13 equ dword[ebp-48]
.dz13 equ [ebp-52]
.dbx13 equ dword[ebp-56]
.dby13 equ [ebp-60]
.dex13 equ dword[ebp-64]
.dey13 equ [ebp-68]
 
.dx23 equ dword[ebp-72]
.dz23 equ [ebp-76]
.dbx23 equ dword[ebp-80]
.dby23 equ [ebp-84]
.dex23 equ dword[ebp-88]
.dey23 equ [ebp-92]
 
.cx1 equ dword[ebp-96] ; current variables
.cz1 equ [ebp-100]
.cx2 equ dword[ebp-104]
.cz2 equ [ebp-108]
.cbx1 equ dword[ebp-112]
.cby1 equ [ebp-116]
.cex1 equ dword[ebp-120]
.cey1 equ [ebp-124]
.cbx2 equ dword[ebp-128]
.cby2 equ [ebp-132]
.cex2 equ dword[ebp-136]
.cey2 equ [ebp-140]
 
mov ebp,esp
push edx ; store bump map
push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ; store triangle coords in variables
push ebx
push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
 
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
 
 
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
 
 
else
 
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
 
end if
 
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
 
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
 
 
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
 
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
end if
 
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
; sub esp,40
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
 
 
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
 
end if
 
.bt_dx23_done:
sub esp,48
 
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
mov .cx2,eax
; push eax
; push eax
 
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
; push eax
; push eax
 
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
; push eax
; push eax
 
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
; push eax
; push eax
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
; push eax
; push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
mov .cz1,eax
mov .cz2,eax
; push eax
; push eax
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey12
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz12
movq .cz1,mm4
movq .cz2,mm5
end if
 
 
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cz1
movq mm5,.cz2
paddd mm0,.dby12
paddd mm1,.dby13
paddd mm2,.dey12
paddd mm3,.dey13
paddd mm4,.dz13
paddd mm5,.dz12
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
end if
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
 
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
 
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
 
.loop23:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey23
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz23
movq .cz1,mm4
movq .cz2,mm5
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cz1
movq mm5,.cz2
paddd mm0,.dby23
paddd mm1,.dby13
paddd mm2,.dey23
paddd mm3,.dey13
paddd mm4,.dz13
paddd mm5,.dz23
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
 
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
end if
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
ret 34
 
.call_bump_line:
 
; push ebp
; push ecx
pushad
 
push dword .cz1
push dword .cz2
push .z_buff
push .t_emap
push .t_bmap
push dword .cey2
push .cex2
push dword .cby2
push .cbx2
push dword .cey1
push .cex1
push dword .cby1
push .cbx1
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call bump_line_z
 
popad
ret
bump_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ dword [ebp+12] ; |
.ex1 equ [ebp+16] ; |
.ey1 equ dword [ebp+20] ; |> bump and env coords
.bx2 equ [ebp+24] ; |> shifted shl ROUND
.by2 equ dword [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ dword [ebp+36] ; ---
.bmap equ dword [ebp+40]
.emap equ dword [ebp+44]
.z_buff equ dword [ebp+48]
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
 
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ dword [ebp-12]
.dby equ [ebp-16]
.dex equ dword [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ dword [ebp-32]
.cby equ [ebp-36]
.cex equ dword [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.czbuff equ dword [ebp-52]
.temp1 equ ebp-60
.temp2 equ ebp-68
.temp3 equ ebp-76
.temp4 equ ebp-84
.temp5 equ ebp-92
 
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
; mov dx,[size_x_var]
; dec dx
cmp cx,[size_y_var] ;SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl .bl_ok
je .bl_end
 
xchg eax,ebx
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
end if
if Ext = MMX
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
end if
if Ext >= SSE
movups xmm0,.bx1
movups xmm1,.bx2
movups .bx2,xmm0
movups .bx1,xmm1
end if
 
mov edx,.z1
xchg edx,.z2
mov .z1,edx
 
.bl_ok:
 
push eax
push ebx ;store x1, x2
movzx edx,word[size_x_var]
dec edx
cmp .x1,edx ;SIZE_X
jge .bl_end
cmp .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
 
if Ext >= SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
 
cvtpi2ps xmm0,.bx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
 
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
end if
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
@@:
movzx edx,word[size_x_var]
dec edx
cmp .x2,edx ;SIZE_X
jl @f
mov .x2,edx ;SIZE_X
@@:
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1
push .by1
push dword .ex1
push .ey1
 
push .z1 ; current z shl CATMULL_SHIFT
push esi
;if Ext = SSE2
; movups xmm1,.dey
;end if
if Ext>=MMX
movq mm0,.cby
movq mm1,.cey
movq mm2,.dby
movq mm3,.dey
end if
if Ext >= SSE2
mov eax,TEXTURE_SIZE
movd xmm1,eax
shufps xmm1,xmm1,0
push dword TEX_X
push dword -TEX_X
push dword 1
push dword -1
movups xmm2,[esp]
movd xmm3,.bmap
shufps xmm3,xmm3,0
end if
 
;align 16
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
jge .skip
 
if Ext>=MMX
movq mm6,mm0
psrld mm6,ROUND
movd eax,mm6
psrlq mm6,32
movd esi,mm6
else
mov eax,.cby
sar eax,ROUND
mov esi,.cbx
sar esi,ROUND
end if
shl eax,TEX_SHIFT ;-
add esi,eax ;- ; esi - current bump map index
 
if Ext = SSE2
movd xmm0,esi
shufps xmm0,xmm0,0
paddd xmm0,xmm2
pand xmm0,xmm1
paddd xmm0,xmm3
 
movd ebx,xmm0
movzx eax,byte[ebx]
;
; shufps xmm0,xmm0,11100001b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx,byte[ebx]
sub eax,ebx
;
; shufps xmm0,xmm0,11111110b
psrldq xmm0,4
movd ebx,xmm0
movzx edx, byte [ebx]
;
; shufps xmm0,xmm0,11111111b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx, byte [ebx]
sub edx,ebx
;
else
 
mov ebx,esi
dec ebx
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx eax,byte [ebx]
 
mov ebx,esi
inc ebx
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
 
sub eax,ebx
 
mov ebx,esi
sub ebx,TEX_X
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx edx,byte [ebx]
 
mov ebx,esi
add ebx,TEX_X
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
 
sub edx,ebx
end if
; eax - horizontal sub
; edx - vertical sub
if Ext = NON
mov ebx,.cex ;.cex - current env map X
sar ebx,ROUND
add eax,ebx ; eax - modified x coord
 
mov ebx,.cey ;.cey - current env map y
sar ebx,ROUND
add edx,ebx ; edx - modified y coord
else
movq mm6,mm1 ; mm5 - copy of cur env coords
psrld mm6,ROUND
movd ebx,mm6
psrlq mm6,32
add eax,ebx
movd ebx,mm6
add edx,ebx
end if
or eax,eax
jl .black
cmp eax,TEX_X
jg .black
or edx,edx
jl .black
cmp edx,TEX_Y
jg .black
 
shl edx,TEX_SHIFT
add edx,eax
lea esi,[edx*3]
add esi,.emap
lodsd
jmp .put_pixel
.black:
xor eax,eax
.put_pixel:
stosd
dec edi
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
.skip:
add edi,3
.no_skip:
add .czbuff,4
 
;if Ext = SSE2
; movups xmm0,.cey
; paddd xmm0,xmm1
; movups .cey,xmm0
;
;end if
if Ext >= MMX
paddd mm0,mm2
paddd mm1,mm3
end if
 
if Ext=NON
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
end if
mov eax,.dz
add .cz,eax
 
dec ecx
jnz .draw
; end if
.bl_end:
mov esp,ebp
ret 56
;SIZE_X equ 350
;SIZE_Y equ 350
ROUND equ 8
;TEX_X equ 512
;TEX_Y equ 512
;TEXTURE_SIZE EQU (512*512)-1
;TEX_SHIFT EQU 9
CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle using Catmull Z-buffer algorithm-
;------- (Z coordinate interpolation)-----------------------------------
bump_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map ------
;---------------------- esi - pointer to environment map
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;---------------------- environment coordinates-
;---------------------- Z position coordinates--
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10
.b_x3 equ ebp+12
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
 
 
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to e. map
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
.dby12 equ [ebp-36]
.dex12 equ dword[ebp-40]
.dey12 equ [ebp-44]
 
.dx13 equ dword[ebp-48]
.dz13 equ [ebp-52]
.dbx13 equ dword[ebp-56]
.dby13 equ [ebp-60]
.dex13 equ dword[ebp-64]
.dey13 equ [ebp-68]
 
.dx23 equ dword[ebp-72]
.dz23 equ [ebp-76]
.dbx23 equ dword[ebp-80]
.dby23 equ [ebp-84]
.dex23 equ dword[ebp-88]
.dey23 equ [ebp-92]
 
.cx1 equ dword[ebp-96] ; current variables
.cz1 equ [ebp-100]
.cx2 equ dword[ebp-104]
.cz2 equ [ebp-108]
.cbx1 equ dword[ebp-112]
.cby1 equ [ebp-116]
.cex1 equ dword[ebp-120]
.cey1 equ [ebp-124]
.cbx2 equ dword[ebp-128]
.cby2 equ [ebp-132]
.cex2 equ dword[ebp-136]
.cey2 equ [ebp-140]
 
mov ebp,esp
push edx ; store bump map
push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ; store triangle coords in variables
push ebx
push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
 
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
 
 
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
 
 
else
 
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
 
end if
 
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
 
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
 
 
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
 
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
end if
 
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
; sub esp,40
if Ext>=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
; cvtpi2ps xmm0,mm0
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; subps xmm1,xmm0
 
 
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
 
end if
 
.bt_dx23_done:
sub esp,48
 
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
mov .cx2,eax
; push eax
; push eax
 
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
; push eax
; push eax
 
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
; push eax
; push eax
 
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
; push eax
; push eax
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
; push eax
; push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
mov .cz1,eax
mov .cz2,eax
; push eax
; push eax
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey12
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz12
movq .cz1,mm4
movq .cz2,mm5
end if
 
 
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cz1
movq mm5,.cz2
paddd mm0,.dby12
paddd mm1,.dby13
paddd mm2,.dey12
paddd mm3,.dey13
paddd mm4,.dz13
paddd mm5,.dz12
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
end if
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
 
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
 
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
 
.loop23:
call .call_bump_line
if Ext >= SSE2
movups xmm0,.cey2
movups xmm1,.cey1
movups xmm2,.dey23
movups xmm3,.dey13
paddd xmm0,xmm2
paddd xmm1,xmm3
movups .cey2,xmm0
movups .cey1,xmm1
movq mm4,.cz1
movq mm5,.cz2
paddd mm4,.dz13
paddd mm5,.dz23
movq .cz1,mm4
movq .cz2,mm5
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cz1
movq mm5,.cz2
paddd mm0,.dby23
paddd mm1,.dby13
paddd mm2,.dey23
paddd mm3,.dey13
paddd mm4,.dz13
paddd mm5,.dz23
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cz1,mm4
movq .cz2,mm5
else if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
 
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
end if
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
ret 34
 
.call_bump_line:
 
; push ebp
; push ecx
pushad
 
push dword .cz1
push dword .cz2
push .z_buff
push .t_emap
push .t_bmap
push dword .cey2
push .cex2
push dword .cby2
push .cbx2
push dword .cey1
push .cex1
push dword .cby1
push .cbx1
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call bump_line_z
 
popad
ret
bump_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ dword [ebp+12] ; |
.ex1 equ [ebp+16] ; |
.ey1 equ dword [ebp+20] ; |> bump and env coords
.bx2 equ [ebp+24] ; |> shifted shl ROUND
.by2 equ dword [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ dword [ebp+36] ; ---
.bmap equ dword [ebp+40]
.emap equ dword [ebp+44]
.z_buff equ dword [ebp+48]
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
 
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ dword [ebp-12]
.dby equ [ebp-16]
.dex equ dword [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ dword [ebp-32]
.cby equ [ebp-36]
.cex equ dword [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.czbuff equ dword [ebp-52]
.temp1 equ ebp-60
.temp2 equ ebp-68
.temp3 equ ebp-76
.temp4 equ ebp-84
.temp5 equ ebp-92
 
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
; mov dx,[size_x_var]
; dec dx
cmp cx,[size_y_var] ;SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl .bl_ok
je .bl_end
 
xchg eax,ebx
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
end if
if Ext = MMX
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
end if
if Ext >= SSE
movups xmm0,.bx1
movups xmm1,.bx2
movups .bx2,xmm0
movups .bx1,xmm1
end if
 
mov edx,.z1
xchg edx,.z2
mov .z1,edx
 
.bl_ok:
 
push eax
push ebx ;store x1, x2
movzx edx,word[size_x_var]
dec edx
cmp .x1,edx ;SIZE_X
jge .bl_end
cmp .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
 
if Ext >= SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
 
cvtpi2ps xmm0,.bx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
 
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
end if
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
@@:
movzx edx,word[size_x_var]
dec edx
cmp .x2,edx ;SIZE_X
jl @f
mov .x2,edx ;SIZE_X
@@:
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1
push .by1
push dword .ex1
push .ey1
 
push .z1 ; current z shl CATMULL_SHIFT
push esi
;if Ext = SSE2
; movups xmm1,.dey
;end if
if Ext>=MMX
movq mm0,.cby
movq mm1,.cey
movq mm2,.dby
movq mm3,.dey
end if
if Ext >= SSE2
mov eax,TEXTURE_SIZE
movd xmm1,eax
shufps xmm1,xmm1,0
push dword TEX_X
push dword -TEX_X
push dword 1
push dword -1
movups xmm2,[esp]
movd xmm3,.bmap
shufps xmm3,xmm3,0
end if
 
;align 16
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
jge .skip
 
if Ext>=MMX
movq mm6,mm0
psrld mm6,ROUND
movd eax,mm6
psrlq mm6,32
movd esi,mm6
else
mov eax,.cby
sar eax,ROUND
mov esi,.cbx
sar esi,ROUND
end if
shl eax,TEX_SHIFT ;-
add esi,eax ;- ; esi - current bump map index
 
if Ext = SSE2
movd xmm0,esi
shufps xmm0,xmm0,0
paddd xmm0,xmm2
pand xmm0,xmm1
paddd xmm0,xmm3
 
movd ebx,xmm0
movzx eax,byte[ebx]
;
; shufps xmm0,xmm0,11100001b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx,byte[ebx]
sub eax,ebx
;
; shufps xmm0,xmm0,11111110b
psrldq xmm0,4
movd ebx,xmm0
movzx edx, byte [ebx]
;
; shufps xmm0,xmm0,11111111b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx, byte [ebx]
sub edx,ebx
;
else
 
mov ebx,esi
dec ebx
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx eax,byte [ebx]
 
mov ebx,esi
inc ebx
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
 
sub eax,ebx
 
mov ebx,esi
sub ebx,TEX_X
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx edx,byte [ebx]
 
mov ebx,esi
add ebx,TEX_X
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
 
sub edx,ebx
end if
; eax - horizontal sub
; edx - vertical sub
if Ext = NON
mov ebx,.cex ;.cex - current env map X
sar ebx,ROUND
add eax,ebx ; eax - modified x coord
 
mov ebx,.cey ;.cey - current env map y
sar ebx,ROUND
add edx,ebx ; edx - modified y coord
else
movq mm6,mm1 ; mm5 - copy of cur env coords
psrld mm6,ROUND
movd ebx,mm6
psrlq mm6,32
add eax,ebx
movd ebx,mm6
add edx,ebx
end if
or eax,eax
jl .black
cmp eax,TEX_X
jg .black
or edx,edx
jl .black
cmp edx,TEX_Y
jg .black
 
shl edx,TEX_SHIFT
add edx,eax
lea esi,[edx*3]
add esi,.emap
lodsd
jmp .put_pixel
.black:
xor eax,eax
.put_pixel:
stosd
dec edi
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
.skip:
add edi,3
.no_skip:
add .czbuff,4
 
;if Ext = SSE2
; movups xmm0,.cey
; paddd xmm0,xmm1
; movups .cey,xmm0
;
;end if
if Ext >= MMX
paddd mm0,mm2
paddd mm1,mm3
end if
 
if Ext=NON
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
end if
mov eax,.dz
add .cz,eax
 
dec ecx
jnz .draw
; end if
.bl_end:
mov esp,ebp
ret 56
/programs/demos/view3ds/bump_tex.inc
1,1817 → 1,1817
 
;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;ROUND equ 8
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle with texture, I use -------------
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
;--------I calc texture pixel by this way: col1*col2/256 ---------------
bump_tex_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map-------
;---------------------- esi - pointer to env map--------
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;---------------------- environment coordinates-
;---------------------- Z position coordinates--
;---------------------- pointer to Z buffer-----
;---------------------- pointer to texture------
;---------------------- texture coordinates-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - bump map coords
.b_x3 equ ebp+12 ; e - env map coords
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
.tex_ptr equ dword[ebp+38] ; ptr to texture
.t_x1 equ ebp+42 ; texture coords
.t_y1 equ ebp+44
.t_x2 equ ebp+46
.t_y2 equ ebp+48
.t_x3 equ ebp+50
.t_y3 equ ebp+52
 
 
 
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to env map
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
if 0 ;Ext <= SSE2
 
.dx12 equ dword[edi-4]
.dz12 equ [edi-8]
.dbx12 equ dword[edi-12]
.dby12 equ [edi-16]
.dex12 equ dword[edi-20]
.dey12 equ [edi-24]
.dtx12 equ dword[edi-28]
.dty12 equ [edi-32]
 
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
 
 
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
 
else
 
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
.dby12 equ [ebp-36]
.dex12 equ dword[ebp-40]
.dey12 equ [ebp-44]
.dtx12 equ dword[ebp-48]
.dty12 equ [ebp-52]
 
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
 
 
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
 
end if
 
if Ext < SSE
 
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cx2 equ dword[ebp-(52+4*19)]
.cz2 equ [ebp-(52+4*20)]
.cbx1 equ dword[ebp-(52+4*21)]
.cby1 equ [ebp-(52+4*22)]
.cbx2 equ dword[ebp-(52+4*23)]
.cby2 equ [ebp-(52+4*24)]
.cex1 equ dword[ebp-(52+4*25)]
.cey1 equ [ebp-(52+4*26)]
.cex2 equ dword[ebp-(52+4*27)]
.cey2 equ [ebp-(52+4*28)]
 
.ctx1 equ dword[ebp-(52+4*29)]
.cty1 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
 
else
 
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cbx1 equ dword[ebp-(52+4*19)]
.cby1 equ [ebp-(52+4*20)]
.cex1 equ dword[ebp-(52+4*21)]
.cey1 equ [ebp-(52+4*22)]
.ctx1 equ dword[ebp-(52+4*23)]
.cty1 equ [ebp-(52+4*24)]
 
.cx2 equ dword[ebp-(52+4*25)]
.cz2 equ [ebp-(52+4*26)]
.cbx2 equ dword[ebp-(52+4*27)]
.cby2 equ [ebp-(52+4*28)]
.cex2 equ dword[ebp-(52+4*29)]
.cey2 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
 
end if
cld
mov ebp,esp
push edx ; store bump map
push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov edx,dword[.t_x1]
xchg edx,dword[.t_x2]
mov dword[.t_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov edx,dword[.t_x2]
xchg edx,dword[.t_x3]
mov dword[.t_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ; store triangle coords in variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
 
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
if 0 ;Ext >= SSE2
pxor xmm0,xmm0
movups .dty12,xmm0
movups .dey12,xmm0
sub esp,16
else
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
end if
jmp .bt_dx12_done
.bt_dx12_make:
movsx ebx,bx
 
 
if Ext>=SSE
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
if 0 ;Ext >= SSE2
mov edi,ebp
sub edi,512
or edi,0x0000000f
end if
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey12,xmm1
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
;-------------
; pxor mm0,mm0
; pxor mm1,mm1
;/ pinsrw mm0,.z1,1
;/ pinsrw mm0,.x1,0
;/ pinsrw mm1,.z2,1
;/ pinsrw mm1,.x2,0
mov ax,.z2
sub ax,.z1
cwde
 
mov dx,.x2
sub dx,.x1
movsx edx,dx
 
;/ movd mm1,eax
 
;/ punpcklwd mm0,mm4
;/ punpcklwd mm1,mm4
 
; cvtpi2ps xmm1,mm1
; cvtpi2ps xmm2,mm0
; subps xmm1,xmm2
 
;/ psubd mm1,mm0
 
movd mm2,[.t_x1]
movd mm3,[.t_x2]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
;/ cvtpi2ps xmm1,mm1
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
; movss xmm1,xmm4
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty12,xmm1
;1 movhps .dz12,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty12,mm0
movq .dz12,mm1
;----
; mov ax,.z2
; sub ax,.z1
; cwde
; mov bx,.x2
; sub bx,.x1
; movsx ebx,bx
; movd mm1,eax
; psllq mm1,32
; movd mm1,ebx
 
;; push ebx
;; push eax
;; movq mm1,[esp]
;; add esp,8
;;; mov ax,.z1
;;; mov bx,.z2
;;; shl eax,16
;;; shl ebx,16
;;; mov ax,.x1
;;; mov bx,.x2
; movd mm2,[.t_x1]
; movd mm3,[.t_x2]
;; movd mm0,eax
;; movd mm1,ebx
 
; pxor mm4,mm4
;; punpcklwd mm0,mm4
;; punpcklwd mm1,mm4
; punpcklwd mm2,mm4
; punpcklwd mm3,mm4
 
;; psubd mm1,mm0
; psubd mm3,mm2
 
 
; cvtpi2ps xmm1,mm1
; movlhps xmm1,xmm1
; cvtpi2ps xmm1,mm3
 
; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx |
 
; shufps xmm1,xmm1,10110001b
; xmm1--> | dx | dz | dtx | dty |
; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
; movhlps xmm1,xmm1
; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
; movq .dty12,mm0
; movq .dz12,mm1
else
mov ax,.x2
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
 
mov ax,word[.t_x2]
sub ax,word[.t_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx12,eax
push eax
 
mov ax,word[.t_y2]
sub ax,word[.t_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty12,eax
push eax
end if
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
movsx ebx,bx
 
if Ext>=SSE
 
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey13,xmm1
 
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
 
mov ax,.z3
sub ax,.z1
cwde
 
mov dx,.x3
sub dx,.x1
movsx edx,dx
 
movd mm2,[.t_x1]
movd mm3,[.t_x3]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty13,xmm1
;1 movhps .dz13,xmm1
 
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty13,mm0
movq .dz13,mm1
 
else
 
mov ax,.x3
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
 
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
 
mov ax,word[.t_x3]
sub ax,word[.t_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx13,eax
push eax
 
mov ax,word[.t_y3]
sub ax,word[.t_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty13,eax
push eax
end if
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
movsx ebx,bx
 
if Ext>=SSE
 
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey23,xmm1
 
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
 
mov ax,.z3
sub ax,.z2
cwde
 
mov dx,.x3
sub dx,.x2
movsx edx,dx
 
movd mm2,[.t_x2]
movd mm3,[.t_x3]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
; movlps .dty23,xmm1
; movhps .dz23,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
movq .dty23,mm0
movq .dz23,mm1
 
 
else
mov ax,.x3
sub ax,.x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
 
 
mov ax,word[.t_x3]
sub ax,word[.t_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx23,eax
push eax
 
mov ax,word[.t_y3]
sub ax,word[.t_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty23,eax
push eax
end if
; sub esp,40
.bt_dx23_done:
sub esp,64
 
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
mov .cx2,eax
; push eax
; push eax
 
movsx ebx,word[.b_x1]
shl ebx,ROUND
mov .cbx1,ebx
mov .cbx2,ebx
; push ebx
; push ebx
 
movsx ecx,word[.b_y1]
shl ecx,ROUND
mov .cby1,ecx
mov .cby2,ecx
; push ecx
; push ecx
 
movsx edx,word[.e_x1]
shl edx,ROUND
mov .cex1,edx
mov .cex2,edx
; push edx
; push edx
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
; push eax
; push eax
 
movsx ebx,.z1
shl ebx,CATMULL_SHIFT
mov .cz1,ebx
mov .cz2,ebx
; push ebx
; push ebx
 
; sub esp,16
movsx ecx,word[.t_x1]
shl ecx,ROUND
mov .ctx1,ecx
mov .ctx2,ecx
;push ecx
;push ecx
 
movsx edx,word[.t_y1]
shl edx,ROUND
mov .cty1,edx
mov .cty2,edx
; push edx
; push edx
 
if Ext >= SSE2
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby12
movups xmm7,.dty12
.scby1 equ [edi]
.scty1 equ [edi+16]
.scby2 equ [edi+32]
.scty2 equ [edi+48]
.sdby13 equ [edi+64]
.sdty13 equ [edi+80]
.sdby12 equ [edi+96]
.sdty12 equ [edi+128]
push edi
mov edi,sse_repository
movaps .scby1,xmm0
movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
movaps .sdby13,xmm4
movaps .sdty13,xmm5
movaps .sdby12,xmm6
movaps .sdty12,xmm7
pop edi
 
end if
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
if Ext >= SSE2
; fxrstor [sse_repository]
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby12
; movups xmm7,.dty12
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby12
paddd xmm3,.sdty12
pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
end if
 
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cty1
movq mm5,.cty2
movq mm6,.cz1
movq mm7,.cz2
paddd mm0,.dby12
paddd mm1,.dby13
paddd mm2,.dey12
paddd mm3,.dey13
paddd mm4,.dty13
paddd mm5,.dty12
paddd mm6,.dz13
paddd mm7,.dz12
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cty1,mm4
movq .cty2,mm5
movq .cz1,mm6
movq .cz2,mm7
end if
if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
mov eax,.dtx13
add .ctx1,eax
mov ebx,.dtx12
add .ctx2,ebx
mov edx,.dty13
add .cty1,edx
mov eax,.dty12
add .cty2,eax
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
end if
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx ebx,.x2
shl ebx,ROUND
mov .cx2,ebx
 
movzx edx,word[.b_x2]
shl edx,ROUND
mov .cbx2,edx
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx ebx,word[.e_x2]
shl ebx,ROUND
mov .cex2,ebx
 
movzx edx,word[.e_y2]
shl edx,ROUND
mov .cey2,edx
 
movzx eax,word[.t_x2]
shl eax,ROUND
mov .ctx2,eax
 
movzx ebx,word[.t_y2]
shl ebx,ROUND
mov .cty2,ebx
if Ext >= SSE2
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
movups xmm6,.dby23
movups xmm7,.dty23
; .scby1 equ [edi]
; .scty1 equ [edi+16]
; .scby2 equ [edi+32]
; .scty2 equ [edi+48]
; .sdby13 equ [edi+64]
; .sdty13 equ [edi+80]
.sdby23 equ [edi+160]
.sdty23 equ [edi+192]
push edi
mov edi,sse_repository
; movaps .scby1,xmm0
; movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
; movaps .sdby13,xmm4
; movaps .sdty13,xmm5
movaps .sdby23,xmm6
movaps .sdty23,xmm7
pop edi
 
end if
 
.loop23:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
 
if Ext >= SSE2
 
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
 
 
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby23
paddd xmm3,.sdty23
pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
 
 
 
 
; fxrstor [sse_repository]
; movups xmm0,.cby1
; movups xmm1,.cty1
; movups xmm2,.cby2
; movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby23
; movups xmm7,.dty23
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
; movups .cby1,xmm0
; movups .cty1,xmm1
; movups .cby2,xmm2
; movups .cty2,xmm3
;
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cty1
movq mm5,.cty2
movq mm6,.cz1
movq mm7,.cz2
paddd mm0,.dby23
paddd mm1,.dby13
paddd mm2,.dey23
paddd mm3,.dey13
paddd mm4,.dty13
paddd mm5,.dty23
paddd mm6,.dz13
paddd mm7,.dz23
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
movq .cty1,mm4
movq .cty2,mm5
movq .cz1,mm6
movq .cz2,mm7
end if
If Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
 
mov eax,.dtx13
add .ctx1,eax
mov ebx,.dtx23
add .ctx2,ebx
mov edx,.dty13
add .cty1,edx
mov eax,.dty23
add .cty2,eax
end if
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
ret 50
 
.call_line:
 
pushad
; xmm0= cby1,cbx1,cz1,cx1
; xmm1= cty1,ctx1,cey1,cex1
if Ext >= SSE2
sub esp,8
shufps xmm1,xmm1,10110001b
shufps xmm3,xmm3,10110001b
movlps [esp],xmm1
else
push dword .cty1
push .ctx1
end if
push dword .cz1
if Ext>=SSE2
sub esp,8
movlps [esp],xmm3
else
push dword .cty2
push .ctx2
end if
push dword .cz2
if Ext>=SSE2
sub esp,32
movhps [esp+24],xmm3
shufps xmm2,xmm2,10110001b
movlps [esp+16],xmm2
movhps [esp+8],xmm1
shufps xmm0,xmm0,10110001b
movlps [esp],xmm0 ;================================
 
else
push dword .cey2
push .cex2
push dword .cby2
push .cbx2
push dword .cey1
push .cex1
push dword .cby1
push .cbx1
end if
 
push .tex_ptr
push .z_buff
push .t_emap
push .t_bmap
 
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call bump_tex_line_z
 
popad
;end if
ret
bump_tex_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bmap equ dword [ebp+8] ; bump map pointer
.emap equ dword [ebp+12] ; env map pointer
.z_buff equ dword [ebp+16] ; z buffer
.tex_map equ dword [ebp+20] ; texture pointer
 
.bx1 equ [ebp+24] ; ---
.by1 equ [ebp+28] ; |
.ex1 equ [ebp+32] ; |
.ey1 equ [ebp+36] ; |
.bx2 equ [ebp+40] ; |
.by2 equ [ebp+44] ; |> b. map and e. map coords
.ex2 equ [ebp+48] ; |> shifted shl ROUND
.ey2 equ [ebp+52] ; ---
.z2 equ [ebp+56]
.tx2 equ [ebp+60]
.ty2 equ [ebp+64]
.z1 equ [ebp+68]
.tx1 equ [ebp+72]
.ty1 equ [ebp+76]
 
 
 
.x1 equ [ebp-4]
.x2 equ [ebp-8]
.dbx equ [ebp-12]
.dby equ [ebp-16]
.dex equ [ebp-20]
.dey equ [ebp-24]
.dz equ [ebp-28]
.dtx equ [ebp-32]
.dty equ [ebp-36]
 
.cbx equ [ebp-40]
.cby equ [ebp-44]
.cex equ [ebp-48]
.cey equ [ebp-52]
.cz equ [ebp-56]
.czbuff equ [ebp-60]
.ctx equ [ebp-64]
.cty equ [ebp-68]
.c_scr equ [ebp-72]
 
.temp1 equ ebp-80
.temp2 equ ebp-88
.temp3 equ ebp-76
.temp4 equ ebp-84
.temp5 equ ebp-92
 
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
movzx edx,word[size_y_var]
cmp ecx,edx ;SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl .bl_ok
je .bl_end
 
 
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
 
mov edx,.tx1
xchg edx,.tx2
mov .tx1,edx
mov edx,.ty1
xchg edx,.ty2
mov .ty1,edx
end if
if Ext = MMX
movq mm0,.bx1
movq mm1,.bx2
movq mm2,.ex1
movq mm3,.ex2
movq mm4,.tx1
movq mm5,.tx2
movq .bx2,mm0
movq .bx1,mm1
movq .ex1,mm3
movq .ex2,mm2
movq .tx1,mm5
movq .tx2,mm4
end if
if Ext>=SSE
movups xmm0,.bx1
movups xmm1,.bx2
movups .bx1,xmm1
movups .bx2,xmm0
movq mm0,.tx1
movq mm1,.tx2
movq .tx1,mm1
movq .tx2,mm0
end if
;if Ext>=SSE2
; movaps xmm4,xmm0
; movaps xmm0,xmm2
; movaps xmm2,xmm4
; movaps xmm5,xmm1
; movaps xmm1,xmm3
; movaps xmm3,xmm5
;else
 
xchg eax,ebx
mov edx,.z1
xchg edx,.z2
mov .z1,edx
;end if
.bl_ok:
;if Ext >= SSE2
; shufps xmm0,xmm0,11100001b
; shufps xmm2,xmm2,11100001b
; movlps .bx1,xmm0
; movlps .bx2,xmm2
 
 
; shufps xmm0,xmm0,00011011b
; shufps xmm2,xmm2,00011011b
; movd eax,xmm0
; movd ebx,xmm2
; shufps xmm0,xmm0,11000110b
; shufps xmm2,xmm2,11000110b
; movd .z1,xmm0
; movd .z2,xmm2
; shufps xmm1,xmm1,10110001b
; shufps xmm3,xmm3,10110001b
; movlps .ex1,xmm1
; movlps .ex2,xmm2
; movhps .tx1,xmm1
; movhps .tx2,xmm2
 
; xchg eax,ebx
; mov edx,.z1
; xchg edx,.z2
; mov .z1,edx
 
 
;end if
 
push eax
push ebx ;store x1, x2
movzx ebx,word[size_x_var]
; mov eax,.x1
cmp dword .x1,ebx ;dword .x1,SIZE_X
jge .bl_end
cmp dword .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
 
if Ext>=SSE
 
sub esp,28
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
; float using SSE variant ::-->
; movups xmm0,.bx1 ; new
; movups xmm1,.bx2 ; new
 
cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
 
shufps xmm1,xmm1,10110001b
; movups .dey,xmm1 ; new
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
 
movd mm2,.z1
movd mm3,.z2
 
cvtpi2ps xmm0,.tx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,.tx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; movups xmm0,,z1 ; new
; movups xmm1,.z2 ; new
subps xmm1,xmm0
 
divps xmm1,xmm3
 
; movups .dz,xmm1 ;new
 
shufps xmm1,xmm1,10110100b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movd .dz,mm0
movq .dty,mm1
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
mov eax,.tx2 ; calc .dtx
sub eax,.tx1
cdq
idiv ebx
push eax
 
mov eax,.ty2 ; calc .dty
sub eax,.ty1
cdq
idiv ebx
push eax
 
end if
cmp dword .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
 
;if Ext >= SSE
 
; cvtsi2ss xmm0,ebx
; shufps xmm0,xmm0,0
; movups xmm1,.dey
; mulps xmm1,xmm0
; shufps xmm1,xmm1,00011011b
; movups xmm2,.bx1
; addps xmm2,xmm1
; movups .bx1,xmm2
 
mov eax,.dz
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov dword .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
 
mov eax,.dtx
imul ebx
add .tx1,eax
 
mov eax,.dty
imul ebx
add .ty1,eax
 
@@:
; mov ebx,.x2
movzx eax,word[size_x_var]
; cmp dword .x2,SIZE_X
cmp dword .x2,eax ; eax,ebx
jl @f
mov dword .x2,eax ;SIZE_X
@@:
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers
mul .y
add eax,.x1
lea esi,[4*eax]
add esi,.z_buff ; z-buffer filled with dd variables
lea eax,[eax*3]
add edi,eax
 
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ; current b, e and t shifted shl ROUND .cbx
push dword .by1 ; .cby
push dword .ex1 ; .cex
push dword .ey1 ; .cey
 
push dword .z1 ; current z shl CATMULL_SHIFT ; .cz
push esi ; .czbuff
 
push dword .tx1 ; .ctx
push dword .ty1 ; .cty
push edi ; .c_scr
if Ext = SSE2
mov eax,TEXTURE_SIZE
movd xmm1,eax
shufps xmm1,xmm1,0
push dword TEX_X
push dword -TEX_X
push dword 1
push dword -1
movups xmm2,[esp]
movd xmm3,.bmap
shufps xmm3,xmm3,0
end if
 
if Ext>=MMX
movq mm7,.cty
movq mm6,.cby
movq mm5,.cey
; movq mm4,.dtyq
; movq mm3,.dbyq
end if
 
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
jge .skip
if Ext=NON
mov eax,.cby
shr eax,ROUND
mov esi,.cbx
shr esi,ROUND
else
movq mm1,mm6
psrld mm1,ROUND
movd eax,mm1
psrlq mm1,32
movd esi,mm1
end if
 
shl eax,TEX_SHIFT
add esi,eax ;- ; esi - current bump map index
 
if Ext = SSE2
 
movd xmm0,esi
shufps xmm0,xmm0,0
paddd xmm0,xmm2
pand xmm0,xmm1
paddd xmm0,xmm3
 
movd ebx,xmm0
movzx eax,byte[ebx]
;
; shufps xmm0,xmm0,11100001b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx,byte[ebx]
sub eax,ebx
;
; shufps xmm0,xmm0,11111110b
psrldq xmm0,4
movd ebx,xmm0
movzx edx, byte [ebx]
;
; shufps xmm0,xmm0,11111111b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx, byte [ebx]
sub edx,ebx
;
else
; mov ebx,esi
; dec ebx
lea ebx,[esi-1]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx eax,byte [ebx]
 
; mov ebx,esi
; inc ebx
lea ebx,[esi+1]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub eax,ebx
 
; mov ebx,esi
; sub ebx,TEX_X
lea ebx,[esi-TEX_X]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx edx,byte [ebx]
 
; mov ebx,esi
; add ebx,TEX_X
lea ebx,[esi+TEX_X]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub edx,ebx
end if
 
; eax - horizontal sub modificated x coord
; edx - vertical sub modificated y coord
if Ext=NON
mov ebx,.cex ;.cex - current env map X
shr ebx,ROUND
add eax,ebx
 
 
mov ebx,.cey ;.cey - current env map y
shr ebx,ROUND
add edx,ebx
 
else
movq mm1,mm5 ; mm5 - copy of cur env coords
psrld mm1,ROUND
movd ebx,mm1
psrlq mm1,32
add eax,ebx
movd ebx,mm1
add edx,ebx
; movq qword[.temp1],mm3
; add eax,dword [.temp1]
; add edx,dword [.temp1+4]
end if
 
or eax,eax
jl .black
cmp eax,TEX_X
jg .black
or edx,edx
jl .black
cmp edx,TEX_Y
jg .black
 
shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze
add edx,eax ; proponuje nie stawiac czarnego pixela tylko
lea esi,[edx*3] ; niezaburzony.
add esi,.emap ;
lodsd
 
if Ext=NON
mov edx,.cty
shr edx,ROUND ; sar
 
mov edi,.ctx
shr edi,ROUND ; sar
else
movq mm1,mm7
psrld mm1,ROUND
movd edx,mm1
psrlq mm1,32
movd edi,mm1
 
end if
 
shl edx,TEX_SHIFT
add edi,edx
and edi,TEXTURE_SIZE
lea esi,[edi*3]
add esi,.tex_map
 
if Ext=NON
mov edx,eax
lodsd
push ax
mul dl
mov dl,ah
pop ax
shr ax,8
mul dh
mov al,dl
mov edi,.c_scr
stosw
shr edx,16
shr eax,16
mul dl
shr ax,8
stosb
else
movd mm0,eax
pxor mm1,mm1
punpcklbw mm0,mm1
movd mm2,[esi]
punpcklbw mm2,mm1
pmullw mm0,mm2
psrlw mm0,8
packuswb mm0,mm1
mov edi,.c_scr
movd [edi],mm0
 
end if
 
jmp .actual_zbuff ; actualize z buffer
@@:
.black:
xor eax,eax
mov edi,.c_scr
stosd
.actual_zbuff:
mov eax,.cz
mov edi,.czbuff
stosd
 
.skip:
add dword .czbuff,4
add dword .c_scr,3
 
if Ext=NON
mov eax,.dbx
add .cbx,eax
mov ebx,.dby
add .cby,ebx
 
mov edx,.dex
add .cex,edx
mov eax,.dey
add .cey,eax
 
mov ebx,.dtx
add .ctx,ebx
mov edx,.dty
add .cty,edx
 
else
paddd mm7,.dty
paddd mm6,.dby
paddd mm5,.dey
end if
mov eax,.dz
add .cz,eax
 
dec ecx
jnz .draw
 
.bl_end:
mov esp,ebp
ret 76
;Ext = MMX
 
; else
; movq mm5, qword[.temp1] ;-
; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X
; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE
; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap
; movd ebx,mm5
; psrlq mm5,32
; end if
 
;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;ROUND equ 8
;Ext = NON
;MMX = 1
;NON = 0
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws bump triangle with texture, I use -------------
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
;--------I calc texture pixel by this way: col1*col2/256 ---------------
bump_tex_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to bump map-------
;---------------------- esi - pointer to env map--------
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : bump coordinates--------
;---------------------- environment coordinates-
;---------------------- Z position coordinates--
;---------------------- pointer to Z buffer-----
;---------------------- pointer to texture------
;---------------------- texture coordinates-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - bump map coords
.b_x3 equ ebp+12 ; e - env map coords
.b_y3 equ ebp+14
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
.tex_ptr equ dword[ebp+38] ; ptr to texture
.t_x1 equ ebp+42 ; texture coords
.t_y1 equ ebp+44
.t_x2 equ ebp+46
.t_y2 equ ebp+48
.t_x3 equ ebp+50
.t_y3 equ ebp+52
 
 
 
.t_bmap equ dword[ebp-4] ; pointer to bump map
.t_emap equ dword[ebp-8] ; pointer to env map
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
if 0 ;Ext <= SSE2
 
.dx12 equ dword[edi-4]
.dz12 equ [edi-8]
.dbx12 equ dword[edi-12]
.dby12 equ [edi-16]
.dex12 equ dword[edi-20]
.dey12 equ [edi-24]
.dtx12 equ dword[edi-28]
.dty12 equ [edi-32]
 
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
 
 
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
 
else
 
.dx12 equ dword[ebp-24]
.dz12 equ [ebp-28]
.dbx12 equ dword[ebp-32]
.dby12 equ [ebp-36]
.dex12 equ dword[ebp-40]
.dey12 equ [ebp-44]
.dtx12 equ dword[ebp-48]
.dty12 equ [ebp-52]
 
.dx13 equ dword[ebp-52-4*1]
.dz13 equ [ebp-52-4*2]
.dbx13 equ dword[ebp-52-4*3]
.dby13 equ [ebp-52-4*4]
.dex13 equ dword[ebp-52-4*5]
.dey13 equ [ebp-52-4*6]
.dtx13 equ dword[ebp-52-4*7]
.dty13 equ [ebp-52-4*8]
 
 
.dx23 equ dword[ebp-(52+4*9)]
.dz23 equ [ebp-(52+4*10)]
.dbx23 equ dword[ebp-(52+4*11)]
.dby23 equ [ebp-(52+4*12)]
.dex23 equ dword[ebp-(52+4*13)]
.dey23 equ [ebp-(52+4*14)]
.dtx23 equ dword[ebp-(52+4*15)]
.dty23 equ [ebp-(52+4*16)]
 
end if
 
if Ext < SSE
 
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cx2 equ dword[ebp-(52+4*19)]
.cz2 equ [ebp-(52+4*20)]
.cbx1 equ dword[ebp-(52+4*21)]
.cby1 equ [ebp-(52+4*22)]
.cbx2 equ dword[ebp-(52+4*23)]
.cby2 equ [ebp-(52+4*24)]
.cex1 equ dword[ebp-(52+4*25)]
.cey1 equ [ebp-(52+4*26)]
.cex2 equ dword[ebp-(52+4*27)]
.cey2 equ [ebp-(52+4*28)]
 
.ctx1 equ dword[ebp-(52+4*29)]
.cty1 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
 
else
 
.cx1 equ dword[ebp-(52+4*17)] ; current variables
.cz1 equ [ebp-(52+4*18)]
.cbx1 equ dword[ebp-(52+4*19)]
.cby1 equ [ebp-(52+4*20)]
.cex1 equ dword[ebp-(52+4*21)]
.cey1 equ [ebp-(52+4*22)]
.ctx1 equ dword[ebp-(52+4*23)]
.cty1 equ [ebp-(52+4*24)]
 
.cx2 equ dword[ebp-(52+4*25)]
.cz2 equ [ebp-(52+4*26)]
.cbx2 equ dword[ebp-(52+4*27)]
.cby2 equ [ebp-(52+4*28)]
.cex2 equ dword[ebp-(52+4*29)]
.cey2 equ [ebp-(52+4*30)]
.ctx2 equ dword[ebp-(52+4*31)]
.cty2 equ [ebp-(52+4*32)]
 
end if
cld
mov ebp,esp
push edx ; store bump map
push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov edx,dword[.t_x1]
xchg edx,dword[.t_x2]
mov dword[.t_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov edx,dword[.t_x2]
xchg edx,dword[.t_x3]
mov dword[.t_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ; store triangle coords in variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
 
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
if 0 ;Ext >= SSE2
pxor xmm0,xmm0
movups .dty12,xmm0
movups .dey12,xmm0
sub esp,16
else
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
end if
jmp .bt_dx12_done
.bt_dx12_make:
movsx ebx,bx
 
 
if Ext>=SSE
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
if 0 ;Ext >= SSE2
mov edi,ebp
sub edi,512
or edi,0x0000000f
end if
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey12,xmm1
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey12,mm0
movq .dby12,mm1
;-------------
; pxor mm0,mm0
; pxor mm1,mm1
;/ pinsrw mm0,.z1,1
;/ pinsrw mm0,.x1,0
;/ pinsrw mm1,.z2,1
;/ pinsrw mm1,.x2,0
mov ax,.z2
sub ax,.z1
cwde
 
mov dx,.x2
sub dx,.x1
movsx edx,dx
 
;/ movd mm1,eax
 
;/ punpcklwd mm0,mm4
;/ punpcklwd mm1,mm4
 
; cvtpi2ps xmm1,mm1
; cvtpi2ps xmm2,mm0
; subps xmm1,xmm2
 
;/ psubd mm1,mm0
 
movd mm2,[.t_x1]
movd mm3,[.t_x2]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
;/ cvtpi2ps xmm1,mm1
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
; movss xmm1,xmm4
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty12,xmm1
;1 movhps .dz12,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty12,mm0
movq .dz12,mm1
;----
; mov ax,.z2
; sub ax,.z1
; cwde
; mov bx,.x2
; sub bx,.x1
; movsx ebx,bx
; movd mm1,eax
; psllq mm1,32
; movd mm1,ebx
 
;; push ebx
;; push eax
;; movq mm1,[esp]
;; add esp,8
;;; mov ax,.z1
;;; mov bx,.z2
;;; shl eax,16
;;; shl ebx,16
;;; mov ax,.x1
;;; mov bx,.x2
; movd mm2,[.t_x1]
; movd mm3,[.t_x2]
;; movd mm0,eax
;; movd mm1,ebx
 
; pxor mm4,mm4
;; punpcklwd mm0,mm4
;; punpcklwd mm1,mm4
; punpcklwd mm2,mm4
; punpcklwd mm3,mm4
 
;; psubd mm1,mm0
; psubd mm3,mm2
 
 
; cvtpi2ps xmm1,mm1
; movlhps xmm1,xmm1
; cvtpi2ps xmm1,mm3
 
; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx |
 
; shufps xmm1,xmm1,10110001b
; xmm1--> | dx | dz | dtx | dty |
; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
; movhlps xmm1,xmm1
; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
; movq .dty12,mm0
; movq .dz12,mm1
else
mov ax,.x2
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
 
mov ax,word[.t_x2]
sub ax,word[.t_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx12,eax
push eax
 
mov ax,word[.t_y2]
sub ax,word[.t_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty12,eax
push eax
end if
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
movsx ebx,bx
 
if Ext>=SSE
 
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey13,xmm1
 
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey13,mm0
movq .dby13,mm1
 
mov ax,.z3
sub ax,.z1
cwde
 
mov dx,.x3
sub dx,.x1
movsx edx,dx
 
movd mm2,[.t_x1]
movd mm3,[.t_x3]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
;1 movlps .dty13,xmm1
;1 movhps .dz13,xmm1
 
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dty13,mm0
movq .dz13,mm1
 
else
 
mov ax,.x3
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
 
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
 
mov ax,word[.t_x3]
sub ax,word[.t_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx13,eax
push eax
 
mov ax,word[.t_y3]
sub ax,word[.t_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty13,eax
push eax
end if
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,8
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
movsx ebx,bx
 
if Ext>=SSE
 
sub esp,32
; mov eax,256
cvtsi2ss xmm4,[i255d]
cvtsi2ss xmm3,ebx ;rcps
divss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
psubd mm1,mm0
psubd mm3,mm2
 
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex |
 
shufps xmm1,xmm1,10110001b
;xmm1--> | dbx | dby | dex | dey |
;1 movups .dey23,xmm1
 
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ;mm1,xmm1
movq .dey23,mm0
movq .dby23,mm1
 
mov ax,.z3
sub ax,.z2
cwde
 
mov dx,.x3
sub dx,.x2
movsx edx,dx
 
movd mm2,[.t_x2]
movd mm3,[.t_x3]
 
punpcklwd mm2,mm4
punpcklwd mm3,mm4
psubd mm3,mm2
 
cvtsi2ss xmm1,eax
movlhps xmm1,xmm1
cvtsi2ss xmm1,edx
shufps xmm1,xmm1,00101111b
cvtpi2ps xmm1,mm3
 
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx |
 
shufps xmm1,xmm1,11100001b
; xmm1--> | dx | dz | dtx | dty |
; movlps .dty23,xmm1
; movhps .dz23,xmm1
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty |
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz |
movq .dty23,mm0
movq .dz23,mm1
 
 
else
mov ax,.x3
sub ax,.x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
 
 
mov ax,word[.t_x3]
sub ax,word[.t_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dtx23,eax
push eax
 
mov ax,word[.t_y3]
sub ax,word[.t_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dty23,eax
push eax
end if
; sub esp,40
.bt_dx23_done:
sub esp,64
 
movsx eax,.x1
shl eax,ROUND
mov .cx1,eax
mov .cx2,eax
; push eax
; push eax
 
movsx ebx,word[.b_x1]
shl ebx,ROUND
mov .cbx1,ebx
mov .cbx2,ebx
; push ebx
; push ebx
 
movsx ecx,word[.b_y1]
shl ecx,ROUND
mov .cby1,ecx
mov .cby2,ecx
; push ecx
; push ecx
 
movsx edx,word[.e_x1]
shl edx,ROUND
mov .cex1,edx
mov .cex2,edx
; push edx
; push edx
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
; push eax
; push eax
 
movsx ebx,.z1
shl ebx,CATMULL_SHIFT
mov .cz1,ebx
mov .cz2,ebx
; push ebx
; push ebx
 
; sub esp,16
movsx ecx,word[.t_x1]
shl ecx,ROUND
mov .ctx1,ecx
mov .ctx2,ecx
;push ecx
;push ecx
 
movsx edx,word[.t_y1]
shl edx,ROUND
mov .cty1,edx
mov .cty2,edx
; push edx
; push edx
 
if Ext >= SSE2
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
movups xmm4,.dby13
movups xmm5,.dty13
movups xmm6,.dby12
movups xmm7,.dty12
.scby1 equ [edi]
.scty1 equ [edi+16]
.scby2 equ [edi+32]
.scty2 equ [edi+48]
.sdby13 equ [edi+64]
.sdty13 equ [edi+80]
.sdby12 equ [edi+96]
.sdty12 equ [edi+128]
push edi
mov edi,sse_repository
movaps .scby1,xmm0
movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
movaps .sdby13,xmm4
movaps .sdty13,xmm5
movaps .sdby12,xmm6
movaps .sdty12,xmm7
pop edi
 
end if
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
if Ext >= SSE2
; fxrstor [sse_repository]
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby12
; movups xmm7,.dty12
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby12
paddd xmm3,.sdty12
pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
end if
 
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cty1
movq mm5,.cty2
movq mm6,.cz1
movq mm7,.cz2
paddd mm0,.dby12
paddd mm1,.dby13
paddd mm2,.dey12
paddd mm3,.dey13
paddd mm4,.dty13
paddd mm5,.dty12
paddd mm6,.dz13
paddd mm7,.dz12
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
movq .cty1,mm4
movq .cty2,mm5
movq .cz1,mm6
movq .cz2,mm7
end if
if Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
mov eax,.dtx13
add .ctx1,eax
mov ebx,.dtx12
add .ctx2,ebx
mov edx,.dty13
add .cty1,edx
mov eax,.dty12
add .cty2,eax
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
end if
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx ebx,.x2
shl ebx,ROUND
mov .cx2,ebx
 
movzx edx,word[.b_x2]
shl edx,ROUND
mov .cbx2,edx
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx ebx,word[.e_x2]
shl ebx,ROUND
mov .cex2,ebx
 
movzx edx,word[.e_y2]
shl edx,ROUND
mov .cey2,edx
 
movzx eax,word[.t_x2]
shl eax,ROUND
mov .ctx2,eax
 
movzx ebx,word[.t_y2]
shl ebx,ROUND
mov .cty2,ebx
if Ext >= SSE2
movups xmm2,.cby2
movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
movups xmm6,.dby23
movups xmm7,.dty23
; .scby1 equ [edi]
; .scty1 equ [edi+16]
; .scby2 equ [edi+32]
; .scty2 equ [edi+48]
; .sdby13 equ [edi+64]
; .sdty13 equ [edi+80]
.sdby23 equ [edi+160]
.sdty23 equ [edi+192]
push edi
mov edi,sse_repository
; movaps .scby1,xmm0
; movaps .scty1,xmm1
movaps .scby2,xmm2
movaps .scty2,xmm3
; movaps .sdby13,xmm4
; movaps .sdty13,xmm5
movaps .sdby23,xmm6
movaps .sdty23,xmm7
pop edi
 
end if
 
.loop23:
;if Ext >= SSE2
; fxsave [sse_repository]
;end if
call .call_line
 
if Ext >= SSE2
 
movups xmm0,.cby1
movups xmm1,.cty1
movups xmm2,.cby2
movups xmm3,.cty2
 
 
push edi
mov edi,sse_repository
paddd xmm0,.sdby13
paddd xmm1,.sdty13
paddd xmm2,.sdby23
paddd xmm3,.sdty23
pop edi
movups .cby1,xmm0
movups .cty1,xmm1
movups .cby2,xmm2
movups .cty2,xmm3
 
 
 
 
; fxrstor [sse_repository]
; movups xmm0,.cby1
; movups xmm1,.cty1
; movups xmm2,.cby2
; movups xmm3,.cty2
; movups xmm4,.dby13
; movups xmm5,.dty13
; movups xmm6,.dby23
; movups xmm7,.dty23
; paddd xmm0,xmm4
; paddd xmm1,xmm5
; paddd xmm2,xmm6
; paddd xmm3,xmm7
; movups .cby1,xmm0
; movups .cty1,xmm1
; movups .cby2,xmm2
; movups .cty2,xmm3
;
end if
if (Ext = MMX) | (Ext = SSE)
movq mm0,.cby2
movq mm1,.cby1
movq mm2,.cey2
movq mm3,.cey1
movq mm4,.cty1
movq mm5,.cty2
movq mm6,.cz1
movq mm7,.cz2
paddd mm0,.dby23
paddd mm1,.dby13
paddd mm2,.dey23
paddd mm3,.dey13
paddd mm4,.dty13
paddd mm5,.dty23
paddd mm6,.dz13
paddd mm7,.dz23
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
movq .cty1,mm4
movq .cty2,mm5
movq .cz1,mm6
movq .cz2,mm7
end if
If Ext = NON
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
 
mov eax,.dtx13
add .ctx1,eax
mov ebx,.dtx23
add .ctx2,ebx
mov edx,.dty13
add .cty1,edx
mov eax,.dty23
add .cty2,eax
end if
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
ret 50
 
.call_line:
 
pushad
; xmm0= cby1,cbx1,cz1,cx1
; xmm1= cty1,ctx1,cey1,cex1
if Ext >= SSE2
sub esp,8
shufps xmm1,xmm1,10110001b
shufps xmm3,xmm3,10110001b
movlps [esp],xmm1
else
push dword .cty1
push .ctx1
end if
push dword .cz1
if Ext>=SSE2
sub esp,8
movlps [esp],xmm3
else
push dword .cty2
push .ctx2
end if
push dword .cz2
if Ext>=SSE2
sub esp,32
movhps [esp+24],xmm3
shufps xmm2,xmm2,10110001b
movlps [esp+16],xmm2
movhps [esp+8],xmm1
shufps xmm0,xmm0,10110001b
movlps [esp],xmm0 ;================================
 
else
push dword .cey2
push .cex2
push dword .cby2
push .cbx2
push dword .cey1
push .cex1
push dword .cby1
push .cbx1
end if
 
push .tex_ptr
push .z_buff
push .t_emap
push .t_bmap
 
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call bump_tex_line_z
 
popad
;end if
ret
bump_tex_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bmap equ dword [ebp+8] ; bump map pointer
.emap equ dword [ebp+12] ; env map pointer
.z_buff equ dword [ebp+16] ; z buffer
.tex_map equ dword [ebp+20] ; texture pointer
 
.bx1 equ [ebp+24] ; ---
.by1 equ [ebp+28] ; |
.ex1 equ [ebp+32] ; |
.ey1 equ [ebp+36] ; |
.bx2 equ [ebp+40] ; |
.by2 equ [ebp+44] ; |> b. map and e. map coords
.ex2 equ [ebp+48] ; |> shifted shl ROUND
.ey2 equ [ebp+52] ; ---
.z2 equ [ebp+56]
.tx2 equ [ebp+60]
.ty2 equ [ebp+64]
.z1 equ [ebp+68]
.tx1 equ [ebp+72]
.ty1 equ [ebp+76]
 
 
 
.x1 equ [ebp-4]
.x2 equ [ebp-8]
.dbx equ [ebp-12]
.dby equ [ebp-16]
.dex equ [ebp-20]
.dey equ [ebp-24]
.dz equ [ebp-28]
.dtx equ [ebp-32]
.dty equ [ebp-36]
 
.cbx equ [ebp-40]
.cby equ [ebp-44]
.cex equ [ebp-48]
.cey equ [ebp-52]
.cz equ [ebp-56]
.czbuff equ [ebp-60]
.ctx equ [ebp-64]
.cty equ [ebp-68]
.c_scr equ [ebp-72]
 
.temp1 equ ebp-80
.temp2 equ ebp-88
.temp3 equ ebp-76
.temp4 equ ebp-84
.temp5 equ ebp-92
 
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
movzx edx,word[size_y_var]
cmp ecx,edx ;SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl .bl_ok
je .bl_end
 
 
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
 
mov edx,.tx1
xchg edx,.tx2
mov .tx1,edx
mov edx,.ty1
xchg edx,.ty2
mov .ty1,edx
end if
if Ext = MMX
movq mm0,.bx1
movq mm1,.bx2
movq mm2,.ex1
movq mm3,.ex2
movq mm4,.tx1
movq mm5,.tx2
movq .bx2,mm0
movq .bx1,mm1
movq .ex1,mm3
movq .ex2,mm2
movq .tx1,mm5
movq .tx2,mm4
end if
if Ext>=SSE
movups xmm0,.bx1
movups xmm1,.bx2
movups .bx1,xmm1
movups .bx2,xmm0
movq mm0,.tx1
movq mm1,.tx2
movq .tx1,mm1
movq .tx2,mm0
end if
;if Ext>=SSE2
; movaps xmm4,xmm0
; movaps xmm0,xmm2
; movaps xmm2,xmm4
; movaps xmm5,xmm1
; movaps xmm1,xmm3
; movaps xmm3,xmm5
;else
 
xchg eax,ebx
mov edx,.z1
xchg edx,.z2
mov .z1,edx
;end if
.bl_ok:
;if Ext >= SSE2
; shufps xmm0,xmm0,11100001b
; shufps xmm2,xmm2,11100001b
; movlps .bx1,xmm0
; movlps .bx2,xmm2
 
 
; shufps xmm0,xmm0,00011011b
; shufps xmm2,xmm2,00011011b
; movd eax,xmm0
; movd ebx,xmm2
; shufps xmm0,xmm0,11000110b
; shufps xmm2,xmm2,11000110b
; movd .z1,xmm0
; movd .z2,xmm2
; shufps xmm1,xmm1,10110001b
; shufps xmm3,xmm3,10110001b
; movlps .ex1,xmm1
; movlps .ex2,xmm2
; movhps .tx1,xmm1
; movhps .tx2,xmm2
 
; xchg eax,ebx
; mov edx,.z1
; xchg edx,.z2
; mov .z1,edx
 
 
;end if
 
push eax
push ebx ;store x1, x2
movzx ebx,word[size_x_var]
; mov eax,.x1
cmp dword .x1,ebx ;dword .x1,SIZE_X
jge .bl_end
cmp dword .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
 
if Ext>=SSE
 
sub esp,28
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
; float using SSE variant ::-->
; movups xmm0,.bx1 ; new
; movups xmm1,.bx2 ; new
 
cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2
cvtpi2ps xmm1,.bx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
 
shufps xmm1,xmm1,10110001b
; movups .dey,xmm1 ; new
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey,mm0
movq .dby,mm1
 
movd mm2,.z1
movd mm3,.z2
 
cvtpi2ps xmm0,.tx1 ;mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,.tx2 ;mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
; movups xmm0,,z1 ; new
; movups xmm1,.z2 ; new
subps xmm1,xmm0
 
divps xmm1,xmm3
 
; movups .dz,xmm1 ;new
 
shufps xmm1,xmm1,10110100b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movd .dz,mm0
movq .dty,mm1
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dby
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dex
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
mov eax,.tx2 ; calc .dtx
sub eax,.tx1
cdq
idiv ebx
push eax
 
mov eax,.ty2 ; calc .dty
sub eax,.ty1
cdq
idiv ebx
push eax
 
end if
cmp dword .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
 
;if Ext >= SSE
 
; cvtsi2ss xmm0,ebx
; shufps xmm0,xmm0,0
; movups xmm1,.dey
; mulps xmm1,xmm0
; shufps xmm1,xmm1,00011011b
; movups xmm2,.bx1
; addps xmm2,xmm1
; movups .bx1,xmm2
 
mov eax,.dz
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov dword .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
 
mov eax,.dtx
imul ebx
add .tx1,eax
 
mov eax,.dty
imul ebx
add .ty1,eax
 
@@:
; mov ebx,.x2
movzx eax,word[size_x_var]
; cmp dword .x2,SIZE_X
cmp dword .x2,eax ; eax,ebx
jl @f
mov dword .x2,eax ;SIZE_X
@@:
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers
mul .y
add eax,.x1
lea esi,[4*eax]
add esi,.z_buff ; z-buffer filled with dd variables
lea eax,[eax*3]
add edi,eax
 
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ; current b, e and t shifted shl ROUND .cbx
push dword .by1 ; .cby
push dword .ex1 ; .cex
push dword .ey1 ; .cey
 
push dword .z1 ; current z shl CATMULL_SHIFT ; .cz
push esi ; .czbuff
 
push dword .tx1 ; .ctx
push dword .ty1 ; .cty
push edi ; .c_scr
if Ext = SSE2
mov eax,TEXTURE_SIZE
movd xmm1,eax
shufps xmm1,xmm1,0
push dword TEX_X
push dword -TEX_X
push dword 1
push dword -1
movups xmm2,[esp]
movd xmm3,.bmap
shufps xmm3,xmm3,0
end if
 
if Ext>=MMX
movq mm7,.cty
movq mm6,.cby
movq mm5,.cey
; movq mm4,.dtyq
; movq mm3,.dbyq
end if
 
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
jge .skip
if Ext=NON
mov eax,.cby
shr eax,ROUND
mov esi,.cbx
shr esi,ROUND
else
movq mm1,mm6
psrld mm1,ROUND
movd eax,mm1
psrlq mm1,32
movd esi,mm1
end if
 
shl eax,TEX_SHIFT
add esi,eax ;- ; esi - current bump map index
 
if Ext = SSE2
 
movd xmm0,esi
shufps xmm0,xmm0,0
paddd xmm0,xmm2
pand xmm0,xmm1
paddd xmm0,xmm3
 
movd ebx,xmm0
movzx eax,byte[ebx]
;
; shufps xmm0,xmm0,11100001b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx,byte[ebx]
sub eax,ebx
;
; shufps xmm0,xmm0,11111110b
psrldq xmm0,4
movd ebx,xmm0
movzx edx, byte [ebx]
;
; shufps xmm0,xmm0,11111111b
psrldq xmm0,4
movd ebx,xmm0
movzx ebx, byte [ebx]
sub edx,ebx
;
else
; mov ebx,esi
; dec ebx
lea ebx,[esi-1]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx eax,byte [ebx]
 
; mov ebx,esi
; inc ebx
lea ebx,[esi+1]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub eax,ebx
 
; mov ebx,esi
; sub ebx,TEX_X
lea ebx,[esi-TEX_X]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx edx,byte [ebx]
 
; mov ebx,esi
; add ebx,TEX_X
lea ebx,[esi+TEX_X]
and ebx,TEXTURE_SIZE
add ebx,.bmap
movzx ebx,byte [ebx]
sub edx,ebx
end if
 
; eax - horizontal sub modificated x coord
; edx - vertical sub modificated y coord
if Ext=NON
mov ebx,.cex ;.cex - current env map X
shr ebx,ROUND
add eax,ebx
 
 
mov ebx,.cey ;.cey - current env map y
shr ebx,ROUND
add edx,ebx
 
else
movq mm1,mm5 ; mm5 - copy of cur env coords
psrld mm1,ROUND
movd ebx,mm1
psrlq mm1,32
add eax,ebx
movd ebx,mm1
add edx,ebx
; movq qword[.temp1],mm3
; add eax,dword [.temp1]
; add edx,dword [.temp1+4]
end if
 
or eax,eax
jl .black
cmp eax,TEX_X
jg .black
or edx,edx
jl .black
cmp edx,TEX_Y
jg .black
 
shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze
add edx,eax ; proponuje nie stawiac czarnego pixela tylko
lea esi,[edx*3] ; niezaburzony.
add esi,.emap ;
lodsd
 
if Ext=NON
mov edx,.cty
shr edx,ROUND ; sar
 
mov edi,.ctx
shr edi,ROUND ; sar
else
movq mm1,mm7
psrld mm1,ROUND
movd edx,mm1
psrlq mm1,32
movd edi,mm1
 
end if
 
shl edx,TEX_SHIFT
add edi,edx
and edi,TEXTURE_SIZE
lea esi,[edi*3]
add esi,.tex_map
 
if Ext=NON
mov edx,eax
lodsd
push ax
mul dl
mov dl,ah
pop ax
shr ax,8
mul dh
mov al,dl
mov edi,.c_scr
stosw
shr edx,16
shr eax,16
mul dl
shr ax,8
stosb
else
movd mm0,eax
pxor mm1,mm1
punpcklbw mm0,mm1
movd mm2,[esi]
punpcklbw mm2,mm1
pmullw mm0,mm2
psrlw mm0,8
packuswb mm0,mm1
mov edi,.c_scr
movd [edi],mm0
 
end if
 
jmp .actual_zbuff ; actualize z buffer
@@:
.black:
xor eax,eax
mov edi,.c_scr
stosd
.actual_zbuff:
mov eax,.cz
mov edi,.czbuff
stosd
 
.skip:
add dword .czbuff,4
add dword .c_scr,3
 
if Ext=NON
mov eax,.dbx
add .cbx,eax
mov ebx,.dby
add .cby,ebx
 
mov edx,.dex
add .cex,edx
mov eax,.dey
add .cey,eax
 
mov ebx,.dtx
add .ctx,ebx
mov edx,.dty
add .cty,edx
 
else
paddd mm7,.dty
paddd mm6,.dby
paddd mm5,.dey
end if
mov eax,.dz
add .cz,eax
 
dec ecx
jnz .draw
 
.bl_end:
mov esp,ebp
ret 76
;Ext = MMX
 
; else
; movq mm5, qword[.temp1] ;-
; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X
; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE
; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap
; movd ebx,mm5
; psrlq mm5,32
; end if
/programs/demos/view3ds/data.inc
1,5 → 1,7
; DATA AREA ************************************
 
if Ext > SSE2
isSSE3 db 1
end if
i3 dw 3
i6 dd 6
i12 dd 12
6,6 → 8,7
i36 dd 36
i256 dw 256
i255d dd 255
f1:
dot_max dd 1.0 ; dot product max and min
dot_min dd 0.0
env_const dd 1.05
25,7 → 28,7
y_offset dw SIZE_Y / 2
z_offset dw 0
rsscale dd 175.0 ; next real scale
vect_x dw SIZE_X / 2
vect_x: dw SIZE_X / 2
vect_y dw SIZE_Y / 2
vect_z dw 0
size_y_var:
110,9 → 113,9
dd ?
 
db 7
db 'catmull '
db 'ray shadow'
db 2
catmull_flag db 1
ray_shd_flag db 0
dd onoff_f
 
db 8
165,7 → 168,7
 
db 16
db 'fire '
db 3
db 2
fire_flag db 0
dd blur_f
 
350,7 → 353,7
if Ext=SSE3
db ' (SSE3)'
end if
db ' 0.073',0
db ' 0.074',0
labellen:
STRdata db '-1 '
lab_vert:
425,8 → 428,43
 
 
 
;if Ext >= SSE3
align 16
point_light_coords:
dd 50.0
dd 50.0
dd -215.0
dd 0.0
 
align 16
 
dd 815.0
dd 815.0
dd -215.0
dd 0.0
 
dd 1500.0
dd 1500.0
dd -215.0
dd 0.0
if 0
aabb1:
.0 dd 1.0,1.0,1.0,0
.1 dd -1.0,1.0,1.0,0
.2 dd 1.0,-1.0,1.0,0
.3 dd -1.0,-1.0,1.0,0
.4 dd 1.0,1.0,-1.0,0
.5 dd -1.0,1.0,-1.0,0
.6 dd 1.0,-1.0,-1.0,0
.7 dd -1.0,-1.0,-1.0,0
 
end if
 
sign_mask:
times 4 dd 0x80000000
f05xz: dd 0, 0, - 1.0 ,0
 
sign_z:
dd -1,-1,0x7fffffff,0
abs_mask:
dd 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff
emboss_bias:
442,9 → 480,13
times 4 dd 510.0
the_one:
times 4 dd 1.0
aprox dd 0.0001
 
eps: times 4 dd 0.00000
epsone dd 1.0001
aprox dd 0.0001
epsminus dd -0.0001
 
 
file_info:
dd 0
dd 0
463,22 → 505,13
workarea rb 180
EndFile dd ?
align 8
sinbeta dd ?;+32
sinbeta dd ?;
cosbeta dd ?
 
xsub dw ?
zsub dw ?;+40
zsub dw ?
ysub dw ?
 
xx1 dw ?
yy1 dw ?
zz1 dw ?;+48 xx1 + 4
xx2 dw ?
yy2 dw ?
zz2 dw ? ; xx1 + 10
xx3 dw ?;+56
yy3 dw ?
zz3 dw ? ; xx1 + 16
col1 dd ?
col2 dd ?
col3 dd ?
487,13 → 520,9
points_count_var dd ? ;
triangles_count_var dd ? ; dont change order
edges_count dd ? ;
tex_points_ptr dd ?
 
point_index1 dd ? ;-\
point_index2 dd ? ; } don't change order
point_index3 dd ? ;-/
temp_col dw ?
temp1 dd ? ; > dont change
temp2 dd ? ; > order
high dd ?
rand_seed dw ?
align 8
510,18 → 539,14
matrix rb 36
cos_tab rd 360
sin_tab rd 360
 
align 16
lights_aligned:
lights_aligned_end = $ + 16 * 12
rb 16 * 12
 
 
points_count = 180000/6*3
triangles_count = 180000 / 6 ;($-triangles)/6
align 16
label trizdd dword
label trizdq qword
triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position
align 16
vectors rb 24
 
align 16
bumpmap rb TEXTURE_SIZE + 1
align 16
535,25 → 560,19
align 16
color_map rb (TEXTURE_SIZE +100) * 3
align 16
tex_points rb points_count * 4 ; bump_map and texture coords
; each point word x, word y
align 16
lights_aligned:
lights_aligned_end = $ + 16 * 12
rb 16 * 12
; tex_points rb points_count * 4 ; bump_map and texture coords
; ; each point word x, word y
;align 16
; lights_aligned:
; lights_aligned_end = $ + 16 * 12
; rb 16 * 12
 
 
if Ext >= SSE2
sse_repository rb 1024
end if
; SourceFile: ; source file temporally in screen area
; workarea dd ?
 
; screen rb SIZE_X * SIZE_Y * 3 ; screen buffer
;align 16
; Z_buffer rb SIZE_X * SIZE_Y * 4
procinfo:
rb 1024 ; process info
rb 2048 ; process info
I_Param rb 256
memStack:
rb 2000
/programs/demos/view3ds/flat_cat.inc
1,399 → 1,399
CATMULL_SHIFT equ 16
 
 
flat_triangle_z:
; procedure drawing triangle with Z cordinate interpolation ------
; (Catmull alghoritm)--------------------------------------------
; ----------------in - eax - x1 shl 16 + y1 ----------------------
; -------------------- ebx - x2 shl 16 + y2 ----------------------
; -------------------- ecx - x3 shl 16 + y3 ----------------------
; -------------------- edx - color 0x00RRGGBB --------------------
; -------------------- esi - pointer to Z-buffer -----------------
; -------------------- edi - pointer to screen buffer-------------
; -------------------- stack : z coordinates
; -------------------- Z-buffer : each z variable as dword
; -------------------- (Z coor. as word) shl CATMULL_SHIFT
.z1 equ word[ebp+4]
.z2 equ word[ebp+6] ; each z coordinate as word integer
.z3 equ word[ebp+8]
 
.col equ dword[ebp-4]
.x1 equ word[ebp-6]
.y1 equ word[ebp-8]
.x2 equ word[ebp-10]
.y2 equ word[ebp-12]
.x3 equ word[ebp-14]
.y3 equ word[ebp-16]
 
.dx12 equ dword[ebp-20]
;.dz12 equ dword[ebp-24]
.dx13 equ dword[ebp-24]
.dz13 equ dword[ebp-28]
.dz12 equ dword[ebp-32]
;.dz13 equ dword[ebp-32]
.dx23 equ dword[ebp-36]
.dz13M equ [ebp-40]
.dz23 equ dword[ebp-44]
.zz1 equ dword[ebp-48]
.zz2 equ dword[ebp-52]
.zz2M equ qword[ebp-52]
.dz12M equ qword[ebp-32]
.dz23M equ qword[ebp-44]
;if Ext>=MMX
; emms
;end if
mov ebp,esp
 
push edx ; store edx in variable .col
.sort2:
cmp ax,bx
jle .sort1
xchg eax,ebx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort3
xchg ebx,ecx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort2
.sort3:
push eax ; store triangle coordinates in user friendly variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .ft_loop2_end
; cmp ax,SIZE_Y
; jle @f
; cmp bx,SIZE_Y
; jle @f
; cmp cx,SIZE_Y
; jge @f
; ror eax,16
; ror ebx,16
; ror ecx,16
; cmp ax,SIZE_X
; jle @f
; cmp bx,SIZE_X
; jle @f
; cmp cx,SIZE_X
; jle @f
; jmp .ft_loop2_end
;@@:
sub esp,52-12
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .ft_dx12_make
mov .dx12,0
mov .dz12,0
jmp .ft_dx12_done
.ft_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
mov .dx12,eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz12,eax
.ft_dx12_done:
mov bx,.y3 ; calc delta 13
sub bx,.y1
jnz .ft_dx13_make
mov .dx13,0
mov .dz13,0
mov dword .dz13M,0
jmp .ft_dx13_done
.ft_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
mov .dx13,eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz13,eax
mov dword .dz13M,eax
.ft_dx13_done:
mov bx,.y3 ; calc delta 23
sub bx,.y2
jnz .gt_dx23_make
mov .dx23,0
mov .dz23,0
jmp .gt_dx23_done
.gt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
mov .dx23,eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz23,eax
.gt_dx23_done:
 
movsx edx,.z1
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
movsx eax,.x1
shl eax,ROUND ; eax - x1
mov ebx,eax ; ebx - x2
;if Ext>=MMX
; movq mm0,.zz2M
;end if
mov cx,.y1
cmp cx,.y2
jge .ft_loop1_end
.ft_loop1:
 
pushad
 
push .col
push cx ; y
sar ebx,ROUND
push bx ; x2
sar eax,ROUND
push ax ; x1
;if Ext>=MMX
; sub esp,8
; movq [esp],mm0
;else
push .zz2 ; z2 shl CATMULL_SHIFT
push .zz1 ; z1 shl CATMULL_SHIFT
;end if
call flat_line_z
 
popad
 
add eax,.dx13
add ebx,.dx12
;if Ext>=MMX
; paddd mm0,.dz12M
;else
 
mov edx,.dz13
add .zz1,edx
mov edx,.dz12
add .zz2,edx
;end if
inc cx
cmp cx,.y2
jl .ft_loop1
.ft_loop1_end:
 
movsx edx,.z2
shl edx,CATMULL_SHIFT
mov .zz2,edx
movsx ebx,.x2
shl ebx,ROUND
;if Ext>=MMX
; movq mm0,.zz2M
;; push .dz13 ; exchange
;; pop .dz12
;; push .dz23 ; exchange
;; pop .dz13
;end if
mov cx,.y2
cmp cx,.y3
jge .ft_loop2_end
.ft_loop2:
pushad
 
push .col
push cx
sar ebx,ROUND
push bx
sar eax,ROUND
push ax ; x1
;if Ext>=MMX
; sub esp,8
; movq [esp],mm0
;else
push .zz2 ; z2 shl CATMULL_SHIFT
push .zz1 ; z1 shl CATMULL_SHIFT
;end if
call flat_line_z
 
popad
 
add eax,.dx13
add ebx,.dx23
;if Ext>=MMX
; paddd mm0,.dz23M
;else
mov edx,.dz13
add .zz1,edx
mov edx,.dz23
add .zz2,edx
 
; mov edx,.dz13
; add .zz1,edx
; mov edx,.dz12
; add .zz2,edx
;end if
inc cx
cmp cx,.y3
jl .ft_loop2
.ft_loop2_end:
 
mov esp,ebp
ret 6
 
flat_line_z:
;----------------
;-------------in edi - pointer to screen buffer ----------------------------------
;--------------- esi - pointer to z-buffer (each Z varible dword)-----------------
;----------stack - (each z coordinate shifted shl CATMULL_SHIFT)------------------
.z1 equ dword [ebp+4]
.z2 equ dword [ebp+8]
.x1 equ word [ebp+12]
.x2 equ word [ebp+14]
.y equ word [ebp+16]
.col equ dword [ebp+18]
 
.dz equ dword [ebp-4]
 
mov ebp,esp
;; sub esp,4
mov ax,.y
or ax,ax
jl .fl_quit
mov bx,[size_y_var]
dec bx
cmp ax,bx ;[size_y_var]
; cmp ax,SIZE_Y-1
jg .fl_quit
 
; cmp .x1,0
; jge .fl_ok1
; cmp .x2,0
; jl .fl_quit
; .fl_ok1:
; cmp .x1,SIZE_X
; jle .fl_ok2
; cmp .x2,SIZE_X
; jg .fl_quit
; .fl_ok2:
mov ax,.x1
cmp ax,.x2
je .fl_quit
jl .fl_ok
 
xchg ax,.x2
mov .x1,ax
mov edx,.z1
xchg edx,.z2
mov .z1,edx
.fl_ok:
mov bx,[size_x_var]
dec bx
cmp .x1,bx ;SIZE_X-1
jg .fl_quit
cmp .x2,0
jle .fl_quit
 
mov eax,.z2
sub eax,.z1
cdq
mov bx,.x2
sub bx,.x1
movsx ebx,bx
idiv ebx
;; mov .dz,eax ; calculated delta - shifted .dz
push eax
 
cmp .x1,0
jge @f
movsx ebx,.x1
neg ebx
imul ebx
add .z1,eax
mov .x1,0
@@:
movzx edx,word[size_x_var]
cmp .x2,dx ;[size_x_var] ;SIZE_X
jl @f
mov .x2,dx ;[size_x_var] ;SIZE_X
@@:
; movzx edx,[size_x_var] ;SIZE_X
movsx eax,.y
mul edx ; edi = edi + (SIZE_X * y + x1)*3
movsx edx,.x1
add eax,edx
push eax
lea eax,[eax*3]
add edi,eax ; esi = esi + (SIZE_X * y + x1)*4
pop eax
shl eax,2
add esi,eax
 
mov cx,.x2
sub cx,.x1
movzx ecx,cx
 
mov eax,.col
mov ebx,.z1 ; ebx : curr. z
mov edx,.dz
dec ecx
jecxz .draw_last
.ddraw:
cmp ebx,dword[esi]
; cmovl [edi],eax
; cmovl [esi],ebx
jge @f
stosd
dec edi
mov dword[esi],ebx
jmp .no_skip
@@:
add edi,3
.no_skip:
add esi,4
add ebx,edx
loop .ddraw
 
.draw_last:
cmp ebx,dword[esi]
jge .fl_quit
stosw
shr eax,16
stosb
mov dword[esi],ebx
 
.fl_quit:
 
mov esp,ebp
ret 18
CATMULL_SHIFT equ 16
 
 
flat_triangle_z:
; procedure drawing triangle with Z cordinate interpolation ------
; (Catmull alghoritm)--------------------------------------------
; ----------------in - eax - x1 shl 16 + y1 ----------------------
; -------------------- ebx - x2 shl 16 + y2 ----------------------
; -------------------- ecx - x3 shl 16 + y3 ----------------------
; -------------------- edx - color 0x00RRGGBB --------------------
; -------------------- esi - pointer to Z-buffer -----------------
; -------------------- edi - pointer to screen buffer-------------
; -------------------- stack : z coordinates
; -------------------- Z-buffer : each z variable as dword
; -------------------- (Z coor. as word) shl CATMULL_SHIFT
.z1 equ word[ebp+4]
.z2 equ word[ebp+6] ; each z coordinate as word integer
.z3 equ word[ebp+8]
 
.col equ dword[ebp-4]
.x1 equ word[ebp-6]
.y1 equ word[ebp-8]
.x2 equ word[ebp-10]
.y2 equ word[ebp-12]
.x3 equ word[ebp-14]
.y3 equ word[ebp-16]
 
.dx12 equ dword[ebp-20]
;.dz12 equ dword[ebp-24]
.dx13 equ dword[ebp-24]
.dz13 equ dword[ebp-28]
.dz12 equ dword[ebp-32]
;.dz13 equ dword[ebp-32]
.dx23 equ dword[ebp-36]
.dz13M equ [ebp-40]
.dz23 equ dword[ebp-44]
.zz1 equ dword[ebp-48]
.zz2 equ dword[ebp-52]
.zz2M equ qword[ebp-52]
.dz12M equ qword[ebp-32]
.dz23M equ qword[ebp-44]
;if Ext>=MMX
; emms
;end if
mov ebp,esp
 
push edx ; store edx in variable .col
.sort2:
cmp ax,bx
jle .sort1
xchg eax,ebx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort3
xchg ebx,ecx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort2
.sort3:
push eax ; store triangle coordinates in user friendly variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .ft_loop2_end
; cmp ax,SIZE_Y
; jle @f
; cmp bx,SIZE_Y
; jle @f
; cmp cx,SIZE_Y
; jge @f
; ror eax,16
; ror ebx,16
; ror ecx,16
; cmp ax,SIZE_X
; jle @f
; cmp bx,SIZE_X
; jle @f
; cmp cx,SIZE_X
; jle @f
; jmp .ft_loop2_end
;@@:
sub esp,52-12
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .ft_dx12_make
mov .dx12,0
mov .dz12,0
jmp .ft_dx12_done
.ft_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
mov .dx12,eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz12,eax
.ft_dx12_done:
mov bx,.y3 ; calc delta 13
sub bx,.y1
jnz .ft_dx13_make
mov .dx13,0
mov .dz13,0
mov dword .dz13M,0
jmp .ft_dx13_done
.ft_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
mov .dx13,eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz13,eax
mov dword .dz13M,eax
.ft_dx13_done:
mov bx,.y3 ; calc delta 23
sub bx,.y2
jnz .gt_dx23_make
mov .dx23,0
mov .dz23,0
jmp .gt_dx23_done
.gt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
mov .dx23,eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
mov .dz23,eax
.gt_dx23_done:
 
movsx edx,.z1
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
movsx eax,.x1
shl eax,ROUND ; eax - x1
mov ebx,eax ; ebx - x2
;if Ext>=MMX
; movq mm0,.zz2M
;end if
mov cx,.y1
cmp cx,.y2
jge .ft_loop1_end
.ft_loop1:
 
pushad
 
push .col
push cx ; y
sar ebx,ROUND
push bx ; x2
sar eax,ROUND
push ax ; x1
;if Ext>=MMX
; sub esp,8
; movq [esp],mm0
;else
push .zz2 ; z2 shl CATMULL_SHIFT
push .zz1 ; z1 shl CATMULL_SHIFT
;end if
call flat_line_z
 
popad
 
add eax,.dx13
add ebx,.dx12
;if Ext>=MMX
; paddd mm0,.dz12M
;else
 
mov edx,.dz13
add .zz1,edx
mov edx,.dz12
add .zz2,edx
;end if
inc cx
cmp cx,.y2
jl .ft_loop1
.ft_loop1_end:
 
movsx edx,.z2
shl edx,CATMULL_SHIFT
mov .zz2,edx
movsx ebx,.x2
shl ebx,ROUND
;if Ext>=MMX
; movq mm0,.zz2M
;; push .dz13 ; exchange
;; pop .dz12
;; push .dz23 ; exchange
;; pop .dz13
;end if
mov cx,.y2
cmp cx,.y3
jge .ft_loop2_end
.ft_loop2:
pushad
 
push .col
push cx
sar ebx,ROUND
push bx
sar eax,ROUND
push ax ; x1
;if Ext>=MMX
; sub esp,8
; movq [esp],mm0
;else
push .zz2 ; z2 shl CATMULL_SHIFT
push .zz1 ; z1 shl CATMULL_SHIFT
;end if
call flat_line_z
 
popad
 
add eax,.dx13
add ebx,.dx23
;if Ext>=MMX
; paddd mm0,.dz23M
;else
mov edx,.dz13
add .zz1,edx
mov edx,.dz23
add .zz2,edx
 
; mov edx,.dz13
; add .zz1,edx
; mov edx,.dz12
; add .zz2,edx
;end if
inc cx
cmp cx,.y3
jl .ft_loop2
.ft_loop2_end:
 
mov esp,ebp
ret 6
 
flat_line_z:
;----------------
;-------------in edi - pointer to screen buffer ----------------------------------
;--------------- esi - pointer to z-buffer (each Z varible dword)-----------------
;----------stack - (each z coordinate shifted shl CATMULL_SHIFT)------------------
.z1 equ dword [ebp+4]
.z2 equ dword [ebp+8]
.x1 equ word [ebp+12]
.x2 equ word [ebp+14]
.y equ word [ebp+16]
.col equ dword [ebp+18]
 
.dz equ dword [ebp-4]
 
mov ebp,esp
;; sub esp,4
mov ax,.y
or ax,ax
jl .fl_quit
mov bx,[size_y_var]
dec bx
cmp ax,bx ;[size_y_var]
; cmp ax,SIZE_Y-1
jg .fl_quit
 
; cmp .x1,0
; jge .fl_ok1
; cmp .x2,0
; jl .fl_quit
; .fl_ok1:
; cmp .x1,SIZE_X
; jle .fl_ok2
; cmp .x2,SIZE_X
; jg .fl_quit
; .fl_ok2:
mov ax,.x1
cmp ax,.x2
je .fl_quit
jl .fl_ok
 
xchg ax,.x2
mov .x1,ax
mov edx,.z1
xchg edx,.z2
mov .z1,edx
.fl_ok:
mov bx,[size_x_var]
dec bx
cmp .x1,bx ;SIZE_X-1
jg .fl_quit
cmp .x2,0
jle .fl_quit
 
mov eax,.z2
sub eax,.z1
cdq
mov bx,.x2
sub bx,.x1
movsx ebx,bx
idiv ebx
;; mov .dz,eax ; calculated delta - shifted .dz
push eax
 
cmp .x1,0
jge @f
movsx ebx,.x1
neg ebx
imul ebx
add .z1,eax
mov .x1,0
@@:
movzx edx,word[size_x_var]
cmp .x2,dx ;[size_x_var] ;SIZE_X
jl @f
mov .x2,dx ;[size_x_var] ;SIZE_X
@@:
; movzx edx,[size_x_var] ;SIZE_X
movsx eax,.y
mul edx ; edi = edi + (SIZE_X * y + x1)*3
movsx edx,.x1
add eax,edx
push eax
lea eax,[eax*3]
add edi,eax ; esi = esi + (SIZE_X * y + x1)*4
pop eax
shl eax,2
add esi,eax
 
mov cx,.x2
sub cx,.x1
movzx ecx,cx
 
mov eax,.col
mov ebx,.z1 ; ebx : curr. z
mov edx,.dz
dec ecx
jecxz .draw_last
.ddraw:
cmp ebx,dword[esi]
; cmovl [edi],eax
; cmovl [esi],ebx
jge @f
stosd
dec edi
mov dword[esi],ebx
jmp .no_skip
@@:
add edi,3
.no_skip:
add esi,4
add ebx,edx
loop .ddraw
 
.draw_last:
cmp ebx,dword[esi]
jge .fl_quit
stosw
shr eax,16
stosb
mov dword[esi],ebx
 
.fl_quit:
 
mov esp,ebp
ret 18
/programs/demos/view3ds/grd_cat.inc
1,704 → 1,704
ROUND equ 8
CATMULL_SHIFT equ 8
gouraud_triangle_z:
 
;----procedure drawing gouraud triangle with z coordinate
;----interpolation ( Catmull alghoritm )-----------------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer--------
;---------------------- Z-buffer filled with dd variables
;---------------------- shifted CATMULL_SHIFT------------
;---------------------- edi - pointer to screen buffer---
;---------------------- stack : colors-------------------
;----------------- procedure don't save registers !!-----
.col1r equ ebp+4 ; each color as word
.col1g equ ebp+6 ; each z coordinate as word
.col1b equ ebp+8
.z1 equ ebp+10
.col2r equ ebp+12
.col2g equ ebp+14
.col2b equ ebp+16
.z2 equ ebp+18
.col3r equ ebp+20
.col3g equ ebp+22
.col3b equ ebp+24
.z3 equ ebp+26
 
.x1 equ word[ebp-2]
.y1 equ word[ebp-4]
.x2 equ word[ebp-6]
.y2 equ word[ebp-8]
.x3 equ word[ebp-10]
.y3 equ word[ebp-12]
 
.dx12 equ dword[ebp-16]
.dz12 equ dword[ebp-20]
.dc12r equ dword[ebp-24]
.dc12g equ dword[ebp-28]
.dc12b equ dword[ebp-32]
 
.dx13 equ dword[ebp-36]
.dz13 equ dword[ebp-40]
.dc13r equ dword[ebp-44]
.dc13g equ dword[ebp-48]
.dc13b equ dword[ebp-52]
 
.dx23 equ dword[ebp-56]
.dz23 equ dword[ebp-60]
.dc23r equ dword[ebp-64]
.dc23g equ dword[ebp-68]
.dc23b equ dword[ebp-72]
 
.zz1 equ dword[ebp-76]
.c1r equ dword[ebp-80]
.c1g equ dword[ebp-84]
.c1b equ dword[ebp-88]
.zz2 equ dword[ebp-92]
.c2r equ dword[ebp-96]
.c2g equ dword[ebp-100]
.c2b equ dword[ebp-104]
;.zz1 equ dword[ebp-100]
;.zz2 equ dword[ebp-104]
 
.c1bM equ [ebp-88]
.c2bM equ [ebp-104]
.c1rM equ [ebp-80]
.c2rM equ [ebp-96]
.dc23bM equ [ebp-72]
.dc13bM equ [ebp-52]
.dc12bM equ [ebp-32]
.dc12rM equ [ebp-24]
.dc13rM equ [ebp-44]
.dc23rM equ [ebp-64]
if Ext=MMX
emms
end if
 
mov ebp,esp
; sub esp,84
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.col1r]
xchg edx,dword[.col2r]
mov dword[.col1r],edx
mov edx,dword[.col1b]
xchg edx,dword[.col2b]
mov dword[.col1b],edx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.col2r]
xchg edx,dword[.col3r]
mov dword[.col2r],edx
mov edx,dword[.col2b]
xchg edx,dword[.col3b]
mov dword[.col2b],edx
jmp .sort3
.sort2:
push eax ; store in variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .gt_loop2_end
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .gt_dx12_make
; mov .dx12,0
; mov .dz12,0
; mov .dc12r,0
; mov .dc12g,0
; mov .dc12b,0
mov ecx,5
@@:
push dword 0
loop @b
jmp .gt_dx12_done
.gt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
mov ax,word[.z2]
sub ax,word[.z1]
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.col2r]
sub ax,word[.col1r]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12r,eax
push eax
mov ax,word[.col2g]
sub ax,word[.col1g]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12g,eax
push eax
mov ax,word[.col2b] ;;---
sub ax,word[.col1b]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12b,eax
push eax
.gt_dx12_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .gt_dx13_make
; mov .dx13,0
; mov .dz13,0
; mov .dc13r,0
; mov .dc13g,0
; mov .dc13b,0
mov ecx,5
@@:
push dword 0
loop @b
jmp .gt_dx13_done
.gt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,word[.z3]
sub ax,word[.z1]
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.col3r]
sub ax,word[.col1r]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13r,eax
push eax
mov ax,word[.col3g]
sub ax,word[.col1g]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13g,eax
push eax
mov ax,word[.col3b]
sub ax,word[.col1b]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13b,eax
push eax
.gt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .gt_dx23_make
; mov .dx23,0
; mov .dz23,0
; mov .dc23r,0
; mov .dc23g,0
; mov .dc23b,0
mov ecx,5
@@:
push dword 0
loop @b
jmp .gt_dx23_done
.gt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,word[.z3]
sub ax,word[.z2]
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.col3r]
sub ax,word[.col2r]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23r,eax
push eax
mov ax,word[.col3g]
sub ax,word[.col2g]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23g,eax
push eax
mov ax,word[.col3b]
sub ax,word[.col2b]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23b,eax
push eax
.gt_dx23_done:
sub esp,32
 
movsx eax,.x1 ; eax - cur x1
shl eax,ROUND ; ebx - cur x2
mov ebx,eax
movsx edx,word[.z1]
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
movzx edx,word[.col1r]
shl edx,ROUND
mov .c1r,edx
mov .c2r,edx
movzx edx,word[.col1g]
shl edx,ROUND
mov .c1g,edx
mov .c2g,edx
movzx edx,word[.col1b]
shl edx,ROUND
mov .c1b,edx
mov .c2b,edx
mov cx,.y1
cmp cx,.y2
jge .gt_loop1_end
 
.gt_loop1:
pushad
; macro .debug
 
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors
sar edx,ROUND
push dx
mov edx,.c2g
sar edx,ROUND
push dx
mov edx,.c2b
sar edx,ROUND
push dx
sar ebx,ROUND ; x2
push bx
mov edx,.c1r
sar edx,ROUND
push dx
mov edx,.c1g
sar edx,ROUND
push dx
mov edx,.c1b
sar edx,ROUND
push dx
sar eax,ROUND
push ax ; x1
push cx ; y
push .zz2
push .zz1
call gouraud_line_z
 
popad
if Ext >= MMX
movq mm0,.c1bM
paddd mm0,qword .dc13bM
movq .c1bM,mm0
movq mm1,.c2bM
paddd mm1,qword .dc12bM
movq .c2bM,mm1
 
movq mm0,.c1rM
paddd mm0,qword .dc13rM
movq .c1rM,mm0
movq mm1,.c2rM
paddd mm1,qword .dc12rM
movq .c2rM,mm1
else
mov edx,.dc13r
add .c1r,edx
mov edx,.dc13g
add .c1g,edx
mov edx,.dc13b
add .c1b,edx
mov edx,.dc12r
add .c2r,edx
mov edx,.dc12g
add .c2g,edx
mov edx,.dc12b
add .c2b,edx
 
mov edx,.dz13
add .zz1,edx
mov edx,.dz12
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx12
inc cx
cmp cx,.y2
jl .gt_loop1
 
.gt_loop1_end:
mov cx,.y2
cmp cx,.y3
jge .gt_loop2_end
 
movsx ebx,.x2 ; eax - cur x1
shl ebx,ROUND ; ebx - cur x2
movsx edx,word[.z2]
shl edx,CATMULL_SHIFT
mov .zz2,edx
movzx edx,word[.col2r]
shl edx,ROUND
mov .c2r,edx
movzx edx,word[.col2g]
shl edx,ROUND
mov .c2g,edx
movzx edx,word[.col2b]
shl edx,ROUND
mov .c2b,edx
 
.gt_loop2:
pushad
; macro .debug
 
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors
sar edx,ROUND
push dx
mov edx,.c2g
sar edx,ROUND
push dx
mov edx,.c2b
sar edx,ROUND
push dx
sar ebx,ROUND ; x2
push bx
mov edx,.c1r
sar edx,ROUND
push dx
mov edx,.c1g
sar edx,ROUND
push dx
mov edx,.c1b
sar edx,ROUND
push dx
sar eax,ROUND
push ax ; x1
push cx ; y
push .zz2
push .zz1
call gouraud_line_z
 
popad
 
if Ext >= MMX
movq mm0,.c1bM
paddd mm0,qword .dc13bM
movq .c1bM,mm0
movq mm1,.c2bM
paddd mm1,qword .dc23bM
movq .c2bM,mm1
 
movq mm0,.c1rM
paddd mm0,qword .dc13rM
movq .c1rM,mm0
movq mm1,.c2rM
paddd mm1,qword .dc23rM
movq .c2rM,mm1
else
mov edx,.dc13r
add .c1r,edx
mov edx,.dc13g
add .c1g,edx
mov edx,.dc13b
add .c1b,edx
mov edx,.dc23r
add .c2r,edx
mov edx,.dc23g
add .c2g,edx
mov edx,.dc23b
add .c2b,edx
mov edx,.dz13
add .zz1,edx
mov edx,.dz23
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx23
inc cx
cmp cx,.y3
jl .gt_loop2
.gt_loop2_end:
 
mov esp,ebp
ret 24
gouraud_line_z:
;----------------- procedure drawing gouraud line
;----------------- with z coordinate interpolation
;----------------- esi - pointer to Z_buffer
;----------------- edi - pointer to screen buffer
;----------------- stack:
.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT
.z2 equ dword[ebp+8]
.y equ word[ebp+12]
.x1 equ ebp+14
.c1b equ ebp+16
.c1g equ ebp+18
.c1r equ ebp+20
.x2 equ ebp+22
.c2b equ ebp+24
.c2g equ ebp+26
.c2r equ ebp+28
 
.dz equ dword[ebp-4]
.dc_b equ dword[ebp-8]
.dc_g equ dword[ebp-12]
.dc_r equ dword[ebp-16]
.c_z equ dword[ebp-20]
.cb equ dword[ebp-24]
.cg equ dword[ebp-28]
.cr equ dword[ebp-32]
;.cg2 equ dword[ebp-36]
 
 
.crM equ ebp-32
.cgM equ ebp-28
.cbM equ ebp-24
 
.dc_rM equ ebp-16
.dc_gM equ ebp-12
.dc_bM equ ebp-8
mov ebp,esp
 
mov ax,.y
or ax,ax
jl .gl_quit
mov bx,[size_y_var]
dec bx
cmp ax,bx ;SIZE_Y
jge .gl_quit
 
mov eax,dword[.x1]
cmp ax,word[.x2]
je .gl_quit
jl @f
 
xchg eax,dword[.x2]
mov dword[.x1],eax
mov eax,dword[.c1g]
xchg eax,dword[.c2g]
mov dword[.c1g],eax
mov eax,.z1
xchg eax,.z2
mov .z1,eax
@@:
mov bx,[size_x_var]
dec bx
cmp word[.x1],bx ;SIZE_X
jge .gl_quit
cmp word[.x2],0
jle .gl_quit
 
mov eax,.z2
sub eax,.z1
cdq
mov bx,word[.x2] ; dz = z2-z1/x2-x1
sub bx,word[.x1]
movsx ebx,bx
idiv ebx
push eax
 
mov ax,word[.c2b]
sub ax,word[.c1b]
cwde
shl eax,ROUND
cdq
idiv ebx
push eax
 
mov ax,word[.c2g]
sub ax,word[.c1g]
cwde
shl eax,ROUND
cdq
idiv ebx
push eax
 
mov ax,word[.c2r]
sub ax,word[.c1r]
cwde
shl eax,ROUND ; dc_r = c2r-c1r/x2-x1
cdq
idiv ebx
push eax
 
cmp word[.x1],0 ; clipping on function
jg @f
mov eax,.dz
movsx ebx,word[.x1]
neg ebx
imul ebx
add .z1,eax
mov word[.x1],0
 
mov eax,.dc_r
imul ebx
sar eax,ROUND
add word[.c1r],ax
 
mov eax,.dc_g
imul ebx
sar eax,ROUND
add word[.c1g],ax
 
mov eax,.dc_b
imul ebx
sar eax,ROUND
add word[.c1b],ax
 
@@:
mov bx,[size_x_var]
dec bx
cmp word[.x2],bx ;SIZE_X
jl @f
mov word[.x2],bx ;SIZE_X
@@:
sub esp,16 ; calculate memory begin
movzx edx,word[size_x_var] ;SIZE_X ; in buffers
movzx eax,.y
mul edx
movzx edx,word[.x1]
add eax,edx
push eax
lea eax,[eax*3]
add edi,eax
pop eax
shl eax,2
add esi,eax
 
mov cx,word[.x2]
sub cx,word[.x1]
movzx ecx,cx
mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT
;if Ext >= SSE
; mov .cz,edx
;end if
mov edx,.dz ; edx - delta z
movzx eax,word[.c1r]
shl eax,ROUND
mov .cr,eax
movzx eax,word[.c1g]
shl eax,ROUND
mov .cg,eax
movzx eax,word[.c1b]
shl eax,ROUND
mov .cb,eax
if Ext = MMX
; mov .c_z,edx
movd mm2,[.dc_bM] ; delta color blue MMX
movd mm3,[.cbM] ; current blue MMX
movq mm5,[.dc_rM]
movq mm4,[.crM]
pxor mm6,mm6
end if
 
 
.ddraw:
;if Ext = MMX
; movq mm0,mm3
; psrsq mm0,32
; movd ebx,mm0
;end if
cmp ebx,dword[esi] ; esi - z_buffer
jge @f ; edi - Screen buffer
if Ext = MMX
movq mm0,mm3 ; mm0, mm1 - temp registers
psrld mm0,ROUND
movq mm1,mm4
psrld mm1,ROUND
packssdw mm1,mm0
packuswb mm1,mm6
; movd [edi],mm1
movd eax,mm1
stosw
shr eax,16
stosb
else
mov eax,.cr
sar eax,ROUND
stosb
mov eax,.cg
sar eax,ROUND
stosb
mov eax,.cb
sar eax,ROUND
stosb
end if
mov dword[esi],ebx
;if Ext = NON
jmp .no_skip
;end if
@@:
add edi,3
.no_skip:
add esi,4
;if Ext=NON
add ebx,edx
;end if
if Ext=MMX
paddd mm3,mm2
paddd mm4,mm5
else
mov eax,.dc_g
add .cg,eax
mov eax,.dc_b
add .cb,eax
mov eax,.dc_r
add .cr,eax
end if
loop .ddraw
 
.gl_quit:
mov esp,ebp
ret 26
ROUND equ 8
CATMULL_SHIFT equ 8
gouraud_triangle_z:
 
;----procedure drawing gouraud triangle with z coordinate
;----interpolation ( Catmull alghoritm )-----------------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer--------
;---------------------- Z-buffer filled with dd variables
;---------------------- shifted CATMULL_SHIFT------------
;---------------------- edi - pointer to screen buffer---
;---------------------- stack : colors-------------------
;----------------- procedure don't save registers !!-----
.col1r equ ebp+4 ; each color as word
.col1g equ ebp+6 ; each z coordinate as word
.col1b equ ebp+8
.z1 equ ebp+10
.col2r equ ebp+12
.col2g equ ebp+14
.col2b equ ebp+16
.z2 equ ebp+18
.col3r equ ebp+20
.col3g equ ebp+22
.col3b equ ebp+24
.z3 equ ebp+26
 
.x1 equ word[ebp-2]
.y1 equ word[ebp-4]
.x2 equ word[ebp-6]
.y2 equ word[ebp-8]
.x3 equ word[ebp-10]
.y3 equ word[ebp-12]
 
.dx12 equ dword[ebp-16]
.dz12 equ dword[ebp-20]
.dc12r equ dword[ebp-24]
.dc12g equ dword[ebp-28]
.dc12b equ dword[ebp-32]
 
.dx13 equ dword[ebp-36]
.dz13 equ dword[ebp-40]
.dc13r equ dword[ebp-44]
.dc13g equ dword[ebp-48]
.dc13b equ dword[ebp-52]
 
.dx23 equ dword[ebp-56]
.dz23 equ dword[ebp-60]
.dc23r equ dword[ebp-64]
.dc23g equ dword[ebp-68]
.dc23b equ dword[ebp-72]
 
.zz1 equ dword[ebp-76]
.c1r equ dword[ebp-80]
.c1g equ dword[ebp-84]
.c1b equ dword[ebp-88]
.zz2 equ dword[ebp-92]
.c2r equ dword[ebp-96]
.c2g equ dword[ebp-100]
.c2b equ dword[ebp-104]
;.zz1 equ dword[ebp-100]
;.zz2 equ dword[ebp-104]
 
.c1bM equ [ebp-88]
.c2bM equ [ebp-104]
.c1rM equ [ebp-80]
.c2rM equ [ebp-96]
.dc23bM equ [ebp-72]
.dc13bM equ [ebp-52]
.dc12bM equ [ebp-32]
.dc12rM equ [ebp-24]
.dc13rM equ [ebp-44]
.dc23rM equ [ebp-64]
if Ext=MMX
emms
end if
 
mov ebp,esp
; sub esp,84
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.col1r]
xchg edx,dword[.col2r]
mov dword[.col1r],edx
mov edx,dword[.col1b]
xchg edx,dword[.col2b]
mov dword[.col1b],edx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.col2r]
xchg edx,dword[.col3r]
mov dword[.col2r],edx
mov edx,dword[.col2b]
xchg edx,dword[.col3b]
mov dword[.col2b],edx
jmp .sort3
.sort2:
push eax ; store in variables
push ebx
push ecx
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .gt_loop2_end
 
mov bx,.y2 ; calc deltas
sub bx,.y1
jnz .gt_dx12_make
; mov .dx12,0
; mov .dz12,0
; mov .dc12r,0
; mov .dc12g,0
; mov .dc12b,0
mov ecx,5
@@:
push dword 0
loop @b
jmp .gt_dx12_done
.gt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
mov ax,word[.z2]
sub ax,word[.z1]
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.col2r]
sub ax,word[.col1r]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12r,eax
push eax
mov ax,word[.col2g]
sub ax,word[.col1g]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12g,eax
push eax
mov ax,word[.col2b] ;;---
sub ax,word[.col1b]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12b,eax
push eax
.gt_dx12_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y1
jnz .gt_dx13_make
; mov .dx13,0
; mov .dz13,0
; mov .dc13r,0
; mov .dc13g,0
; mov .dc13b,0
mov ecx,5
@@:
push dword 0
loop @b
jmp .gt_dx13_done
.gt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,word[.z3]
sub ax,word[.z1]
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.col3r]
sub ax,word[.col1r]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13r,eax
push eax
mov ax,word[.col3g]
sub ax,word[.col1g]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13g,eax
push eax
mov ax,word[.col3b]
sub ax,word[.col1b]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13b,eax
push eax
.gt_dx13_done:
 
mov bx,.y3 ; calc deltas
sub bx,.y2
jnz .gt_dx23_make
; mov .dx23,0
; mov .dz23,0
; mov .dc23r,0
; mov .dc23g,0
; mov .dc23b,0
mov ecx,5
@@:
push dword 0
loop @b
jmp .gt_dx23_done
.gt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,word[.z3]
sub ax,word[.z2]
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
 
mov ax,word[.col3r]
sub ax,word[.col2r]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23r,eax
push eax
mov ax,word[.col3g]
sub ax,word[.col2g]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23g,eax
push eax
mov ax,word[.col3b]
sub ax,word[.col2b]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23b,eax
push eax
.gt_dx23_done:
sub esp,32
 
movsx eax,.x1 ; eax - cur x1
shl eax,ROUND ; ebx - cur x2
mov ebx,eax
movsx edx,word[.z1]
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
movzx edx,word[.col1r]
shl edx,ROUND
mov .c1r,edx
mov .c2r,edx
movzx edx,word[.col1g]
shl edx,ROUND
mov .c1g,edx
mov .c2g,edx
movzx edx,word[.col1b]
shl edx,ROUND
mov .c1b,edx
mov .c2b,edx
mov cx,.y1
cmp cx,.y2
jge .gt_loop1_end
 
.gt_loop1:
pushad
; macro .debug
 
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors
sar edx,ROUND
push dx
mov edx,.c2g
sar edx,ROUND
push dx
mov edx,.c2b
sar edx,ROUND
push dx
sar ebx,ROUND ; x2
push bx
mov edx,.c1r
sar edx,ROUND
push dx
mov edx,.c1g
sar edx,ROUND
push dx
mov edx,.c1b
sar edx,ROUND
push dx
sar eax,ROUND
push ax ; x1
push cx ; y
push .zz2
push .zz1
call gouraud_line_z
 
popad
if Ext >= MMX
movq mm0,.c1bM
paddd mm0,qword .dc13bM
movq .c1bM,mm0
movq mm1,.c2bM
paddd mm1,qword .dc12bM
movq .c2bM,mm1
 
movq mm0,.c1rM
paddd mm0,qword .dc13rM
movq .c1rM,mm0
movq mm1,.c2rM
paddd mm1,qword .dc12rM
movq .c2rM,mm1
else
mov edx,.dc13r
add .c1r,edx
mov edx,.dc13g
add .c1g,edx
mov edx,.dc13b
add .c1b,edx
mov edx,.dc12r
add .c2r,edx
mov edx,.dc12g
add .c2g,edx
mov edx,.dc12b
add .c2b,edx
 
mov edx,.dz13
add .zz1,edx
mov edx,.dz12
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx12
inc cx
cmp cx,.y2
jl .gt_loop1
 
.gt_loop1_end:
mov cx,.y2
cmp cx,.y3
jge .gt_loop2_end
 
movsx ebx,.x2 ; eax - cur x1
shl ebx,ROUND ; ebx - cur x2
movsx edx,word[.z2]
shl edx,CATMULL_SHIFT
mov .zz2,edx
movzx edx,word[.col2r]
shl edx,ROUND
mov .c2r,edx
movzx edx,word[.col2g]
shl edx,ROUND
mov .c2g,edx
movzx edx,word[.col2b]
shl edx,ROUND
mov .c2b,edx
 
.gt_loop2:
pushad
; macro .debug
 
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors
sar edx,ROUND
push dx
mov edx,.c2g
sar edx,ROUND
push dx
mov edx,.c2b
sar edx,ROUND
push dx
sar ebx,ROUND ; x2
push bx
mov edx,.c1r
sar edx,ROUND
push dx
mov edx,.c1g
sar edx,ROUND
push dx
mov edx,.c1b
sar edx,ROUND
push dx
sar eax,ROUND
push ax ; x1
push cx ; y
push .zz2
push .zz1
call gouraud_line_z
 
popad
 
if Ext >= MMX
movq mm0,.c1bM
paddd mm0,qword .dc13bM
movq .c1bM,mm0
movq mm1,.c2bM
paddd mm1,qword .dc23bM
movq .c2bM,mm1
 
movq mm0,.c1rM
paddd mm0,qword .dc13rM
movq .c1rM,mm0
movq mm1,.c2rM
paddd mm1,qword .dc23rM
movq .c2rM,mm1
else
mov edx,.dc13r
add .c1r,edx
mov edx,.dc13g
add .c1g,edx
mov edx,.dc13b
add .c1b,edx
mov edx,.dc23r
add .c2r,edx
mov edx,.dc23g
add .c2g,edx
mov edx,.dc23b
add .c2b,edx
mov edx,.dz13
add .zz1,edx
mov edx,.dz23
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx23
inc cx
cmp cx,.y3
jl .gt_loop2
.gt_loop2_end:
 
mov esp,ebp
ret 24
gouraud_line_z:
;----------------- procedure drawing gouraud line
;----------------- with z coordinate interpolation
;----------------- esi - pointer to Z_buffer
;----------------- edi - pointer to screen buffer
;----------------- stack:
.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT
.z2 equ dword[ebp+8]
.y equ word[ebp+12]
.x1 equ ebp+14
.c1b equ ebp+16
.c1g equ ebp+18
.c1r equ ebp+20
.x2 equ ebp+22
.c2b equ ebp+24
.c2g equ ebp+26
.c2r equ ebp+28
 
.dz equ dword[ebp-4]
.dc_b equ dword[ebp-8]
.dc_g equ dword[ebp-12]
.dc_r equ dword[ebp-16]
.c_z equ dword[ebp-20]
.cb equ dword[ebp-24]
.cg equ dword[ebp-28]
.cr equ dword[ebp-32]
;.cg2 equ dword[ebp-36]
 
 
.crM equ ebp-32
.cgM equ ebp-28
.cbM equ ebp-24
 
.dc_rM equ ebp-16
.dc_gM equ ebp-12
.dc_bM equ ebp-8
mov ebp,esp
 
mov ax,.y
or ax,ax
jl .gl_quit
mov bx,[size_y_var]
dec bx
cmp ax,bx ;SIZE_Y
jge .gl_quit
 
mov eax,dword[.x1]
cmp ax,word[.x2]
je .gl_quit
jl @f
 
xchg eax,dword[.x2]
mov dword[.x1],eax
mov eax,dword[.c1g]
xchg eax,dword[.c2g]
mov dword[.c1g],eax
mov eax,.z1
xchg eax,.z2
mov .z1,eax
@@:
mov bx,[size_x_var]
dec bx
cmp word[.x1],bx ;SIZE_X
jge .gl_quit
cmp word[.x2],0
jle .gl_quit
 
mov eax,.z2
sub eax,.z1
cdq
mov bx,word[.x2] ; dz = z2-z1/x2-x1
sub bx,word[.x1]
movsx ebx,bx
idiv ebx
push eax
 
mov ax,word[.c2b]
sub ax,word[.c1b]
cwde
shl eax,ROUND
cdq
idiv ebx
push eax
 
mov ax,word[.c2g]
sub ax,word[.c1g]
cwde
shl eax,ROUND
cdq
idiv ebx
push eax
 
mov ax,word[.c2r]
sub ax,word[.c1r]
cwde
shl eax,ROUND ; dc_r = c2r-c1r/x2-x1
cdq
idiv ebx
push eax
 
cmp word[.x1],0 ; clipping on function
jg @f
mov eax,.dz
movsx ebx,word[.x1]
neg ebx
imul ebx
add .z1,eax
mov word[.x1],0
 
mov eax,.dc_r
imul ebx
sar eax,ROUND
add word[.c1r],ax
 
mov eax,.dc_g
imul ebx
sar eax,ROUND
add word[.c1g],ax
 
mov eax,.dc_b
imul ebx
sar eax,ROUND
add word[.c1b],ax
 
@@:
mov bx,[size_x_var]
dec bx
cmp word[.x2],bx ;SIZE_X
jl @f
mov word[.x2],bx ;SIZE_X
@@:
sub esp,16 ; calculate memory begin
movzx edx,word[size_x_var] ;SIZE_X ; in buffers
movzx eax,.y
mul edx
movzx edx,word[.x1]
add eax,edx
push eax
lea eax,[eax*3]
add edi,eax
pop eax
shl eax,2
add esi,eax
 
mov cx,word[.x2]
sub cx,word[.x1]
movzx ecx,cx
mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT
;if Ext >= SSE
; mov .cz,edx
;end if
mov edx,.dz ; edx - delta z
movzx eax,word[.c1r]
shl eax,ROUND
mov .cr,eax
movzx eax,word[.c1g]
shl eax,ROUND
mov .cg,eax
movzx eax,word[.c1b]
shl eax,ROUND
mov .cb,eax
if Ext = MMX
; mov .c_z,edx
movd mm2,[.dc_bM] ; delta color blue MMX
movd mm3,[.cbM] ; current blue MMX
movq mm5,[.dc_rM]
movq mm4,[.crM]
pxor mm6,mm6
end if
 
 
.ddraw:
;if Ext = MMX
; movq mm0,mm3
; psrsq mm0,32
; movd ebx,mm0
;end if
cmp ebx,dword[esi] ; esi - z_buffer
jge @f ; edi - Screen buffer
if Ext = MMX
movq mm0,mm3 ; mm0, mm1 - temp registers
psrld mm0,ROUND
movq mm1,mm4
psrld mm1,ROUND
packssdw mm1,mm0
packuswb mm1,mm6
; movd [edi],mm1
movd eax,mm1
stosw
shr eax,16
stosb
else
mov eax,.cr
sar eax,ROUND
stosb
mov eax,.cg
sar eax,ROUND
stosb
mov eax,.cb
sar eax,ROUND
stosb
end if
mov dword[esi],ebx
;if Ext = NON
jmp .no_skip
;end if
@@:
add edi,3
.no_skip:
add esi,4
;if Ext=NON
add ebx,edx
;end if
if Ext=MMX
paddd mm3,mm2
paddd mm4,mm5
else
mov eax,.dc_g
add .cg,eax
mov eax,.dc_b
add .cb,eax
mov eax,.dc_r
add .cr,eax
end if
loop .ddraw
 
.gl_quit:
mov esp,ebp
ret 26
/programs/demos/view3ds/grd_tex.inc
1,1016 → 1,1016
 
 
CATMULL_SHIFT equ 8
ROUND equ 8
;NON=0
;MMX=1
;Ext=MMX
;TEX_SIZE=0x3fff
;SIZE_X equ 512
;SIZE_Y equ 512
;ROUND = 8
;TEX_SHIFT equ 6
 
; procedure drawing textured triangle with Gouraud shading
; Z-buffer alghoritm included, Z coord interpolation ----
; I set the color by this way -- (col1 * col2)/256 ------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer--------
;---------------------- edx - pointer to texture---------
;---------------------- Z-buffer filled with dd variables
;---------------------- shifted CATMULL_SHIFT------------
;---------------------- edi - pointer to screen buffer---
;---------------------- stack : colors-------------------
 
 
 
tex_plus_grd_triangle:
; parameters :
.tex_y3 equ [ebp+38] ; 36 bytes through stack
.tex_x3 equ [ebp+36]
.tex_y2 equ [ebp+34]
.tex_x2 equ [ebp+32]
.tex_y1 equ [ebp+30]
.tex_x1 equ [ebp+28]
 
.z3 equ [ebp+26]
.col3b equ [ebp+24]
.col3g equ [ebp+22]
.col3r equ [ebp+20]
 
.z2 equ [ebp+18]
.col2b equ [ebp+16]
.col2g equ [ebp+14]
.col2r equ [ebp+12]
 
.z1 equ [ebp+10]
.col1b equ [ebp+8]
.col1g equ [ebp+6]
.col1r equ [ebp+4]
 
; local variables:
 
.tex_ptr equ dword[ebp-4]
.z_ptr equ dword[ebp-8]
.scr_buff equ dword[ebp-12]
 
.x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10]
.y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12]
.x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14]
.y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16]
.x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18]
.y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20]
 
.dx12 equ dword[ebp-28] ;dd ?
.tex_dx12 equ dword[ebp-32] ;dd ?
.tex_dy12 equ [ebp-36] ;dd ?
.dz12 equ dword[ebp-40] ;dd ?
.dc12r equ [ebp-44] ;dd ?
.dc12g equ dword[ebp-48] ;dd ?
.dc12b equ [ebp-52] ;dd ?
 
.dx23 equ dword[ebp-56] ;dd ?
.tex_dx23 equ dword[ebp-60] ;dd ?
.tex_dy23 equ [ebp-64] ;dd ?
.dz23 equ dword[ebp-68] ;dd ?
.dc23r equ [ebp-72] ;dd ?
.dc23g equ dword[ebp-76] ;dd ?
.dc23b equ [ebp-80] ;dword[ebp-8]dd ?
 
.dx13 equ dword[ebp-84] ;dd ?
.tex_dx13 equ dword[ebp-88] ;dd ?
.tex_dy13 equ [ebp-92] ;dd ?
.dz13 equ dword[ebp-96] ;dd ?
.dc13r equ [ebp-100] ;dd ?
.dc13g equ dword[ebp-104] ;dd ?
.dc13b equ [ebp-108] ;dd ?
 
.scan_x1 equ dword[ebp-112] ;dd ?
.scan_y1 equ [ebp-116] ;dd ?
.zz1 equ dword[ebp-120] ;dw ?
.cur1r equ [ebp-124] ;dw ?
.cur1g equ dword[ebp-128] ;dw ?
.cur1b equ [ebp-132] ;dw ?
 
.scan_x2 equ dword[ebp-136] ;dd ?
.scan_y2 equ [ebp-140] ;dd ?
.zz2 equ dword[ebp-144] ;dw ?
.cur2r equ [ebp-148] ;dw ?
.cur2g equ dword[ebp-152] ;dw ?
.cur2b equ [ebp-156] ;dw ?
 
 
mov ebp,esp
 
; mov .tex_ptr,edx
; mov .z_ptr,esi
; mov .scr_buff,edi
push edx esi edi
; push esi
; push edi
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop2_end
 
.sort3:
cmp ax,bx
jle .sort1
xchg eax,ebx
if Ext>=MMX
movq mm0, .col1r ; exchange r, g, b, z
movq mm1, .col2r
movq .col1r ,mm1
movq .col2r ,mm0
else
mov edx,dword .col1r ; exchange both r and g
xchg edx,dword .col2r
mov dword .col1r ,edx
 
mov edx,dword .col1b ; b and z
xchg edx,dword .col2b
mov dword .col1b ,edx
end if
 
mov edx,dword .tex_x1
xchg edx,dword .tex_x2
mov dword .tex_x1 ,edx
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
 
if Ext>=MMX
movq mm0, .col2r ; exchange r, g, b, z
movq mm1, .col3r
movq .col3r ,mm0
movq .col2r ,mm1
else
 
mov edx,dword .col2r ; r, g
xchg edx,dword .col3r
mov dword .col2r,edx
 
mov edx,dword .col2b ; b, z
xchg edx,dword .col3b
mov dword .col2b,edx
end if
 
mov edx,dword .tex_x2
xchg edx,dword .tex_x3
mov dword .tex_x2,edx
 
jmp .sort3
 
.sort2:
 
push eax ebx ecx ; store in variables
; push ebx
; push ecx
 
;****************** delta computng zone **************
;+++++++++ first zone
mov bx,.y2 ; calc delta12
sub bx,.y1
jnz .dx12_make
mov ecx,7
@@:
push dword 0
loop @b
jmp .dx12_done
.dx12_make:
 
 
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
if 0 ; Ext=SSE
movd mm0,.col1r ; 2 words r, g
pxor mm1,mm1
punpcklwd mm0,mm1
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
movd mm0,.col1g ; 2 words b, z
punpcklwd mm0,mm1
cvtpi2ps xmm0,mm0
; xmm0=four float double words
divss xmm0,.pack3
;convert and insert mm0 to lower xmm1 ..
end if
 
mov ax,word .tex_x2
sub ax,word .tex_x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx12r,eax
push eax
 
mov ax,word .tex_y2
sub ax,word .tex_y1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx12,eax
push eax
 
mov ax,word .z2
sub ax,word .z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz12,eax
push eax ; .dza12
 
mov ax,word .col2r
sub ax,word .col1r
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12r,eax
push eax
 
mov ax,word .col2g
sub ax,word .col1g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12g,eax
push eax
 
mov ax,word .col2b ;;---
sub ax,word .col1b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12b,eax
push eax
 
;+++++++++++++++++ second zone +++++++++++++
.dx12_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .dx23_make
mov ecx,7
@@:
push dword 0
loop @b
jmp .dx23_done
 
.dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,word .tex_x3
sub ax,word .tex_x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx23,eax
push eax
 
mov ax,word .tex_y3
sub ax,word .tex_y2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dy23,eax
push eax
 
mov ax,word .z3
sub ax,word .z2
cwde ;
shl eax,CATMULL_SHIFT ; 2222222
cdq ; 2 2
idiv ebx ; 2
; mov .dz23,eax ; 2
push eax ; .dza12 ; 2
; 2
mov ax,word .col3r ; 2
sub ax,word .col2r ; 2222222
cwde ; second delta
shl eax,ROUND ;
cdq ;
idiv ebx ;
; mov .dc23r,eax ;
push eax
 
mov ax,word .col3g
sub ax,word .col2g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23g,eax
push eax
 
mov ax,word .col3b ;;---
sub ax,word .col2b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23b,eax
push eax
 
.dx23_done:
;++++++++++++++++++third zone++++++++++++++++++++++++
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .dx13_make
mov ecx,7
@@:
push dword 0
loop @b
jmp .dx13_done
.dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,word .tex_x3 ; triangle b
sub ax,word .tex_x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx13r,eax
push eax
 
mov ax,word .tex_y3
sub ax,word .tex_y1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dy13,eax
push eax
 
mov ax,word .z3
sub ax,word .z1 ; 333333333
cwde ; 3 3
shl eax,CATMULL_SHIFT ; 3
cdq ; 3
idiv ebx ; 3
; mov .dz13,eax ; 3
push eax ; .dza12 ; 3
; 3
mov ax,word .col3r ; 3333333333
sub ax,word .col1r ; 3
cwde ; 3
shl eax,ROUND ; 3
cdq ; 3
idiv ebx ; 3
; mov .dc13r,eax ; 3 3
push eax ; 33333333
 
mov ax,word .col3g
sub ax,word .col1g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13g,eax
push eax
 
mov ax,word .col3b ;;---
sub ax,word .col1b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13b,eax
push eax
 
.dx13_done:
 
; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>>
sub esp,55 ;(12*4)
 
movsx eax,.x1 ; eax - cur x1
shl eax,ROUND ; ebx - cur x2
mov ebx,eax
movsx edx,word .z1
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
 
movzx edi,word .col1r
shl edi,ROUND
mov .cur1r,edi
mov .cur2r,edi
movzx esi,word .col1g
shl esi,ROUND
mov .cur1g,esi
mov .cur2g,esi
movzx edx,word .col1b
shl edx,ROUND
mov .cur1b,edx
mov .cur2b,edx
 
movzx edi,word .tex_x1
shl edi,ROUND
mov .scan_x1,edi
mov .scan_x2,edi
movzx edx,word .tex_y1
shl edx,ROUND
mov .scan_y1,edx
mov .scan_y2,edx
 
mov cx,.y1
cmp cx,.y2
jge .loop1_end
.loop_1:
; push eax ebx ebp
pushad
 
push .tex_ptr
push .scr_buff
push .z_ptr
push cx
 
push .zz2
 
push .scan_x2
push dword .scan_y2
push dword .cur2r
push .cur2g
push dword .cur2b
 
push .zz1
 
push .scan_x1
push dword .scan_y1
push dword .cur1r
push .cur1g
push dword .cur1b
 
sar eax,ROUND
sar ebx,ROUND
call horizontal_tex_grd_line
 
; pop ebp ebx eax
popad
 
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
movq mm3,.cur2b
movq mm4,.cur2r
movq mm5,.scan_y2
paddd mm0,.dc13b
paddd mm1,.dc13r
paddd mm2,.tex_dy13
paddd mm3,.dc12b
paddd mm4,.dc12r
paddd mm5,.tex_dy12
movq .cur1b,mm0
movq .cur1r,mm1
movq .scan_y1,mm2
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
end if
if Ext >= SSE2
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc12b
movq mm2,.scan_y1
movq mm5,.scan_y2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy12
movq .scan_y1,mm2
movq .scan_y2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
 
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
add .cur1g,esi
mov edi,.dc13r
add .cur1r,edi
mov edx,.dz13
add .zz1,edx
mov edx,.tex_dx13
add .scan_x1,edx
mov esi,.tex_dy13
add .scan_y1,esi
 
mov edi,.dc12b
add .cur2b,edi
mov esi,.dc12g
add .cur2g,esi
mov edx,.dc12r
add .cur2r,edx
mov edi,.tex_dx12
add .scan_x2,edi
mov esi,.tex_dy12
add .scan_y2,esi
mov edx,.dz12
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx12
inc cx
cmp cx,.y2
jl .loop_1
.loop1_end:
movzx ecx,.y2
cmp cx,.y3
jge .loop2_end
 
movsx ebx,.x2 ; eax - cur x1
shl ebx,ROUND ; ebx - cur x2
 
movsx edx,word .z2
shl edx,CATMULL_SHIFT
; mov .zz1,edx
mov .zz2,edx
 
movzx edi,word .col2r
shl edi,ROUND
; mov .cur1r,edi
mov .cur2r,edi
movzx esi,word .col2g
shl esi,ROUND
; mov .cur1g,esi
mov .cur2g,esi
movzx edx,word .col2b
shl edx,ROUND
; mov .cur1b,edx
mov .cur2b,edx
 
movzx edi,word .tex_x2
shl edi,ROUND
; mov .scan_x1,edi
mov .scan_x2,edi
movzx edx,word .tex_y2
shl edx,ROUND
; mov .scan_y1,edx
mov .scan_y2,edx
 
.loop_2:
pushad
 
push .tex_ptr
push .scr_buff
push .z_ptr
push cx
 
push .zz2
 
push .scan_x2
push dword .scan_y2
push dword .cur2r
push .cur2g
push dword .cur2b
 
push .zz1
 
push .scan_x1
push dword .scan_y1
push dword .cur1r
push .cur1g
push dword .cur1b
 
sar eax,ROUND
sar ebx,ROUND
call horizontal_tex_grd_line
 
popad
 
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
movq mm3,.cur2b
movq mm4,.cur2r
movq mm5,.scan_y2
paddd mm0,.dc13b
paddd mm1,.dc13r
paddd mm2,.tex_dy13
paddd mm3,.dc23b
paddd mm4,.dc23r
paddd mm5,.tex_dy23
movq .cur1b,mm0
movq .cur1r,mm1
movq .scan_y1,mm2
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
end if
if Ext >= SSE2
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc23b
movq mm2,.scan_y1
movq mm5,.scan_y2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy23
movq .scan_y1,mm2
movq .scan_y2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
add .cur1g,esi
mov edi,.dc13r
add .cur1r,edi
mov edx,.tex_dx13
add .scan_x1,edx
mov esi,.tex_dy13
add .scan_y1,esi
mov edx,.dz13
add .zz1,edx
 
mov edi,.dc23b
add .cur2b,edi
mov esi,.dc23g
add .cur2g,esi
mov edx,.dc23r
add .cur2r,edx
mov edi,.tex_dx23
add .scan_x2,edi
mov esi,.tex_dy23
add .scan_y2,esi
mov edx,.dz23
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx23
inc cx
cmp cx,.y3
jl .loop_2
 
.loop2_end:
mov esp,ebp
ret 36
horizontal_tex_grd_line:
;in:
; eax : x1, ebx : x2
 
.tex_ptr equ [ebp+62]
.screen equ [ebp+58]
.z_buffer equ [ebp+54]
.y equ [ebp+52]
 
.z2 equ [ebp+48]
.tex_x2 equ [ebp+44]
.tex_y2 equ [ebp+40]
.r2 equ [ebp+36]
.g2 equ [ebp+32]
.b2 equ [ebp+28]
 
.z1 equ [ebp+24]
.tex_x1 equ [ebp+20]
.tex_y1 equ [ebp+16]
.r1 equ [ebp+12]
.g1 equ [ebp+8]
.b1 equ [ebp+4]
 
.x1 equ word[ebp-2]
.x2 equ word[ebp-4]
.dz equ dword[ebp-8]
.db equ dword[ebp-12]
.dg equ dword[ebp-16]
.dr equ dword[ebp-20]
.dtex_x equ dword[ebp-24]
.dtex_y equ dword[ebp-28]
 
.c_ty equ [ebp-32]
.c_tx equ [ebp-36]
.cb equ [ebp-40]
.cg equ [ebp-44]
.cr equ [ebp-48]
.t_col equ [ebp-52]
 
.dtex_yM equ qword[ebp-28]
.drM equ qword[ebp-20]
.dbM equ qword[ebp-12]
 
mov ebp,esp
; sub esp,30
 
mov cx,word .y
or cx,cx
jl .quit_l
 
cmp cx,word[size_y_var] ;SIZE_Y
jge .quit_l
 
cmp ax,bx
je .quit_l
jl @f
 
xchg eax,ebx
 
if Ext=NON
mov ecx,dword .r1
xchg ecx, .r2
mov dword .r1, ecx
 
mov ecx,dword .g1
xchg ecx, .g2
mov dword .g1, ecx
 
mov ecx,dword .b1
xchg ecx, .b2
mov dword .b1, ecx
 
mov ecx,dword .tex_x1
xchg ecx, .tex_x2
mov dword .tex_x1, ecx
 
mov ecx,dword .tex_y1
xchg ecx, .tex_y2
mov dword .tex_y1, ecx
 
mov ecx,dword .z1
xchg ecx, .z2
mov dword .z1, ecx
end if
if (Ext=MMX)
movq mm0,.b1 ; b, g
movq mm1,.b2
movq .b1, mm1
movq .b2, mm0
movq mm2,.r1 ; r, y
movq mm3,.r2
movq .r1,mm3
movq .r2,mm2
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
 
end if
if Ext>=SSE
movups xmm0,.b1
movups xmm1,.b2
movups .b1,xmm1
movups .b2,xmm0
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
end if
 
@@:
or bx,bx
jle .quit_l
cmp ax,word[size_x_var] ;SIZE_X
jge .quit_l
 
push ax
push bx
 
mov eax,.z2 ; delta zone************
sub eax,.z1
cdq
mov bx,.x2
sub bx,.x1
movsx ebx,bx
idiv ebx
push eax ; .dz
 
mov eax,.b2
sub eax,.b1
cdq
idiv ebx
push eax ; .db
 
mov eax,.g2
sub eax,.g1
cdq
idiv ebx
push eax ; .dg
 
mov eax,.r2
sub eax,.r1
cdq
idiv ebx
push eax ; .dr
 
mov eax,.tex_x2
sub eax,.tex_x1
cdq
idiv ebx
push eax ; .dtex_x
 
mov eax,.tex_y2
sub eax,.tex_y1
cdq
idiv ebx
push eax ; .dtey_x
 
cmp .x1,0
jg @f
 
mov eax,.dz ; clipping
movsx ebx,.x1
neg ebx
imul ebx
add .z1,eax
mov .x1,0
 
mov eax,.dr
imul ebx
add .r1,eax
;if Ext=NON
mov eax,.dg
imul ebx
add .g1,eax
 
mov eax,.db
imul ebx
add .b1,eax
 
mov eax,.dtex_x
imul ebx
add .tex_x1,eax
 
mov eax,.dtex_y
imul ebx
add .tex_y1,eax
@@:
movsx edx,word[size_x_var] ;SIZE_X
cmp .x2,dx
jl @f
mov .x2,dx
@@:
; calc line addres begin in screen and Z buffer
movsx eax,word .y
mul edx
movsx edx,.x1
add eax,edx
 
mov esi,eax
shl esi,2
add esi,.z_buffer
 
lea eax,[eax*3]
mov edi,.screen
add edi,eax
 
mov cx,.x2
sub cx,.x1
movzx ecx,cx
 
; init current variables
push dword .tex_y1
;if Ext=NON
push dword .tex_x1
 
push dword .b1
push dword .g1
push dword .r1
 
if Ext>=MMX
movq mm4,.cr ; lo -> r,g
movq mm6,.cb ; hi -> b, tex_x
pxor mm0,mm0
end if
mov ebx,.z1
.ddraw:
cmp ebx,dword[esi]
jge @f
mov eax,.c_ty
; if ROUND<TEX_SHIFT
; shl eax,TEX_SHIFT-ROUND
; end if
; if ROUND>TEX_SHIFT
; shr eax,ROUND-TEX_SHIFT
; end if
shr eax,ROUND
shl Eax,TEX_SHIFT
mov edx,.c_tx ; calc texture pixel mem addres
shr edx,ROUND
add eax,edx
and eax,TEXTURE_SIZE ; cutting
lea eax,[3*eax]
add eax,.tex_ptr
mov dword[esi],ebx
if Ext = NON
mov eax,dword[eax]
; mov .tex_col,eax
push ax
shl eax,8
pop ax
mov edx,.cr
sar edx,ROUND
mul dl ; al*dl
shr ax,8
stosb
ror eax,16
push ax
mov edx,.cg
sar edx,ROUND
mul dl
shr ax,8
stosb
pop ax
shr ax,8
mov edx,.cb
sar edx,ROUND
mul dl
shr ax,8
stosb
jmp .no_skip
else
movd mm1,[eax]
punpcklbw mm1,mm0
movq mm3,mm4 ;.cr ; lo -> r,g
movq mm5,mm6 ;.cb ; lo -> b,tex_x
psrld mm3,ROUND ;
psrld mm5,ROUND ;
packssdw mm3,mm5
pmullw mm1,mm3
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
end if
mov dword[esi],ebx
if Ext = NON
jmp .no_skip
end if
@@:
add edi,3
.no_skip:
add esi,4
add ebx,.dz
 
mov eax,.dtex_x
add .c_tx, eax
mov edx,.dtex_y
add .c_ty, edx
if Ext=NON
mov eax,.dr
add .cr,eax
mov edx,.dg
add .cg,edx
mov eax,.db
add .cb,eax
 
else
paddd mm4,.drM
paddd mm6,.dbM
;; paddd mm7,.dtex_y ; mm4 - b, g
;; movq .c_tx,mm7
; mm6 - r, x
end if ; mm7 - y, x
 
dec ecx
jnz .ddraw
 
.quit_l:
 
mov esp,ebp
ret 42+20 ; horizontal line
 
 
 
CATMULL_SHIFT equ 8
ROUND equ 8
;NON=0
;MMX=1
;Ext=MMX
;TEX_SIZE=0x3fff
;SIZE_X equ 512
;SIZE_Y equ 512
;ROUND = 8
;TEX_SHIFT equ 6
 
; procedure drawing textured triangle with Gouraud shading
; Z-buffer alghoritm included, Z coord interpolation ----
; I set the color by this way -- (col1 * col2)/256 ------
;------------------in - eax - x1 shl 16 + y1 ------------
;---------------------- ebx - x2 shl 16 + y2 ------------
;---------------------- ecx - x3 shl 16 + y3 ------------
;---------------------- esi - pointer to Z-buffer--------
;---------------------- edx - pointer to texture---------
;---------------------- Z-buffer filled with dd variables
;---------------------- shifted CATMULL_SHIFT------------
;---------------------- edi - pointer to screen buffer---
;---------------------- stack : colors-------------------
 
 
 
tex_plus_grd_triangle:
; parameters :
.tex_y3 equ [ebp+38] ; 36 bytes through stack
.tex_x3 equ [ebp+36]
.tex_y2 equ [ebp+34]
.tex_x2 equ [ebp+32]
.tex_y1 equ [ebp+30]
.tex_x1 equ [ebp+28]
 
.z3 equ [ebp+26]
.col3b equ [ebp+24]
.col3g equ [ebp+22]
.col3r equ [ebp+20]
 
.z2 equ [ebp+18]
.col2b equ [ebp+16]
.col2g equ [ebp+14]
.col2r equ [ebp+12]
 
.z1 equ [ebp+10]
.col1b equ [ebp+8]
.col1g equ [ebp+6]
.col1r equ [ebp+4]
 
; local variables:
 
.tex_ptr equ dword[ebp-4]
.z_ptr equ dword[ebp-8]
.scr_buff equ dword[ebp-12]
 
.x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10]
.y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12]
.x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14]
.y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16]
.x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18]
.y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20]
 
.dx12 equ dword[ebp-28] ;dd ?
.tex_dx12 equ dword[ebp-32] ;dd ?
.tex_dy12 equ [ebp-36] ;dd ?
.dz12 equ dword[ebp-40] ;dd ?
.dc12r equ [ebp-44] ;dd ?
.dc12g equ dword[ebp-48] ;dd ?
.dc12b equ [ebp-52] ;dd ?
 
.dx23 equ dword[ebp-56] ;dd ?
.tex_dx23 equ dword[ebp-60] ;dd ?
.tex_dy23 equ [ebp-64] ;dd ?
.dz23 equ dword[ebp-68] ;dd ?
.dc23r equ [ebp-72] ;dd ?
.dc23g equ dword[ebp-76] ;dd ?
.dc23b equ [ebp-80] ;dword[ebp-8]dd ?
 
.dx13 equ dword[ebp-84] ;dd ?
.tex_dx13 equ dword[ebp-88] ;dd ?
.tex_dy13 equ [ebp-92] ;dd ?
.dz13 equ dword[ebp-96] ;dd ?
.dc13r equ [ebp-100] ;dd ?
.dc13g equ dword[ebp-104] ;dd ?
.dc13b equ [ebp-108] ;dd ?
 
.scan_x1 equ dword[ebp-112] ;dd ?
.scan_y1 equ [ebp-116] ;dd ?
.zz1 equ dword[ebp-120] ;dw ?
.cur1r equ [ebp-124] ;dw ?
.cur1g equ dword[ebp-128] ;dw ?
.cur1b equ [ebp-132] ;dw ?
 
.scan_x2 equ dword[ebp-136] ;dd ?
.scan_y2 equ [ebp-140] ;dd ?
.zz2 equ dword[ebp-144] ;dw ?
.cur2r equ [ebp-148] ;dw ?
.cur2g equ dword[ebp-152] ;dw ?
.cur2b equ [ebp-156] ;dw ?
 
 
mov ebp,esp
 
; mov .tex_ptr,edx
; mov .z_ptr,esi
; mov .scr_buff,edi
push edx esi edi
; push esi
; push edi
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop2_end
 
.sort3:
cmp ax,bx
jle .sort1
xchg eax,ebx
if Ext>=MMX
movq mm0, .col1r ; exchange r, g, b, z
movq mm1, .col2r
movq .col1r ,mm1
movq .col2r ,mm0
else
mov edx,dword .col1r ; exchange both r and g
xchg edx,dword .col2r
mov dword .col1r ,edx
 
mov edx,dword .col1b ; b and z
xchg edx,dword .col2b
mov dword .col1b ,edx
end if
 
mov edx,dword .tex_x1
xchg edx,dword .tex_x2
mov dword .tex_x1 ,edx
 
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
 
if Ext>=MMX
movq mm0, .col2r ; exchange r, g, b, z
movq mm1, .col3r
movq .col3r ,mm0
movq .col2r ,mm1
else
 
mov edx,dword .col2r ; r, g
xchg edx,dword .col3r
mov dword .col2r,edx
 
mov edx,dword .col2b ; b, z
xchg edx,dword .col3b
mov dword .col2b,edx
end if
 
mov edx,dword .tex_x2
xchg edx,dword .tex_x3
mov dword .tex_x2,edx
 
jmp .sort3
 
.sort2:
 
push eax ebx ecx ; store in variables
; push ebx
; push ecx
 
;****************** delta computng zone **************
;+++++++++ first zone
mov bx,.y2 ; calc delta12
sub bx,.y1
jnz .dx12_make
mov ecx,7
@@:
push dword 0
loop @b
jmp .dx12_done
.dx12_make:
 
 
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
if 0 ; Ext=SSE
movd mm0,.col1r ; 2 words r, g
pxor mm1,mm1
punpcklwd mm0,mm1
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
movd mm0,.col1g ; 2 words b, z
punpcklwd mm0,mm1
cvtpi2ps xmm0,mm0
; xmm0=four float double words
divss xmm0,.pack3
;convert and insert mm0 to lower xmm1 ..
end if
 
mov ax,word .tex_x2
sub ax,word .tex_x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx12r,eax
push eax
 
mov ax,word .tex_y2
sub ax,word .tex_y1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx12,eax
push eax
 
mov ax,word .z2
sub ax,word .z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz12,eax
push eax ; .dza12
 
mov ax,word .col2r
sub ax,word .col1r
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12r,eax
push eax
 
mov ax,word .col2g
sub ax,word .col1g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12g,eax
push eax
 
mov ax,word .col2b ;;---
sub ax,word .col1b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc12b,eax
push eax
 
;+++++++++++++++++ second zone +++++++++++++
.dx12_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .dx23_make
mov ecx,7
@@:
push dword 0
loop @b
jmp .dx23_done
 
.dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
mov ax,word .tex_x3
sub ax,word .tex_x2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx23,eax
push eax
 
mov ax,word .tex_y3
sub ax,word .tex_y2
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dy23,eax
push eax
 
mov ax,word .z3
sub ax,word .z2
cwde ;
shl eax,CATMULL_SHIFT ; 2222222
cdq ; 2 2
idiv ebx ; 2
; mov .dz23,eax ; 2
push eax ; .dza12 ; 2
; 2
mov ax,word .col3r ; 2
sub ax,word .col2r ; 2222222
cwde ; second delta
shl eax,ROUND ;
cdq ;
idiv ebx ;
; mov .dc23r,eax ;
push eax
 
mov ax,word .col3g
sub ax,word .col2g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23g,eax
push eax
 
mov ax,word .col3b ;;---
sub ax,word .col2b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc23b,eax
push eax
 
.dx23_done:
;++++++++++++++++++third zone++++++++++++++++++++++++
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .dx13_make
mov ecx,7
@@:
push dword 0
loop @b
jmp .dx13_done
.dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
mov ax,word .tex_x3 ; triangle b
sub ax,word .tex_x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dx13r,eax
push eax
 
mov ax,word .tex_y3
sub ax,word .tex_y1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .tex_dy13,eax
push eax
 
mov ax,word .z3
sub ax,word .z1 ; 333333333
cwde ; 3 3
shl eax,CATMULL_SHIFT ; 3
cdq ; 3
idiv ebx ; 3
; mov .dz13,eax ; 3
push eax ; .dza12 ; 3
; 3
mov ax,word .col3r ; 3333333333
sub ax,word .col1r ; 3
cwde ; 3
shl eax,ROUND ; 3
cdq ; 3
idiv ebx ; 3
; mov .dc13r,eax ; 3 3
push eax ; 33333333
 
mov ax,word .col3g
sub ax,word .col1g
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13g,eax
push eax
 
mov ax,word .col3b ;;---
sub ax,word .col1b
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dc13b,eax
push eax
 
.dx13_done:
 
; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>>
sub esp,55 ;(12*4)
 
movsx eax,.x1 ; eax - cur x1
shl eax,ROUND ; ebx - cur x2
mov ebx,eax
movsx edx,word .z1
shl edx,CATMULL_SHIFT
mov .zz1,edx
mov .zz2,edx
 
movzx edi,word .col1r
shl edi,ROUND
mov .cur1r,edi
mov .cur2r,edi
movzx esi,word .col1g
shl esi,ROUND
mov .cur1g,esi
mov .cur2g,esi
movzx edx,word .col1b
shl edx,ROUND
mov .cur1b,edx
mov .cur2b,edx
 
movzx edi,word .tex_x1
shl edi,ROUND
mov .scan_x1,edi
mov .scan_x2,edi
movzx edx,word .tex_y1
shl edx,ROUND
mov .scan_y1,edx
mov .scan_y2,edx
 
mov cx,.y1
cmp cx,.y2
jge .loop1_end
.loop_1:
; push eax ebx ebp
pushad
 
push .tex_ptr
push .scr_buff
push .z_ptr
push cx
 
push .zz2
 
push .scan_x2
push dword .scan_y2
push dword .cur2r
push .cur2g
push dword .cur2b
 
push .zz1
 
push .scan_x1
push dword .scan_y1
push dword .cur1r
push .cur1g
push dword .cur1b
 
sar eax,ROUND
sar ebx,ROUND
call horizontal_tex_grd_line
 
; pop ebp ebx eax
popad
 
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
movq mm3,.cur2b
movq mm4,.cur2r
movq mm5,.scan_y2
paddd mm0,.dc13b
paddd mm1,.dc13r
paddd mm2,.tex_dy13
paddd mm3,.dc12b
paddd mm4,.dc12r
paddd mm5,.tex_dy12
movq .cur1b,mm0
movq .cur1r,mm1
movq .scan_y1,mm2
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
end if
if Ext >= SSE2
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc12b
movq mm2,.scan_y1
movq mm5,.scan_y2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy12
movq .scan_y1,mm2
movq .scan_y2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
 
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
add .cur1g,esi
mov edi,.dc13r
add .cur1r,edi
mov edx,.dz13
add .zz1,edx
mov edx,.tex_dx13
add .scan_x1,edx
mov esi,.tex_dy13
add .scan_y1,esi
 
mov edi,.dc12b
add .cur2b,edi
mov esi,.dc12g
add .cur2g,esi
mov edx,.dc12r
add .cur2r,edx
mov edi,.tex_dx12
add .scan_x2,edi
mov esi,.tex_dy12
add .scan_y2,esi
mov edx,.dz12
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx12
inc cx
cmp cx,.y2
jl .loop_1
.loop1_end:
movzx ecx,.y2
cmp cx,.y3
jge .loop2_end
 
movsx ebx,.x2 ; eax - cur x1
shl ebx,ROUND ; ebx - cur x2
 
movsx edx,word .z2
shl edx,CATMULL_SHIFT
; mov .zz1,edx
mov .zz2,edx
 
movzx edi,word .col2r
shl edi,ROUND
; mov .cur1r,edi
mov .cur2r,edi
movzx esi,word .col2g
shl esi,ROUND
; mov .cur1g,esi
mov .cur2g,esi
movzx edx,word .col2b
shl edx,ROUND
; mov .cur1b,edx
mov .cur2b,edx
 
movzx edi,word .tex_x2
shl edi,ROUND
; mov .scan_x1,edi
mov .scan_x2,edi
movzx edx,word .tex_y2
shl edx,ROUND
; mov .scan_y1,edx
mov .scan_y2,edx
 
.loop_2:
pushad
 
push .tex_ptr
push .scr_buff
push .z_ptr
push cx
 
push .zz2
 
push .scan_x2
push dword .scan_y2
push dword .cur2r
push .cur2g
push dword .cur2b
 
push .zz1
 
push .scan_x1
push dword .scan_y1
push dword .cur1r
push .cur1g
push dword .cur1b
 
sar eax,ROUND
sar ebx,ROUND
call horizontal_tex_grd_line
 
popad
 
if (Ext = MMX)|(Ext=SSE)
movq mm0,.cur1b
movq mm1,.cur1r
movq mm2,.scan_y1
movq mm3,.cur2b
movq mm4,.cur2r
movq mm5,.scan_y2
paddd mm0,.dc13b
paddd mm1,.dc13r
paddd mm2,.tex_dy13
paddd mm3,.dc23b
paddd mm4,.dc23r
paddd mm5,.tex_dy23
movq .cur1b,mm0
movq .cur1r,mm1
movq .scan_y1,mm2
movq .cur2b,mm3
movq .cur2r,mm4
movq .scan_y2,mm5
end if
if Ext >= SSE2
movups xmm0,.cur1b
movups xmm1,.dc13b
movups xmm2,.cur2b
movups xmm3,.dc23b
movq mm2,.scan_y1
movq mm5,.scan_y2
paddd xmm0,xmm1
paddd xmm2,xmm3
paddd mm2,.tex_dy13
paddd mm5,.tex_dy23
movq .scan_y1,mm2
movq .scan_y2,mm5
movups .cur1b,xmm0
movups .cur2b,xmm2
end if
if Ext = NON
mov edx,.dc13b
add .cur1b,edx
mov esi,.dc13g
add .cur1g,esi
mov edi,.dc13r
add .cur1r,edi
mov edx,.tex_dx13
add .scan_x1,edx
mov esi,.tex_dy13
add .scan_y1,esi
mov edx,.dz13
add .zz1,edx
 
mov edi,.dc23b
add .cur2b,edi
mov esi,.dc23g
add .cur2g,esi
mov edx,.dc23r
add .cur2r,edx
mov edi,.tex_dx23
add .scan_x2,edi
mov esi,.tex_dy23
add .scan_y2,esi
mov edx,.dz23
add .zz2,edx
end if
add eax,.dx13
add ebx,.dx23
inc cx
cmp cx,.y3
jl .loop_2
 
.loop2_end:
mov esp,ebp
ret 36
horizontal_tex_grd_line:
;in:
; eax : x1, ebx : x2
 
.tex_ptr equ [ebp+62]
.screen equ [ebp+58]
.z_buffer equ [ebp+54]
.y equ [ebp+52]
 
.z2 equ [ebp+48]
.tex_x2 equ [ebp+44]
.tex_y2 equ [ebp+40]
.r2 equ [ebp+36]
.g2 equ [ebp+32]
.b2 equ [ebp+28]
 
.z1 equ [ebp+24]
.tex_x1 equ [ebp+20]
.tex_y1 equ [ebp+16]
.r1 equ [ebp+12]
.g1 equ [ebp+8]
.b1 equ [ebp+4]
 
.x1 equ word[ebp-2]
.x2 equ word[ebp-4]
.dz equ dword[ebp-8]
.db equ dword[ebp-12]
.dg equ dword[ebp-16]
.dr equ dword[ebp-20]
.dtex_x equ dword[ebp-24]
.dtex_y equ dword[ebp-28]
 
.c_ty equ [ebp-32]
.c_tx equ [ebp-36]
.cb equ [ebp-40]
.cg equ [ebp-44]
.cr equ [ebp-48]
.t_col equ [ebp-52]
 
.dtex_yM equ qword[ebp-28]
.drM equ qword[ebp-20]
.dbM equ qword[ebp-12]
 
mov ebp,esp
; sub esp,30
 
mov cx,word .y
or cx,cx
jl .quit_l
 
cmp cx,word[size_y_var] ;SIZE_Y
jge .quit_l
 
cmp ax,bx
je .quit_l
jl @f
 
xchg eax,ebx
 
if Ext=NON
mov ecx,dword .r1
xchg ecx, .r2
mov dword .r1, ecx
 
mov ecx,dword .g1
xchg ecx, .g2
mov dword .g1, ecx
 
mov ecx,dword .b1
xchg ecx, .b2
mov dword .b1, ecx
 
mov ecx,dword .tex_x1
xchg ecx, .tex_x2
mov dword .tex_x1, ecx
 
mov ecx,dword .tex_y1
xchg ecx, .tex_y2
mov dword .tex_y1, ecx
 
mov ecx,dword .z1
xchg ecx, .z2
mov dword .z1, ecx
end if
if (Ext=MMX)
movq mm0,.b1 ; b, g
movq mm1,.b2
movq .b1, mm1
movq .b2, mm0
movq mm2,.r1 ; r, y
movq mm3,.r2
movq .r1,mm3
movq .r2,mm2
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
 
end if
if Ext>=SSE
movups xmm0,.b1
movups xmm1,.b2
movups .b1,xmm1
movups .b2,xmm0
movq mm4,.tex_x1 ; x, z
movq mm5,.tex_x2
movq .tex_x1,mm5
movq .tex_x2,mm4
end if
 
@@:
or bx,bx
jle .quit_l
cmp ax,word[size_x_var] ;SIZE_X
jge .quit_l
 
push ax
push bx
 
mov eax,.z2 ; delta zone************
sub eax,.z1
cdq
mov bx,.x2
sub bx,.x1
movsx ebx,bx
idiv ebx
push eax ; .dz
 
mov eax,.b2
sub eax,.b1
cdq
idiv ebx
push eax ; .db
 
mov eax,.g2
sub eax,.g1
cdq
idiv ebx
push eax ; .dg
 
mov eax,.r2
sub eax,.r1
cdq
idiv ebx
push eax ; .dr
 
mov eax,.tex_x2
sub eax,.tex_x1
cdq
idiv ebx
push eax ; .dtex_x
 
mov eax,.tex_y2
sub eax,.tex_y1
cdq
idiv ebx
push eax ; .dtey_x
 
cmp .x1,0
jg @f
 
mov eax,.dz ; clipping
movsx ebx,.x1
neg ebx
imul ebx
add .z1,eax
mov .x1,0
 
mov eax,.dr
imul ebx
add .r1,eax
;if Ext=NON
mov eax,.dg
imul ebx
add .g1,eax
 
mov eax,.db
imul ebx
add .b1,eax
 
mov eax,.dtex_x
imul ebx
add .tex_x1,eax
 
mov eax,.dtex_y
imul ebx
add .tex_y1,eax
@@:
movsx edx,word[size_x_var] ;SIZE_X
cmp .x2,dx
jl @f
mov .x2,dx
@@:
; calc line addres begin in screen and Z buffer
movsx eax,word .y
mul edx
movsx edx,.x1
add eax,edx
 
mov esi,eax
shl esi,2
add esi,.z_buffer
 
lea eax,[eax*3]
mov edi,.screen
add edi,eax
 
mov cx,.x2
sub cx,.x1
movzx ecx,cx
 
; init current variables
push dword .tex_y1
;if Ext=NON
push dword .tex_x1
 
push dword .b1
push dword .g1
push dword .r1
 
if Ext>=MMX
movq mm4,.cr ; lo -> r,g
movq mm6,.cb ; hi -> b, tex_x
pxor mm0,mm0
end if
mov ebx,.z1
.ddraw:
cmp ebx,dword[esi]
jge @f
mov eax,.c_ty
; if ROUND<TEX_SHIFT
; shl eax,TEX_SHIFT-ROUND
; end if
; if ROUND>TEX_SHIFT
; shr eax,ROUND-TEX_SHIFT
; end if
shr eax,ROUND
shl Eax,TEX_SHIFT
mov edx,.c_tx ; calc texture pixel mem addres
shr edx,ROUND
add eax,edx
and eax,TEXTURE_SIZE ; cutting
lea eax,[3*eax]
add eax,.tex_ptr
mov dword[esi],ebx
if Ext = NON
mov eax,dword[eax]
; mov .tex_col,eax
push ax
shl eax,8
pop ax
mov edx,.cr
sar edx,ROUND
mul dl ; al*dl
shr ax,8
stosb
ror eax,16
push ax
mov edx,.cg
sar edx,ROUND
mul dl
shr ax,8
stosb
pop ax
shr ax,8
mov edx,.cb
sar edx,ROUND
mul dl
shr ax,8
stosb
jmp .no_skip
else
movd mm1,[eax]
punpcklbw mm1,mm0
movq mm3,mm4 ;.cr ; lo -> r,g
movq mm5,mm6 ;.cb ; lo -> b,tex_x
psrld mm3,ROUND ;
psrld mm5,ROUND ;
packssdw mm3,mm5
pmullw mm1,mm3
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
end if
mov dword[esi],ebx
if Ext = NON
jmp .no_skip
end if
@@:
add edi,3
.no_skip:
add esi,4
add ebx,.dz
 
mov eax,.dtex_x
add .c_tx, eax
mov edx,.dtex_y
add .c_ty, edx
if Ext=NON
mov eax,.dr
add .cr,eax
mov edx,.dg
add .cg,edx
mov eax,.db
add .cb,eax
 
else
paddd mm4,.drM
paddd mm6,.dbM
;; paddd mm7,.dtex_y ; mm4 - b, g
;; movq .c_tx,mm7
; mm6 - r, x
end if ; mm7 - y, x
 
dec ecx
jnz .ddraw
 
.quit_l:
 
mov esp,ebp
ret 42+20 ; horizontal line
 
/programs/demos/view3ds/history.txt
1,11 → 1,16
View3ds 0.073 - may 2021
1. I introduced procedure for searching nonredundand edges.
2. Writing some info about object: vertices, triangles unique edges
count.
-----------------------------------------------------------------------------------
 
View3ds 0.072 - march 2021
1. New displaying model - texturing with bilinear filtering and transparency
simultanusly. Note that filtering is done only inside polygon. To better
simultanusly. Note that filtering is done only inside polygon. To better
quality of image there is a need to use floats coordinates of texture to pass
as arguments to single triangle rendering proc.
2. Optimizations.
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and
transparented texturing with filtering rendering models are disabled.
-----------------------------------------------------------------------------------
 
/programs/demos/view3ds/readme.txt
1,20 → 1,16
View3ds 0.073 - tiny viewer to .3ds and .asc files with several graphics
View3ds 0.074 - tiny viewer to .3ds and .asc files with several graphics
effects implementation.
 
What's new?
1. I introduced procedure for searching nonredundand edges.
2. Writing some info about object: vertices, triangles unique edges
count.
1. Fixed emboss bug in grd lines displaying model.
2. Grd line exceedes screen problem fix.
3. New rendering model - ray casted shadows and appropiate button to
set 'on' this option. Note that is non real time model, especially when
complex object is computed. I took effort to introduce accelerating
structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled
for now - seems to work incorrect(slow).
 
 
1. New displaying model - texturing with bilinear filtering and transparency
simultanusly. Note that filtering is done only inside polygon. To better
quality of image there is a need to use floats coordinates of texture to pass
as arguments to single triangle rendering proc.
2. Optimizations.
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and
transparented texturing with filtering rendering models are disabled.
 
Buttons description:
1. rotary: choosing rotary axle: x, y, x+y.
2. shd. model: choosing shading model: flat, grd (smooth), env (spherical
26,20 → 22,20
ptex (real Phong + texturing + transparency).
3. speed: idle, full.
4,5. zoom in, out: no comment.
6. catmull: disabled
6. ray shadow: calc ray casted shadows.
7. culling: backface culling on/ off.
8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination).
9. Blur: blur N times; N=0,1,2,3,4,5
10.11,12,13. loseless operations (rotary 90, 180 degrees).
12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges
12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges
more deep.
13. fire: do motion blur ( looks like fire ).
14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving
14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving
camera, wave: x,y +/- increase, decrease wave effect frequency and amplitude.
15. generate: Generates some objects: node, Thorn Crown, heart...
16. bumps: random, according to texture.
17. bumps deep -> create bumps deeper or lighter.
18. re-map tex -> re-map texture and bump map coordinates, to change spherical
18. re-map tex -> re-map texture and bump map coordinates, to change spherical
mapping around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button.
19. bright + -> increase picture brightness.
20. bright - -> decrease picture brightness.
46,8 → 42,8
21. wav effect -> do effect based sine function.
22. editor -> setting editing option. If is "on" then red bars are draw according to each
vertex, Pressing and moving left mouse button (cursor must be on handler)- change
vertex position. If left mouse button is released apply current position. You may also
vertex position. If left mouse button is released apply current position. You may also
decrease whole handlers count by enable culling (using appropriate button) - some
back handlers become hidden.
 
Maciej Guba V 2021
Maciej Guba IX 2021
/programs/demos/view3ds/tex_cat.inc
1,611 → 1,611
;TEX_X = 512
;TEX_Y = 512
;ROUND equ 8
;SIZE_X = 512
;SIZE_Y = 512
;TEX_SHIFT = 9
CATMULL_SHIFT equ 8
 
;------------------------------------------------------------------------
;- Procedure drawing textured triangle using Catmull Z-buffer algorithm -
;------------------------------------------------------------------------
tex_triangle_z:
;----------in - eax - x1 shl 16 + y1
;-------------- ebx - x2 shl 16 + y2
;---------------ecx - x3 shl 16 + y3
;---------------edx - pointer to Z-buffer
;---------------esi - pointer to texture buffer
;---------------edi - pointer to screen buffer
;-------------stack - texture coordinates
;------------------ - z coordinates
.tex_x1 equ ebp+4
.tex_y1 equ ebp+6
.tex_x2 equ ebp+8
.tex_y2 equ ebp+10
.tex_x3 equ ebp+12
.tex_y3 equ ebp+14
.z1 equ word[ebp+16]
.z2 equ word[ebp+18]
.z3 equ word[ebp+20]
 
.tex_ptr equ dword[ebp-4] ; pointer to texture
.z_ptr equ dword[ebp-8] ; pointer to z-buffer
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.tex_dx12 equ dword[ebp-28]
.tex_dy12 equ dword[ebp-32]
.dz12 equ dword[ebp-36]
 
.dx13 equ dword[ebp-40]
.tex_dx13 equ dword[ebp-44]
.tex_dy13 equ dword[ebp-48]
.dz13 equ dword[ebp-52]
 
.dx23 equ dword[ebp-56]
.tex_dx23 equ dword[ebp-60]
.tex_dy23 equ dword[ebp-64]
.dz23 equ dword[ebp-68]
 
.scan_x1 equ dword[ebp-72]
.scan_x2 equ dword[ebp-76]
.scan_y1 equ dword[ebp-80]
.scan_y2 equ dword[ebp-84]
.cz1 equ dword[ebp-88]
.cz2 equ dword[ebp-92]
 
mov ebp,esp
push esi ; store memory pointers
push edx
.tt_sort3:
cmp ax,bx ;sort all parameters
jle .tt_sort1
xchg eax,ebx
mov edx,dword [.tex_x1]
xchg edx,dword [.tex_x2]
mov dword[.tex_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.tt_sort1:
cmp bx,cx
jle .tt_sort2
xchg ebx,ecx
mov edx,dword [.tex_x2]
xchg edx,dword [.tex_x3]
mov dword [.tex_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .tt_sort3
.tt_sort2:
 
push eax ; and store to user friendly variables
push ebx
push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .tt_loop2_end
; cmp ax,SIZE_Y
; jl @f
; cmp bx,SIZE_Y
; jl @f
; cmp cx,SIZE_Y
; jl @f
ror eax,16
ror ebx,16
ror ecx,16
; cmp ax,SIZE_X
; jl @f
; cmp bx,SIZE_X
; jl @f
; cmp cx,SIZE_X
; jl @f
; jmp .tt_loop2_end
@@:
mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)]
mov ebx,dword[.tex_x2]
mov ecx,dword[.tex_x3]
mov edx,eax
or edx,ebx
or edx,ecx
test edx,80008000h
jne .tt_loop2_end
cmp ax,TEX_X
jge .tt_loop2_end
cmp bx,TEX_X
jge .tt_loop2_end
cmp cx,TEX_X
jge .tt_loop2_end
ror eax,16
ror ebx,16
ror ecx,16
cmp ax,TEX_Y
jge .tt_loop2_end
cmp bx,TEX_Y
jge .tt_loop2_end
cmp cx,TEX_Y
jge .tt_loop2_end
 
 
movsx ebx,.y2 ; calc delta
sub bx,.y1
jnz .tt_dx12_make
xor edx,edx
mov ecx,4
@@:
push edx
loop @b
jmp .tt_dx12_done
.tt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1)
push eax
 
mov ax,word[.tex_x2]
sub ax,word[.tex_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1)
push eax
 
mov ax,word[.tex_y2]
sub ax,word[.tex_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1)
push eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.tt_dx12_done:
 
movsx ebx,.y3 ; calc delta
sub bx,.y1
jnz .tt_dx13_make
xor edx,edx
mov ecx,4
@@:
push edx
loop @b
jmp .tt_dx13_done
.tt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1)
push eax
 
mov ax,word[.tex_x3]
sub ax,word[.tex_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1)
push eax
 
mov ax,word[.tex_y3]
sub ax,word[.tex_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1)
push eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.tt_dx13_done:
 
mov bx,.y3 ; calc delta
sub bx,.y2
jnz .tt_dx23_make
xor edx,edx
mov ecx,4
@@:
push edx
loop @b
jmp .tt_dx23_done
.tt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
shl eax,ROUND
cdq
movzx ebx,bx
idiv ebx
; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2)
push eax
 
mov ax,word[.tex_x3]
sub ax,word[.tex_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2)
push eax
 
mov ax,word[.tex_y3]
sub ax,word[.tex_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2)
push eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.tt_dx23_done:
 
movsx eax,.x1 ;eax - cur x1
shl eax,ROUND ;ebx - cur x2
mov ebx,eax
 
movsx edx, word[.tex_x1]
shl edx,ROUND
; mov [.scan_x1],edx
; mov [.scan_x2],edx
push edx
push edx
movsx edx, word[.tex_y1]
shl edx,ROUND
; mov [.scan_y1],edx
; mov [.scan_y2],edx
push edx
push edx
movsx edx,.z1
shl edx,CATMULL_SHIFT
push edx
push edx
mov cx,.y1
cmp cx,.y2
jge .tt_loop1_end
 
.tt_loop1:
pushad
 
push .z_ptr
push .cz1 ; z coords shifted shl catmull_shift
push .cz2
push .scan_y2
push .scan_x2
push .scan_y1
push .scan_x1
push esi ;[.tex_ptr]
 
push cx
sar ebx,ROUND
push bx
sar eax,ROUND
push ax
call textured_line_z
 
popad
mov edx,.dz13
add .cz1,edx
mov edx,.dz12
add .cz2,edx
 
mov edx, .tex_dx13
add .scan_x1, edx
mov edx, .tex_dx12
add .scan_x2, edx
mov edx, .tex_dy13
add .scan_y1, edx
mov edx, .tex_dy12
add .scan_y2, edx
 
add eax, .dx13
add ebx, .dx12
inc cx
cmp cx,.y2
jl .tt_loop1
 
.tt_loop1_end:
 
 
mov cx,.y2
cmp cx,.y3
jge .tt_loop2_end
 
movsx ebx,.x2
shl ebx,ROUND
movsx edx,.z2
shl edx,CATMULL_SHIFT
mov .cz2,edx
movzx edx, word [.tex_x2]
shl edx,ROUND
mov .scan_x2,edx
movzx edx, word[.tex_y2]
shl edx,ROUND
mov .scan_y2,edx
 
.tt_loop2:
 
pushad
 
push .z_ptr
push .cz1 ; z coords shifted shl catmull_shift
push .cz2
 
push .scan_y2
push .scan_x2
push .scan_y1
push .scan_x1
push esi ;[.tex_ptr]
 
push cx
sar ebx,ROUND
push bx
sar eax,ROUND
push ax
call textured_line_z
 
popad
 
 
mov edx,.dz13
add .cz1,edx
mov edx,.dz23
add .cz2,edx
 
mov edx, .tex_dx13
add .scan_x1, edx
mov edx, .tex_dx23
add .scan_x2, edx
mov edx, .tex_dy13
add .scan_y1, edx
mov edx, .tex_dy23
add .scan_y2, edx
 
add eax, .dx13
add ebx, .dx23
inc cx
cmp cx,.y3
jl .tt_loop2
 
.tt_loop2_end:
 
.tt_end:
mov esp,ebp
ret 18
 
textured_line_z:
;-----in -edi screen buffer pointer
;------------ stack:
.x1 equ word [ebp+4]
.x2 equ word [ebp+6]
.y equ word [ebp+8]
 
.tex_ptr equ dword [ebp+10]
.tex_x1 equ ebp+14
.tex_y1 equ ebp+18
.tex_x2 equ ebp+22
.tex_y2 equ ebp+26
.z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT
.z1 equ dword [ebp+34]
.z_ptr equ dword [ebp+38]
 
.tex_dy equ dword [ebp-4]
.tex_dx equ dword [ebp-8]
.dz equ dword [ebp-12]
.cz equ dword [ebp-16]
.c_tex_x equ dword [ebp-20] ; current tex x
.m_sft1 equ ebp-28
.m_sft2 equ ebp-32
; .c_tex_xM equ ebp+14
.tex_dxM equ ebp-8
 
mov ebp,esp
 
mov ax,.y
or ax,ax
jl .tl_quit
mov bx,[size_y_var]
dec bx
cmp ax,bx ;SIZE_Y
jge .tl_quit
 
mov ax,.x1
cmp ax,.x2
je .tl_quit
jl .tl_ok
 
xchg ax,.x2 ; sort params
mov .x1,ax
if Ext >= MMX
movq mm0,[.tex_x1]
movq mm1,[.tex_x2]
movq [.tex_x2],mm0
movq [.tex_x1],mm1
 
else
mov eax,dword[.tex_x1]
xchg eax,dword[.tex_x2]
mov dword[.tex_x1],eax
 
mov eax,dword[.tex_y1]
xchg eax,dword[.tex_y2]
mov dword[.tex_y1],eax
 
end if
 
mov eax,.z1
xchg eax,.z2
mov .z1,eax
 
.tl_ok:
mov cx,[size_x_var]
dec cx
cmp .x1,cx ;SIZE_X
jge .tl_quit
cmp .x2,0
jle .tl_quit
 
mov bx,.x2
sub bx,.x1
movsx ebx,bx
 
mov eax,dword[.tex_y2] ; calc .dty
sub eax,dword[.tex_y1]
cdq
idiv ebx
push eax
 
mov eax,dword[.tex_x2] ; calc .dtx
sub eax,dword[.tex_x1]
cdq
idiv ebx
push eax
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; clipping
jg @f
 
movsx ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.tex_dy
imul ebx
add dword[.tex_y1],eax
 
mov eax,.tex_dx
imul ebx
add dword[.tex_x1],eax
 
@@:
cmp .x2,cx ;SIZE_X
jl @f
mov .x2,cx ;SIZE_X
@@:
 
movsx ebx,.y ; calc mem begin in buffers
movzx eax,word[size_x_var] ;SIZE_X
mul ebx
movsx ebx,.x1
add eax,ebx
mov ebx,eax
 
lea eax,[eax*3]
add edi,eax ; edi - scr buff
shl ebx,2
add .z_ptr,ebx ; z buffer pointer
 
mov cx,.x2
sub cx,.x1
movzx ecx,cx
 
;if Ext >= MMX
; movq mm0,[.tex_x1]
; movq mm4,mm0
; movq mm1,qword[.tex_dxM]
; mov ebx,.z1
; mov eax,.dz
;else
mov eax,dword[.tex_x1]
mov ebx,dword[.tex_y1]
push .z1 ; .cz
push eax ;.c_tex_x
;end if
mov edx,.z_ptr
 
.tl_loop:
 
;if Ext >= MMX
; cmp ebx,[edx] ; ebx - current z
; jge @f
; movq mm2,mm0
; psrad mm2,ROUND
; movq mm3,mm2
; psrlq mm2,32-TEX_SHIFT
; paddd mm3,mm2
; movd esi,mm3
; mov dword[edx],ebx ; renew z buffer
;else
; eax - temp
mov eax,.cz ; ebx - cur tex y shl ROUND
cmp eax,[edx] ; ecx - l.lenght
jge @f ; ebx - cur tex_y ; edx - temp
mov esi,ebx ; edi - scr buff
sar esi,ROUND ; esi - tex_ptr temp
shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff
mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT
sar eax,ROUND
add esi,eax
mov eax,.cz
mov dword[edx],eax ; renew z buffer
;end if
and esi,TEXTURE_SIZE
lea esi,[esi*3]
add esi,.tex_ptr
movsd
dec edi
jmp .no_skip
@@:
add edi,3
.no_skip:
add edx,4
;if Ext >= MMX
; add ebx,eax
; paddd mm0,mm1
;else
mov eax,.dz
add .cz,eax
mov eax,.tex_dx
add .c_tex_x,eax
add ebx,.tex_dy
;end if
loop .tl_loop
.tl_quit:
 
mov esp,ebp
 
ret 30+8
 
;TEX_X = 512
;TEX_Y = 512
;ROUND equ 8
;SIZE_X = 512
;SIZE_Y = 512
;TEX_SHIFT = 9
CATMULL_SHIFT equ 8
 
;------------------------------------------------------------------------
;- Procedure drawing textured triangle using Catmull Z-buffer algorithm -
;------------------------------------------------------------------------
tex_triangle_z:
;----------in - eax - x1 shl 16 + y1
;-------------- ebx - x2 shl 16 + y2
;---------------ecx - x3 shl 16 + y3
;---------------edx - pointer to Z-buffer
;---------------esi - pointer to texture buffer
;---------------edi - pointer to screen buffer
;-------------stack - texture coordinates
;------------------ - z coordinates
.tex_x1 equ ebp+4
.tex_y1 equ ebp+6
.tex_x2 equ ebp+8
.tex_y2 equ ebp+10
.tex_x3 equ ebp+12
.tex_y3 equ ebp+14
.z1 equ word[ebp+16]
.z2 equ word[ebp+18]
.z3 equ word[ebp+20]
 
.tex_ptr equ dword[ebp-4] ; pointer to texture
.z_ptr equ dword[ebp-8] ; pointer to z-buffer
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.tex_dx12 equ dword[ebp-28]
.tex_dy12 equ dword[ebp-32]
.dz12 equ dword[ebp-36]
 
.dx13 equ dword[ebp-40]
.tex_dx13 equ dword[ebp-44]
.tex_dy13 equ dword[ebp-48]
.dz13 equ dword[ebp-52]
 
.dx23 equ dword[ebp-56]
.tex_dx23 equ dword[ebp-60]
.tex_dy23 equ dword[ebp-64]
.dz23 equ dword[ebp-68]
 
.scan_x1 equ dword[ebp-72]
.scan_x2 equ dword[ebp-76]
.scan_y1 equ dword[ebp-80]
.scan_y2 equ dword[ebp-84]
.cz1 equ dword[ebp-88]
.cz2 equ dword[ebp-92]
 
mov ebp,esp
push esi ; store memory pointers
push edx
.tt_sort3:
cmp ax,bx ;sort all parameters
jle .tt_sort1
xchg eax,ebx
mov edx,dword [.tex_x1]
xchg edx,dword [.tex_x2]
mov dword[.tex_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.tt_sort1:
cmp bx,cx
jle .tt_sort2
xchg ebx,ecx
mov edx,dword [.tex_x2]
xchg edx,dword [.tex_x3]
mov dword [.tex_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .tt_sort3
.tt_sort2:
 
push eax ; and store to user friendly variables
push ebx
push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .tt_loop2_end
; cmp ax,SIZE_Y
; jl @f
; cmp bx,SIZE_Y
; jl @f
; cmp cx,SIZE_Y
; jl @f
ror eax,16
ror ebx,16
ror ecx,16
; cmp ax,SIZE_X
; jl @f
; cmp bx,SIZE_X
; jl @f
; cmp cx,SIZE_X
; jl @f
; jmp .tt_loop2_end
@@:
mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)]
mov ebx,dword[.tex_x2]
mov ecx,dword[.tex_x3]
mov edx,eax
or edx,ebx
or edx,ecx
test edx,80008000h
jne .tt_loop2_end
cmp ax,TEX_X
jge .tt_loop2_end
cmp bx,TEX_X
jge .tt_loop2_end
cmp cx,TEX_X
jge .tt_loop2_end
ror eax,16
ror ebx,16
ror ecx,16
cmp ax,TEX_Y
jge .tt_loop2_end
cmp bx,TEX_Y
jge .tt_loop2_end
cmp cx,TEX_Y
jge .tt_loop2_end
 
 
movsx ebx,.y2 ; calc delta
sub bx,.y1
jnz .tt_dx12_make
xor edx,edx
mov ecx,4
@@:
push edx
loop @b
jmp .tt_dx12_done
.tt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1)
push eax
 
mov ax,word[.tex_x2]
sub ax,word[.tex_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1)
push eax
 
mov ax,word[.tex_y2]
sub ax,word[.tex_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1)
push eax
 
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.tt_dx12_done:
 
movsx ebx,.y3 ; calc delta
sub bx,.y1
jnz .tt_dx13_make
xor edx,edx
mov ecx,4
@@:
push edx
loop @b
jmp .tt_dx13_done
.tt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1)
push eax
 
mov ax,word[.tex_x3]
sub ax,word[.tex_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1)
push eax
 
mov ax,word[.tex_y3]
sub ax,word[.tex_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1)
push eax
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.tt_dx13_done:
 
mov bx,.y3 ; calc delta
sub bx,.y2
jnz .tt_dx23_make
xor edx,edx
mov ecx,4
@@:
push edx
loop @b
jmp .tt_dx23_done
.tt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
shl eax,ROUND
cdq
movzx ebx,bx
idiv ebx
; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2)
push eax
 
mov ax,word[.tex_x3]
sub ax,word[.tex_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2)
push eax
 
mov ax,word[.tex_y3]
sub ax,word[.tex_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2)
push eax
 
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.tt_dx23_done:
 
movsx eax,.x1 ;eax - cur x1
shl eax,ROUND ;ebx - cur x2
mov ebx,eax
 
movsx edx, word[.tex_x1]
shl edx,ROUND
; mov [.scan_x1],edx
; mov [.scan_x2],edx
push edx
push edx
movsx edx, word[.tex_y1]
shl edx,ROUND
; mov [.scan_y1],edx
; mov [.scan_y2],edx
push edx
push edx
movsx edx,.z1
shl edx,CATMULL_SHIFT
push edx
push edx
mov cx,.y1
cmp cx,.y2
jge .tt_loop1_end
 
.tt_loop1:
pushad
 
push .z_ptr
push .cz1 ; z coords shifted shl catmull_shift
push .cz2
push .scan_y2
push .scan_x2
push .scan_y1
push .scan_x1
push esi ;[.tex_ptr]
 
push cx
sar ebx,ROUND
push bx
sar eax,ROUND
push ax
call textured_line_z
 
popad
mov edx,.dz13
add .cz1,edx
mov edx,.dz12
add .cz2,edx
 
mov edx, .tex_dx13
add .scan_x1, edx
mov edx, .tex_dx12
add .scan_x2, edx
mov edx, .tex_dy13
add .scan_y1, edx
mov edx, .tex_dy12
add .scan_y2, edx
 
add eax, .dx13
add ebx, .dx12
inc cx
cmp cx,.y2
jl .tt_loop1
 
.tt_loop1_end:
 
 
mov cx,.y2
cmp cx,.y3
jge .tt_loop2_end
 
movsx ebx,.x2
shl ebx,ROUND
movsx edx,.z2
shl edx,CATMULL_SHIFT
mov .cz2,edx
movzx edx, word [.tex_x2]
shl edx,ROUND
mov .scan_x2,edx
movzx edx, word[.tex_y2]
shl edx,ROUND
mov .scan_y2,edx
 
.tt_loop2:
 
pushad
 
push .z_ptr
push .cz1 ; z coords shifted shl catmull_shift
push .cz2
 
push .scan_y2
push .scan_x2
push .scan_y1
push .scan_x1
push esi ;[.tex_ptr]
 
push cx
sar ebx,ROUND
push bx
sar eax,ROUND
push ax
call textured_line_z
 
popad
 
 
mov edx,.dz13
add .cz1,edx
mov edx,.dz23
add .cz2,edx
 
mov edx, .tex_dx13
add .scan_x1, edx
mov edx, .tex_dx23
add .scan_x2, edx
mov edx, .tex_dy13
add .scan_y1, edx
mov edx, .tex_dy23
add .scan_y2, edx
 
add eax, .dx13
add ebx, .dx23
inc cx
cmp cx,.y3
jl .tt_loop2
 
.tt_loop2_end:
 
.tt_end:
mov esp,ebp
ret 18
 
textured_line_z:
;-----in -edi screen buffer pointer
;------------ stack:
.x1 equ word [ebp+4]
.x2 equ word [ebp+6]
.y equ word [ebp+8]
 
.tex_ptr equ dword [ebp+10]
.tex_x1 equ ebp+14
.tex_y1 equ ebp+18
.tex_x2 equ ebp+22
.tex_y2 equ ebp+26
.z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT
.z1 equ dword [ebp+34]
.z_ptr equ dword [ebp+38]
 
.tex_dy equ dword [ebp-4]
.tex_dx equ dword [ebp-8]
.dz equ dword [ebp-12]
.cz equ dword [ebp-16]
.c_tex_x equ dword [ebp-20] ; current tex x
.m_sft1 equ ebp-28
.m_sft2 equ ebp-32
; .c_tex_xM equ ebp+14
.tex_dxM equ ebp-8
 
mov ebp,esp
 
mov ax,.y
or ax,ax
jl .tl_quit
mov bx,[size_y_var]
dec bx
cmp ax,bx ;SIZE_Y
jge .tl_quit
 
mov ax,.x1
cmp ax,.x2
je .tl_quit
jl .tl_ok
 
xchg ax,.x2 ; sort params
mov .x1,ax
if Ext >= MMX
movq mm0,[.tex_x1]
movq mm1,[.tex_x2]
movq [.tex_x2],mm0
movq [.tex_x1],mm1
 
else
mov eax,dword[.tex_x1]
xchg eax,dword[.tex_x2]
mov dword[.tex_x1],eax
 
mov eax,dword[.tex_y1]
xchg eax,dword[.tex_y2]
mov dword[.tex_y1],eax
 
end if
 
mov eax,.z1
xchg eax,.z2
mov .z1,eax
 
.tl_ok:
mov cx,[size_x_var]
dec cx
cmp .x1,cx ;SIZE_X
jge .tl_quit
cmp .x2,0
jle .tl_quit
 
mov bx,.x2
sub bx,.x1
movsx ebx,bx
 
mov eax,dword[.tex_y2] ; calc .dty
sub eax,dword[.tex_y1]
cdq
idiv ebx
push eax
 
mov eax,dword[.tex_x2] ; calc .dtx
sub eax,dword[.tex_x1]
cdq
idiv ebx
push eax
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; clipping
jg @f
 
movsx ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.tex_dy
imul ebx
add dword[.tex_y1],eax
 
mov eax,.tex_dx
imul ebx
add dword[.tex_x1],eax
 
@@:
cmp .x2,cx ;SIZE_X
jl @f
mov .x2,cx ;SIZE_X
@@:
 
movsx ebx,.y ; calc mem begin in buffers
movzx eax,word[size_x_var] ;SIZE_X
mul ebx
movsx ebx,.x1
add eax,ebx
mov ebx,eax
 
lea eax,[eax*3]
add edi,eax ; edi - scr buff
shl ebx,2
add .z_ptr,ebx ; z buffer pointer
 
mov cx,.x2
sub cx,.x1
movzx ecx,cx
 
;if Ext >= MMX
; movq mm0,[.tex_x1]
; movq mm4,mm0
; movq mm1,qword[.tex_dxM]
; mov ebx,.z1
; mov eax,.dz
;else
mov eax,dword[.tex_x1]
mov ebx,dword[.tex_y1]
push .z1 ; .cz
push eax ;.c_tex_x
;end if
mov edx,.z_ptr
 
.tl_loop:
 
;if Ext >= MMX
; cmp ebx,[edx] ; ebx - current z
; jge @f
; movq mm2,mm0
; psrad mm2,ROUND
; movq mm3,mm2
; psrlq mm2,32-TEX_SHIFT
; paddd mm3,mm2
; movd esi,mm3
; mov dword[edx],ebx ; renew z buffer
;else
; eax - temp
mov eax,.cz ; ebx - cur tex y shl ROUND
cmp eax,[edx] ; ecx - l.lenght
jge @f ; ebx - cur tex_y ; edx - temp
mov esi,ebx ; edi - scr buff
sar esi,ROUND ; esi - tex_ptr temp
shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff
mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT
sar eax,ROUND
add esi,eax
mov eax,.cz
mov dword[edx],eax ; renew z buffer
;end if
and esi,TEXTURE_SIZE
lea esi,[esi*3]
add esi,.tex_ptr
movsd
dec edi
jmp .no_skip
@@:
add edi,3
.no_skip:
add edx,4
;if Ext >= MMX
; add ebx,eax
; paddd mm0,mm1
;else
mov eax,.dz
add .cz,eax
mov eax,.tex_dx
add .c_tex_x,eax
add ebx,.tex_dy
;end if
loop .tl_loop
.tl_quit:
 
mov esp,ebp
 
ret 30+8
 
/programs/demos/view3ds/two_tex.inc
1,1105 → 1,1105
 
;SIZE_X equ 350
;SIZE_Y equ 350
;ROUND equ 8
;TEX_X equ 512
;TEX_Y equ 512
;TEXTURE_SIZE EQU (512*512)-1
;TEX_SHIFT EQU 9
 
;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;Ext = SSE
;SSE = 3
;MMX = 1
;NON = 0
;use32
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws triangle with two overlapped textures, I use --
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
;--------I calc texture pixel by this way: col1*col2/256 ---------------
two_tex_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to b. texture-----
;---------------------- esi - pointer to e. texture-----
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : b. tex coordinates------
;---------------------- e. tex coordinates------
;---------------------- Z position coordinates--
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - first texture
.b_x3 equ ebp+12
.b_y3 equ ebp+14 ; e - second texture
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
 
 
.t_bmap equ dword[ebp-4] ; pointer to b. texture
.t_emap equ dword[ebp-8] ; pointer to e. texture
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.dbx12 equ dword[ebp-28]
.dby12 equ dword[ebp-32]
.dby12q equ [ebp-32]
.dex12 equ dword[ebp-36]
.dey12 equ dword[ebp-40]
.dey12q equ [ebp-40]
.dz12 equ dword[ebp-44]
 
.dx13 equ dword[ebp-48]
.dbx13 equ dword[ebp-52]
.dby13 equ dword[ebp-56]
.dby13q equ [ebp-56]
.dex13 equ dword[ebp-60]
.dey13 equ dword[ebp-64]
.dey13q equ [ebp-64]
.dz13 equ dword[ebp-68]
 
.dx23 equ dword[ebp-72]
.dbx23 equ dword[ebp-76]
.dby23 equ dword[ebp-80]
.dby23q equ [ebp-80]
.dex23 equ dword[ebp-84]
.dey23 equ dword[ebp-88]
.dey23q equ [ebp-88]
.dz23 equ dword[ebp-92]
 
.cx1 equ dword[ebp-96] ; current variables
.cx2 equ dword[ebp-100]
.cbx1 equ dword[ebp-104]
.cby1 equ [ebp-108]
.cex1 equ dword[ebp-112]
.cey1 equ [ebp-116]
.cbx2 equ dword[ebp-120]
.cby2 equ [ebp-124]
.cex2 equ dword[ebp-128]
.cey2 equ [ebp-132]
 
.cz1 equ dword[ebp-136]
.cz2 equ dword[ebp-140]
 
if Ext >= MMX
emms
else
cld
end if
mov ebp,esp
push edx esi ; store bump map
; push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ebx ecx ; store triangle coords in variables
; push ebx
; push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
 
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
if Ext=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
; psubsw mm3,mm2
; psubsw mm1,mm0
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
; pslld mm0,ROUND
; pslld mm1,ROUND
; pslld mm2,ROUND
; pslld mm3,ROUND
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
; pxor mm4,mm4
; movq mm5,mm1
; movq mm6,mm1
; pcmpeqb mm5,mm4
; psubd mm1,mm0
; psubd mm3,mm2
 
; movq mm0,[.b_x1] ; bx1 by1 bx2 by2
; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2
; pxor
; punpcklhd mm0,mm1 ; lwd ;
; psubw mm1,mm0 ; mm1, mm0
; pxor mm2,mm2
; pmovmaskb eax,mm1
; and eax,10101010b
; pcmpgtw mm2,mm1
; punpcklwd mm1,mm2
; psllw mm0,ROUND
; psllw mm1,ROUND
; movq mm2,mm0
; psrlq mm0,32
 
; cvtpi2ps xmm0,mm1
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12q,mm0
movq .dby12q,mm1
 
; movd .dex12,mm0
; psrlq mm0,32
; movd .dey12,mm0
; movhlps xmm1,xmm1
; cvtps2pi mm0,xmm1
; movd .dbx12,mm0
; psrlq mm0,32
; movd .dby12,mm0
 
else
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
 
; mov eax,.dbx12
; mov ebx,.dby12
; int3
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
 
end if
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
if Ext=SSE
 
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13q,mm0
movq .dby13q,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
 
end if
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
if Ext=SSE
 
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23q,mm0
movq .dby23q,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
end if
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
; sub esp,40
.bt_dx23_done:
movsx eax,.x1
shl eax,ROUND
; mov .cx1,eax
; mov .cx2,eax
push eax eax
; push eax
 
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
; push eax eax
; push eax
 
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
; push eax eax
; push eax
 
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
; push eax eax
;push eax
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
sub esp,32
; push eax eax
;push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
; mov .cz1,eax
; mov .cz2,eax
push eax eax
;push eax
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_line
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
if Ext>= SSE2
movups xmm0,.cey1
movups xmm1,.cey2
movups xmm2,.dey12q
movups xmm3,.dey13q
paddd xmm0,xmm3
paddd xmm1,xmm2
movups .cey1,xmm0
movups .cey2,xmm1
else if (Ext = MMX) | (Ext=SSE)
movq mm0,.cby2 ; with this optimization object
movq mm1,.cby1 ; looks bit annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby12q
paddd mm1,.dby13q
paddd mm2,.dey12q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
else
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
end if
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
 
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
 
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
 
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
 
.loop23:
call .call_line
;if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
if Ext>= SSE2
movups xmm0,.cey1
movups xmm1,.cey2
movups xmm2,.dey23q
movups xmm3,.dey13q
paddd xmm0,xmm3
paddd xmm1,xmm2
movups .cey1,xmm0
movups .cey2,xmm1
else if (Ext = MMX) | ( Ext = SSE)
movq mm0,.cby2 ; with this mmx optimization object looks bit
movq mm1,.cby1 ; annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby23q
paddd mm1,.dby13q
paddd mm2,.dey23q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
 
else
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
end if
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
;else
; movq mm0,.db13q
; movq mm1,.cbx1q
 
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
ret 34
 
.call_line:
 
pushad
 
push .cz1
push .cz2
push .z_buff
push .t_bmap
push .t_emap
push dword .cey2
push .cex2
push dword .cey1
push .cex1
push dword .cby2
push .cbx2
push dword .cby1
push .cbx1
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call two_tex_line_z
 
popad
ret
two_tex_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ [ebp+12] ; |
.bx2 equ [ebp+16] ; |
.by2 equ [ebp+20] ; |> b. texture and e. texture coords
.ex1 equ [ebp+24] ; |> shifted shl ROUND
.ey1 equ [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ [ebp+36] ; ---
.emap equ [ebp+40] ; b texture offset
.bmap equ [ebp+44] ; e texture offset
.z_buff equ dword [ebp+48]
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
 
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ [ebp-12]
.dex equ [ebp-16]
.dby equ [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ [ebp-32]
.cex equ [ebp-36]
.cby equ [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.czbuff equ dword [ebp-52]
 
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
mov dx,word[size_y_var]
dec dx
cmp cx,dx ;word[size_y_var] ;SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl @f
je .bl_end
 
xchg eax,ebx
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
else
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
end if
mov edx,.z1
xchg edx,.z2
mov .z1,edx
@@:
push eax ebx ;store x1, x2
mov ebx,.x1
movzx edx,word[size_x_var]
dec edx
cmp ebx,edx
; cmp bx,word[size_x_var] ;SIZE_X
jg .bl_end
cmp .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
 
if Ext >= SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
 
; movq mm0,.bx1q
; movq mm1,.bx2q
; movq mm2,.ex1q
; movq mm3,.ex2q
; psubd mm1,mm0
; psubd mm3,mm2
; cvtpi2ps xmm1,mm1
; movlhps xmm1,xmm1
; cvtpi2ps xmm1,mm3
 
cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1
cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2
subps xmm1,xmm0
; hi lo
divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex
 
shufps xmm1,xmm1,11011000b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dex,mm0 ; hi - lo -> dbx, dex
movq .dey,mm1 ; hi - lo -> dby, dey
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dby
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dex
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
end if
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
@@:
movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var]
mov ebx,.x2
cmp eax,ebx
jg @f
mov .x2,eax
@@:
; movd mm0,eax
; movd mm1,.x2
; pminsw mm0,mm1
; movd .x2,mm0
; cmp .x2,SIZE_X ;eax |
; jl @f |> this dont work idk cause
; mov .x2,SIZE_X ;eax |
@@:
; movzx eax,word[size_x_var] ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax ; edi - screen
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx ; esi - Z buffer
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi
push dword .ex1
push dword .by1
push dword .ey1
 
push .z1 ; current z shl CATMULL_SHIFT
push esi
 
if Ext >= MMX
pxor mm0,mm0
movq mm3,.cex ; hi - lo -> cbx; cex
movq mm4,.cey ; hi - lo -> cby; cey
; movq mm5,mm3
; movq mm6,mm4
; psrad mm5,ROUND
; psrad mm6,ROUND
; movq .ceyq,mm5
; movq .cbyq,mm6
mov edx,.czbuff
else
cld
end if
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
if Ext=NON
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
else
mov ebx,.cz
cmp ebx,dword[edx]
end if
jge .skip
 
if Ext=NON
mov eax,.cby
mov esi,.cbx
sar eax,ROUND
sar esi,ROUND
shl eax,TEX_SHIFT ;-
add esi,eax
lea esi,[esi*3] ;- ; esi - current b. texture addres
add esi,.bmap
 
mov ebx,.cex ;.cex - current env map X
mov eax,.cey ;.cey - current env map y
sar ebx,ROUND
sar eax,ROUND
 
shl eax,TEX_SHIFT
add ebx,eax
lea ebx,[ebx*3]
add ebx,.emap
 
 
else
movq mm5,mm4 ;.cey
psrad mm5,ROUND
pslld mm5,TEX_SHIFT
movq mm6,mm3 ;.cex
psrad mm6,ROUND
paddd mm5,mm6
movq mm6,mm5
paddd mm5,mm5
paddd mm5,mm6
paddd mm5,.emap
movd esi,mm5
psrlq mm5,32
movd ebx,mm5
end if
if Ext>=MMX
movd mm1,[esi]
movd mm2,[ebx]
punpcklbw mm1,mm0
punpcklbw mm2,mm0
pmullw mm1,mm2
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
mov ebx,.cz
mov dword[edx],ebx
else
cld ; esi - tex e.
lodsb ; ebx - tex b.
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
end if
.skip:
add edi,3
 
if Ext = NON
.no_skip:
add .czbuff,4
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
else
add edx,4
paddd mm3,.dex
paddd mm4,.dey
; movq mm5,mm3
; movq mm6,mm4
; psrad mm5,ROUND
; psrad mm6,ROUND
; movq .cex,mm3
; movq .cey,mm4
end if
mov eax,.dz
add .cz,eax
if Ext = NON
dec ecx
jnz .draw
else
loop .draw
end if
 
.bl_end:
mov esp,ebp
ret 56
 
 
;SIZE_X equ 350
;SIZE_Y equ 350
;ROUND equ 8
;TEX_X equ 512
;TEX_Y equ 512
;TEXTURE_SIZE EQU (512*512)-1
;TEX_SHIFT EQU 9
 
;CATMULL_SHIFT equ 8
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1
;Ext = SSE
;SSE = 3
;MMX = 1
;NON = 0
;use32
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great---
;------- DOS 13h mode demos --------------------------------------------
;------- Procedure draws triangle with two overlapped textures, I use --
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)-------
;--------I calc texture pixel by this way: col1*col2/256 ---------------
two_tex_triangle_z:
;------------------in - eax - x1 shl 16 + y1 -----------
;---------------------- ebx - x2 shl 16 + y2 -----------
;---------------------- ecx - x3 shl 16 + y3 -----------
;---------------------- edx - pointer to b. texture-----
;---------------------- esi - pointer to e. texture-----
;---------------------- edi - pointer to screen buffer--
;---------------------- stack : b. tex coordinates------
;---------------------- e. tex coordinates------
;---------------------- Z position coordinates--
;---------------------- pointer io Z buffer-----
;-- Z-buffer - filled with coordinates as dword --------
;-- (Z coor. as word) shl CATMULL_SHIFT ----------------
.b_x1 equ ebp+4 ; procedure don't save registers !!!
.b_y1 equ ebp+6 ; each coordinate as word
.b_x2 equ ebp+8
.b_y2 equ ebp+10 ; b - first texture
.b_x3 equ ebp+12
.b_y3 equ ebp+14 ; e - second texture
.e_x1 equ ebp+16
.e_y1 equ ebp+18
.e_x2 equ ebp+20
.e_y2 equ ebp+22
.e_x3 equ ebp+24
.e_y3 equ ebp+26
.z1 equ word[ebp+28]
.z2 equ word[ebp+30]
.z3 equ word[ebp+32]
.z_buff equ dword[ebp+34] ; pointer to Z-buffer
 
 
.t_bmap equ dword[ebp-4] ; pointer to b. texture
.t_emap equ dword[ebp-8] ; pointer to e. texture
.x1 equ word[ebp-10]
.y1 equ word[ebp-12]
.x2 equ word[ebp-14]
.y2 equ word[ebp-16]
.x3 equ word[ebp-18]
.y3 equ word[ebp-20]
 
.dx12 equ dword[ebp-24]
.dbx12 equ dword[ebp-28]
.dby12 equ dword[ebp-32]
.dby12q equ [ebp-32]
.dex12 equ dword[ebp-36]
.dey12 equ dword[ebp-40]
.dey12q equ [ebp-40]
.dz12 equ dword[ebp-44]
 
.dx13 equ dword[ebp-48]
.dbx13 equ dword[ebp-52]
.dby13 equ dword[ebp-56]
.dby13q equ [ebp-56]
.dex13 equ dword[ebp-60]
.dey13 equ dword[ebp-64]
.dey13q equ [ebp-64]
.dz13 equ dword[ebp-68]
 
.dx23 equ dword[ebp-72]
.dbx23 equ dword[ebp-76]
.dby23 equ dword[ebp-80]
.dby23q equ [ebp-80]
.dex23 equ dword[ebp-84]
.dey23 equ dword[ebp-88]
.dey23q equ [ebp-88]
.dz23 equ dword[ebp-92]
 
.cx1 equ dword[ebp-96] ; current variables
.cx2 equ dword[ebp-100]
.cbx1 equ dword[ebp-104]
.cby1 equ [ebp-108]
.cex1 equ dword[ebp-112]
.cey1 equ [ebp-116]
.cbx2 equ dword[ebp-120]
.cby2 equ [ebp-124]
.cex2 equ dword[ebp-128]
.cey2 equ [ebp-132]
 
.cz1 equ dword[ebp-136]
.cz2 equ dword[ebp-140]
 
if Ext >= MMX
emms
else
cld
end if
mov ebp,esp
push edx esi ; store bump map
; push esi ; store e. map
; sub esp,120
.sort3: ; sort triangle coordinates...
cmp ax,bx
jle .sort1
xchg eax,ebx
mov edx,dword[.b_x1]
xchg edx,dword[.b_x2]
mov dword[.b_x1],edx
mov edx,dword[.e_x1]
xchg edx,dword[.e_x2]
mov dword[.e_x1],edx
mov dx,.z1
xchg dx,.z2
mov .z1,dx
.sort1:
cmp bx,cx
jle .sort2
xchg ebx,ecx
mov edx,dword[.b_x2]
xchg edx,dword[.b_x3]
mov dword[.b_x2],edx
mov edx,dword[.e_x2]
xchg edx,dword[.e_x3]
mov dword[.e_x2],edx
mov dx,.z2
xchg dx,.z3
mov .z2,dx
jmp .sort3
.sort2:
push eax ebx ecx ; store triangle coords in variables
; push ebx
; push ecx
 
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that
and edx,ebx ; if *all* of them are negative a sign flag is raised
and edx,ecx
and edx,eax
test edx,80008000h ; Check both X&Y at once
jne .loop23_done
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that
; or edx,ebx ; if any *one* of them is negative a sign flag is raised
; or edx,ecx
; test edx,80000000h ; Check only X
; jne .loop23_done
 
; cmp .x1,SIZE_X ; {
; jg .loop23_done
; cmp .x2,SIZE_X ; This can be optimized with effort
; jg .loop23_done
; cmp .x3,SIZE_X
; jg .loop23_done ; {
 
 
mov bx,.y2 ; calc delta 12
sub bx,.y1
jnz .bt_dx12_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx12_done
.bt_dx12_make:
mov ax,.x2
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx12,eax
push eax
 
if Ext=SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
 
movd mm0,[.b_x1]
movd mm1,[.b_x2]
movd mm2,[.e_x1]
movd mm3,[.e_x2]
; psubsw mm3,mm2
; psubsw mm1,mm0
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
; pslld mm0,ROUND
; pslld mm1,ROUND
; pslld mm2,ROUND
; pslld mm3,ROUND
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
; pxor mm4,mm4
; movq mm5,mm1
; movq mm6,mm1
; pcmpeqb mm5,mm4
; psubd mm1,mm0
; psubd mm3,mm2
 
; movq mm0,[.b_x1] ; bx1 by1 bx2 by2
; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2
; pxor
; punpcklhd mm0,mm1 ; lwd ;
; psubw mm1,mm0 ; mm1, mm0
; pxor mm2,mm2
; pmovmaskb eax,mm1
; and eax,10101010b
; pcmpgtw mm2,mm1
; punpcklwd mm1,mm2
; psllw mm0,ROUND
; psllw mm1,ROUND
; movq mm2,mm0
; psrlq mm0,32
 
; cvtpi2ps xmm0,mm1
; movlhps xmm0,xmm0
; cvtpi2ps xmm0,mm3
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey12q,mm0
movq .dby12q,mm1
 
; movd .dex12,mm0
; psrlq mm0,32
; movd .dey12,mm0
; movhlps xmm1,xmm1
; cvtps2pi mm0,xmm1
; movd .dbx12,mm0
; psrlq mm0,32
; movd .dby12,mm0
 
else
mov ax,word[.b_x2]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx12,eax
push eax
 
mov ax,word[.b_y2]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby12,eax
push eax
 
; mov eax,.dbx12
; mov ebx,.dby12
; int3
 
mov ax,word[.e_x2]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex12,eax
push eax
 
mov ax,word[.e_y2]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey12,eax
push eax
 
end if
mov ax,.z2
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
push eax
.bt_dx12_done:
 
mov bx,.y3 ; calc delta13
sub bx,.y1
jnz .bt_dx13_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx13_done
.bt_dx13_make:
mov ax,.x3
sub ax,.x1
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx13,eax
push eax
 
if Ext=SSE
 
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d]
divss xmm3,xmm4
rcpss xmm3,xmm3
; mulss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x1]
movd mm1,[.b_x3]
movd mm2,[.e_x1]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
; divps xmm1,xmm3
mulps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey13q,mm0
movq .dby13q,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx13,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby13,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex13,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y1]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey13,eax
push eax
 
end if
 
mov ax,.z3
sub ax,.z1
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz13,eax
push eax
.bt_dx13_done:
 
mov bx,.y3 ; calc delta23
sub bx,.y2
jnz .bt_dx23_make
mov ecx,6
xor edx,edx
@@:
push edx ;dword 0
loop @b
jmp .bt_dx23_done
.bt_dx23_make:
mov ax,.x3
sub ax,.x2
cwde
movsx ebx,bx
shl eax,ROUND
cdq
idiv ebx
; mov .dx23,eax
push eax
 
if Ext=SSE
 
cvtsi2ss xmm3,ebx
; mov eax,255
cvtsi2ss xmm4,[i255d] ;eax
divss xmm3,xmm4
shufps xmm3,xmm3,0
sub esp,16
 
movd mm0,[.b_x2]
movd mm1,[.b_x3]
movd mm2,[.e_x2]
movd mm3,[.e_x3]
 
pxor mm4,mm4
punpcklwd mm0,mm4
punpcklwd mm1,mm4
punpcklwd mm2,mm4
punpcklwd mm3,mm4
 
cvtpi2ps xmm0,mm0
movlhps xmm0,xmm0
cvtpi2ps xmm0,mm2
cvtpi2ps xmm1,mm1
movlhps xmm1,xmm1
cvtpi2ps xmm1,mm3
subps xmm1,xmm0
 
divps xmm1,xmm3
shufps xmm1,xmm1,10110001b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dey23q,mm0
movq .dby23q,mm1
 
else
 
mov ax,word[.b_x3]
sub ax,word[.b_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dbx23,eax
push eax
 
mov ax,word[.b_y3]
sub ax,word[.b_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dby23,eax
push eax
 
mov ax,word[.e_x3]
sub ax,word[.e_x2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dex23,eax
push eax
 
mov ax,word[.e_y3]
sub ax,word[.e_y2]
cwde
shl eax,ROUND
cdq
idiv ebx
; mov .dey23,eax
push eax
end if
mov ax,.z3
sub ax,.z2
cwde
shl eax,CATMULL_SHIFT
cdq
idiv ebx
; mov .dz23,eax
push eax
; sub esp,40
.bt_dx23_done:
movsx eax,.x1
shl eax,ROUND
; mov .cx1,eax
; mov .cx2,eax
push eax eax
; push eax
 
movsx eax,word[.b_x1]
shl eax,ROUND
mov .cbx1,eax
mov .cbx2,eax
; push eax eax
; push eax
 
movsx eax,word[.b_y1]
shl eax,ROUND
mov .cby1,eax
mov .cby2,eax
; push eax eax
; push eax
 
movsx eax,word[.e_x1]
shl eax,ROUND
mov .cex1,eax
mov .cex2,eax
; push eax eax
;push eax
 
movsx eax,word[.e_y1]
shl eax,ROUND
mov .cey1,eax
mov .cey2,eax
sub esp,32
; push eax eax
;push eax
 
movsx eax,.z1
shl eax,CATMULL_SHIFT
; mov .cz1,eax
; mov .cz2,eax
push eax eax
;push eax
 
movsx ecx,.y1
cmp cx,.y2
jge .loop12_done
.loop12:
call .call_line
 
mov eax,.dx13
add .cx1,eax
mov ebx,.dx12
add .cx2,ebx
if Ext>= SSE2
movups xmm0,.cey1
movups xmm1,.cey2
movups xmm2,.dey12q
movups xmm3,.dey13q
paddd xmm0,xmm3
paddd xmm1,xmm2
movups .cey1,xmm0
movups .cey2,xmm1
else if (Ext = MMX) | (Ext=SSE)
movq mm0,.cby2 ; with this optimization object
movq mm1,.cby1 ; looks bit annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby12q
paddd mm1,.dby13q
paddd mm2,.dey12q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey1,mm3
movq .cey2,mm2
else
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx12
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby12
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex12
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey12
add .cey2,eax
 
end if
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz12
add .cz2,edx
 
inc ecx
cmp cx,.y2
jl .loop12
.loop12_done:
 
movsx ecx,.y2
cmp cx,.y3
jge .loop23_done
 
movsx eax,.z2
shl eax,CATMULL_SHIFT
mov .cz2,eax
 
movsx eax,.x2
shl eax,ROUND
mov .cx2,eax
 
movzx eax,word[.b_x2]
shl eax,ROUND
mov .cbx2,eax
 
movzx eax,word[.b_y2]
shl eax,ROUND
mov .cby2,eax
 
movzx eax,word[.e_x2]
shl eax,ROUND
mov .cex2,eax
 
movzx eax,word[.e_y2]
shl eax,ROUND
mov .cey2,eax
 
.loop23:
call .call_line
;if Ext = NON
mov eax,.dx13
add .cx1,eax
mov ebx,.dx23
add .cx2,ebx
if Ext>= SSE2
movups xmm0,.cey1
movups xmm1,.cey2
movups xmm2,.dey23q
movups xmm3,.dey13q
paddd xmm0,xmm3
paddd xmm1,xmm2
movups .cey1,xmm0
movups .cey2,xmm1
else if (Ext = MMX) | ( Ext = SSE)
movq mm0,.cby2 ; with this mmx optimization object looks bit
movq mm1,.cby1 ; annoying
movq mm2,.cey2
movq mm3,.cey1
paddd mm0,.dby23q
paddd mm1,.dby13q
paddd mm2,.dey23q
paddd mm3,.dey13q
movq .cby2,mm0
movq .cby1,mm1
movq .cey2,mm2
movq .cey1,mm3
 
else
mov edx,.dbx13
add .cbx1,edx
mov eax,.dbx23
add .cbx2,eax
mov ebx,.dby13
add .cby1,ebx
mov edx,.dby23
add .cby2,edx
 
mov eax,.dex13
add .cex1,eax
mov ebx,.dex23
add .cex2,ebx
mov edx,.dey13
add .cey1,edx
mov eax,.dey23
add .cey2,eax
end if
 
mov ebx,.dz13
add .cz1,ebx
mov edx,.dz23
add .cz2,edx
;else
; movq mm0,.db13q
; movq mm1,.cbx1q
 
inc ecx
cmp cx,.y3
jl .loop23
.loop23_done:
 
mov esp,ebp
ret 34
 
.call_line:
 
pushad
 
push .cz1
push .cz2
push .z_buff
push .t_bmap
push .t_emap
push dword .cey2
push .cex2
push dword .cey1
push .cex1
push dword .cby2
push .cbx2
push dword .cby1
push .cbx1
push ecx
 
mov eax,.cx1
sar eax,ROUND
mov ebx,.cx2
sar ebx,ROUND
 
call two_tex_line_z
 
popad
ret
two_tex_line_z:
;--------------in: eax - x1
;-------------- ebx - x2
;-------------- edi - pointer to screen buffer
;stack - another parameters :
.y equ dword [ebp+4]
.bx1 equ [ebp+8] ; ---
.by1 equ [ebp+12] ; |
.bx2 equ [ebp+16] ; |
.by2 equ [ebp+20] ; |> b. texture and e. texture coords
.ex1 equ [ebp+24] ; |> shifted shl ROUND
.ey1 equ [ebp+28] ; |
.ex2 equ [ebp+32] ; |
.ey2 equ [ebp+36] ; ---
.emap equ [ebp+40] ; b texture offset
.bmap equ [ebp+44] ; e texture offset
.z_buff equ dword [ebp+48]
.z2 equ dword [ebp+52] ; -- |> z coords shifted
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT
 
.x1 equ dword [ebp-4]
.x2 equ dword [ebp-8]
.dbx equ [ebp-12]
.dex equ [ebp-16]
.dby equ [ebp-20]
.dey equ [ebp-24]
.dz equ dword [ebp-28]
.cbx equ [ebp-32]
.cex equ [ebp-36]
.cby equ [ebp-40]
.cey equ [ebp-44]
.cz equ dword [ebp-48]
.czbuff equ dword [ebp-52]
 
mov ebp,esp
 
mov ecx,.y
or ecx,ecx
jl .bl_end
mov dx,word[size_y_var]
dec dx
cmp cx,dx ;word[size_y_var] ;SIZE_Y
jge .bl_end
 
cmp eax,ebx
jl @f
je .bl_end
 
xchg eax,ebx
if Ext=NON
mov edx,.bx1
xchg edx,.bx2
mov .bx1,edx
mov edx,.by1
xchg edx,.by2
mov .by1,edx
 
mov edx,.ex1
xchg edx,.ex2
mov .ex1,edx
mov edx,.ey1
xchg edx,.ey2
mov .ey1,edx
else
movq mm0,.bx1
movq mm1,.ex1
movq mm2,.bx2
movq mm3,.ex2
movq .bx2,mm0
movq .ex2,mm1
movq .bx1,mm2
movq .ex1,mm3
end if
mov edx,.z1
xchg edx,.z2
mov .z1,edx
@@:
push eax ebx ;store x1, x2
mov ebx,.x1
movzx edx,word[size_x_var]
dec edx
cmp ebx,edx
; cmp bx,word[size_x_var] ;SIZE_X
jg .bl_end
cmp .x2,0
jle .bl_end
 
mov ebx,.x2
sub ebx,.x1
 
if Ext >= SSE
 
sub esp,16
cvtsi2ss xmm3,ebx ;rcps
shufps xmm3,xmm3,0
 
; movq mm0,.bx1q
; movq mm1,.bx2q
; movq mm2,.ex1q
; movq mm3,.ex2q
; psubd mm1,mm0
; psubd mm3,mm2
; cvtpi2ps xmm1,mm1
; movlhps xmm1,xmm1
; cvtpi2ps xmm1,mm3
 
cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1
movlhps xmm0,xmm0
cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1
cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2
movlhps xmm1,xmm1
cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2
subps xmm1,xmm0
; hi lo
divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex
 
shufps xmm1,xmm1,11011000b
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords
movhlps xmm1,xmm1
cvtps2pi mm1,xmm1
movq .dex,mm0 ; hi - lo -> dbx, dex
movq .dey,mm1 ; hi - lo -> dby, dey
 
else
 
mov eax,.bx2 ; calc .dbx
sub eax,.bx1
cdq
idiv ebx
push eax
 
mov eax,.ex2 ; calc .dby
sub eax,.ex1
cdq
idiv ebx
push eax
 
mov eax,.by2 ; calc .dex
sub eax,.by1
cdq
idiv ebx
push eax
 
mov eax,.ey2 ; calc .dey
sub eax,.ey1
cdq
idiv ebx
push eax
 
end if
 
mov eax,.z2 ; calc .dz
sub eax,.z1
cdq
idiv ebx
push eax
 
cmp .x1,0 ; set correctly begin variable
jge @f ; CLIPPING ON FUNCTION
; cutting triangle exceedes screen
mov ebx,.x1
neg ebx
imul ebx ; eax = .dz * abs(.x1)
add .z1,eax
mov .x1,0
 
mov eax,.dbx
imul ebx
add .bx1,eax
 
mov eax,.dby
imul ebx
add .by1,eax
 
mov eax,.dex
imul ebx
add .ex1,eax
 
mov eax,.dey
imul ebx
add .ey1,eax
@@:
movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var]
mov ebx,.x2
cmp eax,ebx
jg @f
mov .x2,eax
@@:
; movd mm0,eax
; movd mm1,.x2
; pminsw mm0,mm1
; movd .x2,mm0
; cmp .x2,SIZE_X ;eax |
; jl @f |> this dont work idk cause
; mov .x2,SIZE_X ;eax |
@@:
; movzx eax,word[size_x_var] ;calc memory begin in buffers
mov ebx,.y
mul ebx
mov ebx,.x1
add eax,ebx
mov ebx,eax
lea eax,[eax*3]
add edi,eax ; edi - screen
mov esi,.z_buff ; z-buffer filled with dd variables
shl ebx,2
add esi,ebx ; esi - Z buffer
 
mov ecx,.x2
sub ecx,.x1
; init current variables
push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi
push dword .ex1
push dword .by1
push dword .ey1
 
push .z1 ; current z shl CATMULL_SHIFT
push esi
 
if Ext >= MMX
pxor mm0,mm0
movq mm3,.cex ; hi - lo -> cbx; cex
movq mm4,.cey ; hi - lo -> cby; cey
; movq mm5,mm3
; movq mm6,mm4
; psrad mm5,ROUND
; psrad mm6,ROUND
; movq .ceyq,mm5
; movq .cbyq,mm6
mov edx,.czbuff
else
cld
end if
.draw:
; if TEX = SHIFTING ;bump drawing only in shifting mode
if Ext=NON
mov esi,.czbuff ; .czbuff current address in buffer
mov ebx,.cz ; .cz - cur z position
cmp ebx,dword[esi]
else
mov ebx,.cz
cmp ebx,dword[edx]
end if
jge .skip
 
if Ext=NON
mov eax,.cby
mov esi,.cbx
sar eax,ROUND
sar esi,ROUND
shl eax,TEX_SHIFT ;-
add esi,eax
lea esi,[esi*3] ;- ; esi - current b. texture addres
add esi,.bmap
 
mov ebx,.cex ;.cex - current env map X
mov eax,.cey ;.cey - current env map y
sar ebx,ROUND
sar eax,ROUND
 
shl eax,TEX_SHIFT
add ebx,eax
lea ebx,[ebx*3]
add ebx,.emap
 
 
else
movq mm5,mm4 ;.cey
psrad mm5,ROUND
pslld mm5,TEX_SHIFT
movq mm6,mm3 ;.cex
psrad mm6,ROUND
paddd mm5,mm6
movq mm6,mm5
paddd mm5,mm5
paddd mm5,mm6
paddd mm5,.emap
movd esi,mm5
psrlq mm5,32
movd ebx,mm5
end if
if Ext>=MMX
movd mm1,[esi]
movd mm2,[ebx]
punpcklbw mm1,mm0
punpcklbw mm2,mm0
pmullw mm1,mm2
psrlw mm1,8
packuswb mm1,mm0
movd [edi],mm1
mov ebx,.cz
mov dword[edx],ebx
else
cld ; esi - tex e.
lodsb ; ebx - tex b.
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
inc ebx
lodsb
mov dl,[ebx]
mul dl
shr ax,8
stosb
mov ebx,.cz
mov esi,.czbuff
mov dword[esi],ebx
jmp .no_skip
end if
.skip:
add edi,3
 
if Ext = NON
.no_skip:
add .czbuff,4
mov eax,.dbx
add .cbx,eax
mov eax,.dby
add .cby,eax
mov eax,.dex
add .cex,eax
mov eax,.dey
add .cey,eax
else
add edx,4
paddd mm3,.dex
paddd mm4,.dey
; movq mm5,mm3
; movq mm6,mm4
; psrad mm5,ROUND
; psrad mm6,ROUND
; movq .cex,mm3
; movq .cey,mm4
end if
mov eax,.dz
add .cz,eax
if Ext = NON
dec ecx
jnz .draw
else
loop .draw
end if
 
.bl_end:
mov esp,ebp
ret 56
 
/programs/demos/view3ds/view3ds.asm
1,5 → 1,5
 
; application : View3ds ver. 0.071 - tiny .3ds and .asc files viewer
; application : View3ds ver. 0.074 - tiny .3ds and .asc files viewer
; with a few graphics effects demonstration.
; compiler : FASM
; system : KolibriOS
38,6 → 38,9
SSE2 = 3
SSE3 = 4
Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 }
; For now correct only SSE2 and SSE3 versions. if you have older CPU
; use older versions of app. Probably ver 005 will be OK but it need
; re-edit to support new Kolibri features.
 
; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features)
USE_LFN = 1 ; App is Kolibri only now.
54,6 → 57,8
dd 0x0 ; I_Icon
 
START: ; start of execution
rdtsc
mov [rand_seed],ax
cld
push dword (SIZE_Y shr 3) * 3
fninit
90,9 → 95,7
; set point(0,0,0) in center and calc all coords
; to be in <-1.0,1.0>
call normalize_all_light_vectors
if Ext >= SSE3
call copy_lights ; to aligned float
end if
call init_triangles_normals2
call init_point_normals
call init_envmap2
100,17 → 103,25
call generate_texture2
call init_sincos_tab
call do_color_buffer ; intit color_map
if Ext >= SSE3
call init_point_lights
mov [fire_flag],0 ; proteza
end if
mov edi,bumpmap
call calc_bumpmap
call calc_bumpmap_coords ; bump and texture mapping
call do_edges_list
call draw_window
;mov [draw_win_at_first],0
;mov eax,40 ; set events mask
;mov ebx,1100000000000000000000000100111b
;int 0x40
if Ext > SSE2
mov eax,1
cpuid
bt ecx,0 ; is sse3 on board?
jc @f
mov [max_dr_flg],12
mov [isSSE3],0
@@:
end if
 
 
still:
cmp [edit_flag],1
jne @f
122,7 → 133,16
mov ebx,111b
.int:
int 0x40
if Ext > SSE2
cmp [ray_shd_flag],1
jne @f
cmp [isSSE3],1
jne @f
mov eax,10
jmp .intt
end if
 
@@:
mov eax,23
mov ebx,TIMEOUT
cmp [speed_flag],0
134,6 → 154,7
mov eax,10
 
@@:
.intt:
int 0x40
 
cmp eax,1 ; redraw event ?
263,8 → 284,9
jne .next_m5 ; 'grd ' 1
call make_random_lights ; 'env ' 2
call normalize_all_light_vectors ; 'bump' 3
call copy_lights
if Ext >= SSE3
call copy_lights
call init_point_lights ; for ex. ray casting
end if
call do_color_buffer ; intit color_map ; 'tex ' 4
 
396,7 → 418,7
cmp [move_flag],0
jne @f
.x_minus:
sub [vect_x],10
sub word[vect_x],10
jmp .next2
@@:
cmp [move_flag],1
414,7 → 436,7
cmp [move_flag],0
jne @f
.x_plus:
add [vect_x],10
add word[vect_x],10
jmp .next3
@@:
cmp [move_flag],1
522,8 → 544,6
.no_sort:
cmp [dr_flag],7 ; fill if 2tex and texgrd
jge @f
cmp [catmull_flag],0 ;non fill if Catmull = off
je .non_f
cmp [dr_flag],6 ; non fill if dots
je .non_f
@@:
534,46 → 554,74
call draw_dots
jmp .blurrr
@@:
if Ext > SSE2
cmp [ray_shd_flag],1 ;non fill if Catmull = off
jne @f
cmp [isSSE3],1
jne @f
mov ax,100
jmp .dr
@@:
end if
 
movzx ax,[dr_flag]
.dr:
call draw_triangles ; draw all triangles from the list
cmp [edit_flag],0
jz .no_edit
call clear_vertices_index
call draw_handlers
movzx eax,[dr_flag]
movzx ebx,[ray_shd_flag]
shl ebx,10
or eax,ebx
call draw_handlers
; call edit
 
 
 
 
 
 
.no_edit:
 
.blurrr:
cmp [sinus_flag],0
je @f
call do_sinus
movzx eax,[dr_flag]
movzx ebx,[ray_shd_flag]
shl ebx,10
or eax,ebx
cmp [sinus_flag],0
je .no_sin
movzx eax,[dr_flag]
movzx ebx,[ray_shd_flag]
shl ebx,10
or eax,ebx
call do_sinus
; jmp .finito
.no_sin:
@@:
cmp [fire_flag],0
jne @f
movzx ecx,[fire_flag]
cmp [fire_flag],1
je @f
cmp [blur_flag],0
je .no_blur ; no blur, no fire
movzx ecx,[blur_flag]
@@:
movzx eax,[dr_flag]
movzx ebx,[ray_shd_flag]
shl ebx,10
or eax,ebx
call blur_screen ; blur and fire
jmp .no_blur
@@:
cmp [emboss_flag],0
jne .emb ; if emboss=true -> no fire
movzx ecx,[fire_flag]
call blur_screen ; blur and fire
; jmp .finito
 
.no_blur: ; no blur, no fire
cmp [emboss_flag],0
je @f
.emb:
movzx eax,[dr_flag]
movzx ebx,[ray_shd_flag]
shl ebx,10
or eax,ebx
call do_emboss
.finito:
@@:
 
@@:
 
 
cmp [inc_bright_flag],0 ; increase brightness
je .no_inc_bright
movzx ebx,[inc_bright_flag]
706,7 → 754,9
mov eax,7 ; put image
mov ebx,[screen_ptr]
mov ecx,[size_y_var]
mov edx,[offset_y]
mov edx,[offset_y]
cmp [ray_shd_flag],1
jge .ff
cmp [dr_flag],11
jge .ff
int 0x40
764,6 → 814,7
include '3stencil.inc'
include '3glass.inc'
include '3glass_tex.inc'
include '3ray_shd.inc'
end if
clear_vertices_index:
mov edi,[vertices_index_ptr]
1075,7 → 1126,7
fldpi
fadd st,st
mov esi,[points_ptr]
mov edi,tex_points
mov edi,[tex_points_ptr]
mov ecx,[points_count_var]
inc ecx
; cmp [map_tex_flag],1
1389,7 → 1440,8
mov esp,ebp
pop ebp
ret
if Ext >= SSE3
 
if Ext >= SSE2
init_point_normals:
.z equ dword [ebp-8]
.y equ dword [ebp-12]
1397,7 → 1449,6
.point_number equ dword [ebp-28]
.hit_faces equ dword [ebp-32]
 
fninit
push ebp
mov ebp,esp
sub esp,64
1438,19 → 1489,25
jne .ipn_check_face
cvtsi2ss xmm6,.hit_faces
movaps xmm7,.x
 
rcpss xmm6,xmm6
shufps xmm6,xmm6,11000000b
mulps xmm7,xmm6
movaps xmm6,xmm7
mulps xmm6,xmm6
andps xmm6,[zero_hgst_dd]
haddps xmm6,xmm6
haddps xmm6,xmm6
rsqrtps xmm6,xmm6
mulps xmm7,xmm6
movlps [edi],xmm7
movhlps xmm7,xmm7
movss [edi+8],xmm7
call normalize_vector
; movaps xmm6,xmm7
; mulps xmm6,xmm6
; andps xmm6,[zero_hgst_dd]
; haddps xmm6,xmm6
; haddps xmm6,xmm6
; rsqrtps xmm6,xmm6
; mulps xmm7,xmm6
; movlps [edi],xmm7
; movhlps xmm7,xmm7
; movss [edi+8],xmm7
 
add edi,12
inc .point_number
mov edx,.point_number
1576,11 → 1633,9
pop ecx
sub ecx,1
jnz @b
; cmp dword[ebp],-1
; jne @b
ret
 
if Ext >= SSE3
 
copy_lights: ; after normalising !
mov esi,lights
mov edi,lights_aligned
1610,8 → 1665,8
pop ecx
loop .again
ret
end if
 
 
clrscr:
mov edi,[screen_ptr]
movzx ecx,word[size_x_var]
1654,7 → 1709,37
 
 
draw_triangles:
; in: eax - render draw model
.tri_no equ dword[ebp-60]
.point_index3 equ [ebp-8]
.point_index2 equ [ebp-12]
.point_index1 equ [ebp-16]
.yy3 equ [ebp-18]
.xx3 equ [ebp-20]
.yy2 equ [ebp-22]
.xx2 equ [ebp-24]
.yy1 equ [ebp-26]
.xx1 equ [ebp-28]
 
.zz3 equ [ebp-30]
.zz2 equ [ebp-32]
.zz1 equ [ebp-34]
.index3x12 equ [ebp-38]
.index2x12 equ [ebp-42]
.index1x12 equ [ebp-46]
.temp1 equ dword[ebp-50]
.temp2 equ dword[ebp-54]
.dr_flag equ word[ebp-56]
 
 
push ebp
mov ebp,esp
sub esp,60
 
; movzx ax,[dr_flag]
mov .dr_flag,ax
 
 
emms
; update translated list MMX required
cmp [vertex_edit_no],0
1671,7 → 1756,8
movd dword[eax],mm1
@@:
if Ext >= SSE3
cmp [dr_flag],13
 
cmp .dr_flag,13
jnge .no_stencil
mov esi,[triangles_ptr]
mov ecx,[triangles_count_var]
1734,124 → 1820,57
je .draw_smooth_line
 
mov esi,[triangles_ptr]
mov ecx,[triangles_count_var]
xor ecx,ecx ;mov ecx,[triangles_count_var]
.again_dts:
; push ebp
push esi
push ecx
mov ebp,[points_translated_ptr]
if Ext >= SSE2
mov eax,dword[esi]
mov [point_index1],eax
lea eax,[eax*3]
add eax,eax
push ebp
add ebp,eax
mov eax,[ebp]
; cmp [vertex_edit_no],0
; jne @f
;
; @@:
mov dword[xx1],eax
mov eax,[ebp+4]
mov [zz1],ax
mov .tri_no,ecx
 
pop ebp
mov eax,[esi]
mov ebx,[esi+4]
mov ecx,[esi+8]
 
mov .point_index1,eax
mov .point_index2,ebx
mov .point_index3,ecx
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov .index1x12,eax
mov .index2x12,ebx
mov .index3x12,ecx
 
mov eax,dword[esi+4]
mov [point_index2],eax
lea eax,[eax*3]
add eax,eax
push ebp
add ebp,eax
mov eax,[ebp]
mov dword[xx2],eax
mov eax,[ebp+4]
mov [zz2],ax
pop ebp
shr eax,1
shr ebx,1
shr ecx,1
add eax,[points_translated_ptr]
add ebx,[points_translated_ptr]
add ecx,[points_translated_ptr]
push word[eax+4]
push word[ebx+4]
push word[ecx+4]
pop word .zz3
pop word .zz2
pop word .zz1
 
mov eax,[eax]
mov ebx,[ebx]
mov ecx,[ecx]
ror eax,16
ror ebx,16
ror ecx,16
mov .xx1,eax
mov .xx2,ebx
mov .xx3,ecx
 
mov eax,dword[esi+8] ; xyz3 = [ebp+[esi+4]*6]
mov [point_index3],eax
lea eax,[eax*3]
add eax,eax
; push ebp
add ebp,eax
mov eax,[ebp]
mov dword[xx3],eax
mov eax,[ebp+4]
mov [zz3],ax
else
movq mm0,[esi] ; don't know MMX
mov qword[point_index1],mm0
; shr eax,16
; mov [point_index2],ax
mov eax,dword[esi+8]
mov [point_index3],eax
movdqu xmm0,[esi]
paddd xmm0,xmm0
movdqa xmm1,xmm0
paddd xmm0,xmm0
paddd xmm0,xmm1
movd eax,xmm0
psrldq xmm0,4
movd ebx,xmm0
psrldq xmm0,4
movd ecx,xmm0
and eax,0FFFFh
and ebx,0FFFFh
and ecx,0FFFFh
movq mm0,[ebp+eax]
movq mm1,[ebp+ebx]
movq mm2,[ebp+ecx]
movq qword[xx1],mm0
movq qword[xx2],mm1
movq qword[xx3],mm2
; emms
end if ; *********************************
if 0
cmp [vertex_edit_no],0
jne .no_edit
mov ax,[vertex_edit_no]
dec ax
cmp ax,[point_index1]
jne @f
movd mm0,[edit_start_x]
psubw mm0,[edit_end_x]
movd mm1,dword[xx1]
paddw mm1,mm0
movd dword[xx1],mm1
jmp .no_edit
@@:
 
cmp ax,[point_index2]
jne @f
movd mm0,[edit_start_x]
psubw mm0,[edit_end_x]
movd mm1,dword[xx2]
paddw mm1,mm0
movd dword[xx2],mm1
jmp .no_edit
@@:
 
cmp ax,[point_index3]
jne @f
movd mm0,[edit_start_x]
psubw mm0,[edit_end_x]
movd mm1,dword[xx3]
paddw mm1,mm0
movd dword[xx3],mm1
jmp .no_edit
@@:
 
 
.no_edit:
end if
 
push esi ;
; push esi
fninit ; DO culling AT FIRST
cmp [culling_flag],1 ; (if culling_flag = 1)
jne .no_culling
mov esi,point_index1 ; *********************************
lea esi,.point_index1 ; *********************************
mov ecx,3 ;
@@:
mov eax,dword[esi]
1858,76 → 1877,77
lea eax,[eax*3]
shl eax,2
add eax,[points_normals_rot_ptr]
; lea eax,[eax+point_normals_rotated]
fld dword[eax+8] ; *****************************
ftst ; CHECKING OF Z COOFICIENT OF
fstsw ax ; NORMAL VECTOR
sahf
jb @f
ffree st
mov eax,[eax+8]
bt eax,31
jc @f
; *****************************
; CHECKING OF Z COOFICIENT OF
; NORMAL VECTOR
add esi,4
loop @b
jmp .end_draw ; non visable
@@:
ffree st ;is visable
 
.no_culling:
cmp [dr_flag],0 ; draw type flag
cmp .dr_flag,0 ; draw type flag
je .flat_draw
cmp [dr_flag],2
cmp .dr_flag,2
je .env_mapping
cmp [dr_flag],3
cmp .dr_flag,3
je .bump_mapping
cmp [dr_flag],4
cmp .dr_flag,4
je .tex_mapping
cmp [dr_flag],5
cmp .dr_flag,5
je .rainbow
cmp [dr_flag],7
cmp .dr_flag,7
je .grd_tex
cmp [dr_flag],8
cmp .dr_flag,8
je .two_tex
cmp [dr_flag],9
cmp .dr_flag,9
je .bump_tex
cmp [dr_flag],10
cmp .dr_flag,10
je .cubic_env_mapping
cmp [dr_flag],11
cmp .dr_flag,11
je .draw_smooth_line
if Ext >= SSE3
cmp [dr_flag],12
cmp .dr_flag,12
je .r_phg
cmp [dr_flag],13
cmp .dr_flag,13
je .glass
cmp [dr_flag],14
cmp .dr_flag,14
je .glass_tex
end if ; ****************
mov esi,point_index3 ; do Gouraud shading
cmp .dr_flag,100
je .ray_shd
 
end if
 
push ebp ; ****************
lea esi,.index3x12 ; do Gouraud shading
lea edi,.zz3
mov ecx,3
.again_grd_draw:
mov eax,dword[esi]
shl eax,2
lea eax,[eax*3]
add eax,[points_normals_rot_ptr]
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax] ; x cooficient of normal vector
fimul [correct_tex]
fiadd [correct_tex]
fistp [temp1]
fistp .temp1
; texture y=(rotated point normal -> y * 255)+255
fld dword[eax+4] ; y cooficient
fimul [correct_tex]
fiadd [correct_tex]
fistp [temp2]
fistp .temp2
 
mov eax,[temp2]
mov ebx,[temp1]
mov eax,.temp2
mov ebx,.temp1
and ebx,0xfffffff
shl eax,TEX_SHIFT
add eax,ebx
lea eax,[eax*3+color_map]
mov eax,dword[eax]
; cmp [catmull_flag],1 ; put on stack z coordinate if necessary
; jne @f
lea edx,[ecx*3]
push word[edx*2+xx1-2] ; zz1 ,2 ,3
; @@:
push word[edi] ; zz1 ,2 ,3
 
ror eax,16 ; eax -0xxxrrggbb -> 0xggbbxxrr
xor ah,ah
push ax ;r
1938,98 → 1958,55
push ax ;b
 
sub esi,4
sub edi,2
dec cx
jnz .again_grd_draw
jmp .both_draw
 
; movzx edi,[point_index3] ;gouraud shading according to light vector
; lea edi,[edi*3]
; lea edi,[4*edi+point_normals_rotated] ; edi - normal
; mov esi,light_vector
; call dot_product
; fabs
; fimul [orginal_color_r]
; fistp [temp_col]
; and [temp_col],0x00ff
; push [temp_col]
; push [temp_col]
; push [temp_col]
.rainbow:
push ebp
push word .zz3
 
; movzx edi,[point_index2]
; lea edi,[edi*3]
; lea edi,[4*edi+point_normals_rotated] ; edi - normal
; mov esi,light_vector
; call dot_product
; fabs
; fimul [orginal_color_r]
; fistp [temp_col]
; and [temp_col],0x00ff
; push [temp_col]
; push [temp_col]
; push [temp_col]
 
; movzx edi,[point_index1]
; lea edi,[edi*3]
; lea edi,[4*edi+point_normals_rotated] ; edi - normal
; mov esi,light_vector
; call dot_product
; fabs
; fimul [orginal_color_r]
; fistp [temp_col]
; and [temp_col],0x00ff
; push [temp_col]
; push [temp_col]
; push [temp_col]
.rainbow:
; cmp [catmull_flag],1 ; put on stack z coordinate if necessary
; jne @f
push [zz3]
@@:
mov eax,dword[yy3]
mov eax, .xx3
ror eax,16
mov ebx,0x00ff00ff
and eax,ebx
push eax
neg al
push ax
push [zz2]
push word .zz2
 
mov eax,dword[yy2]
mov eax, .xx2
ror eax,16
and eax,ebx
push eax
neg al
push ax
push [zz1]
push word .zz1
 
mov eax,dword[yy1]
mov eax, .xx1
ror eax,16
and eax,ebx
push eax
neg al
push ax
.both_draw:
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax, .xx1
mov ebx, .xx2
mov ecx, .xx3
mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr]
call gouraud_triangle_z
pop ebp
jmp .end_draw
 
.flat_draw: ;**************************
fninit ; FLAT DRAWING
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
shl eax,2
shl ebx,2
shl ecx,2
lea eax,[eax*3] ;+point_normals_rotated]
mov eax,.index1x12
mov ebx,.index2x12
mov ecx,.index3x12
add eax,[points_normals_rot_ptr]
lea ebx,[ebx*3] ;+point_normals_rotated]
add ebx,[points_normals_rot_ptr]
lea ecx,[ecx*3] ;+point_normals_rotated]
add ecx,[points_normals_rot_ptr]
fld dword[eax] ; x cooficient of normal vector
fadd dword[ebx]
2037,7 → 2014,7
fidiv [i3]
fimul [correct_tex]
fiadd [correct_tex]
fistp [temp1] ;dword[esp-4] ; x temp variables
fistp .temp1 ;dword[esp-4] ; x temp variables
fld dword[eax+4] ; y cooficient of normal vector
fadd dword[ebx+4]
fadd dword[ecx+4]
2044,12 → 2021,12
fidiv [i3]
fimul [correct_tex]
fiadd [correct_tex]
fistp [temp2] ;dword[esp-8] ; y
mov edx,[temp2] ;dword[esp-8]
fistp .temp2 ;dword[esp-8] ; y
mov edx,.temp2 ;dword[esp-8]
and edx,0xfffffff
and [temp1],0xfffffff
and .temp1,0xfffffff
shl edx,TEX_SHIFT
add edx,[temp1] ;dword[esp-4]
add edx,.temp1 ;dword[esp-4]
 
lea eax,[3*edx]
add eax,color_map
2071,34 → 2048,32
; shl eax,8
; mov edx,eax
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edi,[screen_ptr]
 
mov esi,[Zbuffer_ptr]
push word[zz3]
push word[zz2]
push word[zz1]
push ebp
push word .zz3
push word .zz2
push word .zz1
call flat_triangle_z
pop ebp
jmp .end_draw
 
.env_mapping:
push [zz3]
push [zz2]
push [zz1]
push ebp
push word .zz3
push word .zz2
push word .zz1
 
mov esi,point_index1
lea esi, .index1x12
sub esp,12
mov edi,esp
mov ecx,3
@@:
mov eax,dword[esi]
lea eax,[eax*3]
shl eax,2
add eax,[points_normals_rot_ptr] ;point_normals_rotated
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax]
2115,33 → 2090,29
add esi,4
loop @b
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax, .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edi,[screen_ptr]
mov esi,envmap
 
mov edx,[Zbuffer_ptr]
call tex_triangle_z
 
pop ebp
jmp .end_draw
;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
.cubic_env_mapping:
push [zz3]
push [zz2]
push [zz1]
push ebp
push word .zz3
push word .zz2
push word .zz1
 
mov esi,point_index1
lea esi,.index1x12
sub esp,12
mov edi,esp
mov ecx,3
@@:
mov eax,dword[esi]
lea eax,[eax*3]
shl eax,2
add eax,[points_normals_rot_ptr]
 
fld dword[eax]
2171,37 → 2142,32
add esi,4
loop @b
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax, .xx1
mov ebx, .xx2
mov ecx, .xx3
mov edi,[screen_ptr]
mov esi,envmap_cub
mov edx,[Zbuffer_ptr]
 
call tex_triangle_z
 
pop ebp
jmp .end_draw
 
;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 
.bump_mapping:
 
push ebp
push [Zbuffer_ptr]
push [zz3]
push [zz2]
push [zz1]
push word .zz3
push word .zz2
push word .zz1
 
mov esi,point_index1
lea esi,.index1x12
sub esp,12
mov edi,esp
mov ecx,3
@@:
mov eax,dword[esi]
lea eax,[eax*3]
shl eax,2
add eax,[points_normals_rot_ptr] ;point_normals_rotated
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax]
2218,70 → 2184,58
add esi,4
loop @b
 
mov esi,[point_index3] ; bump map coords
mov esi, .point_index3 ; bump map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index2]
mov esi, .point_index2
shl esi,2
add esi,tex_points
; lea esi,[esi*3]
; lea esi,[points+2+esi*2]
add esi,[tex_points_ptr]
push dword[esi]
; push dword[xx2]
mov esi,[point_index1]
mov esi, .point_index1
shl esi,2
add esi,tex_points
; lea esi,[esi*3]
; lea esi,[points+2+esi*2]
add esi,[tex_points_ptr]
push dword[esi]
; push dword[xx1]
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edi,[screen_ptr]
mov esi,envmap
mov edx,bumpmap ;BUMP_MAPPING
 
call bump_triangle_z
 
pop ebp
jmp .end_draw
 
.tex_mapping:
 
push [zz3]
push [zz2]
push [zz1]
push ebp
push word .zz3
push word .zz2
push word .zz1
; @@:
mov esi,[point_index3] ; tex map coords
mov esi, .point_index3 ; tex map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index2]
mov esi, .point_index2
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index1]
mov esi, .point_index1
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edi,[screen_ptr]
mov esi,texmap
mov edx,[Zbuffer_ptr]
mov edx,[Zbuffer_ptr]
 
call tex_triangle_z
 
pop ebp
jmp .end_draw
; .ray:
; grd_triangle according to points index
2319,49 → 2273,43
 
.grd_tex: ; smooth shading + texture
push ebp
mov ebp,esp
sub esp,4
push ebp
 
mov esi,[point_index3] ; tex map coords
mov esi, .point_index3 ; tex map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi] ; texture coords as first
mov esi,[point_index2] ; group of parameters
mov esi, .point_index2 ; group of parameters
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index1]
mov esi, .point_index1
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
 
mov esi,point_index3
lea esi, .index3x12
lea edi, .zz3
mov ecx,3
.aagain_grd_draw:
 
.aagain_grd_draw:
 
lea edx,[ecx*3]
push word[edx*2+xx1-2] ; zz1 ,2 ,3
push word[edi] ; zz1 ,2 ,3
fninit
mov eax,dword[esi]
shl eax,2
lea eax,[eax*3] ;+point_normals_rotated]
add eax,[points_normals_rot_ptr]
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax] ; x cooficient of normal vector
fimul [correct_tex]
fiadd [correct_tex]
fistp [temp1] ;word[ebp-2]
fistp .temp1 ;word[ebp-2]
; texture y=(rotated point normal -> y * 255)+255
fld dword[eax+4] ; y cooficient
fimul [correct_tex]
fiadd [correct_tex]
fistp [temp2] ;word[ebp-4]
fistp .temp2 ;word[ebp-4]
 
mov eax,[temp2] ;word[ebp-4]
mov ebx,[temp1] ;word[ebp-2]
and ebx,0xfffffff ; some onjects need thid 'and'
mov eax,.temp2
mov ebx,.temp1
and ebx,0xfffffff ; some onjects need this 'and'
shl eax,TEX_SHIFT
add eax,ebx
lea eax,[eax*3]
2376,17 → 2324,14
push ax ;g
shr eax,24
push ax ;b
 
sub edi,2
sub esi,4
dec cx
jnz .aagain_grd_draw
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax, .xx1
mov ebx, .xx2
mov ecx, .xx3
mov edi,[screen_ptr]
mov edx,texmap
mov esi,[Zbuffer_ptr]
2394,31 → 2339,30
call tex_plus_grd_triangle
 
pop ebp
mov esp,ebp
pop ebp
jmp .end_draw
 
.two_tex:
push ebp
push [Zbuffer_ptr]
 
push word[zz3]
push word[zz2]
push word[zz1]
push word .zz3
push word .zz2
push word .zz1
 
mov esi,[point_index3] ; tex map coords
mov esi, .point_index3 ; tex map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index2]
mov esi, .point_index2
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index1]
mov esi, .point_index1
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
 
mov esi,point_index1 ; env coords
lea esi, .point_index1 ; env coords
sub esp,12
mov edi,esp
mov ecx,3
2443,50 → 2387,46
add esi,4
loop @b
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax, .xx1
mov ebx, .xx2
mov ecx, .xx3
mov edi,[screen_ptr]
mov esi,texmap
mov edx,envmap
 
call two_tex_triangle_z
pop ebp
jmp .end_draw
 
.bump_tex:
mov esi,[point_index3] ; tex map coords
push ebp
mov esi, .point_index3 ; tex map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index2]
mov esi, .point_index2
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index1]
mov esi, .point_index1
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
 
push dword texmap
 
push [Zbuffer_ptr]
xor edi,edi
 
push word[zz3]
push word[zz2]
push word[zz1]
push word .zz3
push word .zz2
push word .zz1
 
mov esi,point_index1 ; env coords
lea esi, .index1x12 ; env coords
sub esp,12
mov edi,esp
mov ecx,3
@@:
mov eax,dword[esi]
lea eax,[eax*3]
shl eax,2
add eax,[points_normals_rot_ptr]
; texture x=(rotated point normal -> x * 255)+255
fld dword[eax]
2503,40 → 2443,28
add esi,4
loop @b
 
; push dword 1 shl 16 + 1 ; emap coords
; push dword 127 shl 16 + 1
; push dword 127 shl 16 + 127
 
mov esi,[point_index3] ; bump map coords
mov esi, .point_index3 ; bump map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index2]
mov esi, .point_index2
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
 
mov esi,[point_index1]
mov esi, .point_index1
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
 
; push dword 1 shl 16 + 127
; push dword 127 shl 16 + 127
; push dword 1 shl 16 + 1 ; bump coords
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edi,[screen_ptr]
mov esi,envmap
mov edx,bumpmap
 
call bump_tex_triangle_z
 
pop ebp
jmp .end_draw
 
 
2549,12 → 2477,9
pshufd xmm5,xmm5,01110011b
 
 
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_normals_rot_ptr]
add ebx,[points_normals_rot_ptr]
add ecx,[points_normals_rot_ptr]
2566,12 → 2491,9
andps xmm2,[zero_hgst_dd]
xorps xmm3,xmm3
 
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add ecx,[points_rotated_ptr]
2584,12 → 2506,9
 
 
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr]
 
2603,12 → 2522,9
pshufd xmm5,xmm5,01110011b
 
 
mov eax,[point_index1]
mov ebx ,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_normals_rot_ptr]
add ebx,[points_normals_rot_ptr]
add ecx,[points_normals_rot_ptr]
2620,12 → 2536,9
andps xmm2,[zero_hgst_dd]
xorps xmm3,xmm3
 
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add ecx,[points_rotated_ptr]
2638,12 → 2551,9
 
 
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax, .xx1
mov ebx, .xx2
mov ecx, .xx3
mov edi,[screen_ptr]
mov edx,[Zbuffer_ptr]
mov esi,[Zbuffer_ptr]
2657,12 → 2567,9
punpcklwd xmm5,[the_zero]
pshufd xmm5,xmm5,01110011b
 
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_normals_rot_ptr]
add ebx,[points_normals_rot_ptr]
add ecx,[points_normals_rot_ptr]
2674,12 → 2581,9
andps xmm2,[zero_hgst_dd]
xorps xmm3,xmm3
 
mov eax,[point_index1]
mov ebx,[point_index2]
mov ecx,[point_index3]
imul eax,[i12]
imul ebx,[i12]
imul ecx,[i12]
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add ecx,[points_rotated_ptr]
2690,17 → 2594,17
add esp,12
andps xmm4,[zero_hgst_dd]
 
mov esi,[point_index3] ; tex map coords
mov esi,.point_index3 ; tex map coords
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index2]
mov esi,.point_index2
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
mov esi,[point_index1]
mov esi,.point_index1
shl esi,2
add esi,tex_points
add esi,[tex_points_ptr]
push dword[esi]
movups xmm6,[esp]
add esp,12
2714,31 → 2618,84
por xmm6,xmm7
 
 
mov eax,dword[xx1]
ror eax,16
mov ebx,dword[xx2]
ror ebx,16
mov ecx,dword[xx3]
ror ecx,16
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edx,texmap
mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr]
 
call glass_tex_tri
jmp .end_draw
 
.ray_shd:
emms
movd xmm5,[size_y_var]
punpcklwd xmm5,[the_zero]
pshufd xmm5,xmm5,01110011b
 
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_normals_rot_ptr]
add ebx,[points_normals_rot_ptr]
add ecx,[points_normals_rot_ptr]
movups xmm0,[eax]
movups xmm1,[ebx]
movups xmm2,[ecx]
andps xmm0,[zero_hgst_dd]
andps xmm1,[zero_hgst_dd]
andps xmm2,[zero_hgst_dd]
xorps xmm3,xmm3
 
; mov ebx,.tri_no
; cmp ebx,0
; je @f
; int3
; @@:
mov eax, .index1x12
mov ebx, .index2x12
mov ecx, .index3x12
add eax,[points_rotated_ptr]
add ebx,[points_rotated_ptr]
add ecx,[points_rotated_ptr]
push dword[ecx+8]
push dword[ebx+8]
push dword[eax+8]
movups xmm4,[esp]
add esp,12
andps xmm4,[zero_hgst_dd]
 
movd mm7,.tri_no
 
; mm7 - intialised
 
 
mov eax,dword .xx1
mov ebx,dword .xx2
mov ecx,dword .xx3
mov edx,texmap
mov edi,[screen_ptr]
mov esi,[Zbuffer_ptr]
 
call ray_shad
 
 
 
end if
 
.end_draw:
; pop ebp
pop ecx
pop esi
 
add esi,12
inc ecx
cmp ecx,[triangles_count_var]
jnz .again_dts
 
pop ecx
dec ecx
jmp .eend
 
jnz .again_dts
ret
 
 
.draw_smooth_line:
2789,7 → 2746,8
sub esp,16
movups [esp],xmm1
add esi,4
loop .aga_n
dec ecx
jnz .aga_n
 
movups xmm0,[esp]
movups xmm1,[esp+16]
2807,11 → 2765,17
movhps xmm7,[edx]
pshufd xmm7,xmm7,11101000b
movdqa xmm6,xmm7
movdqa xmm3,xmm7
movdqa xmm4,xmm7
movd xmm5,[size_y_var]
pshuflw xmm5,xmm5,00010001b
pcmpeqw xmm3,xmm5
pcmpeqw xmm4,[the_zero]
pcmpgtw xmm7,xmm5
pcmpgtw xmm6,[the_zero]
pxor xmm7,xmm6
pxor xmm3,xmm4
pxor xmm7,xmm3
pmovmskb eax,xmm7
cmp al,-1
jnz .skp
2851,15 → 2815,25
cmp ecx,[edges_count]
jnz .again_s_line
 
ret
 
 
 
 
 
.eend:
add esp,60
pop ebp
 
ret
 
 
 
 
 
 
 
draw_handlers:
 
; in eax - render model
push ebp
mov ebp,esp
 
2866,14 → 2840,15
.counter equ ebp-16
.xres3m18 equ ebp-8
.xres2m12 equ ebp-12
.dr_model equ dword[ebp-4]
 
 
; init counter
sub esp,12
push dword 0
 
mov .dr_model,eax
movzx eax,word[size_x_var]
cmp [dr_flag],12
cmp .dr_model,12
jge @f
lea ebx,[eax*3]
sub ebx,18
2931,7 → 2906,7
add eax,ebx
push eax
lea edi,[eax*3]
cmp [dr_flag],12
cmp .dr_model,12
jl @f
add edi,[esp]
@@:
2956,7 → 2931,7
mov byte[edi+2],0xff ;al
mov word[eax],dx
add eax,2
cmp [dr_flag],12
cmp .dr_model,12
jl @f
add edi,4
loop .do
3226,7 → 3201,6
.exit:
mov dword[edi],-1
ret
 
alloc_mem_for_tp:
mov eax, 68
cmp [re_alloc_flag],1
3293,7 → 3267,15
mov [points_rotated_ptr], eax
 
mov eax, 68
mov ebx, 12
mov ecx, [points_count_var]
shl ecx,2
mov edx,[tex_points_ptr]
int 0x40
mov [tex_points_ptr], eax
 
mov eax, 68
mov ecx, [points_count_var]
inc ecx
shl ecx, 3
mov edx,[points_translated_ptr]
3302,7 → 3284,6
ret
 
 
 
read_from_disk:
mov eax, 68
mov ebx, 11
3348,11 → 3329,11
mov edi,menu
.again:
mov eax,8 ; function 8 : define and draw button
mov bx,[size_x_var]
movzx ebx,word[size_x_var]
shl ebx,16
add ebx,(10)*65536+62 ; [x start] *65536 + [x size]
movzx ecx,byte[edi] ; button id = position+2
sub cl,2
sub ecx,2
lea ecx,[ecx*5]
lea ecx,[ecx*3]
add ecx,25
3364,10 → 3345,10
; BUTTON LABEL
mov eax,4 ; function 4 : write text to window
movzx ebx,byte[edi]
sub bl,2 ; button id, according to position
sub ebx,2 ; button id, according to position
lea ebx,[ebx*3]
lea ebx,[ebx*5]
mov cx,[size_x_var]
movzx ecx,word[size_x_var]
shl ecx,16
add ebx,ecx
add ebx,(12)*65536+28 ; [x start] *65536 + [y start]
3459,6 → 3440,9
; ******* WINDOW DEFINITIONS AND DRAW ********
; *********************************************
draw_window:
movzx eax,[fire_flag]
push eax
; int3
mov eax,12 ; function 12:tell os about windowdraw
mov ebx,1 ; 1, start of draw
int 0x40
3492,6 → 3476,7
; add edx,130*65536+60 ; [x start] *65536 + [y start]
; mov esi,0x00ddeeff ; font 1 & color ( 0xF0RRGGBB )
; int 0x40
 
call write_info
 
; ADD VECTOR LABEL ; add vector buttons - 30 ++
3644,6 → 3629,8
mov eax,12 ; function 12:tell os about windowdraw
mov ebx,2 ; 2, end of draw
int 0x40
pop eax
mov [fire_flag],al
ret