/programs/demos/view3ds/3dmath.inc |
---|
4,13 → 4,108 |
vec_x equ 0 |
vec_y equ 4 |
vec_z equ 8 |
; 3d point - triple integer word coordinate |
; vector - triple float dword coordinate |
;----------------------in: -------------------------------- |
;------------------------ esi - pointer to 1st 3d point --- |
;------------------------ edi - pointer to 2nd 3d point --- |
;------------------------ ebx - pointer to result vector -- |
;---------------------- out : none ------------------------ |
if 0 ; Ext >= SSE3 |
calc_bounding_box: |
; in: |
; xmm0 - normal vector of ray |
; xmm1 - light origin |
; out: |
; eax - axis aligned bounding boxes bit mask |
.rmx equ [ebp-36] |
.nray equ [ebp-64] |
.origin equ [ebp-80] |
.dirfrac equ [ebp-96] |
.nrayr equ [ebp-112] |
.originr equ [ebp-128] |
.tmin equ [ebp-132] |
.tmax equ [ebp-136] |
push ebp |
mov ebp,esp |
and ebp,-16 |
sub esp,160 |
movss xmm5,[rsscale] |
shufps xmm5,xmm1,0 |
movd xmm2,[vect_x] |
punpcklwd xmm2,[the_zero] |
cvtdq2ps xmm2,xmm2 |
subps xmm1,xmm2 |
movaps .origin,xmm1 |
mulps xmm0,xmm5 |
movaps .nray,xmm0 |
mov esi,matrix |
lea edi,.rmx |
call reverse_mx_3x3 |
; in: esi - ptr to points(normals], each point(normal) coeficient as dword |
; edi - ptr to rotated points(normals) |
; ebx - ptr to 3x3 (9 dwords, 36 bytes) rotation matrix |
; ecx - number of points(normals) |
; reverse transform |
lea esi,.nray |
lea edi,.nrayr |
lea ebx,.rmx |
mov ecx,1 |
call rotary |
lea esi,.origin |
lea edi,.originr |
lea ebx,.rmx |
mov ecx,1 |
call rotary |
xor ecx,ecx |
mov ebx,aabb1 |
xor eax,eax |
rcpps xmm7,.nrayr |
movaps .dirfrac,xmm7 |
.nx_aabb: |
movaps xmm5,[ebx] |
movaps xmm6,[ebx] |
minps xmm5,[the_zero] |
maxps xmm6,[the_zero] |
; xmm5 - lb corner of AABB with minimal coordinates |
; xmm6 - rt cor. of AABB wit maximum coords |
subps xmm5,.originr |
subps xmm6,.originr |
mulps xmm5,.dirfrac ; xmm5 - tx1, ty1 |
mulps xmm6,.dirfrac ; xmm6 - tx2, ty2 |
movaps xmm1,xmm6 |
movaps xmm2,xmm6 |
minps xmm1,xmm5 |
maxps xmm2,xmm5 |
movaps xmm5,xmm1 |
movaps xmm6,xmm2 |
shufps xmm5,xmm5,11100001b |
shufps xmm6,xmm6,11100001b |
maxss xmm1,xmm5 ;t min |
minss xmm2,xmm6 ;t max |
comiss xmm2,xmm1 |
jb .no_inter |
.yes: |
bts eax,ecx |
.no_inter: |
add ebx,16 |
inc ecx |
cmp ecx,8 |
jne .nx_aabb |
; out: eax - bit mask |
add esp,160 |
pop ebp |
ret |
end if |
reverse_mx_3x3: |
; esi - source matrix |
; edi - desired reversed matrix |
141,6 → 236,13 |
mov esp,ebp |
pop ebp |
ret |
; 3d point - triple integer word coordinate |
; vector - triple float dword coordinate |
;----------------------in: -------------------------------- |
;------------------------ esi - pointer to 1st 3d point --- |
;------------------------ edi - pointer to 2nd 3d point --- |
;------------------------ ebx - pointer to result vector -- |
;---------------------- out : none ------------------------ |
make_vector_r: |
if Ext < SSE2 |
194,17 → 296,37 |
fsubp ;st1 ,st |
fstp dword [ebx+vec_z] |
ret |
cross_aligned: |
movaps xmm0,[esi] |
movaps xmm1,[esi] |
movaps xmm2,[edi] |
movaps xmm3,[edi] |
shufps xmm0,xmm0,00001001b |
shufps xmm1,xmm1,00010010b |
shufps xmm2,xmm2,00010010b |
shufps xmm3,xmm3,00001001b |
mulps xmm0,xmm2 |
mulps xmm1,xmm3 |
subps xmm0,xmm1 |
movaps [ebx],xmm0 |
ret |
;----------------------- in: ------------------------------ |
;---------------------------- edi - pointer to vector ----- |
;----------------------- out : none |
normalize_vector: |
if Ext >= SSE3 |
if Ext >= SSE2 |
movups xmm0,[edi] |
andps xmm0,[zero_hgst_dd] |
movups xmm1,xmm0 |
mulps xmm0,xmm0 |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
movhlps xmm2,xmm0 |
addps xmm0,xmm2 |
movaps xmm2,xmm0 |
shufps xmm2,xmm2,11100101b |
addps xmm0,xmm2 |
shufps xmm0,xmm0,0 |
; haddps xmm0,xmm0 |
; haddps xmm0,xmm0 |
rsqrtps xmm0,xmm0 |
mulps xmm0,xmm1 |
movlps [edi],xmm0 |
559,7 → 681,7 |
; packsdw xmm0,xmm0 |
; movq [edi] |
fld dword[esi] |
fiadd [vect_x] |
fiadd word[vect_x] |
fistp word[edi] |
fld dword[esi+4] |
fiadd [vect_y] |
/programs/demos/view3ds/3glass.inc |
---|
1,5 → 1,5 |
; Glass like rendering triangle by Maciej Guba. |
; http://macgub.hekko.pl, macgub3@wp.pl |
; http://macgub.co.pl, macgub3@wp.pl |
ROUND2 equ 10 |
glass_tri: |
/programs/demos/view3ds/3glass_tex.inc |
---|
1,762 → 1,762 |
; Bilinear filtering, real Phongs shading and glass like parallel. |
; Thanks to authors of 3dica tutorial. |
; Implemented in FASM by Maciej Guba. |
; http://macgub.j.pl |
ROUND2 equ 10 |
glass_tex_tri: |
;----Procedure render Phongs shaded triangle with z coord |
;----interpolation ( Catmull alghoritm ), each pixel is - |
;----covered by texture using bilinear filtering.-------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to stencil buffer-- |
;---------------------- filled with dd float variables- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- edx - pointer to texture--------- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- |
;---------------------- ty2, tx3, ty3 as word, xres as-- |
;---------------------- dword integers------------------ |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.cnv1 equ [ebp-192] ; cur normal vectors |
.cnv2 equ [ebp-208] |
.x_res equ [ebp-212] |
.ty3 equ [ebp-214] |
.tx3 equ [ebp-216] |
.ty2 equ [ebp-218] |
.tx2 equ [ebp-220] |
.ty1 equ [ebp-222] |
.tx1 equ [ebp-224] |
.dz12 equ [ebp-232] |
.dty12 equ [ebp-236] |
.dtx12 equ [ebp-240] |
.dz13 equ [ebp-248] |
.dty13 equ [ebp-252] |
.dtx13 equ [ebp-256] |
.dz23 equ [ebp-264] |
.dty23 equ [ebp-268] |
.dtx23 equ [ebp-272] |
.cz1 equ [ebp-280] |
.cty1 equ [ebp-284] |
.ctx1 equ [ebp-288] |
.cz2 equ [ebp-296] |
.cty2 equ [ebp-300] |
.ctx2 equ [ebp-304] |
.tx_ptr equ [ebp-308] |
emms |
; movd .x_res,xmm7 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
shufps xmm6,xmm6,11100001b |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
shufps xmm6,xmm6,11011000b |
movaps xmm7,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm7 |
jmp .sort3 |
.sort2: |
; movq .tx1,xmm6 |
; pshufd xmm6,xmm6,01001110b |
; movd .tx3,xmm6 |
movaps .tx1,xmm6 |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov .tx_ptr,edx |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
movaps .dtx12,xmm7 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
movd xmm0,.tx1 |
movd xmm2,.tx2 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; movlps .ctx1,xmm0 |
; movlps .ctx2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx12,xmm2 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
movaps .dtx13,xmm7 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
movd xmm0,.tx1 |
movd xmm2,.tx3 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx13,xmm2 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
movaps .dtx23,xmm7 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
movd xmm0,.tx2 |
movd xmm2,.tx3 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; movlps .ctx1,xmm0 |
; movlps .ctx2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx23,xmm2 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
movd xmm1,.tx1 |
pxor xmm2,xmm2 |
punpcklwd xmm1,xmm2 |
cvtdq2ps xmm1,xmm1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movlps .ctx1,xmm1 |
movlps .ctx2,xmm1 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
; mov edx,.dx13 |
; cmp edx,.dx12 |
; jg .second_cause |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movlps xmm3,.cz1 ; cz1, cz2 both |
movaps xmm3,.ctx1 |
movaps xmm5,.ctx2 |
movaps xmm4,.l_v |
movd xmm6,.x_res |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.tx_ptr |
mov edi,.screen |
mov esi,.Zbuf |
call glass_tex_line |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movss xmm2,.cz1 |
; movss xmm3,.cz2 |
movaps xmm2,.ctx1 |
movaps xmm3,.ctx2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addps xmm2,.dtx13 |
addps xmm3,.dtx12 |
add eax,.dx13 |
add ebx,.dx12 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
; movss .cz1,xmm2 |
; movss .cz2,xmm3 |
movaps .ctx1,xmm2 |
movaps .ctx2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
; jmp .rpt_loop2_end |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movd xmm1,.tx2 |
pxor xmm2,xmm2 |
punpcklwd xmm1,xmm2 |
cvtdq2ps xmm1,xmm1 |
movlps .ctx2,xmm1 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movaps xmm3,.ctx1 |
movaps xmm5,.ctx2 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.tx_ptr |
mov edi,.screen |
mov esi,.Zbuf |
movd xmm6,.x_res |
call glass_tex_line |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movss xmm2,.cz1 |
; movss xmm3,.cz2 |
movaps xmm2,.ctx1 |
movaps xmm3,.ctx2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
; addss xmm2,.dz13 |
; addss xmm3,.dz23 |
addps xmm2,.dtx13 |
addps xmm3,.dtx23 |
add eax,.dx13 |
add ebx,.dx23 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movaps .ctx1,xmm2 |
movaps .ctx2,xmm3 |
; movss .cz1,xmm2 |
; movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.second_cause: ;dx13 > dx12 |
.rpt_loop2_end: |
add esp,512 |
pop ebp |
ret |
align 16 |
glass_tex_line: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float |
; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edi - screen buffer |
; esi - stencil buffer filled with dd floats |
; edx - texture pointer (handle) |
; xmm6 - lowest dword x_res as integer |
push ebp |
mov ebp,esp |
sub esp,350 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
; .z2 equ [ebp-60] |
; .z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.x_res equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
.z1 equ [ebp-136] |
.ty1 equ [ebp-140] |
.tx1 equ [ebp-144] |
.z2 equ [ebp-152] |
.ty2 equ [ebp-156] |
.tx2 equ [ebp-160] |
.cz equ [ebp-168] |
.cty equ [ebp-172] |
.ctx equ [ebp-176] |
.dz equ [ebp-184] |
.dty equ [ebp-188] |
.dtx equ [ebp-192] |
.yd equ [ebp-196] |
.xd equ [ebp-200] |
.yf equ [ebp-204] |
.xf equ [ebp-208] |
.w4 equ [ebp-212] |
.w3 equ [ebp-216] |
.w2 equ [ebp-220] |
.w1 equ [ebp-224] |
.p4 equ [ebp-228] |
.p3 equ [ebp-232] |
.p2 equ [ebp-236] |
.p1 equ [ebp-240] |
.tx_ptr equ [ebp-244] |
; movaps xmm7,xmm3 |
; movaps xmm3,xmm5 |
; movaps xmm5,xmm7 |
mov .y,ecx |
packssdw xmm2,xmm2 |
; movaps xmm7,xmm2 |
; movhps xmm2,[the_zero] |
; pshuflw xmm2,xmm2,11111000b |
; pshufd xmm2,xmm2,11111100b |
; movlps xmm7,[the_zero] |
; pshufhw xmm7,xmm7,11111111b |
; movlps xmm7,[the_zero] |
; psrldq xmm7,4 |
; por xmm2,xmm7 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_line |
cmp cx,.y_max |
jge .end_line ; |
cmp eax,ebx |
je .end_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
movaps xmm7,xmm3 |
movaps xmm3,xmm5 |
movaps xmm5,xmm7 |
@@: |
cmp ax,.x_max |
jge .end_line |
cmp bx,.x_min |
jle .end_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movaps .tx1,xmm3 |
movaps .tx2,xmm5 |
movd .x_res,xmm6 |
mov .tx_ptr,edx |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
subps xmm5,xmm3 |
divps xmm5,xmm7 |
movaps .dtx,xmm5 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulps xmm5,xmm7 |
mulps xmm1,xmm7 |
addps xmm5,.tx1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movaps .tx1,xmm5 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
mov eax,.x_res |
mul dword .y |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
add esi,eax |
mov ecx,.lx2 |
sub ecx,.lx1 |
; movaps xmm0,.n1 |
movaps xmm2,.tx1 |
; xorps xmm1,xmm1 |
align 16 |
.ddraw: |
; movhlps xmm7,xmm2 |
; cmpnltss xmm7,dword[esi] |
; movd eax,xmm7 |
; or eax,eax |
; jnz .skip |
xorps xmm5,xmm5 |
; movhlps xmm7,xmm2 |
; movss [esi],xmm7 |
movaps xmm7,.n1 ;xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,.n1 ;xmm0 |
; andps xmm7,[abs_z_coof] |
movaps .cnv,xmm7 |
movaps xmm6,xmm2 |
minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 |
cvttps2dq xmm7,xmm6 |
cvtdq2ps xmm4,xmm7 |
subps xmm6,xmm4 |
movlps .xf,xmm6 |
; movaps xmm5,.lv |
mov eax,lights_aligned ; global |
align 16 |
.again_col: |
movaps xmm0,[eax] ; calc multple lights |
mulps xmm0,.cnv ;.lv ; last dword should be zeroed |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
; andps xmm0,[abs_val] ;calc absolute value |
if 1 |
; stencil |
movhlps xmm6,xmm2 |
movhlps xmm4,xmm2 |
addss xmm6,[aprox] |
subss xmm4,[aprox] |
cmpnltss xmm6,dword[esi] |
cmpnltss xmm4,dword[esi] |
xorps xmm6,xmm4 |
xorps xmm4,xmm4 |
movd ebx,xmm6 |
cmp ebx,-1 |
jne .no_reflective |
end if |
movaps xmm4,xmm0 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,[eax+48] |
.no_reflective: |
maxps xmm0,[the_zero] |
; movaps xmm1,xmm0 |
mulps xmm0,[eax+16] |
addps xmm4,xmm0 |
addps xmm4,[eax+32] |
maxps xmm5,xmm4 |
add eax,64 |
cmp eax,lights_aligned_end |
jnz .again_col |
minps xmm5,[mask_255f] |
; texture coords work |
movd eax,xmm7 |
psrldq xmm7,4 |
movd ebx,xmm7 |
shl ebx,TEX_SHIFT |
add eax,ebx |
lea eax,[eax*3] |
add eax,.tx_ptr |
mov ebx,eax |
add ebx,TEX_X*3 |
movd xmm7,[eax] |
movd xmm6,[eax+3] |
movd xmm4,[ebx] |
movd xmm3,[ebx+3] |
punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 |
punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 |
punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... |
movdqa xmm6,xmm7 |
movdqa xmm4,xmm7 |
psrldq xmm6,4 |
psrldq xmm4,8 |
punpcklbw xmm7,[the_zero] ; broadcasted 0 |
punpcklbw xmm6,[the_zero] |
punpcklbw xmm4,[the_zero] |
punpcklwd xmm7,[the_zero] |
punpcklwd xmm6,[the_zero] |
punpcklwd xmm4,[the_zero] |
; calc w ......... |
movlps xmm3,[the_one] ; broadcasted dword 1.0 |
cvtdq2ps xmm7,xmm7 |
subps xmm3,.xf |
cvtdq2ps xmm6,xmm6 |
movhps xmm3,.xf |
cvtdq2ps xmm4,xmm4 |
movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf |
shufps xmm3,xmm3,10001000b |
shufps xmm1,xmm1,11110101b |
mulps xmm3,xmm1 |
mulps xmm7,xmm3 |
mulps xmm6,xmm3 |
mulps xmm4,xmm3 |
haddps xmm7,xmm7 ; r |
haddps xmm6,xmm6 ; g |
haddps xmm4,xmm4 ; b |
haddps xmm7,xmm7 ; r |
haddps xmm6,xmm6 ; g |
haddps xmm4,xmm4 ; b |
movlhps xmm7,xmm6 |
shufps xmm7,xmm7,11101000b |
movlhps xmm7,xmm4 |
mulps xmm5,xmm7 |
cvtps2dq xmm5,xmm5 |
psrld xmm5,8 |
movd xmm6,[edi] |
packssdw xmm5,xmm5 |
packuswb xmm5,xmm5 |
paddusb xmm5,xmm6 |
movd [edi],xmm5 |
.skip: |
add edi,4 |
add esi,4 |
; addps xmm0,.dn |
movaps xmm0,.n1 ; cur normal |
addps xmm0,.dn |
addps xmm2,.dtx |
movaps .n1,xmm0 |
sub ecx,1 |
jnz .ddraw |
.end_line: |
add esp,350 |
pop ebp |
ret |
; Bilinear filtering, real Phongs shading and glass like parallel. |
; Thanks to authors of 3dica tutorial. |
; Implemented in FASM by Maciej Guba. |
; http://macgub.co.pl |
ROUND2 equ 10 |
glass_tex_tri: |
;----Procedure render Phongs shaded triangle with z coord |
;----interpolation ( Catmull alghoritm ), each pixel is - |
;----covered by texture using bilinear filtering.-------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to stencil buffer-- |
;---------------------- filled with dd float variables- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- edx - pointer to texture--------- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- |
;---------------------- ty2, tx3, ty3 as word, xres as-- |
;---------------------- dword integers------------------ |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.cnv1 equ [ebp-192] ; cur normal vectors |
.cnv2 equ [ebp-208] |
.x_res equ [ebp-212] |
.ty3 equ [ebp-214] |
.tx3 equ [ebp-216] |
.ty2 equ [ebp-218] |
.tx2 equ [ebp-220] |
.ty1 equ [ebp-222] |
.tx1 equ [ebp-224] |
.dz12 equ [ebp-232] |
.dty12 equ [ebp-236] |
.dtx12 equ [ebp-240] |
.dz13 equ [ebp-248] |
.dty13 equ [ebp-252] |
.dtx13 equ [ebp-256] |
.dz23 equ [ebp-264] |
.dty23 equ [ebp-268] |
.dtx23 equ [ebp-272] |
.cz1 equ [ebp-280] |
.cty1 equ [ebp-284] |
.ctx1 equ [ebp-288] |
.cz2 equ [ebp-296] |
.cty2 equ [ebp-300] |
.ctx2 equ [ebp-304] |
.tx_ptr equ [ebp-308] |
emms |
; movd .x_res,xmm7 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
shufps xmm6,xmm6,11100001b |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
shufps xmm6,xmm6,11011000b |
movaps xmm7,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm7 |
jmp .sort3 |
.sort2: |
; movq .tx1,xmm6 |
; pshufd xmm6,xmm6,01001110b |
; movd .tx3,xmm6 |
movaps .tx1,xmm6 |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov .tx_ptr,edx |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
movaps .dtx12,xmm7 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
movd xmm0,.tx1 |
movd xmm2,.tx2 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; movlps .ctx1,xmm0 |
; movlps .ctx2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx12,xmm2 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
movaps .dtx13,xmm7 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
movd xmm0,.tx1 |
movd xmm2,.tx3 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx13,xmm2 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
movaps .dtx23,xmm7 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
movd xmm0,.tx2 |
movd xmm2,.tx3 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; movlps .ctx1,xmm0 |
; movlps .ctx2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx23,xmm2 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
movd xmm1,.tx1 |
pxor xmm2,xmm2 |
punpcklwd xmm1,xmm2 |
cvtdq2ps xmm1,xmm1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movlps .ctx1,xmm1 |
movlps .ctx2,xmm1 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
; mov edx,.dx13 |
; cmp edx,.dx12 |
; jg .second_cause |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movlps xmm3,.cz1 ; cz1, cz2 both |
movaps xmm3,.ctx1 |
movaps xmm5,.ctx2 |
movaps xmm4,.l_v |
movd xmm6,.x_res |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.tx_ptr |
mov edi,.screen |
mov esi,.Zbuf |
call glass_tex_line |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movss xmm2,.cz1 |
; movss xmm3,.cz2 |
movaps xmm2,.ctx1 |
movaps xmm3,.ctx2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addps xmm2,.dtx13 |
addps xmm3,.dtx12 |
add eax,.dx13 |
add ebx,.dx12 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
; movss .cz1,xmm2 |
; movss .cz2,xmm3 |
movaps .ctx1,xmm2 |
movaps .ctx2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
; jmp .rpt_loop2_end |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movd xmm1,.tx2 |
pxor xmm2,xmm2 |
punpcklwd xmm1,xmm2 |
cvtdq2ps xmm1,xmm1 |
movlps .ctx2,xmm1 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movaps xmm3,.ctx1 |
movaps xmm5,.ctx2 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.tx_ptr |
mov edi,.screen |
mov esi,.Zbuf |
movd xmm6,.x_res |
call glass_tex_line |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movss xmm2,.cz1 |
; movss xmm3,.cz2 |
movaps xmm2,.ctx1 |
movaps xmm3,.ctx2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
; addss xmm2,.dz13 |
; addss xmm3,.dz23 |
addps xmm2,.dtx13 |
addps xmm3,.dtx23 |
add eax,.dx13 |
add ebx,.dx23 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movaps .ctx1,xmm2 |
movaps .ctx2,xmm3 |
; movss .cz1,xmm2 |
; movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.second_cause: ;dx13 > dx12 |
.rpt_loop2_end: |
add esp,512 |
pop ebp |
ret |
align 16 |
glass_tex_line: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float |
; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edi - screen buffer |
; esi - stencil buffer filled with dd floats |
; edx - texture pointer (handle) |
; xmm6 - lowest dword x_res as integer |
push ebp |
mov ebp,esp |
sub esp,350 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
; .z2 equ [ebp-60] |
; .z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.x_res equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
.z1 equ [ebp-136] |
.ty1 equ [ebp-140] |
.tx1 equ [ebp-144] |
.z2 equ [ebp-152] |
.ty2 equ [ebp-156] |
.tx2 equ [ebp-160] |
.cz equ [ebp-168] |
.cty equ [ebp-172] |
.ctx equ [ebp-176] |
.dz equ [ebp-184] |
.dty equ [ebp-188] |
.dtx equ [ebp-192] |
.yd equ [ebp-196] |
.xd equ [ebp-200] |
.yf equ [ebp-204] |
.xf equ [ebp-208] |
.w4 equ [ebp-212] |
.w3 equ [ebp-216] |
.w2 equ [ebp-220] |
.w1 equ [ebp-224] |
.p4 equ [ebp-228] |
.p3 equ [ebp-232] |
.p2 equ [ebp-236] |
.p1 equ [ebp-240] |
.tx_ptr equ [ebp-244] |
; movaps xmm7,xmm3 |
; movaps xmm3,xmm5 |
; movaps xmm5,xmm7 |
mov .y,ecx |
packssdw xmm2,xmm2 |
; movaps xmm7,xmm2 |
; movhps xmm2,[the_zero] |
; pshuflw xmm2,xmm2,11111000b |
; pshufd xmm2,xmm2,11111100b |
; movlps xmm7,[the_zero] |
; pshufhw xmm7,xmm7,11111111b |
; movlps xmm7,[the_zero] |
; psrldq xmm7,4 |
; por xmm2,xmm7 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_line |
cmp cx,.y_max |
jge .end_line ; |
cmp eax,ebx |
je .end_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
movaps xmm7,xmm3 |
movaps xmm3,xmm5 |
movaps xmm5,xmm7 |
@@: |
cmp ax,.x_max |
jge .end_line |
cmp bx,.x_min |
jle .end_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movaps .tx1,xmm3 |
movaps .tx2,xmm5 |
movd .x_res,xmm6 |
mov .tx_ptr,edx |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
subps xmm5,xmm3 |
divps xmm5,xmm7 |
movaps .dtx,xmm5 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulps xmm5,xmm7 |
mulps xmm1,xmm7 |
addps xmm5,.tx1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movaps .tx1,xmm5 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
mov eax,.x_res |
mul dword .y |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
add esi,eax |
mov ecx,.lx2 |
sub ecx,.lx1 |
; movaps xmm0,.n1 |
movaps xmm2,.tx1 |
; xorps xmm1,xmm1 |
align 16 |
.ddraw: |
; movhlps xmm7,xmm2 |
; cmpnltss xmm7,dword[esi] |
; movd eax,xmm7 |
; or eax,eax |
; jnz .skip |
xorps xmm5,xmm5 |
; movhlps xmm7,xmm2 |
; movss [esi],xmm7 |
movaps xmm7,.n1 ;xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,.n1 ;xmm0 |
; andps xmm7,[abs_z_coof] |
movaps .cnv,xmm7 |
movaps xmm6,xmm2 |
minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 |
cvttps2dq xmm7,xmm6 |
cvtdq2ps xmm4,xmm7 |
subps xmm6,xmm4 |
movlps .xf,xmm6 |
; movaps xmm5,.lv |
mov eax,lights_aligned ; global |
align 16 |
.again_col: |
movaps xmm0,[eax] ; calc multple lights |
mulps xmm0,.cnv ;.lv ; last dword should be zeroed |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
; andps xmm0,[abs_val] ;calc absolute value |
if 1 |
; stencil |
movhlps xmm6,xmm2 |
movhlps xmm4,xmm2 |
addss xmm6,[aprox] |
subss xmm4,[aprox] |
cmpnltss xmm6,dword[esi] |
cmpnltss xmm4,dword[esi] |
xorps xmm6,xmm4 |
xorps xmm4,xmm4 |
movd ebx,xmm6 |
cmp ebx,-1 |
jne .no_reflective |
end if |
movaps xmm4,xmm0 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,[eax+48] |
.no_reflective: |
maxps xmm0,[the_zero] |
; movaps xmm1,xmm0 |
mulps xmm0,[eax+16] |
addps xmm4,xmm0 |
addps xmm4,[eax+32] |
maxps xmm5,xmm4 |
add eax,64 |
cmp eax,lights_aligned_end |
jnz .again_col |
minps xmm5,[mask_255f] |
; texture coords work |
movd eax,xmm7 |
psrldq xmm7,4 |
movd ebx,xmm7 |
shl ebx,TEX_SHIFT |
add eax,ebx |
lea eax,[eax*3] |
add eax,.tx_ptr |
mov ebx,eax |
add ebx,TEX_X*3 |
movd xmm7,[eax] |
movd xmm6,[eax+3] |
movd xmm4,[ebx] |
movd xmm3,[ebx+3] |
punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 |
punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 |
punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... |
movdqa xmm6,xmm7 |
movdqa xmm4,xmm7 |
psrldq xmm6,4 |
psrldq xmm4,8 |
punpcklbw xmm7,[the_zero] ; broadcasted 0 |
punpcklbw xmm6,[the_zero] |
punpcklbw xmm4,[the_zero] |
punpcklwd xmm7,[the_zero] |
punpcklwd xmm6,[the_zero] |
punpcklwd xmm4,[the_zero] |
; calc w ......... |
movlps xmm3,[the_one] ; broadcasted dword 1.0 |
cvtdq2ps xmm7,xmm7 |
subps xmm3,.xf |
cvtdq2ps xmm6,xmm6 |
movhps xmm3,.xf |
cvtdq2ps xmm4,xmm4 |
movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf |
shufps xmm3,xmm3,10001000b |
shufps xmm1,xmm1,11110101b |
mulps xmm3,xmm1 |
mulps xmm7,xmm3 |
mulps xmm6,xmm3 |
mulps xmm4,xmm3 |
haddps xmm7,xmm7 ; r |
haddps xmm6,xmm6 ; g |
haddps xmm4,xmm4 ; b |
haddps xmm7,xmm7 ; r |
haddps xmm6,xmm6 ; g |
haddps xmm4,xmm4 ; b |
movlhps xmm7,xmm6 |
shufps xmm7,xmm7,11101000b |
movlhps xmm7,xmm4 |
mulps xmm5,xmm7 |
cvtps2dq xmm5,xmm5 |
psrld xmm5,8 |
movd xmm6,[edi] |
packssdw xmm5,xmm5 |
packuswb xmm5,xmm5 |
paddusb xmm5,xmm6 |
movd [edi],xmm5 |
.skip: |
add edi,4 |
add esi,4 |
; addps xmm0,.dn |
movaps xmm0,.n1 ; cur normal |
addps xmm0,.dn |
addps xmm2,.dtx |
movaps .n1,xmm0 |
sub ecx,1 |
jnz .ddraw |
.end_line: |
add esp,350 |
pop ebp |
ret |
/programs/demos/view3ds/3r_phg.inc |
---|
1,528 → 1,528 |
; Real Phong's shading implemented if flat assembler |
; by Maciej Guba. |
; http://macgub.vxm.pl |
ROUND2 equ 10 |
real_phong_tri_z: |
;----procedure render Phongs shaded triangle with z coord |
;----interpolation ( Catmull alghoritm )----------------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer filled- |
;---------------------- with dd float variables-------- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.dz12 equ [ebp-180] |
.dz13 equ [ebp-184] |
.dz23 equ [ebp-188] |
.cnv1 equ [ebp-208] ; cur normal vectors |
.cnv2 equ [ebp-224] |
.cz2 equ [ebp-228] |
.cz1 equ [ebp-232] |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
movaps xmm6,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm6 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
movaps xmm6,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm6 |
jmp .sort3 |
.sort2: |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
mov dword .dz12,0 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
mov dword .dz13,0 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
mov dword .dz23,0 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edi,.screen |
mov esi,.Zbuf |
call real_phong_line_z |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addss xmm2,.dz13 |
addss xmm3,.dz12 |
add eax,.dx13 |
add ebx,.dx12 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edi,.screen |
mov esi,.Zbuf |
call real_phong_line_z |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
addss xmm2,.dz13 |
addss xmm3,.dz23 |
add eax,.dx13 |
add ebx,.dx23 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.rpt_loop2_end: |
add esp,512 |
pop ebp |
ret |
align 16 |
real_phong_line_z: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi z1, z2 coords as dwords floats |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edi - screen buffer |
; esi - z buffer filled with dd floats |
push ebp |
mov ebp,esp |
sub esp,160 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
.z2 equ [ebp-60] |
.z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.dz equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
mov .y,ecx |
packssdw xmm2,xmm2 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_rp_line |
cmp cx,.y_max |
jge .end_rp_line ; |
cmp eax,ebx |
je .end_rp_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
shufps xmm3,xmm3,11100001b |
@@: |
cmp ax,.x_max |
jge .end_rp_line |
cmp bx,.x_min |
jle .end_rp_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movlps .z1,xmm3 |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
psrldq xmm3,4 |
subss xmm3,.z1 |
divss xmm3,xmm7 |
movss .dz,xmm3 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulss xmm3,xmm7 |
mulps xmm1,xmm7 |
addss xmm3,.z1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movss .z1,xmm3 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
movzx eax,word[size_x_var] |
mul dword .y |
; mov edx,.x1 |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
add esi,eax |
mov ecx,.lx2 |
sub ecx,.lx1 |
movaps xmm0,.n1 |
movss xmm2,.z1 |
align 16 |
.ddraw: |
movss xmm7,xmm2 |
cmpnltss xmm7,dword[esi] |
movd eax,xmm7 |
or eax,eax |
jnz .skip |
movss [esi],xmm2 |
movaps xmm7,xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,xmm0 |
movaps .cnv,xmm7 |
mov edx,lights_aligned ; lights - global variable |
xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
@@: |
movaps xmm6,[edx+16] |
movaps xmm5,[edx] |
movaps xmm3,[edx+48] |
andps xmm5,[zero_hgst_dd] ; global |
mulps xmm5,.cnv ;.lv ; last dword should be zeroed |
haddps xmm5,xmm5 |
haddps xmm5,xmm5 |
; mulps xmm5,[env_const2] |
; maxps xmm5,[dot_min] |
; minps xmm5,[dot_max] |
movaps xmm7,xmm5 |
; mulps xmm7,[env_const2] |
; mulps xmm7,[env_const2] |
; maxps xmm7,[dot_min] |
; minps xmm7,[dot_max] |
mulps xmm7,xmm7 |
mulps xmm7,xmm7 |
mulps xmm5,xmm6 |
mulps xmm7,xmm7 |
mulps xmm7,xmm3 |
addps xmm5,xmm7 |
minps xmm5,[mask_255f] ; global |
maxps xmm1,xmm5 |
; movq xmm3,[edx+20] ; minimal color |
; punpcklwd xmm3,[minimum0] |
; cvtdq2ps xmm3,xmm3 |
; maxps xmm1,xmm3 |
add edx,64 |
cmp edx,lights_aligned_end ; global |
jnz @b |
cvtps2dq xmm1,xmm1 |
packssdw xmm1,xmm1 |
packuswb xmm1,xmm1 |
movd [edi],xmm1 |
.skip: |
add edi,4 |
add esi,4 |
addps xmm0,.dn |
addss xmm2,.dz |
sub ecx,1 |
jnz .ddraw |
.end_rp_line: |
add esp,160 |
pop ebp |
ret |
; Real Phong's shading implemented if flat assembler |
; by Maciej Guba. |
; http://macgub.co.pl |
ROUND2 equ 10 |
real_phong_tri_z: |
;----procedure render Phongs shaded triangle with z coord |
;----interpolation ( Catmull alghoritm )----------------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer filled- |
;---------------------- with dd float variables-------- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.dz12 equ [ebp-180] |
.dz13 equ [ebp-184] |
.dz23 equ [ebp-188] |
.cnv1 equ [ebp-208] ; cur normal vectors |
.cnv2 equ [ebp-224] |
.cz2 equ [ebp-228] |
.cz1 equ [ebp-232] |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
movaps xmm6,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm6 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
movaps xmm6,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm6 |
jmp .sort3 |
.sort2: |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
mov dword .dz12,0 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
mov dword .dz13,0 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
mov dword .dz23,0 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edi,.screen |
mov esi,.Zbuf |
call real_phong_line_z |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addss xmm2,.dz13 |
addss xmm3,.dz12 |
add eax,.dx13 |
add ebx,.dx12 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edi,.screen |
mov esi,.Zbuf |
call real_phong_line_z |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
addss xmm2,.dz13 |
addss xmm3,.dz23 |
add eax,.dx13 |
add ebx,.dx23 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.rpt_loop2_end: |
add esp,512 |
pop ebp |
ret |
align 16 |
real_phong_line_z: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi z1, z2 coords as dwords floats |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edi - screen buffer |
; esi - z buffer filled with dd floats |
push ebp |
mov ebp,esp |
sub esp,160 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
.z2 equ [ebp-60] |
.z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.dz equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
mov .y,ecx |
packssdw xmm2,xmm2 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_rp_line |
cmp cx,.y_max |
jge .end_rp_line ; |
cmp eax,ebx |
je .end_rp_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
shufps xmm3,xmm3,11100001b |
@@: |
cmp ax,.x_max |
jge .end_rp_line |
cmp bx,.x_min |
jle .end_rp_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movlps .z1,xmm3 |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
psrldq xmm3,4 |
subss xmm3,.z1 |
divss xmm3,xmm7 |
movss .dz,xmm3 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulss xmm3,xmm7 |
mulps xmm1,xmm7 |
addss xmm3,.z1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movss .z1,xmm3 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
movzx eax,word[size_x_var] |
mul dword .y |
; mov edx,.x1 |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
add esi,eax |
mov ecx,.lx2 |
sub ecx,.lx1 |
movaps xmm0,.n1 |
movss xmm2,.z1 |
align 16 |
.ddraw: |
movss xmm7,xmm2 |
cmpnltss xmm7,dword[esi] |
movd eax,xmm7 |
or eax,eax |
jnz .skip |
movss [esi],xmm2 |
movaps xmm7,xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,xmm0 |
movaps .cnv,xmm7 |
mov edx,lights_aligned ; lights - global variable |
xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
@@: |
movaps xmm6,[edx+16] |
movaps xmm5,[edx] |
movaps xmm3,[edx+48] |
andps xmm5,[zero_hgst_dd] ; global |
mulps xmm5,.cnv ;.lv ; last dword should be zeroed |
haddps xmm5,xmm5 |
haddps xmm5,xmm5 |
; mulps xmm5,[env_const2] |
; maxps xmm5,[dot_min] |
; minps xmm5,[dot_max] |
movaps xmm7,xmm5 |
; mulps xmm7,[env_const2] |
; mulps xmm7,[env_const2] |
; maxps xmm7,[dot_min] |
; minps xmm7,[dot_max] |
mulps xmm7,xmm7 |
mulps xmm7,xmm7 |
mulps xmm5,xmm6 |
mulps xmm7,xmm7 |
mulps xmm7,xmm3 |
addps xmm5,xmm7 |
minps xmm5,[mask_255f] ; global |
maxps xmm1,xmm5 |
; movq xmm3,[edx+20] ; minimal color |
; punpcklwd xmm3,[minimum0] |
; cvtdq2ps xmm3,xmm3 |
; maxps xmm1,xmm3 |
add edx,64 |
cmp edx,lights_aligned_end ; global |
jnz @b |
cvtps2dq xmm1,xmm1 |
packssdw xmm1,xmm1 |
packuswb xmm1,xmm1 |
movd [edi],xmm1 |
.skip: |
add edi,4 |
add esi,4 |
addps xmm0,.dn |
addss xmm2,.dz |
sub ecx,1 |
jnz .ddraw |
.end_rp_line: |
add esp,160 |
pop ebp |
ret |
/programs/demos/view3ds/3ray_shd.inc |
---|
0,0 → 1,688 |
; Ray casted shadows |
; by Maciej Guba. |
; http://macgub.co.pl |
ROUND2 equ 10 |
ray_shad: |
;--- Procedure render triangle with ray casted shadow --- |
;--- effect. Calc intersection with all triangles in ---- |
;--- everypixel. Its not real time process, especially -- |
;--- when many triangles are computed. ------------------ |
;------in - eax - x1 shl 16 + y1 ------------------------ |
;---------- ebx - x2 shl 16 + y2 ------------------------ |
;---------- ecx - x3 shl 16 + y3 ------------------------ |
;---------- edx - ptr to fur coords struct -------------- |
;---------- esi - pointer to stencil / Z-buffer, filled - |
;-------------- with dword float variables, it masks -- |
;-------------- 'Z' position (coord) of every front --- |
;-------------- pixel. -------------------------------- |
;---------- edi - pointer to screen buffer -------------- |
;---------- xmm0 - 1st normal vector -------------------- |
;---------- xmm1 - 2cond normal vector ------------------ |
;---------- xmm2 - 3rd normal vector -------------------- |
;---------- xmm3 - -------------------------------------- |
;---------- xmm4 - lo -> hi z1, z2, z3 coords ----------- |
;--------------- as dwords floats --------------------- |
;---------- xmm5 - lo -> hi y_min, y_max, x_min, x_max -- |
;--------------- as dword integers -------------------- |
;-----------mm7 - current triangle index --------------- |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,1024 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.dz12 equ [ebp-180] |
.dz13 equ [ebp-184] |
.dz23 equ [ebp-188] |
.cnv1 equ [ebp-208] ; current normal vectors |
.cnv2 equ [ebp-240] |
.cz2 equ [ebp-244] |
.cz1 equ [ebp-248] |
.tri_no equ [ebp-252] |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
movaps xmm6,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm6 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
movaps xmm6,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm6 |
jmp .sort3 |
.sort2: |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movd .tri_no,mm7 |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
; movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
mov dword .dz12,0 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z2 |
rcpss xmm6,xmm6 |
subss xmm5,.z1 |
mulss xmm5,xmm6 |
movss .dz12,xmm5 |
shufps xmm6,xmm6,0 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
mulps xmm0,xmm6 |
movaps .dn12,xmm0 |
; subps xmm3,xmm0 |
; mulps xmm3,xmm6 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
mov dword .dz13,0 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
rcpss xmm6,xmm6 |
subss xmm5,.z1 |
mulss xmm5,xmm6 |
movss .dz13,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
mulps xmm0,xmm6 |
movaps .dn13,xmm0 |
; mulps xmm0,xmm6 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
mov dword .dz23,0 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
rcpss xmm6,xmm6 |
subss xmm5,.z2 |
mulss xmm5,xmm6 |
movss .dz23,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
shufps xmm6,xmm6,0 |
mulps xmm0,xmm6 |
movaps .dn23,xmm0 |
; mulps xmm0,xmm6 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov ecx,.z1 |
mov .cz1,ecx |
mov .cz2,ecx |
movaps xmm0,.1_nv |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
mov edi,.screen |
mov esi,.Zbuf |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
; movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
movd mm7,.tri_no |
call ray_shd_l |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; fur x,y |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
shufps xmm4,xmm4,01001110b |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addss xmm2,.dz13 |
addss xmm3,.dz12 |
add eax,.dx13 |
add ebx,.dx12 |
shufps xmm4,xmm4,01001110b |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
mov edi,.screen |
mov esi,.Zbuf |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
; movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
movd mm7,.tri_no |
call ray_shd_l |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
addss xmm2,.dz13 |
addss xmm3,.dz23 |
add eax,.dx13 |
add ebx,.dx23 |
addps xmm4,xmm6 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.rpt_loop2_end: |
add esp,1024 |
pop ebp |
ret |
align 16 |
ray_shd_l: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi z1, z2 coords as dwords floats |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - ---- |
; mm7 - current triangle index |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edx - ----- |
; edi - screen buffer |
; esi - z buffer / stencil buffer filled with dd floats |
push ebp |
mov ebp,esp |
sub esp,320 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
.z2 equ [ebp-60] |
.z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.dz equ [ebp-100] |
.y equ [ebp-104] |
; .cur_tri equ [ebp-108] |
.cnv equ [ebp-128] |
.Rlen equ [ebp-128-16] |
.r1 equ [ebp-128-32] |
.vect_t equ [ebp-128-48] |
.cur_tri equ [ebp-128-64] |
; .p3t equ [ebp-128-80] |
.nray equ [ebp-128-96] |
.final_col equ [ebp-128-112] |
.aabb_mask equ dword[ebp-128-112-4] |
mov .y,ecx |
movdqa xmm4,xmm2 |
packssdw xmm2,xmm2 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_rp_line |
cmp cx,.y_max |
jge .end_rp_line ; |
cmp eax,ebx |
je .end_rp_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
shufps xmm3,xmm3,11100001b |
@@: |
movd .cur_tri,mm7 |
cmp ax,.x_max |
jge .end_rp_line |
cmp bx,.x_min |
jle .end_rp_line |
; movaps .lv,xmm4 |
andps xmm0,[zero_hgst_dd] |
andps xmm1,[zero_hgst_dd] |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movlps .z1,xmm3 |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
rcpss xmm7,xmm7 |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
mulps xmm1,xmm7 |
movaps .dn,xmm1 |
shufps xmm3,xmm3,11111001b |
subss xmm3,.z1 |
mulss xmm3,xmm7 |
movss .dz,xmm3 |
subps xmm6,xmm5 |
mulps xmm6,xmm7 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulss xmm3,xmm7 |
mulps xmm1,xmm7 |
mulps xmm6,xmm7 |
addss xmm3,.z1 |
addps xmm1,.n1 |
addps xmm6,xmm5 |
movsx eax,word .x_min |
movss .z1,xmm3 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
movzx eax,word[xres_var] |
mul dword .y |
add eax,.lx1 |
mov .zbuff,esi |
mov .screen,edi |
shl eax,2 |
add edi,eax |
add esi,eax |
mov ecx,.lx2 |
sub ecx,.lx1 |
movd xmm0,[vect_x] |
punpcklwd xmm0,[the_zero] |
cvtdq2ps xmm0,xmm0 |
movaps .vect_t,xmm0 |
.ddraw: |
xorps xmm0,xmm0 |
movss xmm2,.z1 |
movss xmm5,.z1 |
movaps .final_col,xmm0 |
addss xmm2,[f1] |
subss xmm5,[f1] |
cmpnltss xmm2,dword[esi] |
cmpnltss xmm5,dword[esi] |
pxor xmm2,xmm5 |
movd eax,xmm2 |
or eax,eax |
jz .skips |
movaps xmm7,.n1 |
andps xmm7,[zero_hgst_dd] |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,.n1 |
movaps .cnv,xmm7 |
mov ebx,point_light_coords |
mov edx,lights_aligned |
xor eax,eax |
.nx_light: |
pushad |
cvtsi2ss xmm0,.lx1 |
cvtsi2ss xmm1,.y |
movss xmm2,.z1 |
movlhps xmm0,xmm1 |
shufps xmm0,xmm2,11001000b |
subps xmm0,[ebx] ; xmm0 - ray end, -> current vertex |
movaps xmm3,[ebx] |
andps xmm0,[zero_hgst_dd] |
movaps xmm1,xmm0 |
mulps xmm0,xmm0 |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
sqrtps xmm0,xmm0 |
movss .Rlen,xmm0 |
rcpps xmm0,xmm0 |
mulps xmm0,xmm1 ; xmm0 - normalized ray vector |
andps xmm0,[zero_hgst_dd] |
movaps .nray,xmm0 |
movaps .r1,xmm3 ; ray orgin |
if 0 |
movaps xmm1,xmm3 |
call calc_bounding_box |
mov .aabb_mask,eax |
end if |
mov edi,[triangles_ptr] |
xor ecx,ecx |
.nx_tri: ; next triangle |
cmp ecx,.cur_tri ; prevent self shadowing |
je .skipp |
if 0 |
mov edi,ecx |
imul edi,[i12] |
add edi,[triangles_ptr] |
mov eax,[edi] |
mov ebx,[edi+4] |
mov edx,[edi+8] |
imul eax,[i12] |
imul ebx,[i12] |
imul edx,[i12] |
add eax,[points_ptr] |
add ebx,[points_ptr] |
add edx,[points_ptr] |
movups xmm2,[eax] |
movups xmm3,[ebx] |
movups xmm4,[edx] |
andps xmm2,[sign_mask] |
andps xmm3,[sign_mask] |
andps xmm4,[sign_mask] |
movmskps ebx,xmm4 |
cmpeqps xmm2,xmm3 |
cmpeqps xmm3,xmm4 |
andps xmm2,xmm3 |
movmskps eax,xmm2 |
and eax,111b |
and ebx,111b |
cmp eax,111b |
jne @f |
bt .aabb_mask,ebx |
jnc .skipp |
@@: |
end if |
mov edi,ecx |
imul edi,[i12] |
add edi,[triangles_ptr] |
mov eax,[edi] |
mov ebx,[edi+4] |
mov edx,[edi+8] |
imul eax,[i12] |
imul ebx,[i12] |
imul edx,[i12] |
add eax,[points_rotated_ptr] |
add ebx,[points_rotated_ptr] |
add edx,[points_rotated_ptr] |
movups xmm2,[eax] |
movups xmm3,[ebx] |
movups xmm4,[edx] |
addps xmm2,.vect_t |
addps xmm3,.vect_t |
addps xmm4,.vect_t |
;intersect_tri: procs header |
; in: |
; xmm0 - ray direction ; should be normalized |
; xmm1 - ray orgin |
; xmm2 - tri vert1 |
; xmm3 - tri vert2 |
; xmm4 - tri vert3 |
; if eax = 1 - intersction with edge |
; xmm6 - edge lenght |
; if eax = 0 - intersect with ray (classic) |
; out: |
; eax = 1 - intersection occured |
; xmm0 - float lo -> hi = t, v, u, ... |
movss xmm6,.Rlen |
movaps xmm0,.nray |
movaps xmm1,.r1 |
subss xmm6,[the_one] |
mov eax,1 |
push ecx |
call intersect_tri |
pop ecx |
cmp eax,1 |
je .inter |
.skipp: |
.skp: |
inc ecx |
cmp ecx,[triangles_count_var] |
jnz .nx_tri |
; jz .do_process |
; comiss xmm0,.Rlen |
; jl .inter |
popad |
.do_process: |
movaps xmm5,.nray ;[edx] |
andps xmm5,[zero_hgst_dd] ; global |
mulps xmm5,.cnv ;.lv ; last dword should be zeroed |
; andps xmm5,[sign_z] ; global |
haddps xmm5,xmm5 |
haddps xmm5,xmm5 |
andps xmm5,[abs_mask] ; global |
movaps xmm7,xmm5 |
mulps xmm7,xmm7 |
mulps xmm7,xmm7 |
mulps xmm5,[edx+16] |
mulps xmm7,xmm7 |
mulps xmm7,xmm7 |
mulps xmm7,[edx+48] |
addps xmm5,xmm7 |
minps xmm5,[mask_255f] ; global |
maxps xmm5,.final_col ; addps maxps |
movaps .final_col,xmm5 |
jmp .nx_loop |
.inter: |
popad |
.nx_loop: |
; add edx,64 ; unncomment to achive 3 lights |
; add ebx,16 |
; cmp edx,lights_aligned_end ; global |
; jnz .nx_light |
movaps xmm1,.final_col |
cvtps2dq xmm1,xmm1 |
packssdw xmm1,xmm1 |
packuswb xmm1,xmm1 |
movd [edi],xmm1 |
.skips: |
movaps xmm0,.n1 |
movss xmm2,.z1 |
add edi,4 |
add esi,4 |
add dword .lx1,1 |
addps xmm0,.dn |
addss xmm2,.dz |
movaps .n1,xmm0 |
movss .z1,xmm2 |
dec ecx |
jnz .ddraw |
.end_rp_line: |
add esp,320 |
pop ebp |
ret |
/programs/demos/view3ds/a_procs.inc |
---|
1,3 → 1,200 |
if Ext > SSE2 |
;-------------------------------------------------------------------- |
init_point_lights: |
; mov eax,1000 |
; cvtsi2ss xmm1,eax |
; shufps xmm1,xmm1,11000000b |
; mov esi,lights_aligned |
; mov edi,point_light_coords |
; mov ecx,3 |
; @@: |
; movaps xmm0,[esi] |
; addps xmm0,[f05xz] |
; mulps xmm0,xmm1 |
; movaps [edi],xmm0 |
; add esi,64 |
; add edi,16 |
; loop @b |
mov ecx,3 |
mov edi,point_light_coords |
@@: |
push ecx |
xor ecx,ecx |
movzx edx,word[size_x_var] |
call random |
cvtsi2ss xmm0,eax |
movss [edi],xmm0 |
xor ecx,ecx |
movzx edx,word[size_x_var] |
call random |
cvtsi2ss xmm0,eax |
movss [edi+4],xmm0 |
; movzx ebx,word[size_x_var] |
; shl ebx,2 |
; neg ebx |
mov ecx,-1900 |
; sub ecx,100 |
mov edx,-600 |
call random |
cvtsi2ss xmm0,eax |
movss [edi+8],xmm0 |
; mov dword[edi+8],-1700.0 |
mov [edi+12],dword 0 |
add edi,16 |
pop ecx |
loop @b |
ret |
;------------------------------------------------------------------ |
intersect_tri: ; Moeller-Trumbore method |
; in: |
; xmm0 - ray direction ; should be normalized |
; xmm1 - ray orgin |
; xmm2 - tri vert1 |
; xmm3 - tri vert2 |
; xmm4 - tri vert3 |
; if eax = 1 - intersction with edge |
; xmm6 - edge lenght |
; if eax = 0 - intersect with ray (classic) |
; out: |
; eax = 1 - intersection occured |
; xmm0 - float lo -> hi = t, v, u, ... |
push ebp |
mov ebp,esp |
and ebp,-16 |
sub esp,220 |
.dir equ [ebp-16] |
.origin equ [ebp-32] |
.ta equ [ebp-48] |
.tb equ [ebp-64] |
.tc equ [ebp-80] |
.tvec equ [ebp-96] |
.pvec equ [ebp-112] |
.qvec equ [ebp-128] |
.e1 equ [ebp-128-16] |
.ift equ dword[ebp-152] |
.invdet equ [ebp-156] |
.det equ [ebp-160] |
.ed_l equ [ebp-164] |
.u equ [ebp-168] |
.v equ [ebp-172] |
.t equ [ebp-176] |
.e2 equ [ebp-192] |
movaps .dir,xmm0 |
movaps .origin,xmm1 |
movaps .ta,xmm2 |
movaps .tb,xmm3 |
movaps .tc,xmm4 |
mov .ift,eax |
movss .ed_l,xmm6 |
subps xmm3,xmm2 |
subps xmm4,xmm2 |
andps xmm3,[zero_hgst_dd] |
andps xmm4,[zero_hgst_dd] |
movaps .e1,xmm3 |
movaps .e2,xmm4 |
lea esi,.dir |
lea edi,.e2 |
lea ebx,.pvec |
call cross_aligned |
movaps xmm0,.e1 |
mulps xmm0,.pvec |
; andps xmm0,[zero_hgst_dd] |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
movss .det,xmm0 |
; cmpnless xmm0,[eps] |
; movd eax,xmm0 |
; or eax,eax |
; jz @f |
comiss xmm0,[eps] |
jl @f |
rcpss xmm0,.det |
movss .invdet,xmm0 |
movaps xmm0,.origin |
subps xmm0,.ta |
andps xmm0,[zero_hgst_dd] |
movaps .tvec,xmm0 |
mulps xmm0,.pvec |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
mulss xmm0,.invdet |
movss xmm1,xmm0 |
movss .u,xmm0 |
cmpnless xmm1,[epsone] |
cmpnless xmm0,[epsminus] |
pxor xmm1,xmm0 |
movd eax,xmm1 |
or eax,eax |
jz @f |
lea esi,.tvec |
lea edi,.e1 |
lea ebx,.qvec |
call cross_aligned |
movaps xmm0,.dir |
mulps xmm0,.qvec |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
mulss xmm0,.invdet |
movss .v,xmm0 |
movss xmm1,xmm0 |
addss xmm1,.u |
cmpnless xmm1,[epsone] |
cmpnless xmm0,[epsminus] |
pxor xmm1,xmm0 |
movd eax,xmm1 |
or eax,eax |
jz @f |
movaps xmm1,.e2 |
mulps xmm1,.qvec |
haddps xmm1,xmm1 |
haddps xmm1,xmm1 |
mulss xmm1,.invdet |
movss .t,xmm1 |
; cmpnless xmm1,[eps] |
; movmskps eax,xmm1 |
; test eax,1 |
; jz @f |
comiss xmm1,[eps] |
jl @f |
mov eax,1 |
cmp .ift,0 |
je .end ; ok intersect occured, no edge cause |
movss xmm0,.t ; else check with edge lenght |
; movss xmm1,.t |
cmpnless xmm0,[eps] |
cmpnless xmm1,.ed_l |
xorps xmm0,xmm1 |
movd ebx,xmm0 |
or ebx,ebx |
jz @f |
; mov eax,1 |
; movaps xmm0,.t |
jmp .end |
@@: |
xor eax,eax |
.end: |
movaps xmm0,.t |
add esp,220 |
pop ebp |
ret |
end if |
;=============================================================== |
do_edges_list: |
push ebp |
223,13 → 420,18 |
do_sinus: |
;in - ax - render mode |
.x equ [ebp-8] |
.y equ [ebp-12] |
.new_y equ [ebp-16] |
.temp equ [ebp-20] |
.dr_f equ word[ebp-22] |
push ebp |
mov ebp,esp |
sub esp,64 |
sub esp,30 |
mov .dr_f,ax |
mov dword .x,0 |
mov dword .y,0 |
mov esi,[screen_ptr] |
243,53 → 445,20 |
cld |
rep stosd |
pop edi |
; movzx eax,[sinus_flag] |
; mov edx,10 |
; mul edx |
; mov [sin_amplitude],eax |
; mov [sin_frq],eax |
fninit |
;if Ext = SSE2 |
; movups xmm1,[const0123] ; xmm1 - init values |
; mov eax,0x000000ff |
; movd xmm2,eax |
; shufps xmm2,xmm2,0 ; xmm2 - mask value |
; mov eax,4 |
; movd xmm3,eax |
; shufps xmm3,xmm3,0 |
.again: |
if 0 |
fild dword .x |
fidiv [sin_frq] |
fsin |
fimul [sin_amplitude] |
fiadd dword .y |
fistp dword .new_y |
else |
fild dword .x |
fmul [sin_frq] |
fistp dword .temp |
mov eax, .temp |
; mov bx, [angle_x] |
; add bx, [angle_y] |
; movzx ebx,bx |
; shr ebx,1 ; change phase |
; add eax,ebx |
and eax, 0x000000ff |
; cdq |
; mul [sin_frq] |
; and eax,0x000000ff |
; and ax,0x00ff |
; cwde |
fld dword [sin_tab+eax*4] |
fimul dword [sin_amplitude] |
fiadd dword .y |
fistp dword .new_y |
end if |
mov eax,.new_y |
or eax,eax |
jl .skip |
298,12 → 467,11 |
jg .skip |
movzx edx,word[size_x_var] |
mul edx |
; shl eax,9 |
add eax,dword .x |
lea ebx,[eax*3] |
cmp [dr_flag],12 ; 32 bit col cause |
jl @f |
cmp .dr_f,12 ; 32 bit col cause |
jb @f |
add ebx,eax |
@@: |
mov eax,[esi] |
310,8 → 478,8 |
mov [edi+ebx],eax |
.skip: |
add esi,3 |
cmp [dr_flag],12 |
jl @f |
cmp .dr_f,12 |
jb @f |
inc esi |
@@: |
inc dword .x |
330,8 → 498,8 |
movzx ecx,word[size_x_var] |
movzx eax,word[size_y_var] |
imul ecx,eax |
cmp [dr_flag],12 |
jge @f |
cmp .dr_f,12 |
jae @f |
lea ecx,[ecx*3] |
shr ecx,2 |
; mov ecx,SIZE_X*SIZE_Y*3/4 |
377,7 → 545,19 |
ret |
do_emboss: ; sse2 version only |
; in ax - render model |
push ebp |
mov ebp,esp |
sub esp,4 |
.dr_mod equ word[ebp-2] |
mov .dr_mod,ax |
if Ext >= SSE2 |
movzx ecx,[bumps_deep_flag] |
inc ecx |
call blur_screen ;blur n times |
392,13 → 572,13 |
sub ecx,ebx |
mov esi,[screen_ptr] |
mov edi,[Zbuffer_ptr] |
cmp [dr_flag],12 |
cmp .dr_mod,11 |
jge @f |
lea ebx,[ebx*3] |
jmp .f |
jmp .gf |
@@: |
shl ebx,2 |
.f: |
.gf: |
mov edx,esi |
add esi,ebx |
lea ebx,[ebx+esi] |
405,7 → 585,7 |
pxor xmm0,xmm0 |
push eax |
.emb: |
cmp [dr_flag],12 |
cmp .dr_mod ,11 |
jge @f |
movlps xmm1,[esi+3] |
movhps xmm1,[esi+6] |
442,14 → 622,7 |
pmaxsw xmm1,xmm7 |
pmaxsw xmm1,xmm6 |
if 0 |
movaps xmm7,xmm3 |
movaps xmm6,xmm3 |
psrlq xmm7,2*8 |
psrlq xmm6,4*8 |
pmaxsw xmm3,xmm7 |
pmaxsw xmm3,xmm6 |
end if |
pmaxsw xmm1,xmm3 |
movd eax,xmm1 |
469,7 → 642,7 |
mov eax,[eax] |
mov [edi+4],eax |
cmp [dr_flag],12 |
cmp .dr_mod,11 |
jl @f |
add esi,2 |
add ebx,2 |
487,7 → 660,7 |
pop ecx ;,eax |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
cmp [dr_flag],12 |
cmp .dr_mod,11 |
jge .e |
@@: |
movsd |
498,6 → 671,11 |
end if |
mov esp,ebp |
pop ebp |
ret |
;align 16 |
/programs/demos/view3ds/asc.inc |
---|
1,815 → 1,815 |
; Files *.asc routines by Maciej Guba |
; Thanks to Reverend for integer/float/ascii conversion examples |
read_asc: |
mov eax,[fptr] |
.find_vert: |
cmp dword[eax],'Vert' |
je @f |
inc eax |
jmp .find_vert |
@@: |
add eax,4 |
cmp dword[eax],'ices' |
jne .find_vert |
add eax,3 |
@@: |
inc eax |
cmp byte[eax],'0' ; search end of ascii number of vertices string |
jb @b |
cmp byte[eax],'9' |
ja @b |
; eax - start ascii number |
@@: |
inc eax |
cmp byte[eax],'0' |
jb .convert1 |
cmp byte[eax],'9' |
ja .convert1 |
jmp @b |
.convert1: |
dec eax |
mov ebx,eax |
push eax |
call ascii_to_integer |
mov [points_count_var],edx |
pop eax |
@@: |
inc eax |
cmp dword[eax],'Face' |
jne @b |
add eax,3 |
@@: |
inc eax |
cmp byte[eax],'0' |
jb @b |
cmp byte[eax],'9' |
ja @b |
; eax - start ascii number |
@@: |
inc eax |
cmp byte[eax],'0' |
jb .convert2 |
cmp byte[eax],'9' |
ja .convert2 |
jmp @b |
; eax - end ascii number |
.convert2: |
dec eax |
mov ebx,eax |
push eax |
call ascii_to_integer |
mov [triangles_count_var],edx |
pop eax |
@@: |
inc eax |
cmp dword[eax],'Vert' |
jnz @b |
inc eax |
mov edi,[points_ptr] |
xor ebx,ebx |
.decode_vertices: |
push ebx |
@@: |
inc eax |
cmp dword[eax],'Vert' |
jne @b |
xor ecx,ecx |
.decode_coord: |
push ecx |
@@: |
inc eax |
mov dl,byte[eax] |
cmp dl,byte[XYZpartices+ecx] |
jne @b |
@@: |
inc eax |
cmp byte[eax],'.' |
je .readF |
cmp byte[eax],'-' |
je .readF |
cmp byte[eax],'0' |
jb @b |
cmp byte[eax],'9' |
ja @b |
.readF: ; read float |
mov esi,eax |
push eax |
push ecx |
call atof ; st0 - desired dword float |
pop ecx |
pop eax |
fstp dword[edi] |
add edi,4 |
pop ecx |
inc ecx |
cmp ecx,3 |
jne .decode_coord |
pop ebx |
inc ebx |
cmp ebx,[points_count_var] |
jne .decode_vertices |
mov dword[edi],-1 |
mov esi,eax |
@@: |
inc esi |
cmp dword[esi],'Face' |
jne @b |
xor edx,edx |
mov edi,[triangles_ptr] |
cld |
.decode_face: |
push edx |
@@: |
inc esi |
cmp dword[esi],'Face' |
jne @b |
@@: |
inc esi |
cmp byte[esi],'0' ; face number start |
jb @b |
cmp byte[esi],'9' |
ja @b |
@@: |
inc esi |
cmp byte[esi],'0' |
jb @f |
cmp byte[esi],'9' ; face number end |
ja @f |
jmp @b |
@@: |
xor ecx,ecx |
.next_vertex_number: |
push ecx |
@@: |
inc esi |
cmp byte[esi],'0' |
jb @b |
cmp byte[esi],'9' |
ja @b |
; eax - start ascii number |
@@: |
inc esi |
cmp byte[esi],'0' |
jb @f |
cmp byte[esi],'9' |
ja @f |
jmp @b |
; eax - end ascii number |
@@: |
dec esi |
mov ebx,esi |
push esi |
call ascii_to_integer |
mov eax,edx |
stosd |
pop esi |
add esi,4 |
pop ecx |
inc ecx |
cmp ecx,3 |
jne .next_vertex_number |
pop edx |
inc edx |
cmp edx,[triangles_count_var] |
jne .decode_face |
mov dword[edi],-1 ;dword[triangles+ebx+2],-1 ; end mark |
mov eax,1 ;-> mark if ok |
ret |
ascii_to_integer: |
; in --- [ebx] -> end of ascii string |
; out -- edx -> desired number |
xor edx,edx |
xor ecx,ecx |
.again: |
movzx eax,byte[ebx] |
sub al,'0' |
cwde |
push edx |
mul dword[convert_muler+ecx] |
pop edx |
add edx,eax |
dec ebx |
cmp byte[ebx],'0' |
jb .end |
cmp byte[ebx],'9' |
ja .end |
add ecx,4 |
jmp .again |
@@: |
.end: |
ret |
;=============================================================================== |
; ASCII to float conversion procedure |
; |
; input: |
; esi - pointer to string |
; |
; output: |
; st0 - number changed into float |
; |
;=============================================================================== |
atof: |
.string equ ebp-4 |
push ebp |
mov ebp,esp |
sub esp,32 |
push eax ecx esi |
mov [.string],esi |
fninit |
fldz |
fldz |
cld |
cmp byte [esi], '-' |
jnz @F |
inc esi |
@@: |
xor eax, eax |
align 4 |
.loop.integer_part: |
lodsb |
cmp al, '.' |
jz .mantisa |
cmp al,'0' |
jb .exit |
cmp al,'9' |
ja .exit |
fimul [i10] |
sub al, '0' |
push eax |
fiadd dword [esp] |
add esp, 4 |
jmp .loop.integer_part |
.mantisa: |
xor ecx, ecx |
xor eax, eax |
cld |
fxch st1 |
@@: |
lodsb |
cmp al,'0' |
jb .exit |
cmp al,'9' |
ja .exit |
cmp ecx,7*4 |
je .exit ; max 7 digits in mantisa |
sub al,'0' |
push eax |
fild dword[esp] |
fidiv dword[convert_muler+4+ecx] |
faddp |
add esp,4 |
add ecx,4 |
jmp @b |
.exit: |
faddp |
mov eax, [.string] |
cmp byte [eax], '-' |
jnz @F |
fchs |
@@: |
cld |
stc ; always returns no error |
pop esi ecx eax |
mov esp,ebp |
pop ebp |
ret |
itoa: ; unsigned dword integer to ascii procedure |
; in eax - variable |
; esi - Pointer to ascii string |
; out esi - desired ascii string |
; edi - end of ascii string - ptr to memory |
.temp_string equ dword[ebp-36] |
.ptr equ dword[ebp-40] |
.var equ dword[ebp-44] |
push ecx |
push ebp |
mov ebp,esp |
sub esp,64 |
mov .var,eax |
mov eax,-1 |
lea edi,.temp_string |
cld |
mov ecx,9 |
rep stosd ; make floor |
mov .ptr,esi |
lea edi,.temp_string |
add edi,34 |
std |
xor eax,eax |
stosb ; mark begin |
mov eax,.var |
mov esi,10 |
@@: |
xor edx,edx |
div esi |
xchg eax,edx |
add al,'0' |
stosb |
xchg eax,edx |
or eax,eax |
jnz @b |
stosb ; mark end |
lea esi,.temp_string |
cld |
@@: |
lodsb |
or al,al |
jnz @b |
mov edi,.ptr |
@@: |
lodsb |
stosb |
or al,al |
jnz @b |
mov esp,ebp |
pop ebp |
pop ecx |
ret |
if 1 |
ftoa_mac: |
; in : esi - pointer to dword float |
; edi - pointer to ascii string |
.ptr_f equ dword[ebp-4] |
.sign equ dword[ebp-8] ; 0 -> less than zero, 1 - otherwise |
.ptr_ascii equ dword[ebp-12] |
.integer equ dword[ebp-20] |
.fraction equ dword[ebp-28] |
.status_orginal equ word[ebp-32] |
.status_changed equ word[ebp-34] |
push ecx |
push ebp |
mov ebp,esp |
sub esp,64 |
fninit |
fnstcw .status_orginal |
mov ax, .status_orginal |
or ax, 0000110000000000b |
mov .status_changed, ax |
fldcw .status_changed |
; -------------------------------- |
; check if signed |
xor eax, eax |
fld dword[esi] |
fst .sign |
test .sign, 80000000h |
setz al |
mov .sign, eax |
mov .ptr_f,esi |
mov .ptr_ascii,edi |
fabs |
fld st0 |
frndint |
fist .integer |
fsubp st1, st0 |
mov eax,.integer |
mov esi,.ptr_ascii |
call itoa |
; edi -> ptr to end of ascii string |
dec edi |
mov al,'.' |
stosb |
mov ecx, 6 ; max 6 digits in fraction part |
.loop: |
fimul [i10] |
fld st0 |
frndint |
fist .fraction |
fsubp st1, st0 |
mov esi,edi |
mov eax,.fraction |
add al,'0' |
stosb |
ftst |
fnstsw ax |
test ax, 0100000000000000b |
jz @F |
test ax, 0000010100000000b |
jz .finish |
@@: |
loop .loop |
if 0 |
fldcw .status_orginal |
fimul [i10] |
fist .fraction |
; mov esi,edi |
mov eax,.fraction |
add al,'0' |
stosb |
; call itoa |
; -------------------------------- |
; restore previous values |
.finish: |
; fstp st0 |
ffree st |
mov eax,.fraction |
mov esi,edi |
; call itoa |
add al,'0' |
stosb |
end if |
.finish: |
ffree st |
cmp .sign,0 |
jnz @f |
mov esi,.ptr_ascii |
dec esi |
mov byte[esi],'-' |
@@: |
mov esp,ebp |
pop ebp |
pop ecx |
ret |
end if |
if 0 |
;=============================================================================== |
; float to ASCII conversion procedure |
; |
; input: |
; buffer - pointer to memory where output will be saved |
; precision - number of digits after dot |
; |
; output: |
; no immediate output |
; |
; notes: |
; separate integer and mantisa part with dot '.' |
; so GOOD 123.456 |
; WRONG 123,456 |
; |
; coded by Reverend // HTB + RAG |
;=============================================================================== |
proc ftoa buffer, precision |
locals |
status_original dw ? |
status_changed dw ? |
integer dd ? |
mantisa dd ? |
signed dd ? |
endl |
push eax ecx;edi ecx |
; -------------------------------- |
; set correct precision |
mov eax, [precision] |
cmp eax, 51 |
jb @F |
mov eax, 51 |
@@: |
mov [precision], eax |
; -------------------------------- |
; change control wortd of fpu to prevent rounding |
fnstcw [status_original] |
mov ax, [status_original] |
or ax, 0000110000000000b |
mov [status_changed], ax |
fldcw [status_changed] |
; -------------------------------- |
; check if signed |
xor eax, eax |
fst [signed] |
test [signed], 80000000h |
setnz al |
mov [signed], eax |
; -------------------------------- |
; cut integer and mantisa separately |
fld st0 |
fld st0 ; st0 = x, st1 = x |
frndint |
fist [integer] ; st0 = x, st1 = x |
fabs |
fsubp st1, st0 ; st0 = mantisa(x) |
; -------------------------------- |
; save integer part in buffer |
; mov edi, [buffer] |
mov esi,[buffer] |
; push [signed] |
; push edi |
; push 10 |
; push [integer] |
mov eax,[integer] |
call itoa |
; add edi, eax |
mov al, '.' |
stosb |
mov esi,edi |
; -------------------------------- |
; save mantisa part in buffer |
mov ecx, [precision] |
dec ecx |
.loop: |
fimul [i10] |
fld st0 |
frndint |
fist [mantisa] |
fsubp st1, st0 |
; push 0 |
; push edi |
; push 10 |
; push [mantisa] |
mov esi,edi |
mov eax,[mantisa] |
call itoa |
; add edi, eax |
ftst |
fnstsw ax |
test ax, 0100000000000000b |
jz @F |
test ax, 0000010100000000b |
jz .finish |
@@: |
loop .loop |
fldcw [status_original] |
fimul [i10] |
fist [mantisa] |
; push 0 |
; push edi |
; push 10 |
; push [mantisa] |
mov esi,edi |
mov eax,[mantisa] |
call itoa |
; -------------------------------- |
; restore previous values |
.finish: |
fstp st0 |
cmp [signed],1 |
jnz @f |
mov byte[buffer],'-' |
@@: |
stc |
pop ecx eax ;edi eax |
ret |
endp |
end if |
if 0 |
write_asc: |
.counter equ dword[ebp-4] |
push ebp |
mov ebp,esp |
sub esp,64 |
fninit |
mov edi,asc_file_buffer |
mov esi,asc_main_header |
cld |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
mov esi,asc_info_header |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
push esi ; -> position in header info |
movzx eax,[points_count_var] |
mov esi,edi |
call itoa ; unsigned dword integer to ascii procedure |
pop esi |
inc esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
push esi |
movzx eax,[triangles_count_var] |
mov esi,edi |
call itoa |
pop esi |
inc esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
;============================================================= |
;================vertex list parser=========================== |
;============================================================= |
xor ecx,ecx |
.again_vertex: |
push ecx |
mov esi,asc_one_vertex_formula |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
mov eax,ecx |
; push ecx |
push esi |
mov esi,edi |
call itoa |
pop esi |
; pop ecx |
inc esi |
xor ebx,ebx |
.next_vertex_coef: |
push ebx |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
; int3 |
push esi |
lea esi,[ecx*3] |
shl esi,2 |
add esi,points_r |
add esi,ebx |
; int3 |
call ftoa_mac |
; std |
; fld dword[esi] |
; pushad |
; stdcall ftoa, edi, 30 |
; popad |
; add edi,20 |
pop esi |
pop ebx |
add ebx,4 |
cmp ebx,12 |
jnz .next_vertex_coef |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
pop ecx |
inc ecx |
cmp cx,[points_count_var] |
jnz .again_vertex |
; mov edi,[temp_edi] |
mov esi,asc_face_list_header |
@@: |
lodsb |
cmp al,1 ; all face header |
jz @f |
stosb |
jmp @b |
@@: |
;===================================== |
; ==============face list parser====== |
;===================================== |
xor ecx,ecx |
.again_face: |
push ecx |
mov .counter,ecx |
mov esi,asc_one_face_formula |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
mov eax,ecx |
push esi |
mov esi,edi |
call itoa |
pop esi |
inc esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
xor ebx,ebx |
.next_face_index: |
push ebx |
mov ecx,.counter |
lea ecx,[ecx*3] |
add ecx,ecx |
movzx eax,word[triangles+ecx+ebx] |
push esi |
mov esi,edi |
call itoa |
pop esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
pop ebx |
add ebx,2 |
cmp ebx,6 |
jnz .next_face_index |
; push esi |
mov esi,asc_material |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
; pop esi |
pop ecx |
inc ecx |
cmp cx,[triangles_count_var] |
jnz .again_face |
; write file |
sub edi,asc_file_buffer |
; mov [file_buffer+2],edi |
mov [FileSize],edi |
invoke CreateFile,asc_file_name, GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0 |
mov [hfile],eax |
invoke WriteFile,eax,asc_file_buffer,[FileSize], byteswritten, 0 |
invoke CloseHandle,[hfile] |
mov esp,ebp |
pop ebp |
ret |
end if |
if 0 |
asc_file_buffer: |
rd 65536 |
ascii_string rb 50 |
ftoa_muler dd 1000000000 |
file_size dd ? |
file_handle dd ? |
end if |
if 0 |
convert_muler: |
dd 1, 10, 100, 1000, 10000 |
XYZpartices: |
db 'X','Y','Z' |
i10 dw 10 |
points_count_var dd ? |
triangles_count_var dd ? |
points rb 100 |
triangles rb 100 |
asc_file: |
file "2TORUS.ASC" |
; Files *.asc routines by Maciej Guba |
; Thanks to Reverend for integer/float/ascii conversion examples |
read_asc: |
mov eax,[fptr] |
.find_vert: |
cmp dword[eax],'Vert' |
je @f |
inc eax |
jmp .find_vert |
@@: |
add eax,4 |
cmp dword[eax],'ices' |
jne .find_vert |
add eax,3 |
@@: |
inc eax |
cmp byte[eax],'0' ; search end of ascii number of vertices string |
jb @b |
cmp byte[eax],'9' |
ja @b |
; eax - start ascii number |
@@: |
inc eax |
cmp byte[eax],'0' |
jb .convert1 |
cmp byte[eax],'9' |
ja .convert1 |
jmp @b |
.convert1: |
dec eax |
mov ebx,eax |
push eax |
call ascii_to_integer |
mov [points_count_var],edx |
pop eax |
@@: |
inc eax |
cmp dword[eax],'Face' |
jne @b |
add eax,3 |
@@: |
inc eax |
cmp byte[eax],'0' |
jb @b |
cmp byte[eax],'9' |
ja @b |
; eax - start ascii number |
@@: |
inc eax |
cmp byte[eax],'0' |
jb .convert2 |
cmp byte[eax],'9' |
ja .convert2 |
jmp @b |
; eax - end ascii number |
.convert2: |
dec eax |
mov ebx,eax |
push eax |
call ascii_to_integer |
mov [triangles_count_var],edx |
pop eax |
@@: |
inc eax |
cmp dword[eax],'Vert' |
jnz @b |
inc eax |
mov edi,[points_ptr] |
xor ebx,ebx |
.decode_vertices: |
push ebx |
@@: |
inc eax |
cmp dword[eax],'Vert' |
jne @b |
xor ecx,ecx |
.decode_coord: |
push ecx |
@@: |
inc eax |
mov dl,byte[eax] |
cmp dl,byte[XYZpartices+ecx] |
jne @b |
@@: |
inc eax |
cmp byte[eax],'.' |
je .readF |
cmp byte[eax],'-' |
je .readF |
cmp byte[eax],'0' |
jb @b |
cmp byte[eax],'9' |
ja @b |
.readF: ; read float |
mov esi,eax |
push eax |
push ecx |
call atof ; st0 - desired dword float |
pop ecx |
pop eax |
fstp dword[edi] |
add edi,4 |
pop ecx |
inc ecx |
cmp ecx,3 |
jne .decode_coord |
pop ebx |
inc ebx |
cmp ebx,[points_count_var] |
jne .decode_vertices |
mov dword[edi],-1 |
mov esi,eax |
@@: |
inc esi |
cmp dword[esi],'Face' |
jne @b |
xor edx,edx |
mov edi,[triangles_ptr] |
cld |
.decode_face: |
push edx |
@@: |
inc esi |
cmp dword[esi],'Face' |
jne @b |
@@: |
inc esi |
cmp byte[esi],'0' ; face number start |
jb @b |
cmp byte[esi],'9' |
ja @b |
@@: |
inc esi |
cmp byte[esi],'0' |
jb @f |
cmp byte[esi],'9' ; face number end |
ja @f |
jmp @b |
@@: |
xor ecx,ecx |
.next_vertex_number: |
push ecx |
@@: |
inc esi |
cmp byte[esi],'0' |
jb @b |
cmp byte[esi],'9' |
ja @b |
; eax - start ascii number |
@@: |
inc esi |
cmp byte[esi],'0' |
jb @f |
cmp byte[esi],'9' |
ja @f |
jmp @b |
; eax - end ascii number |
@@: |
dec esi |
mov ebx,esi |
push esi |
call ascii_to_integer |
mov eax,edx |
stosd |
pop esi |
add esi,4 |
pop ecx |
inc ecx |
cmp ecx,3 |
jne .next_vertex_number |
pop edx |
inc edx |
cmp edx,[triangles_count_var] |
jne .decode_face |
mov dword[edi],-1 ;dword[triangles+ebx+2],-1 ; end mark |
mov eax,1 ;-> mark if ok |
ret |
ascii_to_integer: |
; in --- [ebx] -> end of ascii string |
; out -- edx -> desired number |
xor edx,edx |
xor ecx,ecx |
.again: |
movzx eax,byte[ebx] |
sub al,'0' |
cwde |
push edx |
mul dword[convert_muler+ecx] |
pop edx |
add edx,eax |
dec ebx |
cmp byte[ebx],'0' |
jb .end |
cmp byte[ebx],'9' |
ja .end |
add ecx,4 |
jmp .again |
@@: |
.end: |
ret |
;=============================================================================== |
; ASCII to float conversion procedure |
; |
; input: |
; esi - pointer to string |
; |
; output: |
; st0 - number changed into float |
; |
;=============================================================================== |
atof: |
.string equ ebp-4 |
push ebp |
mov ebp,esp |
sub esp,32 |
push eax ecx esi |
mov [.string],esi |
fninit |
fldz |
fldz |
cld |
cmp byte [esi], '-' |
jnz @F |
inc esi |
@@: |
xor eax, eax |
align 4 |
.loop.integer_part: |
lodsb |
cmp al, '.' |
jz .mantisa |
cmp al,'0' |
jb .exit |
cmp al,'9' |
ja .exit |
fimul [i10] |
sub al, '0' |
push eax |
fiadd dword [esp] |
add esp, 4 |
jmp .loop.integer_part |
.mantisa: |
xor ecx, ecx |
xor eax, eax |
cld |
fxch st1 |
@@: |
lodsb |
cmp al,'0' |
jb .exit |
cmp al,'9' |
ja .exit |
cmp ecx,7*4 |
je .exit ; max 7 digits in mantisa |
sub al,'0' |
push eax |
fild dword[esp] |
fidiv dword[convert_muler+4+ecx] |
faddp |
add esp,4 |
add ecx,4 |
jmp @b |
.exit: |
faddp |
mov eax, [.string] |
cmp byte [eax], '-' |
jnz @F |
fchs |
@@: |
cld |
stc ; always returns no error |
pop esi ecx eax |
mov esp,ebp |
pop ebp |
ret |
itoa: ; unsigned dword integer to ascii procedure |
; in eax - variable |
; esi - Pointer to ascii string |
; out esi - desired ascii string |
; edi - end of ascii string - ptr to memory |
.temp_string equ dword[ebp-36] |
.ptr equ dword[ebp-40] |
.var equ dword[ebp-44] |
push ecx |
push ebp |
mov ebp,esp |
sub esp,64 |
mov .var,eax |
mov eax,-1 |
lea edi,.temp_string |
cld |
mov ecx,9 |
rep stosd ; make floor |
mov .ptr,esi |
lea edi,.temp_string |
add edi,34 |
std |
xor eax,eax |
stosb ; mark begin |
mov eax,.var |
mov esi,10 |
@@: |
xor edx,edx |
div esi |
xchg eax,edx |
add al,'0' |
stosb |
xchg eax,edx |
or eax,eax |
jnz @b |
stosb ; mark end |
lea esi,.temp_string |
cld |
@@: |
lodsb |
or al,al |
jnz @b |
mov edi,.ptr |
@@: |
lodsb |
stosb |
or al,al |
jnz @b |
mov esp,ebp |
pop ebp |
pop ecx |
ret |
if 1 |
ftoa_mac: |
; in : esi - pointer to dword float |
; edi - pointer to ascii string |
.ptr_f equ dword[ebp-4] |
.sign equ dword[ebp-8] ; 0 -> less than zero, 1 - otherwise |
.ptr_ascii equ dword[ebp-12] |
.integer equ dword[ebp-20] |
.fraction equ dword[ebp-28] |
.status_orginal equ word[ebp-32] |
.status_changed equ word[ebp-34] |
push ecx |
push ebp |
mov ebp,esp |
sub esp,64 |
fninit |
fnstcw .status_orginal |
mov ax, .status_orginal |
or ax, 0000110000000000b |
mov .status_changed, ax |
fldcw .status_changed |
; -------------------------------- |
; check if signed |
xor eax, eax |
fld dword[esi] |
fst .sign |
test .sign, 80000000h |
setz al |
mov .sign, eax |
mov .ptr_f,esi |
mov .ptr_ascii,edi |
fabs |
fld st0 |
frndint |
fist .integer |
fsubp st1, st0 |
mov eax,.integer |
mov esi,.ptr_ascii |
call itoa |
; edi -> ptr to end of ascii string |
dec edi |
mov al,'.' |
stosb |
mov ecx, 6 ; max 6 digits in fraction part |
.loop: |
fimul [i10] |
fld st0 |
frndint |
fist .fraction |
fsubp st1, st0 |
mov esi,edi |
mov eax,.fraction |
add al,'0' |
stosb |
ftst |
fnstsw ax |
test ax, 0100000000000000b |
jz @F |
test ax, 0000010100000000b |
jz .finish |
@@: |
loop .loop |
if 0 |
fldcw .status_orginal |
fimul [i10] |
fist .fraction |
; mov esi,edi |
mov eax,.fraction |
add al,'0' |
stosb |
; call itoa |
; -------------------------------- |
; restore previous values |
.finish: |
; fstp st0 |
ffree st |
mov eax,.fraction |
mov esi,edi |
; call itoa |
add al,'0' |
stosb |
end if |
.finish: |
ffree st |
cmp .sign,0 |
jnz @f |
mov esi,.ptr_ascii |
dec esi |
mov byte[esi],'-' |
@@: |
mov esp,ebp |
pop ebp |
pop ecx |
ret |
end if |
if 0 |
;=============================================================================== |
; float to ASCII conversion procedure |
; |
; input: |
; buffer - pointer to memory where output will be saved |
; precision - number of digits after dot |
; |
; output: |
; no immediate output |
; |
; notes: |
; separate integer and mantisa part with dot '.' |
; so GOOD 123.456 |
; WRONG 123,456 |
; |
; coded by Reverend // HTB + RAG |
;=============================================================================== |
proc ftoa buffer, precision |
locals |
status_original dw ? |
status_changed dw ? |
integer dd ? |
mantisa dd ? |
signed dd ? |
endl |
push eax ecx;edi ecx |
; -------------------------------- |
; set correct precision |
mov eax, [precision] |
cmp eax, 51 |
jb @F |
mov eax, 51 |
@@: |
mov [precision], eax |
; -------------------------------- |
; change control wortd of fpu to prevent rounding |
fnstcw [status_original] |
mov ax, [status_original] |
or ax, 0000110000000000b |
mov [status_changed], ax |
fldcw [status_changed] |
; -------------------------------- |
; check if signed |
xor eax, eax |
fst [signed] |
test [signed], 80000000h |
setnz al |
mov [signed], eax |
; -------------------------------- |
; cut integer and mantisa separately |
fld st0 |
fld st0 ; st0 = x, st1 = x |
frndint |
fist [integer] ; st0 = x, st1 = x |
fabs |
fsubp st1, st0 ; st0 = mantisa(x) |
; -------------------------------- |
; save integer part in buffer |
; mov edi, [buffer] |
mov esi,[buffer] |
; push [signed] |
; push edi |
; push 10 |
; push [integer] |
mov eax,[integer] |
call itoa |
; add edi, eax |
mov al, '.' |
stosb |
mov esi,edi |
; -------------------------------- |
; save mantisa part in buffer |
mov ecx, [precision] |
dec ecx |
.loop: |
fimul [i10] |
fld st0 |
frndint |
fist [mantisa] |
fsubp st1, st0 |
; push 0 |
; push edi |
; push 10 |
; push [mantisa] |
mov esi,edi |
mov eax,[mantisa] |
call itoa |
; add edi, eax |
ftst |
fnstsw ax |
test ax, 0100000000000000b |
jz @F |
test ax, 0000010100000000b |
jz .finish |
@@: |
loop .loop |
fldcw [status_original] |
fimul [i10] |
fist [mantisa] |
; push 0 |
; push edi |
; push 10 |
; push [mantisa] |
mov esi,edi |
mov eax,[mantisa] |
call itoa |
; -------------------------------- |
; restore previous values |
.finish: |
fstp st0 |
cmp [signed],1 |
jnz @f |
mov byte[buffer],'-' |
@@: |
stc |
pop ecx eax ;edi eax |
ret |
endp |
end if |
if 0 |
write_asc: |
.counter equ dword[ebp-4] |
push ebp |
mov ebp,esp |
sub esp,64 |
fninit |
mov edi,asc_file_buffer |
mov esi,asc_main_header |
cld |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
mov esi,asc_info_header |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
push esi ; -> position in header info |
movzx eax,[points_count_var] |
mov esi,edi |
call itoa ; unsigned dword integer to ascii procedure |
pop esi |
inc esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
push esi |
movzx eax,[triangles_count_var] |
mov esi,edi |
call itoa |
pop esi |
inc esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
;============================================================= |
;================vertex list parser=========================== |
;============================================================= |
xor ecx,ecx |
.again_vertex: |
push ecx |
mov esi,asc_one_vertex_formula |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
mov eax,ecx |
; push ecx |
push esi |
mov esi,edi |
call itoa |
pop esi |
; pop ecx |
inc esi |
xor ebx,ebx |
.next_vertex_coef: |
push ebx |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
; int3 |
push esi |
lea esi,[ecx*3] |
shl esi,2 |
add esi,points_r |
add esi,ebx |
; int3 |
call ftoa_mac |
; std |
; fld dword[esi] |
; pushad |
; stdcall ftoa, edi, 30 |
; popad |
; add edi,20 |
pop esi |
pop ebx |
add ebx,4 |
cmp ebx,12 |
jnz .next_vertex_coef |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
pop ecx |
inc ecx |
cmp cx,[points_count_var] |
jnz .again_vertex |
; mov edi,[temp_edi] |
mov esi,asc_face_list_header |
@@: |
lodsb |
cmp al,1 ; all face header |
jz @f |
stosb |
jmp @b |
@@: |
;===================================== |
; ==============face list parser====== |
;===================================== |
xor ecx,ecx |
.again_face: |
push ecx |
mov .counter,ecx |
mov esi,asc_one_face_formula |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
mov eax,ecx |
push esi |
mov esi,edi |
call itoa |
pop esi |
inc esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
xor ebx,ebx |
.next_face_index: |
push ebx |
mov ecx,.counter |
lea ecx,[ecx*3] |
add ecx,ecx |
movzx eax,word[triangles+ecx+ebx] |
push esi |
mov esi,edi |
call itoa |
pop esi |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
pop ebx |
add ebx,2 |
cmp ebx,6 |
jnz .next_face_index |
; push esi |
mov esi,asc_material |
@@: |
lodsb |
cmp al,1 |
jz @f |
stosb |
jmp @b |
@@: |
; pop esi |
pop ecx |
inc ecx |
cmp cx,[triangles_count_var] |
jnz .again_face |
; write file |
sub edi,asc_file_buffer |
; mov [file_buffer+2],edi |
mov [FileSize],edi |
invoke CreateFile,asc_file_name, GENERIC_WRITE, 0, 0,CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, 0 |
mov [hfile],eax |
invoke WriteFile,eax,asc_file_buffer,[FileSize], byteswritten, 0 |
invoke CloseHandle,[hfile] |
mov esp,ebp |
pop ebp |
ret |
end if |
if 0 |
asc_file_buffer: |
rd 65536 |
ascii_string rb 50 |
ftoa_muler dd 1000000000 |
file_size dd ? |
file_handle dd ? |
end if |
if 0 |
convert_muler: |
dd 1, 10, 100, 1000, 10000 |
XYZpartices: |
db 'X','Y','Z' |
i10 dw 10 |
points_count_var dd ? |
triangles_count_var dd ? |
points rb 100 |
triangles rb 100 |
asc_file: |
file "2TORUS.ASC" |
end if |
/programs/demos/view3ds/b_procs.inc |
---|
737,22 → 737,33 |
blur_screen: ;blur n times ; blur or fire |
;in - ecx times count |
;.counter equ dword[esp-4] |
.counter1 equ dword[esp-8] |
; ax - render mode |
.val equ dword[ebp-4] |
.dr_model equ word[ebp-6] |
.fire equ dword[ebp-10] |
if Ext>=SSE2 |
push ebp |
mov ebp,esp |
push dword 0x01010101 |
movss xmm5,[esp] |
sub esp,10 |
; xorps xmm5,xmm5 |
; or edx,edx |
; jz @f |
mov .val,0x01010101 |
movss xmm5,.val |
shufps xmm5,xmm5,0 |
@@: |
mov .dr_model,ax |
.again_blur: |
push ecx |
mov edi,[screen_ptr] |
movzx ecx,word[size_x_var] ;SIZE_X*3/4 |
cmp [dr_flag],12 |
cmp .dr_model,11 |
jge @f |
lea ecx,[ecx*3+1] |
lea ecx,[ecx*3+3] |
shr ecx,2 |
@@: |
763,11 → 774,11 |
movzx ecx,word[size_y_var] |
sub ecx,3 |
imul ecx,ebx |
cmp [dr_flag],12 ; 32 bit per pix cause |
cmp .dr_model,11 ; 32 bit per pix cause |
jge @f |
lea ecx,[ecx*3] |
shr ecx,4 |
lea ebx,[ebx *3] |
lea ebx,[ebx*3] |
jmp .blr |
@@: |
781,7 → 792,7 |
mov ecx,edi |
sub ecx,ebx |
movups xmm1,[ecx] |
cmp [dr_flag],12 |
cmp .dr_model,12 |
jge @f |
movups xmm2,[edi-3] |
movups xmm3,[edi+3] |
802,9 → 813,9 |
end if |
xor eax,eax |
movzx ecx,word[size_x_var] |
cmp [dr_flag],12 |
cmp .dr_model,11 |
jge @f |
lea ecx,[ecx*3] |
lea ecx,[ecx*3+3] |
shr ecx,2 |
@@: |
; mov ecx,SIZE_X*3/4 |
/programs/demos/view3ds/bump_cat.inc |
---|
1,1132 → 1,1132 |
;SIZE_X equ 350 |
;SIZE_Y equ 350 |
ROUND equ 8 |
;TEX_X equ 512 |
;TEX_Y equ 512 |
;TEXTURE_SIZE EQU (512*512)-1 |
;TEX_SHIFT EQU 9 |
CATMULL_SHIFT equ 8 |
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
;Ext = NON |
;MMX = 1 |
;NON = 0 |
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
;------- DOS 13h mode demos -------------------------------------------- |
;------- Procedure draws bump triangle using Catmull Z-buffer algorithm- |
;------- (Z coordinate interpolation)----------------------------------- |
bump_triangle_z: |
;------------------in - eax - x1 shl 16 + y1 ----------- |
;---------------------- ebx - x2 shl 16 + y2 ----------- |
;---------------------- ecx - x3 shl 16 + y3 ----------- |
;---------------------- edx - pointer to bump map ------ |
;---------------------- esi - pointer to environment map |
;---------------------- edi - pointer to screen buffer-- |
;---------------------- stack : bump coordinates-------- |
;---------------------- environment coordinates- |
;---------------------- Z position coordinates-- |
;---------------------- pointer io Z buffer----- |
;-- Z-buffer - filled with coordinates as dword -------- |
;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
.b_x1 equ ebp+4 ; procedure don't save registers !!! |
.b_y1 equ ebp+6 ; each coordinate as word |
.b_x2 equ ebp+8 |
.b_y2 equ ebp+10 |
.b_x3 equ ebp+12 |
.b_y3 equ ebp+14 |
.e_x1 equ ebp+16 |
.e_y1 equ ebp+18 |
.e_x2 equ ebp+20 |
.e_y2 equ ebp+22 |
.e_x3 equ ebp+24 |
.e_y3 equ ebp+26 |
.z1 equ word[ebp+28] |
.z2 equ word[ebp+30] |
.z3 equ word[ebp+32] |
.z_buff equ dword[ebp+34] ; pointer to Z-buffer |
.t_bmap equ dword[ebp-4] ; pointer to bump map |
.t_emap equ dword[ebp-8] ; pointer to e. map |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
.dx12 equ dword[ebp-24] |
.dz12 equ [ebp-28] |
.dbx12 equ dword[ebp-32] |
.dby12 equ [ebp-36] |
.dex12 equ dword[ebp-40] |
.dey12 equ [ebp-44] |
.dx13 equ dword[ebp-48] |
.dz13 equ [ebp-52] |
.dbx13 equ dword[ebp-56] |
.dby13 equ [ebp-60] |
.dex13 equ dword[ebp-64] |
.dey13 equ [ebp-68] |
.dx23 equ dword[ebp-72] |
.dz23 equ [ebp-76] |
.dbx23 equ dword[ebp-80] |
.dby23 equ [ebp-84] |
.dex23 equ dword[ebp-88] |
.dey23 equ [ebp-92] |
.cx1 equ dword[ebp-96] ; current variables |
.cz1 equ [ebp-100] |
.cx2 equ dword[ebp-104] |
.cz2 equ [ebp-108] |
.cbx1 equ dword[ebp-112] |
.cby1 equ [ebp-116] |
.cex1 equ dword[ebp-120] |
.cey1 equ [ebp-124] |
.cbx2 equ dword[ebp-128] |
.cby2 equ [ebp-132] |
.cex2 equ dword[ebp-136] |
.cey2 equ [ebp-140] |
mov ebp,esp |
push edx ; store bump map |
push esi ; store e. map |
; sub esp,120 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.b_x1] |
xchg edx,dword[.b_x2] |
mov dword[.b_x1],edx |
mov edx,dword[.e_x1] |
xchg edx,dword[.e_x2] |
mov dword[.e_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.b_x2] |
xchg edx,dword[.b_x3] |
mov dword[.b_x2],edx |
mov edx,dword[.e_x2] |
xchg edx,dword[.e_x3] |
mov dword[.e_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort3 |
.sort2: |
push eax ; store triangle coords in variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop23_done |
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
; or edx,ecx |
; test edx,80000000h ; Check only X |
; jne .loop23_done |
; cmp .x1,SIZE_X ; { |
; jg .loop23_done |
; cmp .x2,SIZE_X ; This can be optimized with effort |
; jg .loop23_done |
; cmp .x3,SIZE_X |
; jg .loop23_done ; { |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .bt_dx12_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx12_done |
.bt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
if Ext>=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x2] |
movd mm2,[.e_x1] |
movd mm3,[.e_x2] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
; cvtpi2ps xmm0,mm0 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; subps xmm1,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey12,mm0 |
movq .dby12,mm1 |
else |
mov ax,word[.b_x2] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx12,eax |
push eax |
mov ax,word[.b_y2] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby12,eax |
push eax |
mov ax,word[.e_x2] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex12,eax |
push eax |
mov ax,word[.e_y2] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey12,eax |
push eax |
end if |
.bt_dx12_done: |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .bt_dx13_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx13_done |
.bt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz13,eax |
push eax |
if Ext>=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x3] |
movd mm2,[.e_x1] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
; cvtpi2ps xmm0,mm0 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; subps xmm1,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey13,mm0 |
movq .dby13,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx13,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby13,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex13,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey13,eax |
push eax |
end if |
.bt_dx13_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .bt_dx23_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx23_done |
.bt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz23,eax |
push eax |
; sub esp,40 |
if Ext>=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x2] |
movd mm1,[.b_x3] |
movd mm2,[.e_x2] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
; cvtpi2ps xmm0,mm0 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; subps xmm1,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey23,mm0 |
movq .dby23,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx23,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby23,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex23,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey23,eax |
push eax |
end if |
.bt_dx23_done: |
sub esp,48 |
movsx eax,.x1 |
shl eax,ROUND |
mov .cx1,eax |
mov .cx2,eax |
; push eax |
; push eax |
movsx eax,word[.b_x1] |
shl eax,ROUND |
mov .cbx1,eax |
mov .cbx2,eax |
; push eax |
; push eax |
movsx eax,word[.b_y1] |
shl eax,ROUND |
mov .cby1,eax |
mov .cby2,eax |
; push eax |
; push eax |
movsx eax,word[.e_x1] |
shl eax,ROUND |
mov .cex1,eax |
mov .cex2,eax |
; push eax |
; push eax |
movsx eax,word[.e_y1] |
shl eax,ROUND |
mov .cey1,eax |
mov .cey2,eax |
; push eax |
; push eax |
movsx eax,.z1 |
shl eax,CATMULL_SHIFT |
mov .cz1,eax |
mov .cz2,eax |
; push eax |
; push eax |
movsx ecx,.y1 |
cmp cx,.y2 |
jge .loop12_done |
.loop12: |
call .call_bump_line |
if Ext >= SSE2 |
movups xmm0,.cey2 |
movups xmm1,.cey1 |
movups xmm2,.dey12 |
movups xmm3,.dey13 |
paddd xmm0,xmm2 |
paddd xmm1,xmm3 |
movups .cey2,xmm0 |
movups .cey1,xmm1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm4,.dz13 |
paddd mm5,.dz12 |
movq .cz1,mm4 |
movq .cz2,mm5 |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm0,.dby12 |
paddd mm1,.dby13 |
paddd mm2,.dey12 |
paddd mm3,.dey13 |
paddd mm4,.dz13 |
paddd mm5,.dz12 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
movq .cz1,mm4 |
movq .cz2,mm5 |
else if Ext = NON |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx12 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby12 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex12 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey12 |
add .cey2,eax |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx12 |
add .cx2,ebx |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz12 |
add .cz2,edx |
end if |
inc ecx |
cmp cx,.y2 |
jl .loop12 |
.loop12_done: |
movsx ecx,.y2 |
cmp cx,.y3 |
jge .loop23_done |
movsx eax,.z2 |
shl eax,CATMULL_SHIFT |
mov .cz2,eax |
movsx eax,.x2 |
shl eax,ROUND |
mov .cx2,eax |
movzx eax,word[.b_x2] |
shl eax,ROUND |
mov .cbx2,eax |
movzx eax,word[.b_y2] |
shl eax,ROUND |
mov .cby2,eax |
movzx eax,word[.e_x2] |
shl eax,ROUND |
mov .cex2,eax |
movzx eax,word[.e_y2] |
shl eax,ROUND |
mov .cey2,eax |
.loop23: |
call .call_bump_line |
if Ext >= SSE2 |
movups xmm0,.cey2 |
movups xmm1,.cey1 |
movups xmm2,.dey23 |
movups xmm3,.dey13 |
paddd xmm0,xmm2 |
paddd xmm1,xmm3 |
movups .cey2,xmm0 |
movups .cey1,xmm1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm4,.dz13 |
paddd mm5,.dz23 |
movq .cz1,mm4 |
movq .cz2,mm5 |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm0,.dby23 |
paddd mm1,.dby13 |
paddd mm2,.dey23 |
paddd mm3,.dey13 |
paddd mm4,.dz13 |
paddd mm5,.dz23 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
movq .cz1,mm4 |
movq .cz2,mm5 |
else if Ext = NON |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx23 |
add .cx2,ebx |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx23 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby23 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex23 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey23 |
add .cey2,eax |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz23 |
add .cz2,edx |
end if |
inc ecx |
cmp cx,.y3 |
jl .loop23 |
.loop23_done: |
mov esp,ebp |
ret 34 |
.call_bump_line: |
; push ebp |
; push ecx |
pushad |
push dword .cz1 |
push dword .cz2 |
push .z_buff |
push .t_emap |
push .t_bmap |
push dword .cey2 |
push .cex2 |
push dword .cby2 |
push .cbx2 |
push dword .cey1 |
push .cex1 |
push dword .cby1 |
push .cbx1 |
push ecx |
mov eax,.cx1 |
sar eax,ROUND |
mov ebx,.cx2 |
sar ebx,ROUND |
call bump_line_z |
popad |
ret |
bump_line_z: |
;--------------in: eax - x1 |
;-------------- ebx - x2 |
;-------------- edi - pointer to screen buffer |
;stack - another parameters : |
.y equ dword [ebp+4] |
.bx1 equ [ebp+8] ; --- |
.by1 equ dword [ebp+12] ; | |
.ex1 equ [ebp+16] ; | |
.ey1 equ dword [ebp+20] ; |> bump and env coords |
.bx2 equ [ebp+24] ; |> shifted shl ROUND |
.by2 equ dword [ebp+28] ; | |
.ex2 equ [ebp+32] ; | |
.ey2 equ dword [ebp+36] ; --- |
.bmap equ dword [ebp+40] |
.emap equ dword [ebp+44] |
.z_buff equ dword [ebp+48] |
.z2 equ dword [ebp+52] ; -- |> z coords shifted |
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT |
.x1 equ dword [ebp-4] |
.x2 equ dword [ebp-8] |
.dbx equ dword [ebp-12] |
.dby equ [ebp-16] |
.dex equ dword [ebp-20] |
.dey equ [ebp-24] |
.dz equ dword [ebp-28] |
.cbx equ dword [ebp-32] |
.cby equ [ebp-36] |
.cex equ dword [ebp-40] |
.cey equ [ebp-44] |
.cz equ dword [ebp-48] |
.czbuff equ dword [ebp-52] |
.temp1 equ ebp-60 |
.temp2 equ ebp-68 |
.temp3 equ ebp-76 |
.temp4 equ ebp-84 |
.temp5 equ ebp-92 |
mov ebp,esp |
mov ecx,.y |
or ecx,ecx |
jl .bl_end |
; mov dx,[size_x_var] |
; dec dx |
cmp cx,[size_y_var] ;SIZE_Y |
jge .bl_end |
cmp eax,ebx |
jl .bl_ok |
je .bl_end |
xchg eax,ebx |
if Ext=NON |
mov edx,.bx1 |
xchg edx,.bx2 |
mov .bx1,edx |
mov edx,.by1 |
xchg edx,.by2 |
mov .by1,edx |
mov edx,.ex1 |
xchg edx,.ex2 |
mov .ex1,edx |
mov edx,.ey1 |
xchg edx,.ey2 |
mov .ey1,edx |
end if |
if Ext = MMX |
movq mm0,.bx1 |
movq mm1,.ex1 |
movq mm2,.bx2 |
movq mm3,.ex2 |
movq .bx2,mm0 |
movq .ex2,mm1 |
movq .bx1,mm2 |
movq .ex1,mm3 |
end if |
if Ext >= SSE |
movups xmm0,.bx1 |
movups xmm1,.bx2 |
movups .bx2,xmm0 |
movups .bx1,xmm1 |
end if |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
.bl_ok: |
push eax |
push ebx ;store x1, x2 |
movzx edx,word[size_x_var] |
dec edx |
cmp .x1,edx ;SIZE_X |
jge .bl_end |
cmp .x2,0 |
jle .bl_end |
mov ebx,.x2 |
sub ebx,.x1 |
if Ext >= SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
shufps xmm3,xmm3,0 |
cvtpi2ps xmm0,.bx1 ;mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,.ex1 ;mm2 |
cvtpi2ps xmm1,.bx2 ;mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,.ex2 ;mm3 |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey,mm0 |
movq .dby,mm1 |
else |
mov eax,.bx2 ; calc .dbx |
sub eax,.bx1 |
cdq |
idiv ebx |
push eax |
mov eax,.by2 ; calc .dby |
sub eax,.by1 |
cdq |
idiv ebx |
push eax |
mov eax,.ex2 ; calc .dex |
sub eax,.ex1 |
cdq |
idiv ebx |
push eax |
mov eax,.ey2 ; calc .dey |
sub eax,.ey1 |
cdq |
idiv ebx |
push eax |
end if |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
cmp .x1,0 ; set correctly begin variable |
jge @f ; CLIPPING ON FUNCTION |
; cutting triangle exceedes screen |
mov ebx,.x1 |
neg ebx |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov .x1,0 |
mov eax,.dbx |
imul ebx |
add .bx1,eax |
mov eax,.dby |
imul ebx |
add .by1,eax |
mov eax,.dex |
imul ebx |
add .ex1,eax |
mov eax,.dey |
imul ebx |
add .ey1,eax |
@@: |
movzx edx,word[size_x_var] |
dec edx |
cmp .x2,edx ;SIZE_X |
jl @f |
mov .x2,edx ;SIZE_X |
@@: |
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers |
mov ebx,.y |
mul ebx |
mov ebx,.x1 |
add eax,ebx |
mov ebx,eax |
lea eax,[eax*3] |
add edi,eax |
mov esi,.z_buff ; z-buffer filled with dd variables |
shl ebx,2 |
add esi,ebx |
mov ecx,.x2 |
sub ecx,.x1 |
; init current variables |
push dword .bx1 |
push .by1 |
push dword .ex1 |
push .ey1 |
push .z1 ; current z shl CATMULL_SHIFT |
push esi |
;if Ext = SSE2 |
; movups xmm1,.dey |
;end if |
if Ext>=MMX |
movq mm0,.cby |
movq mm1,.cey |
movq mm2,.dby |
movq mm3,.dey |
end if |
if Ext >= SSE2 |
mov eax,TEXTURE_SIZE |
movd xmm1,eax |
shufps xmm1,xmm1,0 |
push dword TEX_X |
push dword -TEX_X |
push dword 1 |
push dword -1 |
movups xmm2,[esp] |
movd xmm3,.bmap |
shufps xmm3,xmm3,0 |
end if |
;align 16 |
.draw: |
; if TEX = SHIFTING ;bump drawing only in shifting mode |
mov esi,.czbuff ; .czbuff current address in buffer |
mov ebx,.cz ; .cz - cur z position |
cmp ebx,dword[esi] |
jge .skip |
if Ext>=MMX |
movq mm6,mm0 |
psrld mm6,ROUND |
movd eax,mm6 |
psrlq mm6,32 |
movd esi,mm6 |
else |
mov eax,.cby |
sar eax,ROUND |
mov esi,.cbx |
sar esi,ROUND |
end if |
shl eax,TEX_SHIFT ;- |
add esi,eax ;- ; esi - current bump map index |
if Ext = SSE2 |
movd xmm0,esi |
shufps xmm0,xmm0,0 |
paddd xmm0,xmm2 |
pand xmm0,xmm1 |
paddd xmm0,xmm3 |
movd ebx,xmm0 |
movzx eax,byte[ebx] |
; |
; shufps xmm0,xmm0,11100001b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx,byte[ebx] |
sub eax,ebx |
; |
; shufps xmm0,xmm0,11111110b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx edx, byte [ebx] |
; |
; shufps xmm0,xmm0,11111111b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx, byte [ebx] |
sub edx,ebx |
; |
else |
mov ebx,esi |
dec ebx |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx eax,byte [ebx] |
mov ebx,esi |
inc ebx |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub eax,ebx |
mov ebx,esi |
sub ebx,TEX_X |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx edx,byte [ebx] |
mov ebx,esi |
add ebx,TEX_X |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub edx,ebx |
end if |
; eax - horizontal sub |
; edx - vertical sub |
if Ext = NON |
mov ebx,.cex ;.cex - current env map X |
sar ebx,ROUND |
add eax,ebx ; eax - modified x coord |
mov ebx,.cey ;.cey - current env map y |
sar ebx,ROUND |
add edx,ebx ; edx - modified y coord |
else |
movq mm6,mm1 ; mm5 - copy of cur env coords |
psrld mm6,ROUND |
movd ebx,mm6 |
psrlq mm6,32 |
add eax,ebx |
movd ebx,mm6 |
add edx,ebx |
end if |
or eax,eax |
jl .black |
cmp eax,TEX_X |
jg .black |
or edx,edx |
jl .black |
cmp edx,TEX_Y |
jg .black |
shl edx,TEX_SHIFT |
add edx,eax |
lea esi,[edx*3] |
add esi,.emap |
lodsd |
jmp .put_pixel |
.black: |
xor eax,eax |
.put_pixel: |
stosd |
dec edi |
mov ebx,.cz |
mov esi,.czbuff |
mov dword[esi],ebx |
jmp .no_skip |
.skip: |
add edi,3 |
.no_skip: |
add .czbuff,4 |
;if Ext = SSE2 |
; movups xmm0,.cey |
; paddd xmm0,xmm1 |
; movups .cey,xmm0 |
; |
;end if |
if Ext >= MMX |
paddd mm0,mm2 |
paddd mm1,mm3 |
end if |
if Ext=NON |
mov eax,.dbx |
add .cbx,eax |
mov eax,.dby |
add .cby,eax |
mov eax,.dex |
add .cex,eax |
mov eax,.dey |
add .cey,eax |
end if |
mov eax,.dz |
add .cz,eax |
dec ecx |
jnz .draw |
; end if |
.bl_end: |
mov esp,ebp |
ret 56 |
;SIZE_X equ 350 |
;SIZE_Y equ 350 |
ROUND equ 8 |
;TEX_X equ 512 |
;TEX_Y equ 512 |
;TEXTURE_SIZE EQU (512*512)-1 |
;TEX_SHIFT EQU 9 |
CATMULL_SHIFT equ 8 |
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
;Ext = NON |
;MMX = 1 |
;NON = 0 |
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
;------- DOS 13h mode demos -------------------------------------------- |
;------- Procedure draws bump triangle using Catmull Z-buffer algorithm- |
;------- (Z coordinate interpolation)----------------------------------- |
bump_triangle_z: |
;------------------in - eax - x1 shl 16 + y1 ----------- |
;---------------------- ebx - x2 shl 16 + y2 ----------- |
;---------------------- ecx - x3 shl 16 + y3 ----------- |
;---------------------- edx - pointer to bump map ------ |
;---------------------- esi - pointer to environment map |
;---------------------- edi - pointer to screen buffer-- |
;---------------------- stack : bump coordinates-------- |
;---------------------- environment coordinates- |
;---------------------- Z position coordinates-- |
;---------------------- pointer io Z buffer----- |
;-- Z-buffer - filled with coordinates as dword -------- |
;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
.b_x1 equ ebp+4 ; procedure don't save registers !!! |
.b_y1 equ ebp+6 ; each coordinate as word |
.b_x2 equ ebp+8 |
.b_y2 equ ebp+10 |
.b_x3 equ ebp+12 |
.b_y3 equ ebp+14 |
.e_x1 equ ebp+16 |
.e_y1 equ ebp+18 |
.e_x2 equ ebp+20 |
.e_y2 equ ebp+22 |
.e_x3 equ ebp+24 |
.e_y3 equ ebp+26 |
.z1 equ word[ebp+28] |
.z2 equ word[ebp+30] |
.z3 equ word[ebp+32] |
.z_buff equ dword[ebp+34] ; pointer to Z-buffer |
.t_bmap equ dword[ebp-4] ; pointer to bump map |
.t_emap equ dword[ebp-8] ; pointer to e. map |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
.dx12 equ dword[ebp-24] |
.dz12 equ [ebp-28] |
.dbx12 equ dword[ebp-32] |
.dby12 equ [ebp-36] |
.dex12 equ dword[ebp-40] |
.dey12 equ [ebp-44] |
.dx13 equ dword[ebp-48] |
.dz13 equ [ebp-52] |
.dbx13 equ dword[ebp-56] |
.dby13 equ [ebp-60] |
.dex13 equ dword[ebp-64] |
.dey13 equ [ebp-68] |
.dx23 equ dword[ebp-72] |
.dz23 equ [ebp-76] |
.dbx23 equ dword[ebp-80] |
.dby23 equ [ebp-84] |
.dex23 equ dword[ebp-88] |
.dey23 equ [ebp-92] |
.cx1 equ dword[ebp-96] ; current variables |
.cz1 equ [ebp-100] |
.cx2 equ dword[ebp-104] |
.cz2 equ [ebp-108] |
.cbx1 equ dword[ebp-112] |
.cby1 equ [ebp-116] |
.cex1 equ dword[ebp-120] |
.cey1 equ [ebp-124] |
.cbx2 equ dword[ebp-128] |
.cby2 equ [ebp-132] |
.cex2 equ dword[ebp-136] |
.cey2 equ [ebp-140] |
mov ebp,esp |
push edx ; store bump map |
push esi ; store e. map |
; sub esp,120 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.b_x1] |
xchg edx,dword[.b_x2] |
mov dword[.b_x1],edx |
mov edx,dword[.e_x1] |
xchg edx,dword[.e_x2] |
mov dword[.e_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.b_x2] |
xchg edx,dword[.b_x3] |
mov dword[.b_x2],edx |
mov edx,dword[.e_x2] |
xchg edx,dword[.e_x3] |
mov dword[.e_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort3 |
.sort2: |
push eax ; store triangle coords in variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop23_done |
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
; or edx,ecx |
; test edx,80000000h ; Check only X |
; jne .loop23_done |
; cmp .x1,SIZE_X ; { |
; jg .loop23_done |
; cmp .x2,SIZE_X ; This can be optimized with effort |
; jg .loop23_done |
; cmp .x3,SIZE_X |
; jg .loop23_done ; { |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .bt_dx12_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx12_done |
.bt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
if Ext>=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x2] |
movd mm2,[.e_x1] |
movd mm3,[.e_x2] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
; cvtpi2ps xmm0,mm0 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; subps xmm1,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey12,mm0 |
movq .dby12,mm1 |
else |
mov ax,word[.b_x2] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx12,eax |
push eax |
mov ax,word[.b_y2] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby12,eax |
push eax |
mov ax,word[.e_x2] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex12,eax |
push eax |
mov ax,word[.e_y2] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey12,eax |
push eax |
end if |
.bt_dx12_done: |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .bt_dx13_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx13_done |
.bt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz13,eax |
push eax |
if Ext>=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x3] |
movd mm2,[.e_x1] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
; cvtpi2ps xmm0,mm0 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; subps xmm1,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey13,mm0 |
movq .dby13,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx13,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby13,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex13,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey13,eax |
push eax |
end if |
.bt_dx13_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .bt_dx23_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx23_done |
.bt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz23,eax |
push eax |
; sub esp,40 |
if Ext>=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x2] |
movd mm1,[.b_x3] |
movd mm2,[.e_x2] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
; cvtpi2ps xmm0,mm0 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; subps xmm1,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey23,mm0 |
movq .dby23,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx23,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby23,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex23,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey23,eax |
push eax |
end if |
.bt_dx23_done: |
sub esp,48 |
movsx eax,.x1 |
shl eax,ROUND |
mov .cx1,eax |
mov .cx2,eax |
; push eax |
; push eax |
movsx eax,word[.b_x1] |
shl eax,ROUND |
mov .cbx1,eax |
mov .cbx2,eax |
; push eax |
; push eax |
movsx eax,word[.b_y1] |
shl eax,ROUND |
mov .cby1,eax |
mov .cby2,eax |
; push eax |
; push eax |
movsx eax,word[.e_x1] |
shl eax,ROUND |
mov .cex1,eax |
mov .cex2,eax |
; push eax |
; push eax |
movsx eax,word[.e_y1] |
shl eax,ROUND |
mov .cey1,eax |
mov .cey2,eax |
; push eax |
; push eax |
movsx eax,.z1 |
shl eax,CATMULL_SHIFT |
mov .cz1,eax |
mov .cz2,eax |
; push eax |
; push eax |
movsx ecx,.y1 |
cmp cx,.y2 |
jge .loop12_done |
.loop12: |
call .call_bump_line |
if Ext >= SSE2 |
movups xmm0,.cey2 |
movups xmm1,.cey1 |
movups xmm2,.dey12 |
movups xmm3,.dey13 |
paddd xmm0,xmm2 |
paddd xmm1,xmm3 |
movups .cey2,xmm0 |
movups .cey1,xmm1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm4,.dz13 |
paddd mm5,.dz12 |
movq .cz1,mm4 |
movq .cz2,mm5 |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm0,.dby12 |
paddd mm1,.dby13 |
paddd mm2,.dey12 |
paddd mm3,.dey13 |
paddd mm4,.dz13 |
paddd mm5,.dz12 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
movq .cz1,mm4 |
movq .cz2,mm5 |
else if Ext = NON |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx12 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby12 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex12 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey12 |
add .cey2,eax |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx12 |
add .cx2,ebx |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz12 |
add .cz2,edx |
end if |
inc ecx |
cmp cx,.y2 |
jl .loop12 |
.loop12_done: |
movsx ecx,.y2 |
cmp cx,.y3 |
jge .loop23_done |
movsx eax,.z2 |
shl eax,CATMULL_SHIFT |
mov .cz2,eax |
movsx eax,.x2 |
shl eax,ROUND |
mov .cx2,eax |
movzx eax,word[.b_x2] |
shl eax,ROUND |
mov .cbx2,eax |
movzx eax,word[.b_y2] |
shl eax,ROUND |
mov .cby2,eax |
movzx eax,word[.e_x2] |
shl eax,ROUND |
mov .cex2,eax |
movzx eax,word[.e_y2] |
shl eax,ROUND |
mov .cey2,eax |
.loop23: |
call .call_bump_line |
if Ext >= SSE2 |
movups xmm0,.cey2 |
movups xmm1,.cey1 |
movups xmm2,.dey23 |
movups xmm3,.dey13 |
paddd xmm0,xmm2 |
paddd xmm1,xmm3 |
movups .cey2,xmm0 |
movups .cey1,xmm1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm4,.dz13 |
paddd mm5,.dz23 |
movq .cz1,mm4 |
movq .cz2,mm5 |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cz1 |
movq mm5,.cz2 |
paddd mm0,.dby23 |
paddd mm1,.dby13 |
paddd mm2,.dey23 |
paddd mm3,.dey13 |
paddd mm4,.dz13 |
paddd mm5,.dz23 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
movq .cz1,mm4 |
movq .cz2,mm5 |
else if Ext = NON |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx23 |
add .cx2,ebx |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx23 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby23 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex23 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey23 |
add .cey2,eax |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz23 |
add .cz2,edx |
end if |
inc ecx |
cmp cx,.y3 |
jl .loop23 |
.loop23_done: |
mov esp,ebp |
ret 34 |
.call_bump_line: |
; push ebp |
; push ecx |
pushad |
push dword .cz1 |
push dword .cz2 |
push .z_buff |
push .t_emap |
push .t_bmap |
push dword .cey2 |
push .cex2 |
push dword .cby2 |
push .cbx2 |
push dword .cey1 |
push .cex1 |
push dword .cby1 |
push .cbx1 |
push ecx |
mov eax,.cx1 |
sar eax,ROUND |
mov ebx,.cx2 |
sar ebx,ROUND |
call bump_line_z |
popad |
ret |
bump_line_z: |
;--------------in: eax - x1 |
;-------------- ebx - x2 |
;-------------- edi - pointer to screen buffer |
;stack - another parameters : |
.y equ dword [ebp+4] |
.bx1 equ [ebp+8] ; --- |
.by1 equ dword [ebp+12] ; | |
.ex1 equ [ebp+16] ; | |
.ey1 equ dword [ebp+20] ; |> bump and env coords |
.bx2 equ [ebp+24] ; |> shifted shl ROUND |
.by2 equ dword [ebp+28] ; | |
.ex2 equ [ebp+32] ; | |
.ey2 equ dword [ebp+36] ; --- |
.bmap equ dword [ebp+40] |
.emap equ dword [ebp+44] |
.z_buff equ dword [ebp+48] |
.z2 equ dword [ebp+52] ; -- |> z coords shifted |
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT |
.x1 equ dword [ebp-4] |
.x2 equ dword [ebp-8] |
.dbx equ dword [ebp-12] |
.dby equ [ebp-16] |
.dex equ dword [ebp-20] |
.dey equ [ebp-24] |
.dz equ dword [ebp-28] |
.cbx equ dword [ebp-32] |
.cby equ [ebp-36] |
.cex equ dword [ebp-40] |
.cey equ [ebp-44] |
.cz equ dword [ebp-48] |
.czbuff equ dword [ebp-52] |
.temp1 equ ebp-60 |
.temp2 equ ebp-68 |
.temp3 equ ebp-76 |
.temp4 equ ebp-84 |
.temp5 equ ebp-92 |
mov ebp,esp |
mov ecx,.y |
or ecx,ecx |
jl .bl_end |
; mov dx,[size_x_var] |
; dec dx |
cmp cx,[size_y_var] ;SIZE_Y |
jge .bl_end |
cmp eax,ebx |
jl .bl_ok |
je .bl_end |
xchg eax,ebx |
if Ext=NON |
mov edx,.bx1 |
xchg edx,.bx2 |
mov .bx1,edx |
mov edx,.by1 |
xchg edx,.by2 |
mov .by1,edx |
mov edx,.ex1 |
xchg edx,.ex2 |
mov .ex1,edx |
mov edx,.ey1 |
xchg edx,.ey2 |
mov .ey1,edx |
end if |
if Ext = MMX |
movq mm0,.bx1 |
movq mm1,.ex1 |
movq mm2,.bx2 |
movq mm3,.ex2 |
movq .bx2,mm0 |
movq .ex2,mm1 |
movq .bx1,mm2 |
movq .ex1,mm3 |
end if |
if Ext >= SSE |
movups xmm0,.bx1 |
movups xmm1,.bx2 |
movups .bx2,xmm0 |
movups .bx1,xmm1 |
end if |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
.bl_ok: |
push eax |
push ebx ;store x1, x2 |
movzx edx,word[size_x_var] |
dec edx |
cmp .x1,edx ;SIZE_X |
jge .bl_end |
cmp .x2,0 |
jle .bl_end |
mov ebx,.x2 |
sub ebx,.x1 |
if Ext >= SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
shufps xmm3,xmm3,0 |
cvtpi2ps xmm0,.bx1 ;mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,.ex1 ;mm2 |
cvtpi2ps xmm1,.bx2 ;mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,.ex2 ;mm3 |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey,mm0 |
movq .dby,mm1 |
else |
mov eax,.bx2 ; calc .dbx |
sub eax,.bx1 |
cdq |
idiv ebx |
push eax |
mov eax,.by2 ; calc .dby |
sub eax,.by1 |
cdq |
idiv ebx |
push eax |
mov eax,.ex2 ; calc .dex |
sub eax,.ex1 |
cdq |
idiv ebx |
push eax |
mov eax,.ey2 ; calc .dey |
sub eax,.ey1 |
cdq |
idiv ebx |
push eax |
end if |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
cmp .x1,0 ; set correctly begin variable |
jge @f ; CLIPPING ON FUNCTION |
; cutting triangle exceedes screen |
mov ebx,.x1 |
neg ebx |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov .x1,0 |
mov eax,.dbx |
imul ebx |
add .bx1,eax |
mov eax,.dby |
imul ebx |
add .by1,eax |
mov eax,.dex |
imul ebx |
add .ex1,eax |
mov eax,.dey |
imul ebx |
add .ey1,eax |
@@: |
movzx edx,word[size_x_var] |
dec edx |
cmp .x2,edx ;SIZE_X |
jl @f |
mov .x2,edx ;SIZE_X |
@@: |
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers |
mov ebx,.y |
mul ebx |
mov ebx,.x1 |
add eax,ebx |
mov ebx,eax |
lea eax,[eax*3] |
add edi,eax |
mov esi,.z_buff ; z-buffer filled with dd variables |
shl ebx,2 |
add esi,ebx |
mov ecx,.x2 |
sub ecx,.x1 |
; init current variables |
push dword .bx1 |
push .by1 |
push dword .ex1 |
push .ey1 |
push .z1 ; current z shl CATMULL_SHIFT |
push esi |
;if Ext = SSE2 |
; movups xmm1,.dey |
;end if |
if Ext>=MMX |
movq mm0,.cby |
movq mm1,.cey |
movq mm2,.dby |
movq mm3,.dey |
end if |
if Ext >= SSE2 |
mov eax,TEXTURE_SIZE |
movd xmm1,eax |
shufps xmm1,xmm1,0 |
push dword TEX_X |
push dword -TEX_X |
push dword 1 |
push dword -1 |
movups xmm2,[esp] |
movd xmm3,.bmap |
shufps xmm3,xmm3,0 |
end if |
;align 16 |
.draw: |
; if TEX = SHIFTING ;bump drawing only in shifting mode |
mov esi,.czbuff ; .czbuff current address in buffer |
mov ebx,.cz ; .cz - cur z position |
cmp ebx,dword[esi] |
jge .skip |
if Ext>=MMX |
movq mm6,mm0 |
psrld mm6,ROUND |
movd eax,mm6 |
psrlq mm6,32 |
movd esi,mm6 |
else |
mov eax,.cby |
sar eax,ROUND |
mov esi,.cbx |
sar esi,ROUND |
end if |
shl eax,TEX_SHIFT ;- |
add esi,eax ;- ; esi - current bump map index |
if Ext = SSE2 |
movd xmm0,esi |
shufps xmm0,xmm0,0 |
paddd xmm0,xmm2 |
pand xmm0,xmm1 |
paddd xmm0,xmm3 |
movd ebx,xmm0 |
movzx eax,byte[ebx] |
; |
; shufps xmm0,xmm0,11100001b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx,byte[ebx] |
sub eax,ebx |
; |
; shufps xmm0,xmm0,11111110b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx edx, byte [ebx] |
; |
; shufps xmm0,xmm0,11111111b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx, byte [ebx] |
sub edx,ebx |
; |
else |
mov ebx,esi |
dec ebx |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx eax,byte [ebx] |
mov ebx,esi |
inc ebx |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub eax,ebx |
mov ebx,esi |
sub ebx,TEX_X |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx edx,byte [ebx] |
mov ebx,esi |
add ebx,TEX_X |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub edx,ebx |
end if |
; eax - horizontal sub |
; edx - vertical sub |
if Ext = NON |
mov ebx,.cex ;.cex - current env map X |
sar ebx,ROUND |
add eax,ebx ; eax - modified x coord |
mov ebx,.cey ;.cey - current env map y |
sar ebx,ROUND |
add edx,ebx ; edx - modified y coord |
else |
movq mm6,mm1 ; mm5 - copy of cur env coords |
psrld mm6,ROUND |
movd ebx,mm6 |
psrlq mm6,32 |
add eax,ebx |
movd ebx,mm6 |
add edx,ebx |
end if |
or eax,eax |
jl .black |
cmp eax,TEX_X |
jg .black |
or edx,edx |
jl .black |
cmp edx,TEX_Y |
jg .black |
shl edx,TEX_SHIFT |
add edx,eax |
lea esi,[edx*3] |
add esi,.emap |
lodsd |
jmp .put_pixel |
.black: |
xor eax,eax |
.put_pixel: |
stosd |
dec edi |
mov ebx,.cz |
mov esi,.czbuff |
mov dword[esi],ebx |
jmp .no_skip |
.skip: |
add edi,3 |
.no_skip: |
add .czbuff,4 |
;if Ext = SSE2 |
; movups xmm0,.cey |
; paddd xmm0,xmm1 |
; movups .cey,xmm0 |
; |
;end if |
if Ext >= MMX |
paddd mm0,mm2 |
paddd mm1,mm3 |
end if |
if Ext=NON |
mov eax,.dbx |
add .cbx,eax |
mov eax,.dby |
add .cby,eax |
mov eax,.dex |
add .cex,eax |
mov eax,.dey |
add .cey,eax |
end if |
mov eax,.dz |
add .cz,eax |
dec ecx |
jnz .draw |
; end if |
.bl_end: |
mov esp,ebp |
ret 56 |
/programs/demos/view3ds/bump_tex.inc |
---|
1,1817 → 1,1817 |
;CATMULL_SHIFT equ 8 |
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
;ROUND equ 8 |
;Ext = NON |
;MMX = 1 |
;NON = 0 |
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
;------- DOS 13h mode demos -------------------------------------------- |
;------- Procedure draws bump triangle with texture, I use ------------- |
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
;--------I calc texture pixel by this way: col1*col2/256 --------------- |
bump_tex_triangle_z: |
;------------------in - eax - x1 shl 16 + y1 ----------- |
;---------------------- ebx - x2 shl 16 + y2 ----------- |
;---------------------- ecx - x3 shl 16 + y3 ----------- |
;---------------------- edx - pointer to bump map------- |
;---------------------- esi - pointer to env map-------- |
;---------------------- edi - pointer to screen buffer-- |
;---------------------- stack : bump coordinates-------- |
;---------------------- environment coordinates- |
;---------------------- Z position coordinates-- |
;---------------------- pointer to Z buffer----- |
;---------------------- pointer to texture------ |
;---------------------- texture coordinates----- |
;-- Z-buffer - filled with coordinates as dword -------- |
;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
.b_x1 equ ebp+4 ; procedure don't save registers !!! |
.b_y1 equ ebp+6 ; each coordinate as word |
.b_x2 equ ebp+8 |
.b_y2 equ ebp+10 ; b - bump map coords |
.b_x3 equ ebp+12 ; e - env map coords |
.b_y3 equ ebp+14 |
.e_x1 equ ebp+16 |
.e_y1 equ ebp+18 |
.e_x2 equ ebp+20 |
.e_y2 equ ebp+22 |
.e_x3 equ ebp+24 |
.e_y3 equ ebp+26 |
.z1 equ word[ebp+28] |
.z2 equ word[ebp+30] |
.z3 equ word[ebp+32] |
.z_buff equ dword[ebp+34] ; pointer to Z-buffer |
.tex_ptr equ dword[ebp+38] ; ptr to texture |
.t_x1 equ ebp+42 ; texture coords |
.t_y1 equ ebp+44 |
.t_x2 equ ebp+46 |
.t_y2 equ ebp+48 |
.t_x3 equ ebp+50 |
.t_y3 equ ebp+52 |
.t_bmap equ dword[ebp-4] ; pointer to bump map |
.t_emap equ dword[ebp-8] ; pointer to env map |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
if 0 ;Ext <= SSE2 |
.dx12 equ dword[edi-4] |
.dz12 equ [edi-8] |
.dbx12 equ dword[edi-12] |
.dby12 equ [edi-16] |
.dex12 equ dword[edi-20] |
.dey12 equ [edi-24] |
.dtx12 equ dword[edi-28] |
.dty12 equ [edi-32] |
.dx13 equ dword[ebp-52-4*1] |
.dz13 equ [ebp-52-4*2] |
.dbx13 equ dword[ebp-52-4*3] |
.dby13 equ [ebp-52-4*4] |
.dex13 equ dword[ebp-52-4*5] |
.dey13 equ [ebp-52-4*6] |
.dtx13 equ dword[ebp-52-4*7] |
.dty13 equ [ebp-52-4*8] |
.dx23 equ dword[ebp-(52+4*9)] |
.dz23 equ [ebp-(52+4*10)] |
.dbx23 equ dword[ebp-(52+4*11)] |
.dby23 equ [ebp-(52+4*12)] |
.dex23 equ dword[ebp-(52+4*13)] |
.dey23 equ [ebp-(52+4*14)] |
.dtx23 equ dword[ebp-(52+4*15)] |
.dty23 equ [ebp-(52+4*16)] |
else |
.dx12 equ dword[ebp-24] |
.dz12 equ [ebp-28] |
.dbx12 equ dword[ebp-32] |
.dby12 equ [ebp-36] |
.dex12 equ dword[ebp-40] |
.dey12 equ [ebp-44] |
.dtx12 equ dword[ebp-48] |
.dty12 equ [ebp-52] |
.dx13 equ dword[ebp-52-4*1] |
.dz13 equ [ebp-52-4*2] |
.dbx13 equ dword[ebp-52-4*3] |
.dby13 equ [ebp-52-4*4] |
.dex13 equ dword[ebp-52-4*5] |
.dey13 equ [ebp-52-4*6] |
.dtx13 equ dword[ebp-52-4*7] |
.dty13 equ [ebp-52-4*8] |
.dx23 equ dword[ebp-(52+4*9)] |
.dz23 equ [ebp-(52+4*10)] |
.dbx23 equ dword[ebp-(52+4*11)] |
.dby23 equ [ebp-(52+4*12)] |
.dex23 equ dword[ebp-(52+4*13)] |
.dey23 equ [ebp-(52+4*14)] |
.dtx23 equ dword[ebp-(52+4*15)] |
.dty23 equ [ebp-(52+4*16)] |
end if |
if Ext < SSE |
.cx1 equ dword[ebp-(52+4*17)] ; current variables |
.cz1 equ [ebp-(52+4*18)] |
.cx2 equ dword[ebp-(52+4*19)] |
.cz2 equ [ebp-(52+4*20)] |
.cbx1 equ dword[ebp-(52+4*21)] |
.cby1 equ [ebp-(52+4*22)] |
.cbx2 equ dword[ebp-(52+4*23)] |
.cby2 equ [ebp-(52+4*24)] |
.cex1 equ dword[ebp-(52+4*25)] |
.cey1 equ [ebp-(52+4*26)] |
.cex2 equ dword[ebp-(52+4*27)] |
.cey2 equ [ebp-(52+4*28)] |
.ctx1 equ dword[ebp-(52+4*29)] |
.cty1 equ [ebp-(52+4*30)] |
.ctx2 equ dword[ebp-(52+4*31)] |
.cty2 equ [ebp-(52+4*32)] |
else |
.cx1 equ dword[ebp-(52+4*17)] ; current variables |
.cz1 equ [ebp-(52+4*18)] |
.cbx1 equ dword[ebp-(52+4*19)] |
.cby1 equ [ebp-(52+4*20)] |
.cex1 equ dword[ebp-(52+4*21)] |
.cey1 equ [ebp-(52+4*22)] |
.ctx1 equ dword[ebp-(52+4*23)] |
.cty1 equ [ebp-(52+4*24)] |
.cx2 equ dword[ebp-(52+4*25)] |
.cz2 equ [ebp-(52+4*26)] |
.cbx2 equ dword[ebp-(52+4*27)] |
.cby2 equ [ebp-(52+4*28)] |
.cex2 equ dword[ebp-(52+4*29)] |
.cey2 equ [ebp-(52+4*30)] |
.ctx2 equ dword[ebp-(52+4*31)] |
.cty2 equ [ebp-(52+4*32)] |
end if |
cld |
mov ebp,esp |
push edx ; store bump map |
push esi ; store e. map |
; sub esp,120 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.b_x1] |
xchg edx,dword[.b_x2] |
mov dword[.b_x1],edx |
mov edx,dword[.e_x1] |
xchg edx,dword[.e_x2] |
mov dword[.e_x1],edx |
mov edx,dword[.t_x1] |
xchg edx,dword[.t_x2] |
mov dword[.t_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.b_x2] |
xchg edx,dword[.b_x3] |
mov dword[.b_x2],edx |
mov edx,dword[.e_x2] |
xchg edx,dword[.e_x3] |
mov dword[.e_x2],edx |
mov edx,dword[.t_x2] |
xchg edx,dword[.t_x3] |
mov dword[.t_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort3 |
.sort2: |
push eax ; store triangle coords in variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop23_done |
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
; or edx,ecx |
; test edx,80000000h ; Check only X |
; jne .loop23_done |
; cmp .x1,SIZE_X ; { |
; jg .loop23_done |
; cmp .x2,SIZE_X ; This can be optimized with effort |
; jg .loop23_done |
; cmp .x3,SIZE_X |
; jg .loop23_done ; { |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .bt_dx12_make |
if 0 ;Ext >= SSE2 |
pxor xmm0,xmm0 |
movups .dty12,xmm0 |
movups .dey12,xmm0 |
sub esp,16 |
else |
mov ecx,8 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
end if |
jmp .bt_dx12_done |
.bt_dx12_make: |
movsx ebx,bx |
if Ext>=SSE |
sub esp,32 |
; mov eax,256 |
cvtsi2ss xmm4,[i255d] |
cvtsi2ss xmm3,ebx ;rcps |
if 0 ;Ext >= SSE2 |
mov edi,ebp |
sub edi,512 |
or edi,0x0000000f |
end if |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x2] |
movd mm2,[.e_x1] |
movd mm3,[.e_x2] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
shufps xmm1,xmm1,10110001b |
;xmm1--> | dbx | dby | dex | dey | |
;1 movups .dey12,xmm1 |
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ;mm1,xmm1 |
movq .dey12,mm0 |
movq .dby12,mm1 |
;------------- |
; pxor mm0,mm0 |
; pxor mm1,mm1 |
;/ pinsrw mm0,.z1,1 |
;/ pinsrw mm0,.x1,0 |
;/ pinsrw mm1,.z2,1 |
;/ pinsrw mm1,.x2,0 |
mov ax,.z2 |
sub ax,.z1 |
cwde |
mov dx,.x2 |
sub dx,.x1 |
movsx edx,dx |
;/ movd mm1,eax |
;/ punpcklwd mm0,mm4 |
;/ punpcklwd mm1,mm4 |
; cvtpi2ps xmm1,mm1 |
; cvtpi2ps xmm2,mm0 |
; subps xmm1,xmm2 |
;/ psubd mm1,mm0 |
movd mm2,[.t_x1] |
movd mm3,[.t_x2] |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm3,mm2 |
;/ cvtpi2ps xmm1,mm1 |
cvtsi2ss xmm1,eax |
movlhps xmm1,xmm1 |
cvtsi2ss xmm1,edx |
; movss xmm1,xmm4 |
shufps xmm1,xmm1,00101111b |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
shufps xmm1,xmm1,11100001b |
; xmm1--> | dx | dz | dtx | dty | |
;1 movlps .dty12,xmm1 |
;1 movhps .dz12,xmm1 |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dty12,mm0 |
movq .dz12,mm1 |
;---- |
; mov ax,.z2 |
; sub ax,.z1 |
; cwde |
; mov bx,.x2 |
; sub bx,.x1 |
; movsx ebx,bx |
; movd mm1,eax |
; psllq mm1,32 |
; movd mm1,ebx |
;; push ebx |
;; push eax |
;; movq mm1,[esp] |
;; add esp,8 |
;;; mov ax,.z1 |
;;; mov bx,.z2 |
;;; shl eax,16 |
;;; shl ebx,16 |
;;; mov ax,.x1 |
;;; mov bx,.x2 |
; movd mm2,[.t_x1] |
; movd mm3,[.t_x2] |
;; movd mm0,eax |
;; movd mm1,ebx |
; pxor mm4,mm4 |
;; punpcklwd mm0,mm4 |
;; punpcklwd mm1,mm4 |
; punpcklwd mm2,mm4 |
; punpcklwd mm3,mm4 |
;; psubd mm1,mm0 |
; psubd mm3,mm2 |
; cvtpi2ps xmm1,mm1 |
; movlhps xmm1,xmm1 |
; cvtpi2ps xmm1,mm3 |
; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | |
; shufps xmm1,xmm1,10110001b |
; xmm1--> | dx | dz | dtx | dty | |
; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
; movhlps xmm1,xmm1 |
; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
; movq .dty12,mm0 |
; movq .dz12,mm1 |
else |
mov ax,.x2 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.b_x2] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx12,eax |
push eax |
mov ax,word[.b_y2] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby12,eax |
push eax |
mov ax,word[.e_x2] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex12,eax |
push eax |
mov ax,word[.e_y2] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey12,eax |
push eax |
mov ax,word[.t_x2] |
sub ax,word[.t_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dtx12,eax |
push eax |
mov ax,word[.t_y2] |
sub ax,word[.t_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dty12,eax |
push eax |
end if |
.bt_dx12_done: |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .bt_dx13_make |
mov ecx,8 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx13_done |
.bt_dx13_make: |
movsx ebx,bx |
if Ext>=SSE |
sub esp,32 |
; mov eax,256 |
cvtsi2ss xmm4,[i255d] |
cvtsi2ss xmm3,ebx ;rcps |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x3] |
movd mm2,[.e_x1] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
shufps xmm1,xmm1,10110001b |
;xmm1--> | dbx | dby | dex | dey | |
;1 movups .dey13,xmm1 |
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ;mm1,xmm1 |
movq .dey13,mm0 |
movq .dby13,mm1 |
mov ax,.z3 |
sub ax,.z1 |
cwde |
mov dx,.x3 |
sub dx,.x1 |
movsx edx,dx |
movd mm2,[.t_x1] |
movd mm3,[.t_x3] |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm3,mm2 |
cvtsi2ss xmm1,eax |
movlhps xmm1,xmm1 |
cvtsi2ss xmm1,edx |
shufps xmm1,xmm1,00101111b |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
shufps xmm1,xmm1,11100001b |
; xmm1--> | dx | dz | dtx | dty | |
;1 movlps .dty13,xmm1 |
;1 movhps .dz13,xmm1 |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dty13,mm0 |
movq .dz13,mm1 |
else |
mov ax,.x3 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz13,eax |
push eax |
mov ax,word[.b_x3] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx13,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby13,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex13,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey13,eax |
push eax |
mov ax,word[.t_x3] |
sub ax,word[.t_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dtx13,eax |
push eax |
mov ax,word[.t_y3] |
sub ax,word[.t_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dty13,eax |
push eax |
end if |
.bt_dx13_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .bt_dx23_make |
mov ecx,8 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx23_done |
.bt_dx23_make: |
movsx ebx,bx |
if Ext>=SSE |
sub esp,32 |
; mov eax,256 |
cvtsi2ss xmm4,[i255d] |
cvtsi2ss xmm3,ebx ;rcps |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x2] |
movd mm1,[.b_x3] |
movd mm2,[.e_x2] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
shufps xmm1,xmm1,10110001b |
;xmm1--> | dbx | dby | dex | dey | |
;1 movups .dey23,xmm1 |
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ;mm1,xmm1 |
movq .dey23,mm0 |
movq .dby23,mm1 |
mov ax,.z3 |
sub ax,.z2 |
cwde |
mov dx,.x3 |
sub dx,.x2 |
movsx edx,dx |
movd mm2,[.t_x2] |
movd mm3,[.t_x3] |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm3,mm2 |
cvtsi2ss xmm1,eax |
movlhps xmm1,xmm1 |
cvtsi2ss xmm1,edx |
shufps xmm1,xmm1,00101111b |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
shufps xmm1,xmm1,11100001b |
; xmm1--> | dx | dz | dtx | dty | |
; movlps .dty23,xmm1 |
; movhps .dz23,xmm1 |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
movq .dty23,mm0 |
movq .dz23,mm1 |
else |
mov ax,.x3 |
sub ax,.x2 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz23,eax |
push eax |
mov ax,word[.b_x3] |
sub ax,word[.b_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx23,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby23,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex23,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey23,eax |
push eax |
mov ax,word[.t_x3] |
sub ax,word[.t_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dtx23,eax |
push eax |
mov ax,word[.t_y3] |
sub ax,word[.t_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dty23,eax |
push eax |
end if |
; sub esp,40 |
.bt_dx23_done: |
sub esp,64 |
movsx eax,.x1 |
shl eax,ROUND |
mov .cx1,eax |
mov .cx2,eax |
; push eax |
; push eax |
movsx ebx,word[.b_x1] |
shl ebx,ROUND |
mov .cbx1,ebx |
mov .cbx2,ebx |
; push ebx |
; push ebx |
movsx ecx,word[.b_y1] |
shl ecx,ROUND |
mov .cby1,ecx |
mov .cby2,ecx |
; push ecx |
; push ecx |
movsx edx,word[.e_x1] |
shl edx,ROUND |
mov .cex1,edx |
mov .cex2,edx |
; push edx |
; push edx |
movsx eax,word[.e_y1] |
shl eax,ROUND |
mov .cey1,eax |
mov .cey2,eax |
; push eax |
; push eax |
movsx ebx,.z1 |
shl ebx,CATMULL_SHIFT |
mov .cz1,ebx |
mov .cz2,ebx |
; push ebx |
; push ebx |
; sub esp,16 |
movsx ecx,word[.t_x1] |
shl ecx,ROUND |
mov .ctx1,ecx |
mov .ctx2,ecx |
;push ecx |
;push ecx |
movsx edx,word[.t_y1] |
shl edx,ROUND |
mov .cty1,edx |
mov .cty2,edx |
; push edx |
; push edx |
if Ext >= SSE2 |
movups xmm0,.cby1 |
movups xmm1,.cty1 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
movups xmm4,.dby13 |
movups xmm5,.dty13 |
movups xmm6,.dby12 |
movups xmm7,.dty12 |
.scby1 equ [edi] |
.scty1 equ [edi+16] |
.scby2 equ [edi+32] |
.scty2 equ [edi+48] |
.sdby13 equ [edi+64] |
.sdty13 equ [edi+80] |
.sdby12 equ [edi+96] |
.sdty12 equ [edi+128] |
push edi |
mov edi,sse_repository |
movaps .scby1,xmm0 |
movaps .scty1,xmm1 |
movaps .scby2,xmm2 |
movaps .scty2,xmm3 |
movaps .sdby13,xmm4 |
movaps .sdty13,xmm5 |
movaps .sdby12,xmm6 |
movaps .sdty12,xmm7 |
pop edi |
end if |
movsx ecx,.y1 |
cmp cx,.y2 |
jge .loop12_done |
.loop12: |
;if Ext >= SSE2 |
; fxsave [sse_repository] |
;end if |
call .call_line |
if Ext >= SSE2 |
; fxrstor [sse_repository] |
movups xmm0,.cby1 |
movups xmm1,.cty1 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
; movups xmm4,.dby13 |
; movups xmm5,.dty13 |
; movups xmm6,.dby12 |
; movups xmm7,.dty12 |
; paddd xmm0,xmm4 |
; paddd xmm1,xmm5 |
; paddd xmm2,xmm6 |
; paddd xmm3,xmm7 |
push edi |
mov edi,sse_repository |
paddd xmm0,.sdby13 |
paddd xmm1,.sdty13 |
paddd xmm2,.sdby12 |
paddd xmm3,.sdty12 |
pop edi |
movups .cby1,xmm0 |
movups .cty1,xmm1 |
movups .cby2,xmm2 |
movups .cty2,xmm3 |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cty1 |
movq mm5,.cty2 |
movq mm6,.cz1 |
movq mm7,.cz2 |
paddd mm0,.dby12 |
paddd mm1,.dby13 |
paddd mm2,.dey12 |
paddd mm3,.dey13 |
paddd mm4,.dty13 |
paddd mm5,.dty12 |
paddd mm6,.dz13 |
paddd mm7,.dz12 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
movq .cty1,mm4 |
movq .cty2,mm5 |
movq .cz1,mm6 |
movq .cz2,mm7 |
end if |
if Ext = NON |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx12 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby12 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex12 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey12 |
add .cey2,eax |
mov eax,.dtx13 |
add .ctx1,eax |
mov ebx,.dtx12 |
add .ctx2,ebx |
mov edx,.dty13 |
add .cty1,edx |
mov eax,.dty12 |
add .cty2,eax |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx12 |
add .cx2,ebx |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz12 |
add .cz2,edx |
end if |
inc ecx |
cmp cx,.y2 |
jl .loop12 |
.loop12_done: |
movsx ecx,.y2 |
cmp cx,.y3 |
jge .loop23_done |
movsx eax,.z2 |
shl eax,CATMULL_SHIFT |
mov .cz2,eax |
movsx ebx,.x2 |
shl ebx,ROUND |
mov .cx2,ebx |
movzx edx,word[.b_x2] |
shl edx,ROUND |
mov .cbx2,edx |
movzx eax,word[.b_y2] |
shl eax,ROUND |
mov .cby2,eax |
movzx ebx,word[.e_x2] |
shl ebx,ROUND |
mov .cex2,ebx |
movzx edx,word[.e_y2] |
shl edx,ROUND |
mov .cey2,edx |
movzx eax,word[.t_x2] |
shl eax,ROUND |
mov .ctx2,eax |
movzx ebx,word[.t_y2] |
shl ebx,ROUND |
mov .cty2,ebx |
if Ext >= SSE2 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
; movups xmm4,.dby13 |
; movups xmm5,.dty13 |
movups xmm6,.dby23 |
movups xmm7,.dty23 |
; .scby1 equ [edi] |
; .scty1 equ [edi+16] |
; .scby2 equ [edi+32] |
; .scty2 equ [edi+48] |
; .sdby13 equ [edi+64] |
; .sdty13 equ [edi+80] |
.sdby23 equ [edi+160] |
.sdty23 equ [edi+192] |
push edi |
mov edi,sse_repository |
; movaps .scby1,xmm0 |
; movaps .scty1,xmm1 |
movaps .scby2,xmm2 |
movaps .scty2,xmm3 |
; movaps .sdby13,xmm4 |
; movaps .sdty13,xmm5 |
movaps .sdby23,xmm6 |
movaps .sdty23,xmm7 |
pop edi |
end if |
.loop23: |
;if Ext >= SSE2 |
; fxsave [sse_repository] |
;end if |
call .call_line |
if Ext >= SSE2 |
movups xmm0,.cby1 |
movups xmm1,.cty1 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
push edi |
mov edi,sse_repository |
paddd xmm0,.sdby13 |
paddd xmm1,.sdty13 |
paddd xmm2,.sdby23 |
paddd xmm3,.sdty23 |
pop edi |
movups .cby1,xmm0 |
movups .cty1,xmm1 |
movups .cby2,xmm2 |
movups .cty2,xmm3 |
; fxrstor [sse_repository] |
; movups xmm0,.cby1 |
; movups xmm1,.cty1 |
; movups xmm2,.cby2 |
; movups xmm3,.cty2 |
; movups xmm4,.dby13 |
; movups xmm5,.dty13 |
; movups xmm6,.dby23 |
; movups xmm7,.dty23 |
; paddd xmm0,xmm4 |
; paddd xmm1,xmm5 |
; paddd xmm2,xmm6 |
; paddd xmm3,xmm7 |
; movups .cby1,xmm0 |
; movups .cty1,xmm1 |
; movups .cby2,xmm2 |
; movups .cty2,xmm3 |
; |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cty1 |
movq mm5,.cty2 |
movq mm6,.cz1 |
movq mm7,.cz2 |
paddd mm0,.dby23 |
paddd mm1,.dby13 |
paddd mm2,.dey23 |
paddd mm3,.dey13 |
paddd mm4,.dty13 |
paddd mm5,.dty23 |
paddd mm6,.dz13 |
paddd mm7,.dz23 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey2,mm2 |
movq .cey1,mm3 |
movq .cty1,mm4 |
movq .cty2,mm5 |
movq .cz1,mm6 |
movq .cz2,mm7 |
end if |
If Ext = NON |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx23 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby23 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex23 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey23 |
add .cey2,eax |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx23 |
add .cx2,ebx |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz23 |
add .cz2,edx |
mov eax,.dtx13 |
add .ctx1,eax |
mov ebx,.dtx23 |
add .ctx2,ebx |
mov edx,.dty13 |
add .cty1,edx |
mov eax,.dty23 |
add .cty2,eax |
end if |
inc ecx |
cmp cx,.y3 |
jl .loop23 |
.loop23_done: |
mov esp,ebp |
ret 50 |
.call_line: |
pushad |
; xmm0= cby1,cbx1,cz1,cx1 |
; xmm1= cty1,ctx1,cey1,cex1 |
if Ext >= SSE2 |
sub esp,8 |
shufps xmm1,xmm1,10110001b |
shufps xmm3,xmm3,10110001b |
movlps [esp],xmm1 |
else |
push dword .cty1 |
push .ctx1 |
end if |
push dword .cz1 |
if Ext>=SSE2 |
sub esp,8 |
movlps [esp],xmm3 |
else |
push dword .cty2 |
push .ctx2 |
end if |
push dword .cz2 |
if Ext>=SSE2 |
sub esp,32 |
movhps [esp+24],xmm3 |
shufps xmm2,xmm2,10110001b |
movlps [esp+16],xmm2 |
movhps [esp+8],xmm1 |
shufps xmm0,xmm0,10110001b |
movlps [esp],xmm0 ;================================ |
else |
push dword .cey2 |
push .cex2 |
push dword .cby2 |
push .cbx2 |
push dword .cey1 |
push .cex1 |
push dword .cby1 |
push .cbx1 |
end if |
push .tex_ptr |
push .z_buff |
push .t_emap |
push .t_bmap |
push ecx |
mov eax,.cx1 |
sar eax,ROUND |
mov ebx,.cx2 |
sar ebx,ROUND |
call bump_tex_line_z |
popad |
;end if |
ret |
bump_tex_line_z: |
;--------------in: eax - x1 |
;-------------- ebx - x2 |
;-------------- edi - pointer to screen buffer |
;stack - another parameters : |
.y equ dword [ebp+4] |
.bmap equ dword [ebp+8] ; bump map pointer |
.emap equ dword [ebp+12] ; env map pointer |
.z_buff equ dword [ebp+16] ; z buffer |
.tex_map equ dword [ebp+20] ; texture pointer |
.bx1 equ [ebp+24] ; --- |
.by1 equ [ebp+28] ; | |
.ex1 equ [ebp+32] ; | |
.ey1 equ [ebp+36] ; | |
.bx2 equ [ebp+40] ; | |
.by2 equ [ebp+44] ; |> b. map and e. map coords |
.ex2 equ [ebp+48] ; |> shifted shl ROUND |
.ey2 equ [ebp+52] ; --- |
.z2 equ [ebp+56] |
.tx2 equ [ebp+60] |
.ty2 equ [ebp+64] |
.z1 equ [ebp+68] |
.tx1 equ [ebp+72] |
.ty1 equ [ebp+76] |
.x1 equ [ebp-4] |
.x2 equ [ebp-8] |
.dbx equ [ebp-12] |
.dby equ [ebp-16] |
.dex equ [ebp-20] |
.dey equ [ebp-24] |
.dz equ [ebp-28] |
.dtx equ [ebp-32] |
.dty equ [ebp-36] |
.cbx equ [ebp-40] |
.cby equ [ebp-44] |
.cex equ [ebp-48] |
.cey equ [ebp-52] |
.cz equ [ebp-56] |
.czbuff equ [ebp-60] |
.ctx equ [ebp-64] |
.cty equ [ebp-68] |
.c_scr equ [ebp-72] |
.temp1 equ ebp-80 |
.temp2 equ ebp-88 |
.temp3 equ ebp-76 |
.temp4 equ ebp-84 |
.temp5 equ ebp-92 |
mov ebp,esp |
mov ecx,.y |
or ecx,ecx |
jl .bl_end |
movzx edx,word[size_y_var] |
cmp ecx,edx ;SIZE_Y |
jge .bl_end |
cmp eax,ebx |
jl .bl_ok |
je .bl_end |
if Ext=NON |
mov edx,.bx1 |
xchg edx,.bx2 |
mov .bx1,edx |
mov edx,.by1 |
xchg edx,.by2 |
mov .by1,edx |
mov edx,.ex1 |
xchg edx,.ex2 |
mov .ex1,edx |
mov edx,.ey1 |
xchg edx,.ey2 |
mov .ey1,edx |
mov edx,.tx1 |
xchg edx,.tx2 |
mov .tx1,edx |
mov edx,.ty1 |
xchg edx,.ty2 |
mov .ty1,edx |
end if |
if Ext = MMX |
movq mm0,.bx1 |
movq mm1,.bx2 |
movq mm2,.ex1 |
movq mm3,.ex2 |
movq mm4,.tx1 |
movq mm5,.tx2 |
movq .bx2,mm0 |
movq .bx1,mm1 |
movq .ex1,mm3 |
movq .ex2,mm2 |
movq .tx1,mm5 |
movq .tx2,mm4 |
end if |
if Ext>=SSE |
movups xmm0,.bx1 |
movups xmm1,.bx2 |
movups .bx1,xmm1 |
movups .bx2,xmm0 |
movq mm0,.tx1 |
movq mm1,.tx2 |
movq .tx1,mm1 |
movq .tx2,mm0 |
end if |
;if Ext>=SSE2 |
; movaps xmm4,xmm0 |
; movaps xmm0,xmm2 |
; movaps xmm2,xmm4 |
; movaps xmm5,xmm1 |
; movaps xmm1,xmm3 |
; movaps xmm3,xmm5 |
;else |
xchg eax,ebx |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
;end if |
.bl_ok: |
;if Ext >= SSE2 |
; shufps xmm0,xmm0,11100001b |
; shufps xmm2,xmm2,11100001b |
; movlps .bx1,xmm0 |
; movlps .bx2,xmm2 |
; shufps xmm0,xmm0,00011011b |
; shufps xmm2,xmm2,00011011b |
; movd eax,xmm0 |
; movd ebx,xmm2 |
; shufps xmm0,xmm0,11000110b |
; shufps xmm2,xmm2,11000110b |
; movd .z1,xmm0 |
; movd .z2,xmm2 |
; shufps xmm1,xmm1,10110001b |
; shufps xmm3,xmm3,10110001b |
; movlps .ex1,xmm1 |
; movlps .ex2,xmm2 |
; movhps .tx1,xmm1 |
; movhps .tx2,xmm2 |
; xchg eax,ebx |
; mov edx,.z1 |
; xchg edx,.z2 |
; mov .z1,edx |
;end if |
push eax |
push ebx ;store x1, x2 |
movzx ebx,word[size_x_var] |
; mov eax,.x1 |
cmp dword .x1,ebx ;dword .x1,SIZE_X |
jge .bl_end |
cmp dword .x2,0 |
jle .bl_end |
mov ebx,.x2 |
sub ebx,.x1 |
if Ext>=SSE |
sub esp,28 |
cvtsi2ss xmm3,ebx ;rcps |
shufps xmm3,xmm3,0 |
; float using SSE variant ::--> |
; movups xmm0,.bx1 ; new |
; movups xmm1,.bx2 ; new |
cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,.ex1 ;mm2 |
cvtpi2ps xmm1,.bx2 ;mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,.ex2 ;mm3 |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
; movups .dey,xmm1 ; new |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey,mm0 |
movq .dby,mm1 |
movd mm2,.z1 |
movd mm3,.z2 |
cvtpi2ps xmm0,.tx1 ;mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,.tx2 ;mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; movups xmm0,,z1 ; new |
; movups xmm1,.z2 ; new |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
; movups .dz,xmm1 ;new |
shufps xmm1,xmm1,10110100b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movd .dz,mm0 |
movq .dty,mm1 |
else |
mov eax,.bx2 ; calc .dbx |
sub eax,.bx1 |
cdq |
idiv ebx |
push eax |
mov eax,.by2 ; calc .dby |
sub eax,.by1 |
cdq |
idiv ebx |
push eax |
mov eax,.ex2 ; calc .dex |
sub eax,.ex1 |
cdq |
idiv ebx |
push eax |
mov eax,.ey2 ; calc .dey |
sub eax,.ey1 |
cdq |
idiv ebx |
push eax |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
mov eax,.tx2 ; calc .dtx |
sub eax,.tx1 |
cdq |
idiv ebx |
push eax |
mov eax,.ty2 ; calc .dty |
sub eax,.ty1 |
cdq |
idiv ebx |
push eax |
end if |
cmp dword .x1,0 ; set correctly begin variable |
jge @f ; CLIPPING ON FUNCTION |
; cutting triangle exceedes screen |
mov ebx,.x1 |
neg ebx |
;if Ext >= SSE |
; cvtsi2ss xmm0,ebx |
; shufps xmm0,xmm0,0 |
; movups xmm1,.dey |
; mulps xmm1,xmm0 |
; shufps xmm1,xmm1,00011011b |
; movups xmm2,.bx1 |
; addps xmm2,xmm1 |
; movups .bx1,xmm2 |
mov eax,.dz |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov dword .x1,0 |
mov eax,.dbx |
imul ebx |
add .bx1,eax |
mov eax,.dby |
imul ebx |
add .by1,eax |
mov eax,.dex |
imul ebx |
add .ex1,eax |
mov eax,.dey |
imul ebx |
add .ey1,eax |
mov eax,.dtx |
imul ebx |
add .tx1,eax |
mov eax,.dty |
imul ebx |
add .ty1,eax |
@@: |
; mov ebx,.x2 |
movzx eax,word[size_x_var] |
; cmp dword .x2,SIZE_X |
cmp dword .x2,eax ; eax,ebx |
jl @f |
mov dword .x2,eax ;SIZE_X |
@@: |
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers |
mul .y |
add eax,.x1 |
lea esi,[4*eax] |
add esi,.z_buff ; z-buffer filled with dd variables |
lea eax,[eax*3] |
add edi,eax |
mov ecx,.x2 |
sub ecx,.x1 |
; init current variables |
push dword .bx1 ; current b, e and t shifted shl ROUND .cbx |
push dword .by1 ; .cby |
push dword .ex1 ; .cex |
push dword .ey1 ; .cey |
push dword .z1 ; current z shl CATMULL_SHIFT ; .cz |
push esi ; .czbuff |
push dword .tx1 ; .ctx |
push dword .ty1 ; .cty |
push edi ; .c_scr |
if Ext = SSE2 |
mov eax,TEXTURE_SIZE |
movd xmm1,eax |
shufps xmm1,xmm1,0 |
push dword TEX_X |
push dword -TEX_X |
push dword 1 |
push dword -1 |
movups xmm2,[esp] |
movd xmm3,.bmap |
shufps xmm3,xmm3,0 |
end if |
if Ext>=MMX |
movq mm7,.cty |
movq mm6,.cby |
movq mm5,.cey |
; movq mm4,.dtyq |
; movq mm3,.dbyq |
end if |
.draw: |
; if TEX = SHIFTING ;bump drawing only in shifting mode |
mov esi,.czbuff ; .czbuff current address in buffer |
mov ebx,.cz ; .cz - cur z position |
cmp ebx,dword[esi] |
jge .skip |
if Ext=NON |
mov eax,.cby |
shr eax,ROUND |
mov esi,.cbx |
shr esi,ROUND |
else |
movq mm1,mm6 |
psrld mm1,ROUND |
movd eax,mm1 |
psrlq mm1,32 |
movd esi,mm1 |
end if |
shl eax,TEX_SHIFT |
add esi,eax ;- ; esi - current bump map index |
if Ext = SSE2 |
movd xmm0,esi |
shufps xmm0,xmm0,0 |
paddd xmm0,xmm2 |
pand xmm0,xmm1 |
paddd xmm0,xmm3 |
movd ebx,xmm0 |
movzx eax,byte[ebx] |
; |
; shufps xmm0,xmm0,11100001b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx,byte[ebx] |
sub eax,ebx |
; |
; shufps xmm0,xmm0,11111110b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx edx, byte [ebx] |
; |
; shufps xmm0,xmm0,11111111b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx, byte [ebx] |
sub edx,ebx |
; |
else |
; mov ebx,esi |
; dec ebx |
lea ebx,[esi-1] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx eax,byte [ebx] |
; mov ebx,esi |
; inc ebx |
lea ebx,[esi+1] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub eax,ebx |
; mov ebx,esi |
; sub ebx,TEX_X |
lea ebx,[esi-TEX_X] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx edx,byte [ebx] |
; mov ebx,esi |
; add ebx,TEX_X |
lea ebx,[esi+TEX_X] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub edx,ebx |
end if |
; eax - horizontal sub modificated x coord |
; edx - vertical sub modificated y coord |
if Ext=NON |
mov ebx,.cex ;.cex - current env map X |
shr ebx,ROUND |
add eax,ebx |
mov ebx,.cey ;.cey - current env map y |
shr ebx,ROUND |
add edx,ebx |
else |
movq mm1,mm5 ; mm5 - copy of cur env coords |
psrld mm1,ROUND |
movd ebx,mm1 |
psrlq mm1,32 |
add eax,ebx |
movd ebx,mm1 |
add edx,ebx |
; movq qword[.temp1],mm3 |
; add eax,dword [.temp1] |
; add edx,dword [.temp1+4] |
end if |
or eax,eax |
jl .black |
cmp eax,TEX_X |
jg .black |
or edx,edx |
jl .black |
cmp edx,TEX_Y |
jg .black |
shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze |
add edx,eax ; proponuje nie stawiac czarnego pixela tylko |
lea esi,[edx*3] ; niezaburzony. |
add esi,.emap ; |
lodsd |
if Ext=NON |
mov edx,.cty |
shr edx,ROUND ; sar |
mov edi,.ctx |
shr edi,ROUND ; sar |
else |
movq mm1,mm7 |
psrld mm1,ROUND |
movd edx,mm1 |
psrlq mm1,32 |
movd edi,mm1 |
end if |
shl edx,TEX_SHIFT |
add edi,edx |
and edi,TEXTURE_SIZE |
lea esi,[edi*3] |
add esi,.tex_map |
if Ext=NON |
mov edx,eax |
lodsd |
push ax |
mul dl |
mov dl,ah |
pop ax |
shr ax,8 |
mul dh |
mov al,dl |
mov edi,.c_scr |
stosw |
shr edx,16 |
shr eax,16 |
mul dl |
shr ax,8 |
stosb |
else |
movd mm0,eax |
pxor mm1,mm1 |
punpcklbw mm0,mm1 |
movd mm2,[esi] |
punpcklbw mm2,mm1 |
pmullw mm0,mm2 |
psrlw mm0,8 |
packuswb mm0,mm1 |
mov edi,.c_scr |
movd [edi],mm0 |
end if |
jmp .actual_zbuff ; actualize z buffer |
@@: |
.black: |
xor eax,eax |
mov edi,.c_scr |
stosd |
.actual_zbuff: |
mov eax,.cz |
mov edi,.czbuff |
stosd |
.skip: |
add dword .czbuff,4 |
add dword .c_scr,3 |
if Ext=NON |
mov eax,.dbx |
add .cbx,eax |
mov ebx,.dby |
add .cby,ebx |
mov edx,.dex |
add .cex,edx |
mov eax,.dey |
add .cey,eax |
mov ebx,.dtx |
add .ctx,ebx |
mov edx,.dty |
add .cty,edx |
else |
paddd mm7,.dty |
paddd mm6,.dby |
paddd mm5,.dey |
end if |
mov eax,.dz |
add .cz,eax |
dec ecx |
jnz .draw |
.bl_end: |
mov esp,ebp |
ret 76 |
;Ext = MMX |
; else |
; movq mm5, qword[.temp1] ;- |
; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X |
; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE |
; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap |
; movd ebx,mm5 |
; psrlq mm5,32 |
; end if |
;CATMULL_SHIFT equ 8 |
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
;ROUND equ 8 |
;Ext = NON |
;MMX = 1 |
;NON = 0 |
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
;------- DOS 13h mode demos -------------------------------------------- |
;------- Procedure draws bump triangle with texture, I use ------------- |
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
;--------I calc texture pixel by this way: col1*col2/256 --------------- |
bump_tex_triangle_z: |
;------------------in - eax - x1 shl 16 + y1 ----------- |
;---------------------- ebx - x2 shl 16 + y2 ----------- |
;---------------------- ecx - x3 shl 16 + y3 ----------- |
;---------------------- edx - pointer to bump map------- |
;---------------------- esi - pointer to env map-------- |
;---------------------- edi - pointer to screen buffer-- |
;---------------------- stack : bump coordinates-------- |
;---------------------- environment coordinates- |
;---------------------- Z position coordinates-- |
;---------------------- pointer to Z buffer----- |
;---------------------- pointer to texture------ |
;---------------------- texture coordinates----- |
;-- Z-buffer - filled with coordinates as dword -------- |
;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
.b_x1 equ ebp+4 ; procedure don't save registers !!! |
.b_y1 equ ebp+6 ; each coordinate as word |
.b_x2 equ ebp+8 |
.b_y2 equ ebp+10 ; b - bump map coords |
.b_x3 equ ebp+12 ; e - env map coords |
.b_y3 equ ebp+14 |
.e_x1 equ ebp+16 |
.e_y1 equ ebp+18 |
.e_x2 equ ebp+20 |
.e_y2 equ ebp+22 |
.e_x3 equ ebp+24 |
.e_y3 equ ebp+26 |
.z1 equ word[ebp+28] |
.z2 equ word[ebp+30] |
.z3 equ word[ebp+32] |
.z_buff equ dword[ebp+34] ; pointer to Z-buffer |
.tex_ptr equ dword[ebp+38] ; ptr to texture |
.t_x1 equ ebp+42 ; texture coords |
.t_y1 equ ebp+44 |
.t_x2 equ ebp+46 |
.t_y2 equ ebp+48 |
.t_x3 equ ebp+50 |
.t_y3 equ ebp+52 |
.t_bmap equ dword[ebp-4] ; pointer to bump map |
.t_emap equ dword[ebp-8] ; pointer to env map |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
if 0 ;Ext <= SSE2 |
.dx12 equ dword[edi-4] |
.dz12 equ [edi-8] |
.dbx12 equ dword[edi-12] |
.dby12 equ [edi-16] |
.dex12 equ dword[edi-20] |
.dey12 equ [edi-24] |
.dtx12 equ dword[edi-28] |
.dty12 equ [edi-32] |
.dx13 equ dword[ebp-52-4*1] |
.dz13 equ [ebp-52-4*2] |
.dbx13 equ dword[ebp-52-4*3] |
.dby13 equ [ebp-52-4*4] |
.dex13 equ dword[ebp-52-4*5] |
.dey13 equ [ebp-52-4*6] |
.dtx13 equ dword[ebp-52-4*7] |
.dty13 equ [ebp-52-4*8] |
.dx23 equ dword[ebp-(52+4*9)] |
.dz23 equ [ebp-(52+4*10)] |
.dbx23 equ dword[ebp-(52+4*11)] |
.dby23 equ [ebp-(52+4*12)] |
.dex23 equ dword[ebp-(52+4*13)] |
.dey23 equ [ebp-(52+4*14)] |
.dtx23 equ dword[ebp-(52+4*15)] |
.dty23 equ [ebp-(52+4*16)] |
else |
.dx12 equ dword[ebp-24] |
.dz12 equ [ebp-28] |
.dbx12 equ dword[ebp-32] |
.dby12 equ [ebp-36] |
.dex12 equ dword[ebp-40] |
.dey12 equ [ebp-44] |
.dtx12 equ dword[ebp-48] |
.dty12 equ [ebp-52] |
.dx13 equ dword[ebp-52-4*1] |
.dz13 equ [ebp-52-4*2] |
.dbx13 equ dword[ebp-52-4*3] |
.dby13 equ [ebp-52-4*4] |
.dex13 equ dword[ebp-52-4*5] |
.dey13 equ [ebp-52-4*6] |
.dtx13 equ dword[ebp-52-4*7] |
.dty13 equ [ebp-52-4*8] |
.dx23 equ dword[ebp-(52+4*9)] |
.dz23 equ [ebp-(52+4*10)] |
.dbx23 equ dword[ebp-(52+4*11)] |
.dby23 equ [ebp-(52+4*12)] |
.dex23 equ dword[ebp-(52+4*13)] |
.dey23 equ [ebp-(52+4*14)] |
.dtx23 equ dword[ebp-(52+4*15)] |
.dty23 equ [ebp-(52+4*16)] |
end if |
if Ext < SSE |
.cx1 equ dword[ebp-(52+4*17)] ; current variables |
.cz1 equ [ebp-(52+4*18)] |
.cx2 equ dword[ebp-(52+4*19)] |
.cz2 equ [ebp-(52+4*20)] |
.cbx1 equ dword[ebp-(52+4*21)] |
.cby1 equ [ebp-(52+4*22)] |
.cbx2 equ dword[ebp-(52+4*23)] |
.cby2 equ [ebp-(52+4*24)] |
.cex1 equ dword[ebp-(52+4*25)] |
.cey1 equ [ebp-(52+4*26)] |
.cex2 equ dword[ebp-(52+4*27)] |
.cey2 equ [ebp-(52+4*28)] |
.ctx1 equ dword[ebp-(52+4*29)] |
.cty1 equ [ebp-(52+4*30)] |
.ctx2 equ dword[ebp-(52+4*31)] |
.cty2 equ [ebp-(52+4*32)] |
else |
.cx1 equ dword[ebp-(52+4*17)] ; current variables |
.cz1 equ [ebp-(52+4*18)] |
.cbx1 equ dword[ebp-(52+4*19)] |
.cby1 equ [ebp-(52+4*20)] |
.cex1 equ dword[ebp-(52+4*21)] |
.cey1 equ [ebp-(52+4*22)] |
.ctx1 equ dword[ebp-(52+4*23)] |
.cty1 equ [ebp-(52+4*24)] |
.cx2 equ dword[ebp-(52+4*25)] |
.cz2 equ [ebp-(52+4*26)] |
.cbx2 equ dword[ebp-(52+4*27)] |
.cby2 equ [ebp-(52+4*28)] |
.cex2 equ dword[ebp-(52+4*29)] |
.cey2 equ [ebp-(52+4*30)] |
.ctx2 equ dword[ebp-(52+4*31)] |
.cty2 equ [ebp-(52+4*32)] |
end if |
cld |
mov ebp,esp |
push edx ; store bump map |
push esi ; store e. map |
; sub esp,120 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.b_x1] |
xchg edx,dword[.b_x2] |
mov dword[.b_x1],edx |
mov edx,dword[.e_x1] |
xchg edx,dword[.e_x2] |
mov dword[.e_x1],edx |
mov edx,dword[.t_x1] |
xchg edx,dword[.t_x2] |
mov dword[.t_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.b_x2] |
xchg edx,dword[.b_x3] |
mov dword[.b_x2],edx |
mov edx,dword[.e_x2] |
xchg edx,dword[.e_x3] |
mov dword[.e_x2],edx |
mov edx,dword[.t_x2] |
xchg edx,dword[.t_x3] |
mov dword[.t_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort3 |
.sort2: |
push eax ; store triangle coords in variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop23_done |
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
; or edx,ecx |
; test edx,80000000h ; Check only X |
; jne .loop23_done |
; cmp .x1,SIZE_X ; { |
; jg .loop23_done |
; cmp .x2,SIZE_X ; This can be optimized with effort |
; jg .loop23_done |
; cmp .x3,SIZE_X |
; jg .loop23_done ; { |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .bt_dx12_make |
if 0 ;Ext >= SSE2 |
pxor xmm0,xmm0 |
movups .dty12,xmm0 |
movups .dey12,xmm0 |
sub esp,16 |
else |
mov ecx,8 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
end if |
jmp .bt_dx12_done |
.bt_dx12_make: |
movsx ebx,bx |
if Ext>=SSE |
sub esp,32 |
; mov eax,256 |
cvtsi2ss xmm4,[i255d] |
cvtsi2ss xmm3,ebx ;rcps |
if 0 ;Ext >= SSE2 |
mov edi,ebp |
sub edi,512 |
or edi,0x0000000f |
end if |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x2] |
movd mm2,[.e_x1] |
movd mm3,[.e_x2] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
shufps xmm1,xmm1,10110001b |
;xmm1--> | dbx | dby | dex | dey | |
;1 movups .dey12,xmm1 |
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ;mm1,xmm1 |
movq .dey12,mm0 |
movq .dby12,mm1 |
;------------- |
; pxor mm0,mm0 |
; pxor mm1,mm1 |
;/ pinsrw mm0,.z1,1 |
;/ pinsrw mm0,.x1,0 |
;/ pinsrw mm1,.z2,1 |
;/ pinsrw mm1,.x2,0 |
mov ax,.z2 |
sub ax,.z1 |
cwde |
mov dx,.x2 |
sub dx,.x1 |
movsx edx,dx |
;/ movd mm1,eax |
;/ punpcklwd mm0,mm4 |
;/ punpcklwd mm1,mm4 |
; cvtpi2ps xmm1,mm1 |
; cvtpi2ps xmm2,mm0 |
; subps xmm1,xmm2 |
;/ psubd mm1,mm0 |
movd mm2,[.t_x1] |
movd mm3,[.t_x2] |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm3,mm2 |
;/ cvtpi2ps xmm1,mm1 |
cvtsi2ss xmm1,eax |
movlhps xmm1,xmm1 |
cvtsi2ss xmm1,edx |
; movss xmm1,xmm4 |
shufps xmm1,xmm1,00101111b |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
shufps xmm1,xmm1,11100001b |
; xmm1--> | dx | dz | dtx | dty | |
;1 movlps .dty12,xmm1 |
;1 movhps .dz12,xmm1 |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dty12,mm0 |
movq .dz12,mm1 |
;---- |
; mov ax,.z2 |
; sub ax,.z1 |
; cwde |
; mov bx,.x2 |
; sub bx,.x1 |
; movsx ebx,bx |
; movd mm1,eax |
; psllq mm1,32 |
; movd mm1,ebx |
;; push ebx |
;; push eax |
;; movq mm1,[esp] |
;; add esp,8 |
;;; mov ax,.z1 |
;;; mov bx,.z2 |
;;; shl eax,16 |
;;; shl ebx,16 |
;;; mov ax,.x1 |
;;; mov bx,.x2 |
; movd mm2,[.t_x1] |
; movd mm3,[.t_x2] |
;; movd mm0,eax |
;; movd mm1,ebx |
; pxor mm4,mm4 |
;; punpcklwd mm0,mm4 |
;; punpcklwd mm1,mm4 |
; punpcklwd mm2,mm4 |
; punpcklwd mm3,mm4 |
;; psubd mm1,mm0 |
; psubd mm3,mm2 |
; cvtpi2ps xmm1,mm1 |
; movlhps xmm1,xmm1 |
; cvtpi2ps xmm1,mm3 |
; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | |
; shufps xmm1,xmm1,10110001b |
; xmm1--> | dx | dz | dtx | dty | |
; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
; movhlps xmm1,xmm1 |
; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
; movq .dty12,mm0 |
; movq .dz12,mm1 |
else |
mov ax,.x2 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.b_x2] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx12,eax |
push eax |
mov ax,word[.b_y2] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby12,eax |
push eax |
mov ax,word[.e_x2] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex12,eax |
push eax |
mov ax,word[.e_y2] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey12,eax |
push eax |
mov ax,word[.t_x2] |
sub ax,word[.t_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dtx12,eax |
push eax |
mov ax,word[.t_y2] |
sub ax,word[.t_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dty12,eax |
push eax |
end if |
.bt_dx12_done: |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .bt_dx13_make |
mov ecx,8 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx13_done |
.bt_dx13_make: |
movsx ebx,bx |
if Ext>=SSE |
sub esp,32 |
; mov eax,256 |
cvtsi2ss xmm4,[i255d] |
cvtsi2ss xmm3,ebx ;rcps |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x3] |
movd mm2,[.e_x1] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
shufps xmm1,xmm1,10110001b |
;xmm1--> | dbx | dby | dex | dey | |
;1 movups .dey13,xmm1 |
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ;mm1,xmm1 |
movq .dey13,mm0 |
movq .dby13,mm1 |
mov ax,.z3 |
sub ax,.z1 |
cwde |
mov dx,.x3 |
sub dx,.x1 |
movsx edx,dx |
movd mm2,[.t_x1] |
movd mm3,[.t_x3] |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm3,mm2 |
cvtsi2ss xmm1,eax |
movlhps xmm1,xmm1 |
cvtsi2ss xmm1,edx |
shufps xmm1,xmm1,00101111b |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
shufps xmm1,xmm1,11100001b |
; xmm1--> | dx | dz | dtx | dty | |
;1 movlps .dty13,xmm1 |
;1 movhps .dz13,xmm1 |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dty13,mm0 |
movq .dz13,mm1 |
else |
mov ax,.x3 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz13,eax |
push eax |
mov ax,word[.b_x3] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx13,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby13,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex13,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey13,eax |
push eax |
mov ax,word[.t_x3] |
sub ax,word[.t_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dtx13,eax |
push eax |
mov ax,word[.t_y3] |
sub ax,word[.t_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dty13,eax |
push eax |
end if |
.bt_dx13_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .bt_dx23_make |
mov ecx,8 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx23_done |
.bt_dx23_make: |
movsx ebx,bx |
if Ext>=SSE |
sub esp,32 |
; mov eax,256 |
cvtsi2ss xmm4,[i255d] |
cvtsi2ss xmm3,ebx ;rcps |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x2] |
movd mm1,[.b_x3] |
movd mm2,[.e_x2] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm1,mm0 |
psubd mm3,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
shufps xmm1,xmm1,10110001b |
;xmm1--> | dbx | dby | dex | dey | |
;1 movups .dey23,xmm1 |
cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ;mm1,xmm1 |
movq .dey23,mm0 |
movq .dby23,mm1 |
mov ax,.z3 |
sub ax,.z2 |
cwde |
mov dx,.x3 |
sub dx,.x2 |
movsx edx,dx |
movd mm2,[.t_x2] |
movd mm3,[.t_x3] |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
psubd mm3,mm2 |
cvtsi2ss xmm1,eax |
movlhps xmm1,xmm1 |
cvtsi2ss xmm1,edx |
shufps xmm1,xmm1,00101111b |
cvtpi2ps xmm1,mm3 |
divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
shufps xmm1,xmm1,11100001b |
; xmm1--> | dx | dz | dtx | dty | |
; movlps .dty23,xmm1 |
; movhps .dz23,xmm1 |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
movq .dty23,mm0 |
movq .dz23,mm1 |
else |
mov ax,.x3 |
sub ax,.x2 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz23,eax |
push eax |
mov ax,word[.b_x3] |
sub ax,word[.b_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx23,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby23,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex23,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey23,eax |
push eax |
mov ax,word[.t_x3] |
sub ax,word[.t_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dtx23,eax |
push eax |
mov ax,word[.t_y3] |
sub ax,word[.t_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dty23,eax |
push eax |
end if |
; sub esp,40 |
.bt_dx23_done: |
sub esp,64 |
movsx eax,.x1 |
shl eax,ROUND |
mov .cx1,eax |
mov .cx2,eax |
; push eax |
; push eax |
movsx ebx,word[.b_x1] |
shl ebx,ROUND |
mov .cbx1,ebx |
mov .cbx2,ebx |
; push ebx |
; push ebx |
movsx ecx,word[.b_y1] |
shl ecx,ROUND |
mov .cby1,ecx |
mov .cby2,ecx |
; push ecx |
; push ecx |
movsx edx,word[.e_x1] |
shl edx,ROUND |
mov .cex1,edx |
mov .cex2,edx |
; push edx |
; push edx |
movsx eax,word[.e_y1] |
shl eax,ROUND |
mov .cey1,eax |
mov .cey2,eax |
; push eax |
; push eax |
movsx ebx,.z1 |
shl ebx,CATMULL_SHIFT |
mov .cz1,ebx |
mov .cz2,ebx |
; push ebx |
; push ebx |
; sub esp,16 |
movsx ecx,word[.t_x1] |
shl ecx,ROUND |
mov .ctx1,ecx |
mov .ctx2,ecx |
;push ecx |
;push ecx |
movsx edx,word[.t_y1] |
shl edx,ROUND |
mov .cty1,edx |
mov .cty2,edx |
; push edx |
; push edx |
if Ext >= SSE2 |
movups xmm0,.cby1 |
movups xmm1,.cty1 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
movups xmm4,.dby13 |
movups xmm5,.dty13 |
movups xmm6,.dby12 |
movups xmm7,.dty12 |
.scby1 equ [edi] |
.scty1 equ [edi+16] |
.scby2 equ [edi+32] |
.scty2 equ [edi+48] |
.sdby13 equ [edi+64] |
.sdty13 equ [edi+80] |
.sdby12 equ [edi+96] |
.sdty12 equ [edi+128] |
push edi |
mov edi,sse_repository |
movaps .scby1,xmm0 |
movaps .scty1,xmm1 |
movaps .scby2,xmm2 |
movaps .scty2,xmm3 |
movaps .sdby13,xmm4 |
movaps .sdty13,xmm5 |
movaps .sdby12,xmm6 |
movaps .sdty12,xmm7 |
pop edi |
end if |
movsx ecx,.y1 |
cmp cx,.y2 |
jge .loop12_done |
.loop12: |
;if Ext >= SSE2 |
; fxsave [sse_repository] |
;end if |
call .call_line |
if Ext >= SSE2 |
; fxrstor [sse_repository] |
movups xmm0,.cby1 |
movups xmm1,.cty1 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
; movups xmm4,.dby13 |
; movups xmm5,.dty13 |
; movups xmm6,.dby12 |
; movups xmm7,.dty12 |
; paddd xmm0,xmm4 |
; paddd xmm1,xmm5 |
; paddd xmm2,xmm6 |
; paddd xmm3,xmm7 |
push edi |
mov edi,sse_repository |
paddd xmm0,.sdby13 |
paddd xmm1,.sdty13 |
paddd xmm2,.sdby12 |
paddd xmm3,.sdty12 |
pop edi |
movups .cby1,xmm0 |
movups .cty1,xmm1 |
movups .cby2,xmm2 |
movups .cty2,xmm3 |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cty1 |
movq mm5,.cty2 |
movq mm6,.cz1 |
movq mm7,.cz2 |
paddd mm0,.dby12 |
paddd mm1,.dby13 |
paddd mm2,.dey12 |
paddd mm3,.dey13 |
paddd mm4,.dty13 |
paddd mm5,.dty12 |
paddd mm6,.dz13 |
paddd mm7,.dz12 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
movq .cty1,mm4 |
movq .cty2,mm5 |
movq .cz1,mm6 |
movq .cz2,mm7 |
end if |
if Ext = NON |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx12 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby12 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex12 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey12 |
add .cey2,eax |
mov eax,.dtx13 |
add .ctx1,eax |
mov ebx,.dtx12 |
add .ctx2,ebx |
mov edx,.dty13 |
add .cty1,edx |
mov eax,.dty12 |
add .cty2,eax |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx12 |
add .cx2,ebx |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz12 |
add .cz2,edx |
end if |
inc ecx |
cmp cx,.y2 |
jl .loop12 |
.loop12_done: |
movsx ecx,.y2 |
cmp cx,.y3 |
jge .loop23_done |
movsx eax,.z2 |
shl eax,CATMULL_SHIFT |
mov .cz2,eax |
movsx ebx,.x2 |
shl ebx,ROUND |
mov .cx2,ebx |
movzx edx,word[.b_x2] |
shl edx,ROUND |
mov .cbx2,edx |
movzx eax,word[.b_y2] |
shl eax,ROUND |
mov .cby2,eax |
movzx ebx,word[.e_x2] |
shl ebx,ROUND |
mov .cex2,ebx |
movzx edx,word[.e_y2] |
shl edx,ROUND |
mov .cey2,edx |
movzx eax,word[.t_x2] |
shl eax,ROUND |
mov .ctx2,eax |
movzx ebx,word[.t_y2] |
shl ebx,ROUND |
mov .cty2,ebx |
if Ext >= SSE2 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
; movups xmm4,.dby13 |
; movups xmm5,.dty13 |
movups xmm6,.dby23 |
movups xmm7,.dty23 |
; .scby1 equ [edi] |
; .scty1 equ [edi+16] |
; .scby2 equ [edi+32] |
; .scty2 equ [edi+48] |
; .sdby13 equ [edi+64] |
; .sdty13 equ [edi+80] |
.sdby23 equ [edi+160] |
.sdty23 equ [edi+192] |
push edi |
mov edi,sse_repository |
; movaps .scby1,xmm0 |
; movaps .scty1,xmm1 |
movaps .scby2,xmm2 |
movaps .scty2,xmm3 |
; movaps .sdby13,xmm4 |
; movaps .sdty13,xmm5 |
movaps .sdby23,xmm6 |
movaps .sdty23,xmm7 |
pop edi |
end if |
.loop23: |
;if Ext >= SSE2 |
; fxsave [sse_repository] |
;end if |
call .call_line |
if Ext >= SSE2 |
movups xmm0,.cby1 |
movups xmm1,.cty1 |
movups xmm2,.cby2 |
movups xmm3,.cty2 |
push edi |
mov edi,sse_repository |
paddd xmm0,.sdby13 |
paddd xmm1,.sdty13 |
paddd xmm2,.sdby23 |
paddd xmm3,.sdty23 |
pop edi |
movups .cby1,xmm0 |
movups .cty1,xmm1 |
movups .cby2,xmm2 |
movups .cty2,xmm3 |
; fxrstor [sse_repository] |
; movups xmm0,.cby1 |
; movups xmm1,.cty1 |
; movups xmm2,.cby2 |
; movups xmm3,.cty2 |
; movups xmm4,.dby13 |
; movups xmm5,.dty13 |
; movups xmm6,.dby23 |
; movups xmm7,.dty23 |
; paddd xmm0,xmm4 |
; paddd xmm1,xmm5 |
; paddd xmm2,xmm6 |
; paddd xmm3,xmm7 |
; movups .cby1,xmm0 |
; movups .cty1,xmm1 |
; movups .cby2,xmm2 |
; movups .cty2,xmm3 |
; |
end if |
if (Ext = MMX) | (Ext = SSE) |
movq mm0,.cby2 |
movq mm1,.cby1 |
movq mm2,.cey2 |
movq mm3,.cey1 |
movq mm4,.cty1 |
movq mm5,.cty2 |
movq mm6,.cz1 |
movq mm7,.cz2 |
paddd mm0,.dby23 |
paddd mm1,.dby13 |
paddd mm2,.dey23 |
paddd mm3,.dey13 |
paddd mm4,.dty13 |
paddd mm5,.dty23 |
paddd mm6,.dz13 |
paddd mm7,.dz23 |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey2,mm2 |
movq .cey1,mm3 |
movq .cty1,mm4 |
movq .cty2,mm5 |
movq .cz1,mm6 |
movq .cz2,mm7 |
end if |
If Ext = NON |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx23 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby23 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex23 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey23 |
add .cey2,eax |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx23 |
add .cx2,ebx |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz23 |
add .cz2,edx |
mov eax,.dtx13 |
add .ctx1,eax |
mov ebx,.dtx23 |
add .ctx2,ebx |
mov edx,.dty13 |
add .cty1,edx |
mov eax,.dty23 |
add .cty2,eax |
end if |
inc ecx |
cmp cx,.y3 |
jl .loop23 |
.loop23_done: |
mov esp,ebp |
ret 50 |
.call_line: |
pushad |
; xmm0= cby1,cbx1,cz1,cx1 |
; xmm1= cty1,ctx1,cey1,cex1 |
if Ext >= SSE2 |
sub esp,8 |
shufps xmm1,xmm1,10110001b |
shufps xmm3,xmm3,10110001b |
movlps [esp],xmm1 |
else |
push dword .cty1 |
push .ctx1 |
end if |
push dword .cz1 |
if Ext>=SSE2 |
sub esp,8 |
movlps [esp],xmm3 |
else |
push dword .cty2 |
push .ctx2 |
end if |
push dword .cz2 |
if Ext>=SSE2 |
sub esp,32 |
movhps [esp+24],xmm3 |
shufps xmm2,xmm2,10110001b |
movlps [esp+16],xmm2 |
movhps [esp+8],xmm1 |
shufps xmm0,xmm0,10110001b |
movlps [esp],xmm0 ;================================ |
else |
push dword .cey2 |
push .cex2 |
push dword .cby2 |
push .cbx2 |
push dword .cey1 |
push .cex1 |
push dword .cby1 |
push .cbx1 |
end if |
push .tex_ptr |
push .z_buff |
push .t_emap |
push .t_bmap |
push ecx |
mov eax,.cx1 |
sar eax,ROUND |
mov ebx,.cx2 |
sar ebx,ROUND |
call bump_tex_line_z |
popad |
;end if |
ret |
bump_tex_line_z: |
;--------------in: eax - x1 |
;-------------- ebx - x2 |
;-------------- edi - pointer to screen buffer |
;stack - another parameters : |
.y equ dword [ebp+4] |
.bmap equ dword [ebp+8] ; bump map pointer |
.emap equ dword [ebp+12] ; env map pointer |
.z_buff equ dword [ebp+16] ; z buffer |
.tex_map equ dword [ebp+20] ; texture pointer |
.bx1 equ [ebp+24] ; --- |
.by1 equ [ebp+28] ; | |
.ex1 equ [ebp+32] ; | |
.ey1 equ [ebp+36] ; | |
.bx2 equ [ebp+40] ; | |
.by2 equ [ebp+44] ; |> b. map and e. map coords |
.ex2 equ [ebp+48] ; |> shifted shl ROUND |
.ey2 equ [ebp+52] ; --- |
.z2 equ [ebp+56] |
.tx2 equ [ebp+60] |
.ty2 equ [ebp+64] |
.z1 equ [ebp+68] |
.tx1 equ [ebp+72] |
.ty1 equ [ebp+76] |
.x1 equ [ebp-4] |
.x2 equ [ebp-8] |
.dbx equ [ebp-12] |
.dby equ [ebp-16] |
.dex equ [ebp-20] |
.dey equ [ebp-24] |
.dz equ [ebp-28] |
.dtx equ [ebp-32] |
.dty equ [ebp-36] |
.cbx equ [ebp-40] |
.cby equ [ebp-44] |
.cex equ [ebp-48] |
.cey equ [ebp-52] |
.cz equ [ebp-56] |
.czbuff equ [ebp-60] |
.ctx equ [ebp-64] |
.cty equ [ebp-68] |
.c_scr equ [ebp-72] |
.temp1 equ ebp-80 |
.temp2 equ ebp-88 |
.temp3 equ ebp-76 |
.temp4 equ ebp-84 |
.temp5 equ ebp-92 |
mov ebp,esp |
mov ecx,.y |
or ecx,ecx |
jl .bl_end |
movzx edx,word[size_y_var] |
cmp ecx,edx ;SIZE_Y |
jge .bl_end |
cmp eax,ebx |
jl .bl_ok |
je .bl_end |
if Ext=NON |
mov edx,.bx1 |
xchg edx,.bx2 |
mov .bx1,edx |
mov edx,.by1 |
xchg edx,.by2 |
mov .by1,edx |
mov edx,.ex1 |
xchg edx,.ex2 |
mov .ex1,edx |
mov edx,.ey1 |
xchg edx,.ey2 |
mov .ey1,edx |
mov edx,.tx1 |
xchg edx,.tx2 |
mov .tx1,edx |
mov edx,.ty1 |
xchg edx,.ty2 |
mov .ty1,edx |
end if |
if Ext = MMX |
movq mm0,.bx1 |
movq mm1,.bx2 |
movq mm2,.ex1 |
movq mm3,.ex2 |
movq mm4,.tx1 |
movq mm5,.tx2 |
movq .bx2,mm0 |
movq .bx1,mm1 |
movq .ex1,mm3 |
movq .ex2,mm2 |
movq .tx1,mm5 |
movq .tx2,mm4 |
end if |
if Ext>=SSE |
movups xmm0,.bx1 |
movups xmm1,.bx2 |
movups .bx1,xmm1 |
movups .bx2,xmm0 |
movq mm0,.tx1 |
movq mm1,.tx2 |
movq .tx1,mm1 |
movq .tx2,mm0 |
end if |
;if Ext>=SSE2 |
; movaps xmm4,xmm0 |
; movaps xmm0,xmm2 |
; movaps xmm2,xmm4 |
; movaps xmm5,xmm1 |
; movaps xmm1,xmm3 |
; movaps xmm3,xmm5 |
;else |
xchg eax,ebx |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
;end if |
.bl_ok: |
;if Ext >= SSE2 |
; shufps xmm0,xmm0,11100001b |
; shufps xmm2,xmm2,11100001b |
; movlps .bx1,xmm0 |
; movlps .bx2,xmm2 |
; shufps xmm0,xmm0,00011011b |
; shufps xmm2,xmm2,00011011b |
; movd eax,xmm0 |
; movd ebx,xmm2 |
; shufps xmm0,xmm0,11000110b |
; shufps xmm2,xmm2,11000110b |
; movd .z1,xmm0 |
; movd .z2,xmm2 |
; shufps xmm1,xmm1,10110001b |
; shufps xmm3,xmm3,10110001b |
; movlps .ex1,xmm1 |
; movlps .ex2,xmm2 |
; movhps .tx1,xmm1 |
; movhps .tx2,xmm2 |
; xchg eax,ebx |
; mov edx,.z1 |
; xchg edx,.z2 |
; mov .z1,edx |
;end if |
push eax |
push ebx ;store x1, x2 |
movzx ebx,word[size_x_var] |
; mov eax,.x1 |
cmp dword .x1,ebx ;dword .x1,SIZE_X |
jge .bl_end |
cmp dword .x2,0 |
jle .bl_end |
mov ebx,.x2 |
sub ebx,.x1 |
if Ext>=SSE |
sub esp,28 |
cvtsi2ss xmm3,ebx ;rcps |
shufps xmm3,xmm3,0 |
; float using SSE variant ::--> |
; movups xmm0,.bx1 ; new |
; movups xmm1,.bx2 ; new |
cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,.ex1 ;mm2 |
cvtpi2ps xmm1,.bx2 ;mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,.ex2 ;mm3 |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
; movups .dey,xmm1 ; new |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey,mm0 |
movq .dby,mm1 |
movd mm2,.z1 |
movd mm3,.z2 |
cvtpi2ps xmm0,.tx1 ;mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,.tx2 ;mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
; movups xmm0,,z1 ; new |
; movups xmm1,.z2 ; new |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
; movups .dz,xmm1 ;new |
shufps xmm1,xmm1,10110100b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movd .dz,mm0 |
movq .dty,mm1 |
else |
mov eax,.bx2 ; calc .dbx |
sub eax,.bx1 |
cdq |
idiv ebx |
push eax |
mov eax,.by2 ; calc .dby |
sub eax,.by1 |
cdq |
idiv ebx |
push eax |
mov eax,.ex2 ; calc .dex |
sub eax,.ex1 |
cdq |
idiv ebx |
push eax |
mov eax,.ey2 ; calc .dey |
sub eax,.ey1 |
cdq |
idiv ebx |
push eax |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
mov eax,.tx2 ; calc .dtx |
sub eax,.tx1 |
cdq |
idiv ebx |
push eax |
mov eax,.ty2 ; calc .dty |
sub eax,.ty1 |
cdq |
idiv ebx |
push eax |
end if |
cmp dword .x1,0 ; set correctly begin variable |
jge @f ; CLIPPING ON FUNCTION |
; cutting triangle exceedes screen |
mov ebx,.x1 |
neg ebx |
;if Ext >= SSE |
; cvtsi2ss xmm0,ebx |
; shufps xmm0,xmm0,0 |
; movups xmm1,.dey |
; mulps xmm1,xmm0 |
; shufps xmm1,xmm1,00011011b |
; movups xmm2,.bx1 |
; addps xmm2,xmm1 |
; movups .bx1,xmm2 |
mov eax,.dz |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov dword .x1,0 |
mov eax,.dbx |
imul ebx |
add .bx1,eax |
mov eax,.dby |
imul ebx |
add .by1,eax |
mov eax,.dex |
imul ebx |
add .ex1,eax |
mov eax,.dey |
imul ebx |
add .ey1,eax |
mov eax,.dtx |
imul ebx |
add .tx1,eax |
mov eax,.dty |
imul ebx |
add .ty1,eax |
@@: |
; mov ebx,.x2 |
movzx eax,word[size_x_var] |
; cmp dword .x2,SIZE_X |
cmp dword .x2,eax ; eax,ebx |
jl @f |
mov dword .x2,eax ;SIZE_X |
@@: |
movzx eax,word[size_x_var] ;SIZE_X ;calc memory begin in buffers |
mul .y |
add eax,.x1 |
lea esi,[4*eax] |
add esi,.z_buff ; z-buffer filled with dd variables |
lea eax,[eax*3] |
add edi,eax |
mov ecx,.x2 |
sub ecx,.x1 |
; init current variables |
push dword .bx1 ; current b, e and t shifted shl ROUND .cbx |
push dword .by1 ; .cby |
push dword .ex1 ; .cex |
push dword .ey1 ; .cey |
push dword .z1 ; current z shl CATMULL_SHIFT ; .cz |
push esi ; .czbuff |
push dword .tx1 ; .ctx |
push dword .ty1 ; .cty |
push edi ; .c_scr |
if Ext = SSE2 |
mov eax,TEXTURE_SIZE |
movd xmm1,eax |
shufps xmm1,xmm1,0 |
push dword TEX_X |
push dword -TEX_X |
push dword 1 |
push dword -1 |
movups xmm2,[esp] |
movd xmm3,.bmap |
shufps xmm3,xmm3,0 |
end if |
if Ext>=MMX |
movq mm7,.cty |
movq mm6,.cby |
movq mm5,.cey |
; movq mm4,.dtyq |
; movq mm3,.dbyq |
end if |
.draw: |
; if TEX = SHIFTING ;bump drawing only in shifting mode |
mov esi,.czbuff ; .czbuff current address in buffer |
mov ebx,.cz ; .cz - cur z position |
cmp ebx,dword[esi] |
jge .skip |
if Ext=NON |
mov eax,.cby |
shr eax,ROUND |
mov esi,.cbx |
shr esi,ROUND |
else |
movq mm1,mm6 |
psrld mm1,ROUND |
movd eax,mm1 |
psrlq mm1,32 |
movd esi,mm1 |
end if |
shl eax,TEX_SHIFT |
add esi,eax ;- ; esi - current bump map index |
if Ext = SSE2 |
movd xmm0,esi |
shufps xmm0,xmm0,0 |
paddd xmm0,xmm2 |
pand xmm0,xmm1 |
paddd xmm0,xmm3 |
movd ebx,xmm0 |
movzx eax,byte[ebx] |
; |
; shufps xmm0,xmm0,11100001b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx,byte[ebx] |
sub eax,ebx |
; |
; shufps xmm0,xmm0,11111110b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx edx, byte [ebx] |
; |
; shufps xmm0,xmm0,11111111b |
psrldq xmm0,4 |
movd ebx,xmm0 |
movzx ebx, byte [ebx] |
sub edx,ebx |
; |
else |
; mov ebx,esi |
; dec ebx |
lea ebx,[esi-1] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx eax,byte [ebx] |
; mov ebx,esi |
; inc ebx |
lea ebx,[esi+1] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub eax,ebx |
; mov ebx,esi |
; sub ebx,TEX_X |
lea ebx,[esi-TEX_X] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx edx,byte [ebx] |
; mov ebx,esi |
; add ebx,TEX_X |
lea ebx,[esi+TEX_X] |
and ebx,TEXTURE_SIZE |
add ebx,.bmap |
movzx ebx,byte [ebx] |
sub edx,ebx |
end if |
; eax - horizontal sub modificated x coord |
; edx - vertical sub modificated y coord |
if Ext=NON |
mov ebx,.cex ;.cex - current env map X |
shr ebx,ROUND |
add eax,ebx |
mov ebx,.cey ;.cey - current env map y |
shr ebx,ROUND |
add edx,ebx |
else |
movq mm1,mm5 ; mm5 - copy of cur env coords |
psrld mm1,ROUND |
movd ebx,mm1 |
psrlq mm1,32 |
add eax,ebx |
movd ebx,mm1 |
add edx,ebx |
; movq qword[.temp1],mm3 |
; add eax,dword [.temp1] |
; add edx,dword [.temp1+4] |
end if |
or eax,eax |
jl .black |
cmp eax,TEX_X |
jg .black |
or edx,edx |
jl .black |
cmp edx,TEX_Y |
jg .black |
shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze |
add edx,eax ; proponuje nie stawiac czarnego pixela tylko |
lea esi,[edx*3] ; niezaburzony. |
add esi,.emap ; |
lodsd |
if Ext=NON |
mov edx,.cty |
shr edx,ROUND ; sar |
mov edi,.ctx |
shr edi,ROUND ; sar |
else |
movq mm1,mm7 |
psrld mm1,ROUND |
movd edx,mm1 |
psrlq mm1,32 |
movd edi,mm1 |
end if |
shl edx,TEX_SHIFT |
add edi,edx |
and edi,TEXTURE_SIZE |
lea esi,[edi*3] |
add esi,.tex_map |
if Ext=NON |
mov edx,eax |
lodsd |
push ax |
mul dl |
mov dl,ah |
pop ax |
shr ax,8 |
mul dh |
mov al,dl |
mov edi,.c_scr |
stosw |
shr edx,16 |
shr eax,16 |
mul dl |
shr ax,8 |
stosb |
else |
movd mm0,eax |
pxor mm1,mm1 |
punpcklbw mm0,mm1 |
movd mm2,[esi] |
punpcklbw mm2,mm1 |
pmullw mm0,mm2 |
psrlw mm0,8 |
packuswb mm0,mm1 |
mov edi,.c_scr |
movd [edi],mm0 |
end if |
jmp .actual_zbuff ; actualize z buffer |
@@: |
.black: |
xor eax,eax |
mov edi,.c_scr |
stosd |
.actual_zbuff: |
mov eax,.cz |
mov edi,.czbuff |
stosd |
.skip: |
add dword .czbuff,4 |
add dword .c_scr,3 |
if Ext=NON |
mov eax,.dbx |
add .cbx,eax |
mov ebx,.dby |
add .cby,ebx |
mov edx,.dex |
add .cex,edx |
mov eax,.dey |
add .cey,eax |
mov ebx,.dtx |
add .ctx,ebx |
mov edx,.dty |
add .cty,edx |
else |
paddd mm7,.dty |
paddd mm6,.dby |
paddd mm5,.dey |
end if |
mov eax,.dz |
add .cz,eax |
dec ecx |
jnz .draw |
.bl_end: |
mov esp,ebp |
ret 76 |
;Ext = MMX |
; else |
; movq mm5, qword[.temp1] ;- |
; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X |
; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE |
; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap |
; movd ebx,mm5 |
; psrlq mm5,32 |
; end if |
/programs/demos/view3ds/data.inc |
---|
1,5 → 1,7 |
; DATA AREA ************************************ |
if Ext > SSE2 |
isSSE3 db 1 |
end if |
i3 dw 3 |
i6 dd 6 |
i12 dd 12 |
6,6 → 8,7 |
i36 dd 36 |
i256 dw 256 |
i255d dd 255 |
f1: |
dot_max dd 1.0 ; dot product max and min |
dot_min dd 0.0 |
env_const dd 1.05 |
25,7 → 28,7 |
y_offset dw SIZE_Y / 2 |
z_offset dw 0 |
rsscale dd 175.0 ; next real scale |
vect_x dw SIZE_X / 2 |
vect_x: dw SIZE_X / 2 |
vect_y dw SIZE_Y / 2 |
vect_z dw 0 |
size_y_var: |
110,9 → 113,9 |
dd ? |
db 7 |
db 'catmull ' |
db 'ray shadow' |
db 2 |
catmull_flag db 1 |
ray_shd_flag db 0 |
dd onoff_f |
db 8 |
165,7 → 168,7 |
db 16 |
db 'fire ' |
db 3 |
db 2 |
fire_flag db 0 |
dd blur_f |
350,7 → 353,7 |
if Ext=SSE3 |
db ' (SSE3)' |
end if |
db ' 0.073',0 |
db ' 0.074',0 |
labellen: |
STRdata db '-1 ' |
lab_vert: |
425,8 → 428,43 |
;if Ext >= SSE3 |
align 16 |
point_light_coords: |
dd 50.0 |
dd 50.0 |
dd -215.0 |
dd 0.0 |
align 16 |
dd 815.0 |
dd 815.0 |
dd -215.0 |
dd 0.0 |
dd 1500.0 |
dd 1500.0 |
dd -215.0 |
dd 0.0 |
if 0 |
aabb1: |
.0 dd 1.0,1.0,1.0,0 |
.1 dd -1.0,1.0,1.0,0 |
.2 dd 1.0,-1.0,1.0,0 |
.3 dd -1.0,-1.0,1.0,0 |
.4 dd 1.0,1.0,-1.0,0 |
.5 dd -1.0,1.0,-1.0,0 |
.6 dd 1.0,-1.0,-1.0,0 |
.7 dd -1.0,-1.0,-1.0,0 |
end if |
sign_mask: |
times 4 dd 0x80000000 |
f05xz: dd 0, 0, - 1.0 ,0 |
sign_z: |
dd -1,-1,0x7fffffff,0 |
abs_mask: |
dd 0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff |
emboss_bias: |
442,9 → 480,13 |
times 4 dd 510.0 |
the_one: |
times 4 dd 1.0 |
aprox dd 0.0001 |
eps: times 4 dd 0.00000 |
epsone dd 1.0001 |
aprox dd 0.0001 |
epsminus dd -0.0001 |
file_info: |
dd 0 |
dd 0 |
463,22 → 505,13 |
workarea rb 180 |
EndFile dd ? |
align 8 |
sinbeta dd ?;+32 |
sinbeta dd ?; |
cosbeta dd ? |
xsub dw ? |
zsub dw ?;+40 |
zsub dw ? |
ysub dw ? |
xx1 dw ? |
yy1 dw ? |
zz1 dw ?;+48 xx1 + 4 |
xx2 dw ? |
yy2 dw ? |
zz2 dw ? ; xx1 + 10 |
xx3 dw ?;+56 |
yy3 dw ? |
zz3 dw ? ; xx1 + 16 |
col1 dd ? |
col2 dd ? |
col3 dd ? |
487,13 → 520,9 |
points_count_var dd ? ; |
triangles_count_var dd ? ; dont change order |
edges_count dd ? ; |
tex_points_ptr dd ? |
point_index1 dd ? ;-\ |
point_index2 dd ? ; } don't change order |
point_index3 dd ? ;-/ |
temp_col dw ? |
temp1 dd ? ; > dont change |
temp2 dd ? ; > order |
high dd ? |
rand_seed dw ? |
align 8 |
510,18 → 539,14 |
matrix rb 36 |
cos_tab rd 360 |
sin_tab rd 360 |
align 16 |
lights_aligned: |
lights_aligned_end = $ + 16 * 12 |
rb 16 * 12 |
points_count = 180000/6*3 |
triangles_count = 180000 / 6 ;($-triangles)/6 |
align 16 |
label trizdd dword |
label trizdq qword |
triangles_with_z rw triangles_count*4 + 2 ; triangles triple dw + z position |
align 16 |
vectors rb 24 |
align 16 |
bumpmap rb TEXTURE_SIZE + 1 |
align 16 |
535,25 → 560,19 |
align 16 |
color_map rb (TEXTURE_SIZE +100) * 3 |
align 16 |
tex_points rb points_count * 4 ; bump_map and texture coords |
; each point word x, word y |
align 16 |
lights_aligned: |
lights_aligned_end = $ + 16 * 12 |
rb 16 * 12 |
; tex_points rb points_count * 4 ; bump_map and texture coords |
; ; each point word x, word y |
;align 16 |
; lights_aligned: |
; lights_aligned_end = $ + 16 * 12 |
; rb 16 * 12 |
if Ext >= SSE2 |
sse_repository rb 1024 |
end if |
; SourceFile: ; source file temporally in screen area |
; workarea dd ? |
; screen rb SIZE_X * SIZE_Y * 3 ; screen buffer |
;align 16 |
; Z_buffer rb SIZE_X * SIZE_Y * 4 |
procinfo: |
rb 1024 ; process info |
rb 2048 ; process info |
I_Param rb 256 |
memStack: |
rb 2000 |
/programs/demos/view3ds/flat_cat.inc |
---|
1,399 → 1,399 |
CATMULL_SHIFT equ 16 |
flat_triangle_z: |
; procedure drawing triangle with Z cordinate interpolation ------ |
; (Catmull alghoritm)-------------------------------------------- |
; ----------------in - eax - x1 shl 16 + y1 ---------------------- |
; -------------------- ebx - x2 shl 16 + y2 ---------------------- |
; -------------------- ecx - x3 shl 16 + y3 ---------------------- |
; -------------------- edx - color 0x00RRGGBB -------------------- |
; -------------------- esi - pointer to Z-buffer ----------------- |
; -------------------- edi - pointer to screen buffer------------- |
; -------------------- stack : z coordinates |
; -------------------- Z-buffer : each z variable as dword |
; -------------------- (Z coor. as word) shl CATMULL_SHIFT |
.z1 equ word[ebp+4] |
.z2 equ word[ebp+6] ; each z coordinate as word integer |
.z3 equ word[ebp+8] |
.col equ dword[ebp-4] |
.x1 equ word[ebp-6] |
.y1 equ word[ebp-8] |
.x2 equ word[ebp-10] |
.y2 equ word[ebp-12] |
.x3 equ word[ebp-14] |
.y3 equ word[ebp-16] |
.dx12 equ dword[ebp-20] |
;.dz12 equ dword[ebp-24] |
.dx13 equ dword[ebp-24] |
.dz13 equ dword[ebp-28] |
.dz12 equ dword[ebp-32] |
;.dz13 equ dword[ebp-32] |
.dx23 equ dword[ebp-36] |
.dz13M equ [ebp-40] |
.dz23 equ dword[ebp-44] |
.zz1 equ dword[ebp-48] |
.zz2 equ dword[ebp-52] |
.zz2M equ qword[ebp-52] |
.dz12M equ qword[ebp-32] |
.dz23M equ qword[ebp-44] |
;if Ext>=MMX |
; emms |
;end if |
mov ebp,esp |
push edx ; store edx in variable .col |
.sort2: |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort3 |
xchg ebx,ecx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort2 |
.sort3: |
push eax ; store triangle coordinates in user friendly variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .ft_loop2_end |
; cmp ax,SIZE_Y |
; jle @f |
; cmp bx,SIZE_Y |
; jle @f |
; cmp cx,SIZE_Y |
; jge @f |
; ror eax,16 |
; ror ebx,16 |
; ror ecx,16 |
; cmp ax,SIZE_X |
; jle @f |
; cmp bx,SIZE_X |
; jle @f |
; cmp cx,SIZE_X |
; jle @f |
; jmp .ft_loop2_end |
;@@: |
sub esp,52-12 |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .ft_dx12_make |
mov .dx12,0 |
mov .dz12,0 |
jmp .ft_dx12_done |
.ft_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
mov .dx12,eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
mov .dz12,eax |
.ft_dx12_done: |
mov bx,.y3 ; calc delta 13 |
sub bx,.y1 |
jnz .ft_dx13_make |
mov .dx13,0 |
mov .dz13,0 |
mov dword .dz13M,0 |
jmp .ft_dx13_done |
.ft_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
mov .dx13,eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
mov .dz13,eax |
mov dword .dz13M,eax |
.ft_dx13_done: |
mov bx,.y3 ; calc delta 23 |
sub bx,.y2 |
jnz .gt_dx23_make |
mov .dx23,0 |
mov .dz23,0 |
jmp .gt_dx23_done |
.gt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
mov .dx23,eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
mov .dz23,eax |
.gt_dx23_done: |
movsx edx,.z1 |
shl edx,CATMULL_SHIFT |
mov .zz1,edx |
mov .zz2,edx |
movsx eax,.x1 |
shl eax,ROUND ; eax - x1 |
mov ebx,eax ; ebx - x2 |
;if Ext>=MMX |
; movq mm0,.zz2M |
;end if |
mov cx,.y1 |
cmp cx,.y2 |
jge .ft_loop1_end |
.ft_loop1: |
pushad |
push .col |
push cx ; y |
sar ebx,ROUND |
push bx ; x2 |
sar eax,ROUND |
push ax ; x1 |
;if Ext>=MMX |
; sub esp,8 |
; movq [esp],mm0 |
;else |
push .zz2 ; z2 shl CATMULL_SHIFT |
push .zz1 ; z1 shl CATMULL_SHIFT |
;end if |
call flat_line_z |
popad |
add eax,.dx13 |
add ebx,.dx12 |
;if Ext>=MMX |
; paddd mm0,.dz12M |
;else |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz12 |
add .zz2,edx |
;end if |
inc cx |
cmp cx,.y2 |
jl .ft_loop1 |
.ft_loop1_end: |
movsx edx,.z2 |
shl edx,CATMULL_SHIFT |
mov .zz2,edx |
movsx ebx,.x2 |
shl ebx,ROUND |
;if Ext>=MMX |
; movq mm0,.zz2M |
;; push .dz13 ; exchange |
;; pop .dz12 |
;; push .dz23 ; exchange |
;; pop .dz13 |
;end if |
mov cx,.y2 |
cmp cx,.y3 |
jge .ft_loop2_end |
.ft_loop2: |
pushad |
push .col |
push cx |
sar ebx,ROUND |
push bx |
sar eax,ROUND |
push ax ; x1 |
;if Ext>=MMX |
; sub esp,8 |
; movq [esp],mm0 |
;else |
push .zz2 ; z2 shl CATMULL_SHIFT |
push .zz1 ; z1 shl CATMULL_SHIFT |
;end if |
call flat_line_z |
popad |
add eax,.dx13 |
add ebx,.dx23 |
;if Ext>=MMX |
; paddd mm0,.dz23M |
;else |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz23 |
add .zz2,edx |
; mov edx,.dz13 |
; add .zz1,edx |
; mov edx,.dz12 |
; add .zz2,edx |
;end if |
inc cx |
cmp cx,.y3 |
jl .ft_loop2 |
.ft_loop2_end: |
mov esp,ebp |
ret 6 |
flat_line_z: |
;---------------- |
;-------------in edi - pointer to screen buffer ---------------------------------- |
;--------------- esi - pointer to z-buffer (each Z varible dword)----------------- |
;----------stack - (each z coordinate shifted shl CATMULL_SHIFT)------------------ |
.z1 equ dword [ebp+4] |
.z2 equ dword [ebp+8] |
.x1 equ word [ebp+12] |
.x2 equ word [ebp+14] |
.y equ word [ebp+16] |
.col equ dword [ebp+18] |
.dz equ dword [ebp-4] |
mov ebp,esp |
;; sub esp,4 |
mov ax,.y |
or ax,ax |
jl .fl_quit |
mov bx,[size_y_var] |
dec bx |
cmp ax,bx ;[size_y_var] |
; cmp ax,SIZE_Y-1 |
jg .fl_quit |
; cmp .x1,0 |
; jge .fl_ok1 |
; cmp .x2,0 |
; jl .fl_quit |
; .fl_ok1: |
; cmp .x1,SIZE_X |
; jle .fl_ok2 |
; cmp .x2,SIZE_X |
; jg .fl_quit |
; .fl_ok2: |
mov ax,.x1 |
cmp ax,.x2 |
je .fl_quit |
jl .fl_ok |
xchg ax,.x2 |
mov .x1,ax |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
.fl_ok: |
mov bx,[size_x_var] |
dec bx |
cmp .x1,bx ;SIZE_X-1 |
jg .fl_quit |
cmp .x2,0 |
jle .fl_quit |
mov eax,.z2 |
sub eax,.z1 |
cdq |
mov bx,.x2 |
sub bx,.x1 |
movsx ebx,bx |
idiv ebx |
;; mov .dz,eax ; calculated delta - shifted .dz |
push eax |
cmp .x1,0 |
jge @f |
movsx ebx,.x1 |
neg ebx |
imul ebx |
add .z1,eax |
mov .x1,0 |
@@: |
movzx edx,word[size_x_var] |
cmp .x2,dx ;[size_x_var] ;SIZE_X |
jl @f |
mov .x2,dx ;[size_x_var] ;SIZE_X |
@@: |
; movzx edx,[size_x_var] ;SIZE_X |
movsx eax,.y |
mul edx ; edi = edi + (SIZE_X * y + x1)*3 |
movsx edx,.x1 |
add eax,edx |
push eax |
lea eax,[eax*3] |
add edi,eax ; esi = esi + (SIZE_X * y + x1)*4 |
pop eax |
shl eax,2 |
add esi,eax |
mov cx,.x2 |
sub cx,.x1 |
movzx ecx,cx |
mov eax,.col |
mov ebx,.z1 ; ebx : curr. z |
mov edx,.dz |
dec ecx |
jecxz .draw_last |
.ddraw: |
cmp ebx,dword[esi] |
; cmovl [edi],eax |
; cmovl [esi],ebx |
jge @f |
stosd |
dec edi |
mov dword[esi],ebx |
jmp .no_skip |
@@: |
add edi,3 |
.no_skip: |
add esi,4 |
add ebx,edx |
loop .ddraw |
.draw_last: |
cmp ebx,dword[esi] |
jge .fl_quit |
stosw |
shr eax,16 |
stosb |
mov dword[esi],ebx |
.fl_quit: |
mov esp,ebp |
ret 18 |
CATMULL_SHIFT equ 16 |
flat_triangle_z: |
; procedure drawing triangle with Z cordinate interpolation ------ |
; (Catmull alghoritm)-------------------------------------------- |
; ----------------in - eax - x1 shl 16 + y1 ---------------------- |
; -------------------- ebx - x2 shl 16 + y2 ---------------------- |
; -------------------- ecx - x3 shl 16 + y3 ---------------------- |
; -------------------- edx - color 0x00RRGGBB -------------------- |
; -------------------- esi - pointer to Z-buffer ----------------- |
; -------------------- edi - pointer to screen buffer------------- |
; -------------------- stack : z coordinates |
; -------------------- Z-buffer : each z variable as dword |
; -------------------- (Z coor. as word) shl CATMULL_SHIFT |
.z1 equ word[ebp+4] |
.z2 equ word[ebp+6] ; each z coordinate as word integer |
.z3 equ word[ebp+8] |
.col equ dword[ebp-4] |
.x1 equ word[ebp-6] |
.y1 equ word[ebp-8] |
.x2 equ word[ebp-10] |
.y2 equ word[ebp-12] |
.x3 equ word[ebp-14] |
.y3 equ word[ebp-16] |
.dx12 equ dword[ebp-20] |
;.dz12 equ dword[ebp-24] |
.dx13 equ dword[ebp-24] |
.dz13 equ dword[ebp-28] |
.dz12 equ dword[ebp-32] |
;.dz13 equ dword[ebp-32] |
.dx23 equ dword[ebp-36] |
.dz13M equ [ebp-40] |
.dz23 equ dword[ebp-44] |
.zz1 equ dword[ebp-48] |
.zz2 equ dword[ebp-52] |
.zz2M equ qword[ebp-52] |
.dz12M equ qword[ebp-32] |
.dz23M equ qword[ebp-44] |
;if Ext>=MMX |
; emms |
;end if |
mov ebp,esp |
push edx ; store edx in variable .col |
.sort2: |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort3 |
xchg ebx,ecx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort2 |
.sort3: |
push eax ; store triangle coordinates in user friendly variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .ft_loop2_end |
; cmp ax,SIZE_Y |
; jle @f |
; cmp bx,SIZE_Y |
; jle @f |
; cmp cx,SIZE_Y |
; jge @f |
; ror eax,16 |
; ror ebx,16 |
; ror ecx,16 |
; cmp ax,SIZE_X |
; jle @f |
; cmp bx,SIZE_X |
; jle @f |
; cmp cx,SIZE_X |
; jle @f |
; jmp .ft_loop2_end |
;@@: |
sub esp,52-12 |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .ft_dx12_make |
mov .dx12,0 |
mov .dz12,0 |
jmp .ft_dx12_done |
.ft_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
mov .dx12,eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
mov .dz12,eax |
.ft_dx12_done: |
mov bx,.y3 ; calc delta 13 |
sub bx,.y1 |
jnz .ft_dx13_make |
mov .dx13,0 |
mov .dz13,0 |
mov dword .dz13M,0 |
jmp .ft_dx13_done |
.ft_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
mov .dx13,eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
mov .dz13,eax |
mov dword .dz13M,eax |
.ft_dx13_done: |
mov bx,.y3 ; calc delta 23 |
sub bx,.y2 |
jnz .gt_dx23_make |
mov .dx23,0 |
mov .dz23,0 |
jmp .gt_dx23_done |
.gt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
mov .dx23,eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
mov .dz23,eax |
.gt_dx23_done: |
movsx edx,.z1 |
shl edx,CATMULL_SHIFT |
mov .zz1,edx |
mov .zz2,edx |
movsx eax,.x1 |
shl eax,ROUND ; eax - x1 |
mov ebx,eax ; ebx - x2 |
;if Ext>=MMX |
; movq mm0,.zz2M |
;end if |
mov cx,.y1 |
cmp cx,.y2 |
jge .ft_loop1_end |
.ft_loop1: |
pushad |
push .col |
push cx ; y |
sar ebx,ROUND |
push bx ; x2 |
sar eax,ROUND |
push ax ; x1 |
;if Ext>=MMX |
; sub esp,8 |
; movq [esp],mm0 |
;else |
push .zz2 ; z2 shl CATMULL_SHIFT |
push .zz1 ; z1 shl CATMULL_SHIFT |
;end if |
call flat_line_z |
popad |
add eax,.dx13 |
add ebx,.dx12 |
;if Ext>=MMX |
; paddd mm0,.dz12M |
;else |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz12 |
add .zz2,edx |
;end if |
inc cx |
cmp cx,.y2 |
jl .ft_loop1 |
.ft_loop1_end: |
movsx edx,.z2 |
shl edx,CATMULL_SHIFT |
mov .zz2,edx |
movsx ebx,.x2 |
shl ebx,ROUND |
;if Ext>=MMX |
; movq mm0,.zz2M |
;; push .dz13 ; exchange |
;; pop .dz12 |
;; push .dz23 ; exchange |
;; pop .dz13 |
;end if |
mov cx,.y2 |
cmp cx,.y3 |
jge .ft_loop2_end |
.ft_loop2: |
pushad |
push .col |
push cx |
sar ebx,ROUND |
push bx |
sar eax,ROUND |
push ax ; x1 |
;if Ext>=MMX |
; sub esp,8 |
; movq [esp],mm0 |
;else |
push .zz2 ; z2 shl CATMULL_SHIFT |
push .zz1 ; z1 shl CATMULL_SHIFT |
;end if |
call flat_line_z |
popad |
add eax,.dx13 |
add ebx,.dx23 |
;if Ext>=MMX |
; paddd mm0,.dz23M |
;else |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz23 |
add .zz2,edx |
; mov edx,.dz13 |
; add .zz1,edx |
; mov edx,.dz12 |
; add .zz2,edx |
;end if |
inc cx |
cmp cx,.y3 |
jl .ft_loop2 |
.ft_loop2_end: |
mov esp,ebp |
ret 6 |
flat_line_z: |
;---------------- |
;-------------in edi - pointer to screen buffer ---------------------------------- |
;--------------- esi - pointer to z-buffer (each Z varible dword)----------------- |
;----------stack - (each z coordinate shifted shl CATMULL_SHIFT)------------------ |
.z1 equ dword [ebp+4] |
.z2 equ dword [ebp+8] |
.x1 equ word [ebp+12] |
.x2 equ word [ebp+14] |
.y equ word [ebp+16] |
.col equ dword [ebp+18] |
.dz equ dword [ebp-4] |
mov ebp,esp |
;; sub esp,4 |
mov ax,.y |
or ax,ax |
jl .fl_quit |
mov bx,[size_y_var] |
dec bx |
cmp ax,bx ;[size_y_var] |
; cmp ax,SIZE_Y-1 |
jg .fl_quit |
; cmp .x1,0 |
; jge .fl_ok1 |
; cmp .x2,0 |
; jl .fl_quit |
; .fl_ok1: |
; cmp .x1,SIZE_X |
; jle .fl_ok2 |
; cmp .x2,SIZE_X |
; jg .fl_quit |
; .fl_ok2: |
mov ax,.x1 |
cmp ax,.x2 |
je .fl_quit |
jl .fl_ok |
xchg ax,.x2 |
mov .x1,ax |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
.fl_ok: |
mov bx,[size_x_var] |
dec bx |
cmp .x1,bx ;SIZE_X-1 |
jg .fl_quit |
cmp .x2,0 |
jle .fl_quit |
mov eax,.z2 |
sub eax,.z1 |
cdq |
mov bx,.x2 |
sub bx,.x1 |
movsx ebx,bx |
idiv ebx |
;; mov .dz,eax ; calculated delta - shifted .dz |
push eax |
cmp .x1,0 |
jge @f |
movsx ebx,.x1 |
neg ebx |
imul ebx |
add .z1,eax |
mov .x1,0 |
@@: |
movzx edx,word[size_x_var] |
cmp .x2,dx ;[size_x_var] ;SIZE_X |
jl @f |
mov .x2,dx ;[size_x_var] ;SIZE_X |
@@: |
; movzx edx,[size_x_var] ;SIZE_X |
movsx eax,.y |
mul edx ; edi = edi + (SIZE_X * y + x1)*3 |
movsx edx,.x1 |
add eax,edx |
push eax |
lea eax,[eax*3] |
add edi,eax ; esi = esi + (SIZE_X * y + x1)*4 |
pop eax |
shl eax,2 |
add esi,eax |
mov cx,.x2 |
sub cx,.x1 |
movzx ecx,cx |
mov eax,.col |
mov ebx,.z1 ; ebx : curr. z |
mov edx,.dz |
dec ecx |
jecxz .draw_last |
.ddraw: |
cmp ebx,dword[esi] |
; cmovl [edi],eax |
; cmovl [esi],ebx |
jge @f |
stosd |
dec edi |
mov dword[esi],ebx |
jmp .no_skip |
@@: |
add edi,3 |
.no_skip: |
add esi,4 |
add ebx,edx |
loop .ddraw |
.draw_last: |
cmp ebx,dword[esi] |
jge .fl_quit |
stosw |
shr eax,16 |
stosb |
mov dword[esi],ebx |
.fl_quit: |
mov esp,ebp |
ret 18 |
/programs/demos/view3ds/grd_cat.inc |
---|
1,704 → 1,704 |
ROUND equ 8 |
CATMULL_SHIFT equ 8 |
gouraud_triangle_z: |
;----procedure drawing gouraud triangle with z coordinate |
;----interpolation ( Catmull alghoritm )----------------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer-------- |
;---------------------- Z-buffer filled with dd variables |
;---------------------- shifted CATMULL_SHIFT------------ |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- stack : colors------------------- |
;----------------- procedure don't save registers !!----- |
.col1r equ ebp+4 ; each color as word |
.col1g equ ebp+6 ; each z coordinate as word |
.col1b equ ebp+8 |
.z1 equ ebp+10 |
.col2r equ ebp+12 |
.col2g equ ebp+14 |
.col2b equ ebp+16 |
.z2 equ ebp+18 |
.col3r equ ebp+20 |
.col3g equ ebp+22 |
.col3b equ ebp+24 |
.z3 equ ebp+26 |
.x1 equ word[ebp-2] |
.y1 equ word[ebp-4] |
.x2 equ word[ebp-6] |
.y2 equ word[ebp-8] |
.x3 equ word[ebp-10] |
.y3 equ word[ebp-12] |
.dx12 equ dword[ebp-16] |
.dz12 equ dword[ebp-20] |
.dc12r equ dword[ebp-24] |
.dc12g equ dword[ebp-28] |
.dc12b equ dword[ebp-32] |
.dx13 equ dword[ebp-36] |
.dz13 equ dword[ebp-40] |
.dc13r equ dword[ebp-44] |
.dc13g equ dword[ebp-48] |
.dc13b equ dword[ebp-52] |
.dx23 equ dword[ebp-56] |
.dz23 equ dword[ebp-60] |
.dc23r equ dword[ebp-64] |
.dc23g equ dword[ebp-68] |
.dc23b equ dword[ebp-72] |
.zz1 equ dword[ebp-76] |
.c1r equ dword[ebp-80] |
.c1g equ dword[ebp-84] |
.c1b equ dword[ebp-88] |
.zz2 equ dword[ebp-92] |
.c2r equ dword[ebp-96] |
.c2g equ dword[ebp-100] |
.c2b equ dword[ebp-104] |
;.zz1 equ dword[ebp-100] |
;.zz2 equ dword[ebp-104] |
.c1bM equ [ebp-88] |
.c2bM equ [ebp-104] |
.c1rM equ [ebp-80] |
.c2rM equ [ebp-96] |
.dc23bM equ [ebp-72] |
.dc13bM equ [ebp-52] |
.dc12bM equ [ebp-32] |
.dc12rM equ [ebp-24] |
.dc13rM equ [ebp-44] |
.dc23rM equ [ebp-64] |
if Ext=MMX |
emms |
end if |
mov ebp,esp |
; sub esp,84 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.col1r] |
xchg edx,dword[.col2r] |
mov dword[.col1r],edx |
mov edx,dword[.col1b] |
xchg edx,dword[.col2b] |
mov dword[.col1b],edx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.col2r] |
xchg edx,dword[.col3r] |
mov dword[.col2r],edx |
mov edx,dword[.col2b] |
xchg edx,dword[.col3b] |
mov dword[.col2b],edx |
jmp .sort3 |
.sort2: |
push eax ; store in variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .gt_loop2_end |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .gt_dx12_make |
; mov .dx12,0 |
; mov .dz12,0 |
; mov .dc12r,0 |
; mov .dc12g,0 |
; mov .dc12b,0 |
mov ecx,5 |
@@: |
push dword 0 |
loop @b |
jmp .gt_dx12_done |
.gt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
mov ax,word[.z2] |
sub ax,word[.z1] |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.col2r] |
sub ax,word[.col1r] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12r,eax |
push eax |
mov ax,word[.col2g] |
sub ax,word[.col1g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12g,eax |
push eax |
mov ax,word[.col2b] ;;--- |
sub ax,word[.col1b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12b,eax |
push eax |
.gt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .gt_dx13_make |
; mov .dx13,0 |
; mov .dz13,0 |
; mov .dc13r,0 |
; mov .dc13g,0 |
; mov .dc13b,0 |
mov ecx,5 |
@@: |
push dword 0 |
loop @b |
jmp .gt_dx13_done |
.gt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,word[.z3] |
sub ax,word[.z1] |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.col3r] |
sub ax,word[.col1r] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13r,eax |
push eax |
mov ax,word[.col3g] |
sub ax,word[.col1g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13g,eax |
push eax |
mov ax,word[.col3b] |
sub ax,word[.col1b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13b,eax |
push eax |
.gt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .gt_dx23_make |
; mov .dx23,0 |
; mov .dz23,0 |
; mov .dc23r,0 |
; mov .dc23g,0 |
; mov .dc23b,0 |
mov ecx,5 |
@@: |
push dword 0 |
loop @b |
jmp .gt_dx23_done |
.gt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,word[.z3] |
sub ax,word[.z2] |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.col3r] |
sub ax,word[.col2r] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23r,eax |
push eax |
mov ax,word[.col3g] |
sub ax,word[.col2g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23g,eax |
push eax |
mov ax,word[.col3b] |
sub ax,word[.col2b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23b,eax |
push eax |
.gt_dx23_done: |
sub esp,32 |
movsx eax,.x1 ; eax - cur x1 |
shl eax,ROUND ; ebx - cur x2 |
mov ebx,eax |
movsx edx,word[.z1] |
shl edx,CATMULL_SHIFT |
mov .zz1,edx |
mov .zz2,edx |
movzx edx,word[.col1r] |
shl edx,ROUND |
mov .c1r,edx |
mov .c2r,edx |
movzx edx,word[.col1g] |
shl edx,ROUND |
mov .c1g,edx |
mov .c2g,edx |
movzx edx,word[.col1b] |
shl edx,ROUND |
mov .c1b,edx |
mov .c2b,edx |
mov cx,.y1 |
cmp cx,.y2 |
jge .gt_loop1_end |
.gt_loop1: |
pushad |
; macro .debug |
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors |
sar edx,ROUND |
push dx |
mov edx,.c2g |
sar edx,ROUND |
push dx |
mov edx,.c2b |
sar edx,ROUND |
push dx |
sar ebx,ROUND ; x2 |
push bx |
mov edx,.c1r |
sar edx,ROUND |
push dx |
mov edx,.c1g |
sar edx,ROUND |
push dx |
mov edx,.c1b |
sar edx,ROUND |
push dx |
sar eax,ROUND |
push ax ; x1 |
push cx ; y |
push .zz2 |
push .zz1 |
call gouraud_line_z |
popad |
if Ext >= MMX |
movq mm0,.c1bM |
paddd mm0,qword .dc13bM |
movq .c1bM,mm0 |
movq mm1,.c2bM |
paddd mm1,qword .dc12bM |
movq .c2bM,mm1 |
movq mm0,.c1rM |
paddd mm0,qword .dc13rM |
movq .c1rM,mm0 |
movq mm1,.c2rM |
paddd mm1,qword .dc12rM |
movq .c2rM,mm1 |
else |
mov edx,.dc13r |
add .c1r,edx |
mov edx,.dc13g |
add .c1g,edx |
mov edx,.dc13b |
add .c1b,edx |
mov edx,.dc12r |
add .c2r,edx |
mov edx,.dc12g |
add .c2g,edx |
mov edx,.dc12b |
add .c2b,edx |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz12 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx12 |
inc cx |
cmp cx,.y2 |
jl .gt_loop1 |
.gt_loop1_end: |
mov cx,.y2 |
cmp cx,.y3 |
jge .gt_loop2_end |
movsx ebx,.x2 ; eax - cur x1 |
shl ebx,ROUND ; ebx - cur x2 |
movsx edx,word[.z2] |
shl edx,CATMULL_SHIFT |
mov .zz2,edx |
movzx edx,word[.col2r] |
shl edx,ROUND |
mov .c2r,edx |
movzx edx,word[.col2g] |
shl edx,ROUND |
mov .c2g,edx |
movzx edx,word[.col2b] |
shl edx,ROUND |
mov .c2b,edx |
.gt_loop2: |
pushad |
; macro .debug |
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors |
sar edx,ROUND |
push dx |
mov edx,.c2g |
sar edx,ROUND |
push dx |
mov edx,.c2b |
sar edx,ROUND |
push dx |
sar ebx,ROUND ; x2 |
push bx |
mov edx,.c1r |
sar edx,ROUND |
push dx |
mov edx,.c1g |
sar edx,ROUND |
push dx |
mov edx,.c1b |
sar edx,ROUND |
push dx |
sar eax,ROUND |
push ax ; x1 |
push cx ; y |
push .zz2 |
push .zz1 |
call gouraud_line_z |
popad |
if Ext >= MMX |
movq mm0,.c1bM |
paddd mm0,qword .dc13bM |
movq .c1bM,mm0 |
movq mm1,.c2bM |
paddd mm1,qword .dc23bM |
movq .c2bM,mm1 |
movq mm0,.c1rM |
paddd mm0,qword .dc13rM |
movq .c1rM,mm0 |
movq mm1,.c2rM |
paddd mm1,qword .dc23rM |
movq .c2rM,mm1 |
else |
mov edx,.dc13r |
add .c1r,edx |
mov edx,.dc13g |
add .c1g,edx |
mov edx,.dc13b |
add .c1b,edx |
mov edx,.dc23r |
add .c2r,edx |
mov edx,.dc23g |
add .c2g,edx |
mov edx,.dc23b |
add .c2b,edx |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz23 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx23 |
inc cx |
cmp cx,.y3 |
jl .gt_loop2 |
.gt_loop2_end: |
mov esp,ebp |
ret 24 |
gouraud_line_z: |
;----------------- procedure drawing gouraud line |
;----------------- with z coordinate interpolation |
;----------------- esi - pointer to Z_buffer |
;----------------- edi - pointer to screen buffer |
;----------------- stack: |
.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT |
.z2 equ dword[ebp+8] |
.y equ word[ebp+12] |
.x1 equ ebp+14 |
.c1b equ ebp+16 |
.c1g equ ebp+18 |
.c1r equ ebp+20 |
.x2 equ ebp+22 |
.c2b equ ebp+24 |
.c2g equ ebp+26 |
.c2r equ ebp+28 |
.dz equ dword[ebp-4] |
.dc_b equ dword[ebp-8] |
.dc_g equ dword[ebp-12] |
.dc_r equ dword[ebp-16] |
.c_z equ dword[ebp-20] |
.cb equ dword[ebp-24] |
.cg equ dword[ebp-28] |
.cr equ dword[ebp-32] |
;.cg2 equ dword[ebp-36] |
.crM equ ebp-32 |
.cgM equ ebp-28 |
.cbM equ ebp-24 |
.dc_rM equ ebp-16 |
.dc_gM equ ebp-12 |
.dc_bM equ ebp-8 |
mov ebp,esp |
mov ax,.y |
or ax,ax |
jl .gl_quit |
mov bx,[size_y_var] |
dec bx |
cmp ax,bx ;SIZE_Y |
jge .gl_quit |
mov eax,dword[.x1] |
cmp ax,word[.x2] |
je .gl_quit |
jl @f |
xchg eax,dword[.x2] |
mov dword[.x1],eax |
mov eax,dword[.c1g] |
xchg eax,dword[.c2g] |
mov dword[.c1g],eax |
mov eax,.z1 |
xchg eax,.z2 |
mov .z1,eax |
@@: |
mov bx,[size_x_var] |
dec bx |
cmp word[.x1],bx ;SIZE_X |
jge .gl_quit |
cmp word[.x2],0 |
jle .gl_quit |
mov eax,.z2 |
sub eax,.z1 |
cdq |
mov bx,word[.x2] ; dz = z2-z1/x2-x1 |
sub bx,word[.x1] |
movsx ebx,bx |
idiv ebx |
push eax |
mov ax,word[.c2b] |
sub ax,word[.c1b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
push eax |
mov ax,word[.c2g] |
sub ax,word[.c1g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
push eax |
mov ax,word[.c2r] |
sub ax,word[.c1r] |
cwde |
shl eax,ROUND ; dc_r = c2r-c1r/x2-x1 |
cdq |
idiv ebx |
push eax |
cmp word[.x1],0 ; clipping on function |
jg @f |
mov eax,.dz |
movsx ebx,word[.x1] |
neg ebx |
imul ebx |
add .z1,eax |
mov word[.x1],0 |
mov eax,.dc_r |
imul ebx |
sar eax,ROUND |
add word[.c1r],ax |
mov eax,.dc_g |
imul ebx |
sar eax,ROUND |
add word[.c1g],ax |
mov eax,.dc_b |
imul ebx |
sar eax,ROUND |
add word[.c1b],ax |
@@: |
mov bx,[size_x_var] |
dec bx |
cmp word[.x2],bx ;SIZE_X |
jl @f |
mov word[.x2],bx ;SIZE_X |
@@: |
sub esp,16 ; calculate memory begin |
movzx edx,word[size_x_var] ;SIZE_X ; in buffers |
movzx eax,.y |
mul edx |
movzx edx,word[.x1] |
add eax,edx |
push eax |
lea eax,[eax*3] |
add edi,eax |
pop eax |
shl eax,2 |
add esi,eax |
mov cx,word[.x2] |
sub cx,word[.x1] |
movzx ecx,cx |
mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT |
;if Ext >= SSE |
; mov .cz,edx |
;end if |
mov edx,.dz ; edx - delta z |
movzx eax,word[.c1r] |
shl eax,ROUND |
mov .cr,eax |
movzx eax,word[.c1g] |
shl eax,ROUND |
mov .cg,eax |
movzx eax,word[.c1b] |
shl eax,ROUND |
mov .cb,eax |
if Ext = MMX |
; mov .c_z,edx |
movd mm2,[.dc_bM] ; delta color blue MMX |
movd mm3,[.cbM] ; current blue MMX |
movq mm5,[.dc_rM] |
movq mm4,[.crM] |
pxor mm6,mm6 |
end if |
.ddraw: |
;if Ext = MMX |
; movq mm0,mm3 |
; psrsq mm0,32 |
; movd ebx,mm0 |
;end if |
cmp ebx,dword[esi] ; esi - z_buffer |
jge @f ; edi - Screen buffer |
if Ext = MMX |
movq mm0,mm3 ; mm0, mm1 - temp registers |
psrld mm0,ROUND |
movq mm1,mm4 |
psrld mm1,ROUND |
packssdw mm1,mm0 |
packuswb mm1,mm6 |
; movd [edi],mm1 |
movd eax,mm1 |
stosw |
shr eax,16 |
stosb |
else |
mov eax,.cr |
sar eax,ROUND |
stosb |
mov eax,.cg |
sar eax,ROUND |
stosb |
mov eax,.cb |
sar eax,ROUND |
stosb |
end if |
mov dword[esi],ebx |
;if Ext = NON |
jmp .no_skip |
;end if |
@@: |
add edi,3 |
.no_skip: |
add esi,4 |
;if Ext=NON |
add ebx,edx |
;end if |
if Ext=MMX |
paddd mm3,mm2 |
paddd mm4,mm5 |
else |
mov eax,.dc_g |
add .cg,eax |
mov eax,.dc_b |
add .cb,eax |
mov eax,.dc_r |
add .cr,eax |
end if |
loop .ddraw |
.gl_quit: |
mov esp,ebp |
ret 26 |
ROUND equ 8 |
CATMULL_SHIFT equ 8 |
gouraud_triangle_z: |
;----procedure drawing gouraud triangle with z coordinate |
;----interpolation ( Catmull alghoritm )----------------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer-------- |
;---------------------- Z-buffer filled with dd variables |
;---------------------- shifted CATMULL_SHIFT------------ |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- stack : colors------------------- |
;----------------- procedure don't save registers !!----- |
.col1r equ ebp+4 ; each color as word |
.col1g equ ebp+6 ; each z coordinate as word |
.col1b equ ebp+8 |
.z1 equ ebp+10 |
.col2r equ ebp+12 |
.col2g equ ebp+14 |
.col2b equ ebp+16 |
.z2 equ ebp+18 |
.col3r equ ebp+20 |
.col3g equ ebp+22 |
.col3b equ ebp+24 |
.z3 equ ebp+26 |
.x1 equ word[ebp-2] |
.y1 equ word[ebp-4] |
.x2 equ word[ebp-6] |
.y2 equ word[ebp-8] |
.x3 equ word[ebp-10] |
.y3 equ word[ebp-12] |
.dx12 equ dword[ebp-16] |
.dz12 equ dword[ebp-20] |
.dc12r equ dword[ebp-24] |
.dc12g equ dword[ebp-28] |
.dc12b equ dword[ebp-32] |
.dx13 equ dword[ebp-36] |
.dz13 equ dword[ebp-40] |
.dc13r equ dword[ebp-44] |
.dc13g equ dword[ebp-48] |
.dc13b equ dword[ebp-52] |
.dx23 equ dword[ebp-56] |
.dz23 equ dword[ebp-60] |
.dc23r equ dword[ebp-64] |
.dc23g equ dword[ebp-68] |
.dc23b equ dword[ebp-72] |
.zz1 equ dword[ebp-76] |
.c1r equ dword[ebp-80] |
.c1g equ dword[ebp-84] |
.c1b equ dword[ebp-88] |
.zz2 equ dword[ebp-92] |
.c2r equ dword[ebp-96] |
.c2g equ dword[ebp-100] |
.c2b equ dword[ebp-104] |
;.zz1 equ dword[ebp-100] |
;.zz2 equ dword[ebp-104] |
.c1bM equ [ebp-88] |
.c2bM equ [ebp-104] |
.c1rM equ [ebp-80] |
.c2rM equ [ebp-96] |
.dc23bM equ [ebp-72] |
.dc13bM equ [ebp-52] |
.dc12bM equ [ebp-32] |
.dc12rM equ [ebp-24] |
.dc13rM equ [ebp-44] |
.dc23rM equ [ebp-64] |
if Ext=MMX |
emms |
end if |
mov ebp,esp |
; sub esp,84 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.col1r] |
xchg edx,dword[.col2r] |
mov dword[.col1r],edx |
mov edx,dword[.col1b] |
xchg edx,dword[.col2b] |
mov dword[.col1b],edx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.col2r] |
xchg edx,dword[.col3r] |
mov dword[.col2r],edx |
mov edx,dword[.col2b] |
xchg edx,dword[.col3b] |
mov dword[.col2b],edx |
jmp .sort3 |
.sort2: |
push eax ; store in variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .gt_loop2_end |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .gt_dx12_make |
; mov .dx12,0 |
; mov .dz12,0 |
; mov .dc12r,0 |
; mov .dc12g,0 |
; mov .dc12b,0 |
mov ecx,5 |
@@: |
push dword 0 |
loop @b |
jmp .gt_dx12_done |
.gt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
mov ax,word[.z2] |
sub ax,word[.z1] |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.col2r] |
sub ax,word[.col1r] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12r,eax |
push eax |
mov ax,word[.col2g] |
sub ax,word[.col1g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12g,eax |
push eax |
mov ax,word[.col2b] ;;--- |
sub ax,word[.col1b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12b,eax |
push eax |
.gt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .gt_dx13_make |
; mov .dx13,0 |
; mov .dz13,0 |
; mov .dc13r,0 |
; mov .dc13g,0 |
; mov .dc13b,0 |
mov ecx,5 |
@@: |
push dword 0 |
loop @b |
jmp .gt_dx13_done |
.gt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,word[.z3] |
sub ax,word[.z1] |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.col3r] |
sub ax,word[.col1r] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13r,eax |
push eax |
mov ax,word[.col3g] |
sub ax,word[.col1g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13g,eax |
push eax |
mov ax,word[.col3b] |
sub ax,word[.col1b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13b,eax |
push eax |
.gt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .gt_dx23_make |
; mov .dx23,0 |
; mov .dz23,0 |
; mov .dc23r,0 |
; mov .dc23g,0 |
; mov .dc23b,0 |
mov ecx,5 |
@@: |
push dword 0 |
loop @b |
jmp .gt_dx23_done |
.gt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,word[.z3] |
sub ax,word[.z2] |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
mov ax,word[.col3r] |
sub ax,word[.col2r] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23r,eax |
push eax |
mov ax,word[.col3g] |
sub ax,word[.col2g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23g,eax |
push eax |
mov ax,word[.col3b] |
sub ax,word[.col2b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23b,eax |
push eax |
.gt_dx23_done: |
sub esp,32 |
movsx eax,.x1 ; eax - cur x1 |
shl eax,ROUND ; ebx - cur x2 |
mov ebx,eax |
movsx edx,word[.z1] |
shl edx,CATMULL_SHIFT |
mov .zz1,edx |
mov .zz2,edx |
movzx edx,word[.col1r] |
shl edx,ROUND |
mov .c1r,edx |
mov .c2r,edx |
movzx edx,word[.col1g] |
shl edx,ROUND |
mov .c1g,edx |
mov .c2g,edx |
movzx edx,word[.col1b] |
shl edx,ROUND |
mov .c1b,edx |
mov .c2b,edx |
mov cx,.y1 |
cmp cx,.y2 |
jge .gt_loop1_end |
.gt_loop1: |
pushad |
; macro .debug |
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors |
sar edx,ROUND |
push dx |
mov edx,.c2g |
sar edx,ROUND |
push dx |
mov edx,.c2b |
sar edx,ROUND |
push dx |
sar ebx,ROUND ; x2 |
push bx |
mov edx,.c1r |
sar edx,ROUND |
push dx |
mov edx,.c1g |
sar edx,ROUND |
push dx |
mov edx,.c1b |
sar edx,ROUND |
push dx |
sar eax,ROUND |
push ax ; x1 |
push cx ; y |
push .zz2 |
push .zz1 |
call gouraud_line_z |
popad |
if Ext >= MMX |
movq mm0,.c1bM |
paddd mm0,qword .dc13bM |
movq .c1bM,mm0 |
movq mm1,.c2bM |
paddd mm1,qword .dc12bM |
movq .c2bM,mm1 |
movq mm0,.c1rM |
paddd mm0,qword .dc13rM |
movq .c1rM,mm0 |
movq mm1,.c2rM |
paddd mm1,qword .dc12rM |
movq .c2rM,mm1 |
else |
mov edx,.dc13r |
add .c1r,edx |
mov edx,.dc13g |
add .c1g,edx |
mov edx,.dc13b |
add .c1b,edx |
mov edx,.dc12r |
add .c2r,edx |
mov edx,.dc12g |
add .c2g,edx |
mov edx,.dc12b |
add .c2b,edx |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz12 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx12 |
inc cx |
cmp cx,.y2 |
jl .gt_loop1 |
.gt_loop1_end: |
mov cx,.y2 |
cmp cx,.y3 |
jge .gt_loop2_end |
movsx ebx,.x2 ; eax - cur x1 |
shl ebx,ROUND ; ebx - cur x2 |
movsx edx,word[.z2] |
shl edx,CATMULL_SHIFT |
mov .zz2,edx |
movzx edx,word[.col2r] |
shl edx,ROUND |
mov .c2r,edx |
movzx edx,word[.col2g] |
shl edx,ROUND |
mov .c2g,edx |
movzx edx,word[.col2b] |
shl edx,ROUND |
mov .c2b,edx |
.gt_loop2: |
pushad |
; macro .debug |
mov edx,.c2r ; c2r,c2g,c2b,c1r,c1g,c1b - current colors |
sar edx,ROUND |
push dx |
mov edx,.c2g |
sar edx,ROUND |
push dx |
mov edx,.c2b |
sar edx,ROUND |
push dx |
sar ebx,ROUND ; x2 |
push bx |
mov edx,.c1r |
sar edx,ROUND |
push dx |
mov edx,.c1g |
sar edx,ROUND |
push dx |
mov edx,.c1b |
sar edx,ROUND |
push dx |
sar eax,ROUND |
push ax ; x1 |
push cx ; y |
push .zz2 |
push .zz1 |
call gouraud_line_z |
popad |
if Ext >= MMX |
movq mm0,.c1bM |
paddd mm0,qword .dc13bM |
movq .c1bM,mm0 |
movq mm1,.c2bM |
paddd mm1,qword .dc23bM |
movq .c2bM,mm1 |
movq mm0,.c1rM |
paddd mm0,qword .dc13rM |
movq .c1rM,mm0 |
movq mm1,.c2rM |
paddd mm1,qword .dc23rM |
movq .c2rM,mm1 |
else |
mov edx,.dc13r |
add .c1r,edx |
mov edx,.dc13g |
add .c1g,edx |
mov edx,.dc13b |
add .c1b,edx |
mov edx,.dc23r |
add .c2r,edx |
mov edx,.dc23g |
add .c2g,edx |
mov edx,.dc23b |
add .c2b,edx |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.dz23 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx23 |
inc cx |
cmp cx,.y3 |
jl .gt_loop2 |
.gt_loop2_end: |
mov esp,ebp |
ret 24 |
gouraud_line_z: |
;----------------- procedure drawing gouraud line |
;----------------- with z coordinate interpolation |
;----------------- esi - pointer to Z_buffer |
;----------------- edi - pointer to screen buffer |
;----------------- stack: |
.z1 equ dword[ebp+4] ; z coordiunate shifted left CATMULL_SHIFT |
.z2 equ dword[ebp+8] |
.y equ word[ebp+12] |
.x1 equ ebp+14 |
.c1b equ ebp+16 |
.c1g equ ebp+18 |
.c1r equ ebp+20 |
.x2 equ ebp+22 |
.c2b equ ebp+24 |
.c2g equ ebp+26 |
.c2r equ ebp+28 |
.dz equ dword[ebp-4] |
.dc_b equ dword[ebp-8] |
.dc_g equ dword[ebp-12] |
.dc_r equ dword[ebp-16] |
.c_z equ dword[ebp-20] |
.cb equ dword[ebp-24] |
.cg equ dword[ebp-28] |
.cr equ dword[ebp-32] |
;.cg2 equ dword[ebp-36] |
.crM equ ebp-32 |
.cgM equ ebp-28 |
.cbM equ ebp-24 |
.dc_rM equ ebp-16 |
.dc_gM equ ebp-12 |
.dc_bM equ ebp-8 |
mov ebp,esp |
mov ax,.y |
or ax,ax |
jl .gl_quit |
mov bx,[size_y_var] |
dec bx |
cmp ax,bx ;SIZE_Y |
jge .gl_quit |
mov eax,dword[.x1] |
cmp ax,word[.x2] |
je .gl_quit |
jl @f |
xchg eax,dword[.x2] |
mov dword[.x1],eax |
mov eax,dword[.c1g] |
xchg eax,dword[.c2g] |
mov dword[.c1g],eax |
mov eax,.z1 |
xchg eax,.z2 |
mov .z1,eax |
@@: |
mov bx,[size_x_var] |
dec bx |
cmp word[.x1],bx ;SIZE_X |
jge .gl_quit |
cmp word[.x2],0 |
jle .gl_quit |
mov eax,.z2 |
sub eax,.z1 |
cdq |
mov bx,word[.x2] ; dz = z2-z1/x2-x1 |
sub bx,word[.x1] |
movsx ebx,bx |
idiv ebx |
push eax |
mov ax,word[.c2b] |
sub ax,word[.c1b] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
push eax |
mov ax,word[.c2g] |
sub ax,word[.c1g] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
push eax |
mov ax,word[.c2r] |
sub ax,word[.c1r] |
cwde |
shl eax,ROUND ; dc_r = c2r-c1r/x2-x1 |
cdq |
idiv ebx |
push eax |
cmp word[.x1],0 ; clipping on function |
jg @f |
mov eax,.dz |
movsx ebx,word[.x1] |
neg ebx |
imul ebx |
add .z1,eax |
mov word[.x1],0 |
mov eax,.dc_r |
imul ebx |
sar eax,ROUND |
add word[.c1r],ax |
mov eax,.dc_g |
imul ebx |
sar eax,ROUND |
add word[.c1g],ax |
mov eax,.dc_b |
imul ebx |
sar eax,ROUND |
add word[.c1b],ax |
@@: |
mov bx,[size_x_var] |
dec bx |
cmp word[.x2],bx ;SIZE_X |
jl @f |
mov word[.x2],bx ;SIZE_X |
@@: |
sub esp,16 ; calculate memory begin |
movzx edx,word[size_x_var] ;SIZE_X ; in buffers |
movzx eax,.y |
mul edx |
movzx edx,word[.x1] |
add eax,edx |
push eax |
lea eax,[eax*3] |
add edi,eax |
pop eax |
shl eax,2 |
add esi,eax |
mov cx,word[.x2] |
sub cx,word[.x1] |
movzx ecx,cx |
mov ebx,.z1 ; ebx - currrent z shl CATMULL_SIFT |
;if Ext >= SSE |
; mov .cz,edx |
;end if |
mov edx,.dz ; edx - delta z |
movzx eax,word[.c1r] |
shl eax,ROUND |
mov .cr,eax |
movzx eax,word[.c1g] |
shl eax,ROUND |
mov .cg,eax |
movzx eax,word[.c1b] |
shl eax,ROUND |
mov .cb,eax |
if Ext = MMX |
; mov .c_z,edx |
movd mm2,[.dc_bM] ; delta color blue MMX |
movd mm3,[.cbM] ; current blue MMX |
movq mm5,[.dc_rM] |
movq mm4,[.crM] |
pxor mm6,mm6 |
end if |
.ddraw: |
;if Ext = MMX |
; movq mm0,mm3 |
; psrsq mm0,32 |
; movd ebx,mm0 |
;end if |
cmp ebx,dword[esi] ; esi - z_buffer |
jge @f ; edi - Screen buffer |
if Ext = MMX |
movq mm0,mm3 ; mm0, mm1 - temp registers |
psrld mm0,ROUND |
movq mm1,mm4 |
psrld mm1,ROUND |
packssdw mm1,mm0 |
packuswb mm1,mm6 |
; movd [edi],mm1 |
movd eax,mm1 |
stosw |
shr eax,16 |
stosb |
else |
mov eax,.cr |
sar eax,ROUND |
stosb |
mov eax,.cg |
sar eax,ROUND |
stosb |
mov eax,.cb |
sar eax,ROUND |
stosb |
end if |
mov dword[esi],ebx |
;if Ext = NON |
jmp .no_skip |
;end if |
@@: |
add edi,3 |
.no_skip: |
add esi,4 |
;if Ext=NON |
add ebx,edx |
;end if |
if Ext=MMX |
paddd mm3,mm2 |
paddd mm4,mm5 |
else |
mov eax,.dc_g |
add .cg,eax |
mov eax,.dc_b |
add .cb,eax |
mov eax,.dc_r |
add .cr,eax |
end if |
loop .ddraw |
.gl_quit: |
mov esp,ebp |
ret 26 |
/programs/demos/view3ds/grd_tex.inc |
---|
1,1016 → 1,1016 |
CATMULL_SHIFT equ 8 |
ROUND equ 8 |
;NON=0 |
;MMX=1 |
;Ext=MMX |
;TEX_SIZE=0x3fff |
;SIZE_X equ 512 |
;SIZE_Y equ 512 |
;ROUND = 8 |
;TEX_SHIFT equ 6 |
; procedure drawing textured triangle with Gouraud shading |
; Z-buffer alghoritm included, Z coord interpolation ---- |
; I set the color by this way -- (col1 * col2)/256 ------ |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer-------- |
;---------------------- edx - pointer to texture--------- |
;---------------------- Z-buffer filled with dd variables |
;---------------------- shifted CATMULL_SHIFT------------ |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- stack : colors------------------- |
tex_plus_grd_triangle: |
; parameters : |
.tex_y3 equ [ebp+38] ; 36 bytes through stack |
.tex_x3 equ [ebp+36] |
.tex_y2 equ [ebp+34] |
.tex_x2 equ [ebp+32] |
.tex_y1 equ [ebp+30] |
.tex_x1 equ [ebp+28] |
.z3 equ [ebp+26] |
.col3b equ [ebp+24] |
.col3g equ [ebp+22] |
.col3r equ [ebp+20] |
.z2 equ [ebp+18] |
.col2b equ [ebp+16] |
.col2g equ [ebp+14] |
.col2r equ [ebp+12] |
.z1 equ [ebp+10] |
.col1b equ [ebp+8] |
.col1g equ [ebp+6] |
.col1r equ [ebp+4] |
; local variables: |
.tex_ptr equ dword[ebp-4] |
.z_ptr equ dword[ebp-8] |
.scr_buff equ dword[ebp-12] |
.x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10] |
.y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12] |
.x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14] |
.y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16] |
.x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18] |
.y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20] |
.dx12 equ dword[ebp-28] ;dd ? |
.tex_dx12 equ dword[ebp-32] ;dd ? |
.tex_dy12 equ [ebp-36] ;dd ? |
.dz12 equ dword[ebp-40] ;dd ? |
.dc12r equ [ebp-44] ;dd ? |
.dc12g equ dword[ebp-48] ;dd ? |
.dc12b equ [ebp-52] ;dd ? |
.dx23 equ dword[ebp-56] ;dd ? |
.tex_dx23 equ dword[ebp-60] ;dd ? |
.tex_dy23 equ [ebp-64] ;dd ? |
.dz23 equ dword[ebp-68] ;dd ? |
.dc23r equ [ebp-72] ;dd ? |
.dc23g equ dword[ebp-76] ;dd ? |
.dc23b equ [ebp-80] ;dword[ebp-8]dd ? |
.dx13 equ dword[ebp-84] ;dd ? |
.tex_dx13 equ dword[ebp-88] ;dd ? |
.tex_dy13 equ [ebp-92] ;dd ? |
.dz13 equ dword[ebp-96] ;dd ? |
.dc13r equ [ebp-100] ;dd ? |
.dc13g equ dword[ebp-104] ;dd ? |
.dc13b equ [ebp-108] ;dd ? |
.scan_x1 equ dword[ebp-112] ;dd ? |
.scan_y1 equ [ebp-116] ;dd ? |
.zz1 equ dword[ebp-120] ;dw ? |
.cur1r equ [ebp-124] ;dw ? |
.cur1g equ dword[ebp-128] ;dw ? |
.cur1b equ [ebp-132] ;dw ? |
.scan_x2 equ dword[ebp-136] ;dd ? |
.scan_y2 equ [ebp-140] ;dd ? |
.zz2 equ dword[ebp-144] ;dw ? |
.cur2r equ [ebp-148] ;dw ? |
.cur2g equ dword[ebp-152] ;dw ? |
.cur2b equ [ebp-156] ;dw ? |
mov ebp,esp |
; mov .tex_ptr,edx |
; mov .z_ptr,esi |
; mov .scr_buff,edi |
push edx esi edi |
; push esi |
; push edi |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop2_end |
.sort3: |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
if Ext>=MMX |
movq mm0, .col1r ; exchange r, g, b, z |
movq mm1, .col2r |
movq .col1r ,mm1 |
movq .col2r ,mm0 |
else |
mov edx,dword .col1r ; exchange both r and g |
xchg edx,dword .col2r |
mov dword .col1r ,edx |
mov edx,dword .col1b ; b and z |
xchg edx,dword .col2b |
mov dword .col1b ,edx |
end if |
mov edx,dword .tex_x1 |
xchg edx,dword .tex_x2 |
mov dword .tex_x1 ,edx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
if Ext>=MMX |
movq mm0, .col2r ; exchange r, g, b, z |
movq mm1, .col3r |
movq .col3r ,mm0 |
movq .col2r ,mm1 |
else |
mov edx,dword .col2r ; r, g |
xchg edx,dword .col3r |
mov dword .col2r,edx |
mov edx,dword .col2b ; b, z |
xchg edx,dword .col3b |
mov dword .col2b,edx |
end if |
mov edx,dword .tex_x2 |
xchg edx,dword .tex_x3 |
mov dword .tex_x2,edx |
jmp .sort3 |
.sort2: |
push eax ebx ecx ; store in variables |
; push ebx |
; push ecx |
;****************** delta computng zone ************** |
;+++++++++ first zone |
mov bx,.y2 ; calc delta12 |
sub bx,.y1 |
jnz .dx12_make |
mov ecx,7 |
@@: |
push dword 0 |
loop @b |
jmp .dx12_done |
.dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
if 0 ; Ext=SSE |
movd mm0,.col1r ; 2 words r, g |
pxor mm1,mm1 |
punpcklwd mm0,mm1 |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
movd mm0,.col1g ; 2 words b, z |
punpcklwd mm0,mm1 |
cvtpi2ps xmm0,mm0 |
; xmm0=four float double words |
divss xmm0,.pack3 |
;convert and insert mm0 to lower xmm1 .. |
end if |
mov ax,word .tex_x2 |
sub ax,word .tex_x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx12r,eax |
push eax |
mov ax,word .tex_y2 |
sub ax,word .tex_y1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx12,eax |
push eax |
mov ax,word .z2 |
sub ax,word .z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz12,eax |
push eax ; .dza12 |
mov ax,word .col2r |
sub ax,word .col1r |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12r,eax |
push eax |
mov ax,word .col2g |
sub ax,word .col1g |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12g,eax |
push eax |
mov ax,word .col2b ;;--- |
sub ax,word .col1b |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12b,eax |
push eax |
;+++++++++++++++++ second zone +++++++++++++ |
.dx12_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .dx23_make |
mov ecx,7 |
@@: |
push dword 0 |
loop @b |
jmp .dx23_done |
.dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,word .tex_x3 |
sub ax,word .tex_x2 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx23,eax |
push eax |
mov ax,word .tex_y3 |
sub ax,word .tex_y2 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dy23,eax |
push eax |
mov ax,word .z3 |
sub ax,word .z2 |
cwde ; |
shl eax,CATMULL_SHIFT ; 2222222 |
cdq ; 2 2 |
idiv ebx ; 2 |
; mov .dz23,eax ; 2 |
push eax ; .dza12 ; 2 |
; 2 |
mov ax,word .col3r ; 2 |
sub ax,word .col2r ; 2222222 |
cwde ; second delta |
shl eax,ROUND ; |
cdq ; |
idiv ebx ; |
; mov .dc23r,eax ; |
push eax |
mov ax,word .col3g |
sub ax,word .col2g |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23g,eax |
push eax |
mov ax,word .col3b ;;--- |
sub ax,word .col2b |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23b,eax |
push eax |
.dx23_done: |
;++++++++++++++++++third zone++++++++++++++++++++++++ |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .dx13_make |
mov ecx,7 |
@@: |
push dword 0 |
loop @b |
jmp .dx13_done |
.dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,word .tex_x3 ; triangle b |
sub ax,word .tex_x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx13r,eax |
push eax |
mov ax,word .tex_y3 |
sub ax,word .tex_y1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dy13,eax |
push eax |
mov ax,word .z3 |
sub ax,word .z1 ; 333333333 |
cwde ; 3 3 |
shl eax,CATMULL_SHIFT ; 3 |
cdq ; 3 |
idiv ebx ; 3 |
; mov .dz13,eax ; 3 |
push eax ; .dza12 ; 3 |
; 3 |
mov ax,word .col3r ; 3333333333 |
sub ax,word .col1r ; 3 |
cwde ; 3 |
shl eax,ROUND ; 3 |
cdq ; 3 |
idiv ebx ; 3 |
; mov .dc13r,eax ; 3 3 |
push eax ; 33333333 |
mov ax,word .col3g |
sub ax,word .col1g |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13g,eax |
push eax |
mov ax,word .col3b ;;--- |
sub ax,word .col1b |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13b,eax |
push eax |
.dx13_done: |
; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>> |
sub esp,55 ;(12*4) |
movsx eax,.x1 ; eax - cur x1 |
shl eax,ROUND ; ebx - cur x2 |
mov ebx,eax |
movsx edx,word .z1 |
shl edx,CATMULL_SHIFT |
mov .zz1,edx |
mov .zz2,edx |
movzx edi,word .col1r |
shl edi,ROUND |
mov .cur1r,edi |
mov .cur2r,edi |
movzx esi,word .col1g |
shl esi,ROUND |
mov .cur1g,esi |
mov .cur2g,esi |
movzx edx,word .col1b |
shl edx,ROUND |
mov .cur1b,edx |
mov .cur2b,edx |
movzx edi,word .tex_x1 |
shl edi,ROUND |
mov .scan_x1,edi |
mov .scan_x2,edi |
movzx edx,word .tex_y1 |
shl edx,ROUND |
mov .scan_y1,edx |
mov .scan_y2,edx |
mov cx,.y1 |
cmp cx,.y2 |
jge .loop1_end |
.loop_1: |
; push eax ebx ebp |
pushad |
push .tex_ptr |
push .scr_buff |
push .z_ptr |
push cx |
push .zz2 |
push .scan_x2 |
push dword .scan_y2 |
push dword .cur2r |
push .cur2g |
push dword .cur2b |
push .zz1 |
push .scan_x1 |
push dword .scan_y1 |
push dword .cur1r |
push .cur1g |
push dword .cur1b |
sar eax,ROUND |
sar ebx,ROUND |
call horizontal_tex_grd_line |
; pop ebp ebx eax |
popad |
if (Ext = MMX)|(Ext=SSE) |
movq mm0,.cur1b |
movq mm1,.cur1r |
movq mm2,.scan_y1 |
movq mm3,.cur2b |
movq mm4,.cur2r |
movq mm5,.scan_y2 |
paddd mm0,.dc13b |
paddd mm1,.dc13r |
paddd mm2,.tex_dy13 |
paddd mm3,.dc12b |
paddd mm4,.dc12r |
paddd mm5,.tex_dy12 |
movq .cur1b,mm0 |
movq .cur1r,mm1 |
movq .scan_y1,mm2 |
movq .cur2b,mm3 |
movq .cur2r,mm4 |
movq .scan_y2,mm5 |
end if |
if Ext >= SSE2 |
movups xmm0,.cur1b |
movups xmm1,.dc13b |
movups xmm2,.cur2b |
movups xmm3,.dc12b |
movq mm2,.scan_y1 |
movq mm5,.scan_y2 |
paddd xmm0,xmm1 |
paddd xmm2,xmm3 |
paddd mm2,.tex_dy13 |
paddd mm5,.tex_dy12 |
movq .scan_y1,mm2 |
movq .scan_y2,mm5 |
movups .cur1b,xmm0 |
movups .cur2b,xmm2 |
end if |
if Ext = NON |
mov edx,.dc13b |
add .cur1b,edx |
mov esi,.dc13g |
add .cur1g,esi |
mov edi,.dc13r |
add .cur1r,edi |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.tex_dx13 |
add .scan_x1,edx |
mov esi,.tex_dy13 |
add .scan_y1,esi |
mov edi,.dc12b |
add .cur2b,edi |
mov esi,.dc12g |
add .cur2g,esi |
mov edx,.dc12r |
add .cur2r,edx |
mov edi,.tex_dx12 |
add .scan_x2,edi |
mov esi,.tex_dy12 |
add .scan_y2,esi |
mov edx,.dz12 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx12 |
inc cx |
cmp cx,.y2 |
jl .loop_1 |
.loop1_end: |
movzx ecx,.y2 |
cmp cx,.y3 |
jge .loop2_end |
movsx ebx,.x2 ; eax - cur x1 |
shl ebx,ROUND ; ebx - cur x2 |
movsx edx,word .z2 |
shl edx,CATMULL_SHIFT |
; mov .zz1,edx |
mov .zz2,edx |
movzx edi,word .col2r |
shl edi,ROUND |
; mov .cur1r,edi |
mov .cur2r,edi |
movzx esi,word .col2g |
shl esi,ROUND |
; mov .cur1g,esi |
mov .cur2g,esi |
movzx edx,word .col2b |
shl edx,ROUND |
; mov .cur1b,edx |
mov .cur2b,edx |
movzx edi,word .tex_x2 |
shl edi,ROUND |
; mov .scan_x1,edi |
mov .scan_x2,edi |
movzx edx,word .tex_y2 |
shl edx,ROUND |
; mov .scan_y1,edx |
mov .scan_y2,edx |
.loop_2: |
pushad |
push .tex_ptr |
push .scr_buff |
push .z_ptr |
push cx |
push .zz2 |
push .scan_x2 |
push dword .scan_y2 |
push dword .cur2r |
push .cur2g |
push dword .cur2b |
push .zz1 |
push .scan_x1 |
push dword .scan_y1 |
push dword .cur1r |
push .cur1g |
push dword .cur1b |
sar eax,ROUND |
sar ebx,ROUND |
call horizontal_tex_grd_line |
popad |
if (Ext = MMX)|(Ext=SSE) |
movq mm0,.cur1b |
movq mm1,.cur1r |
movq mm2,.scan_y1 |
movq mm3,.cur2b |
movq mm4,.cur2r |
movq mm5,.scan_y2 |
paddd mm0,.dc13b |
paddd mm1,.dc13r |
paddd mm2,.tex_dy13 |
paddd mm3,.dc23b |
paddd mm4,.dc23r |
paddd mm5,.tex_dy23 |
movq .cur1b,mm0 |
movq .cur1r,mm1 |
movq .scan_y1,mm2 |
movq .cur2b,mm3 |
movq .cur2r,mm4 |
movq .scan_y2,mm5 |
end if |
if Ext >= SSE2 |
movups xmm0,.cur1b |
movups xmm1,.dc13b |
movups xmm2,.cur2b |
movups xmm3,.dc23b |
movq mm2,.scan_y1 |
movq mm5,.scan_y2 |
paddd xmm0,xmm1 |
paddd xmm2,xmm3 |
paddd mm2,.tex_dy13 |
paddd mm5,.tex_dy23 |
movq .scan_y1,mm2 |
movq .scan_y2,mm5 |
movups .cur1b,xmm0 |
movups .cur2b,xmm2 |
end if |
if Ext = NON |
mov edx,.dc13b |
add .cur1b,edx |
mov esi,.dc13g |
add .cur1g,esi |
mov edi,.dc13r |
add .cur1r,edi |
mov edx,.tex_dx13 |
add .scan_x1,edx |
mov esi,.tex_dy13 |
add .scan_y1,esi |
mov edx,.dz13 |
add .zz1,edx |
mov edi,.dc23b |
add .cur2b,edi |
mov esi,.dc23g |
add .cur2g,esi |
mov edx,.dc23r |
add .cur2r,edx |
mov edi,.tex_dx23 |
add .scan_x2,edi |
mov esi,.tex_dy23 |
add .scan_y2,esi |
mov edx,.dz23 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx23 |
inc cx |
cmp cx,.y3 |
jl .loop_2 |
.loop2_end: |
mov esp,ebp |
ret 36 |
horizontal_tex_grd_line: |
;in: |
; eax : x1, ebx : x2 |
.tex_ptr equ [ebp+62] |
.screen equ [ebp+58] |
.z_buffer equ [ebp+54] |
.y equ [ebp+52] |
.z2 equ [ebp+48] |
.tex_x2 equ [ebp+44] |
.tex_y2 equ [ebp+40] |
.r2 equ [ebp+36] |
.g2 equ [ebp+32] |
.b2 equ [ebp+28] |
.z1 equ [ebp+24] |
.tex_x1 equ [ebp+20] |
.tex_y1 equ [ebp+16] |
.r1 equ [ebp+12] |
.g1 equ [ebp+8] |
.b1 equ [ebp+4] |
.x1 equ word[ebp-2] |
.x2 equ word[ebp-4] |
.dz equ dword[ebp-8] |
.db equ dword[ebp-12] |
.dg equ dword[ebp-16] |
.dr equ dword[ebp-20] |
.dtex_x equ dword[ebp-24] |
.dtex_y equ dword[ebp-28] |
.c_ty equ [ebp-32] |
.c_tx equ [ebp-36] |
.cb equ [ebp-40] |
.cg equ [ebp-44] |
.cr equ [ebp-48] |
.t_col equ [ebp-52] |
.dtex_yM equ qword[ebp-28] |
.drM equ qword[ebp-20] |
.dbM equ qword[ebp-12] |
mov ebp,esp |
; sub esp,30 |
mov cx,word .y |
or cx,cx |
jl .quit_l |
cmp cx,word[size_y_var] ;SIZE_Y |
jge .quit_l |
cmp ax,bx |
je .quit_l |
jl @f |
xchg eax,ebx |
if Ext=NON |
mov ecx,dword .r1 |
xchg ecx, .r2 |
mov dword .r1, ecx |
mov ecx,dword .g1 |
xchg ecx, .g2 |
mov dword .g1, ecx |
mov ecx,dword .b1 |
xchg ecx, .b2 |
mov dword .b1, ecx |
mov ecx,dword .tex_x1 |
xchg ecx, .tex_x2 |
mov dword .tex_x1, ecx |
mov ecx,dword .tex_y1 |
xchg ecx, .tex_y2 |
mov dword .tex_y1, ecx |
mov ecx,dword .z1 |
xchg ecx, .z2 |
mov dword .z1, ecx |
end if |
if (Ext=MMX) |
movq mm0,.b1 ; b, g |
movq mm1,.b2 |
movq .b1, mm1 |
movq .b2, mm0 |
movq mm2,.r1 ; r, y |
movq mm3,.r2 |
movq .r1,mm3 |
movq .r2,mm2 |
movq mm4,.tex_x1 ; x, z |
movq mm5,.tex_x2 |
movq .tex_x1,mm5 |
movq .tex_x2,mm4 |
end if |
if Ext>=SSE |
movups xmm0,.b1 |
movups xmm1,.b2 |
movups .b1,xmm1 |
movups .b2,xmm0 |
movq mm4,.tex_x1 ; x, z |
movq mm5,.tex_x2 |
movq .tex_x1,mm5 |
movq .tex_x2,mm4 |
end if |
@@: |
or bx,bx |
jle .quit_l |
cmp ax,word[size_x_var] ;SIZE_X |
jge .quit_l |
push ax |
push bx |
mov eax,.z2 ; delta zone************ |
sub eax,.z1 |
cdq |
mov bx,.x2 |
sub bx,.x1 |
movsx ebx,bx |
idiv ebx |
push eax ; .dz |
mov eax,.b2 |
sub eax,.b1 |
cdq |
idiv ebx |
push eax ; .db |
mov eax,.g2 |
sub eax,.g1 |
cdq |
idiv ebx |
push eax ; .dg |
mov eax,.r2 |
sub eax,.r1 |
cdq |
idiv ebx |
push eax ; .dr |
mov eax,.tex_x2 |
sub eax,.tex_x1 |
cdq |
idiv ebx |
push eax ; .dtex_x |
mov eax,.tex_y2 |
sub eax,.tex_y1 |
cdq |
idiv ebx |
push eax ; .dtey_x |
cmp .x1,0 |
jg @f |
mov eax,.dz ; clipping |
movsx ebx,.x1 |
neg ebx |
imul ebx |
add .z1,eax |
mov .x1,0 |
mov eax,.dr |
imul ebx |
add .r1,eax |
;if Ext=NON |
mov eax,.dg |
imul ebx |
add .g1,eax |
mov eax,.db |
imul ebx |
add .b1,eax |
mov eax,.dtex_x |
imul ebx |
add .tex_x1,eax |
mov eax,.dtex_y |
imul ebx |
add .tex_y1,eax |
@@: |
movsx edx,word[size_x_var] ;SIZE_X |
cmp .x2,dx |
jl @f |
mov .x2,dx |
@@: |
; calc line addres begin in screen and Z buffer |
movsx eax,word .y |
mul edx |
movsx edx,.x1 |
add eax,edx |
mov esi,eax |
shl esi,2 |
add esi,.z_buffer |
lea eax,[eax*3] |
mov edi,.screen |
add edi,eax |
mov cx,.x2 |
sub cx,.x1 |
movzx ecx,cx |
; init current variables |
push dword .tex_y1 |
;if Ext=NON |
push dword .tex_x1 |
push dword .b1 |
push dword .g1 |
push dword .r1 |
if Ext>=MMX |
movq mm4,.cr ; lo -> r,g |
movq mm6,.cb ; hi -> b, tex_x |
pxor mm0,mm0 |
end if |
mov ebx,.z1 |
.ddraw: |
cmp ebx,dword[esi] |
jge @f |
mov eax,.c_ty |
; if ROUND<TEX_SHIFT |
; shl eax,TEX_SHIFT-ROUND |
; end if |
; if ROUND>TEX_SHIFT |
; shr eax,ROUND-TEX_SHIFT |
; end if |
shr eax,ROUND |
shl Eax,TEX_SHIFT |
mov edx,.c_tx ; calc texture pixel mem addres |
shr edx,ROUND |
add eax,edx |
and eax,TEXTURE_SIZE ; cutting |
lea eax,[3*eax] |
add eax,.tex_ptr |
mov dword[esi],ebx |
if Ext = NON |
mov eax,dword[eax] |
; mov .tex_col,eax |
push ax |
shl eax,8 |
pop ax |
mov edx,.cr |
sar edx,ROUND |
mul dl ; al*dl |
shr ax,8 |
stosb |
ror eax,16 |
push ax |
mov edx,.cg |
sar edx,ROUND |
mul dl |
shr ax,8 |
stosb |
pop ax |
shr ax,8 |
mov edx,.cb |
sar edx,ROUND |
mul dl |
shr ax,8 |
stosb |
jmp .no_skip |
else |
movd mm1,[eax] |
punpcklbw mm1,mm0 |
movq mm3,mm4 ;.cr ; lo -> r,g |
movq mm5,mm6 ;.cb ; lo -> b,tex_x |
psrld mm3,ROUND ; |
psrld mm5,ROUND ; |
packssdw mm3,mm5 |
pmullw mm1,mm3 |
psrlw mm1,8 |
packuswb mm1,mm0 |
movd [edi],mm1 |
end if |
mov dword[esi],ebx |
if Ext = NON |
jmp .no_skip |
end if |
@@: |
add edi,3 |
.no_skip: |
add esi,4 |
add ebx,.dz |
mov eax,.dtex_x |
add .c_tx, eax |
mov edx,.dtex_y |
add .c_ty, edx |
if Ext=NON |
mov eax,.dr |
add .cr,eax |
mov edx,.dg |
add .cg,edx |
mov eax,.db |
add .cb,eax |
else |
paddd mm4,.drM |
paddd mm6,.dbM |
;; paddd mm7,.dtex_y ; mm4 - b, g |
;; movq .c_tx,mm7 |
; mm6 - r, x |
end if ; mm7 - y, x |
dec ecx |
jnz .ddraw |
.quit_l: |
mov esp,ebp |
ret 42+20 ; horizontal line |
CATMULL_SHIFT equ 8 |
ROUND equ 8 |
;NON=0 |
;MMX=1 |
;Ext=MMX |
;TEX_SIZE=0x3fff |
;SIZE_X equ 512 |
;SIZE_Y equ 512 |
;ROUND = 8 |
;TEX_SHIFT equ 6 |
; procedure drawing textured triangle with Gouraud shading |
; Z-buffer alghoritm included, Z coord interpolation ---- |
; I set the color by this way -- (col1 * col2)/256 ------ |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer-------- |
;---------------------- edx - pointer to texture--------- |
;---------------------- Z-buffer filled with dd variables |
;---------------------- shifted CATMULL_SHIFT------------ |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- stack : colors------------------- |
tex_plus_grd_triangle: |
; parameters : |
.tex_y3 equ [ebp+38] ; 36 bytes through stack |
.tex_x3 equ [ebp+36] |
.tex_y2 equ [ebp+34] |
.tex_x2 equ [ebp+32] |
.tex_y1 equ [ebp+30] |
.tex_x1 equ [ebp+28] |
.z3 equ [ebp+26] |
.col3b equ [ebp+24] |
.col3g equ [ebp+22] |
.col3r equ [ebp+20] |
.z2 equ [ebp+18] |
.col2b equ [ebp+16] |
.col2g equ [ebp+14] |
.col2r equ [ebp+12] |
.z1 equ [ebp+10] |
.col1b equ [ebp+8] |
.col1g equ [ebp+6] |
.col1r equ [ebp+4] |
; local variables: |
.tex_ptr equ dword[ebp-4] |
.z_ptr equ dword[ebp-8] |
.scr_buff equ dword[ebp-12] |
.x1 equ word[ebp-14] ;dw ? ;equ word[ebp-10] |
.y1 equ word[ebp-16] ;dw ? ;equ word[ebp-12] |
.x2 equ word[ebp-18] ;dw ? ;equ word[ebp-14] |
.y2 equ word[ebp-20] ;dw ? ;equ word[ebp-16] |
.x3 equ word[ebp-22] ;dw ? ;equ word[ebp-18] |
.y3 equ word[ebp-24] ;dw ? ;equ word[ebp-20] |
.dx12 equ dword[ebp-28] ;dd ? |
.tex_dx12 equ dword[ebp-32] ;dd ? |
.tex_dy12 equ [ebp-36] ;dd ? |
.dz12 equ dword[ebp-40] ;dd ? |
.dc12r equ [ebp-44] ;dd ? |
.dc12g equ dword[ebp-48] ;dd ? |
.dc12b equ [ebp-52] ;dd ? |
.dx23 equ dword[ebp-56] ;dd ? |
.tex_dx23 equ dword[ebp-60] ;dd ? |
.tex_dy23 equ [ebp-64] ;dd ? |
.dz23 equ dword[ebp-68] ;dd ? |
.dc23r equ [ebp-72] ;dd ? |
.dc23g equ dword[ebp-76] ;dd ? |
.dc23b equ [ebp-80] ;dword[ebp-8]dd ? |
.dx13 equ dword[ebp-84] ;dd ? |
.tex_dx13 equ dword[ebp-88] ;dd ? |
.tex_dy13 equ [ebp-92] ;dd ? |
.dz13 equ dword[ebp-96] ;dd ? |
.dc13r equ [ebp-100] ;dd ? |
.dc13g equ dword[ebp-104] ;dd ? |
.dc13b equ [ebp-108] ;dd ? |
.scan_x1 equ dword[ebp-112] ;dd ? |
.scan_y1 equ [ebp-116] ;dd ? |
.zz1 equ dword[ebp-120] ;dw ? |
.cur1r equ [ebp-124] ;dw ? |
.cur1g equ dword[ebp-128] ;dw ? |
.cur1b equ [ebp-132] ;dw ? |
.scan_x2 equ dword[ebp-136] ;dd ? |
.scan_y2 equ [ebp-140] ;dd ? |
.zz2 equ dword[ebp-144] ;dw ? |
.cur2r equ [ebp-148] ;dw ? |
.cur2g equ dword[ebp-152] ;dw ? |
.cur2b equ [ebp-156] ;dw ? |
mov ebp,esp |
; mov .tex_ptr,edx |
; mov .z_ptr,esi |
; mov .scr_buff,edi |
push edx esi edi |
; push esi |
; push edi |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop2_end |
.sort3: |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
if Ext>=MMX |
movq mm0, .col1r ; exchange r, g, b, z |
movq mm1, .col2r |
movq .col1r ,mm1 |
movq .col2r ,mm0 |
else |
mov edx,dword .col1r ; exchange both r and g |
xchg edx,dword .col2r |
mov dword .col1r ,edx |
mov edx,dword .col1b ; b and z |
xchg edx,dword .col2b |
mov dword .col1b ,edx |
end if |
mov edx,dword .tex_x1 |
xchg edx,dword .tex_x2 |
mov dword .tex_x1 ,edx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
if Ext>=MMX |
movq mm0, .col2r ; exchange r, g, b, z |
movq mm1, .col3r |
movq .col3r ,mm0 |
movq .col2r ,mm1 |
else |
mov edx,dword .col2r ; r, g |
xchg edx,dword .col3r |
mov dword .col2r,edx |
mov edx,dword .col2b ; b, z |
xchg edx,dword .col3b |
mov dword .col2b,edx |
end if |
mov edx,dword .tex_x2 |
xchg edx,dword .tex_x3 |
mov dword .tex_x2,edx |
jmp .sort3 |
.sort2: |
push eax ebx ecx ; store in variables |
; push ebx |
; push ecx |
;****************** delta computng zone ************** |
;+++++++++ first zone |
mov bx,.y2 ; calc delta12 |
sub bx,.y1 |
jnz .dx12_make |
mov ecx,7 |
@@: |
push dword 0 |
loop @b |
jmp .dx12_done |
.dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
if 0 ; Ext=SSE |
movd mm0,.col1r ; 2 words r, g |
pxor mm1,mm1 |
punpcklwd mm0,mm1 |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
movd mm0,.col1g ; 2 words b, z |
punpcklwd mm0,mm1 |
cvtpi2ps xmm0,mm0 |
; xmm0=four float double words |
divss xmm0,.pack3 |
;convert and insert mm0 to lower xmm1 .. |
end if |
mov ax,word .tex_x2 |
sub ax,word .tex_x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx12r,eax |
push eax |
mov ax,word .tex_y2 |
sub ax,word .tex_y1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx12,eax |
push eax |
mov ax,word .z2 |
sub ax,word .z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz12,eax |
push eax ; .dza12 |
mov ax,word .col2r |
sub ax,word .col1r |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12r,eax |
push eax |
mov ax,word .col2g |
sub ax,word .col1g |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12g,eax |
push eax |
mov ax,word .col2b ;;--- |
sub ax,word .col1b |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc12b,eax |
push eax |
;+++++++++++++++++ second zone +++++++++++++ |
.dx12_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .dx23_make |
mov ecx,7 |
@@: |
push dword 0 |
loop @b |
jmp .dx23_done |
.dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
mov ax,word .tex_x3 |
sub ax,word .tex_x2 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx23,eax |
push eax |
mov ax,word .tex_y3 |
sub ax,word .tex_y2 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dy23,eax |
push eax |
mov ax,word .z3 |
sub ax,word .z2 |
cwde ; |
shl eax,CATMULL_SHIFT ; 2222222 |
cdq ; 2 2 |
idiv ebx ; 2 |
; mov .dz23,eax ; 2 |
push eax ; .dza12 ; 2 |
; 2 |
mov ax,word .col3r ; 2 |
sub ax,word .col2r ; 2222222 |
cwde ; second delta |
shl eax,ROUND ; |
cdq ; |
idiv ebx ; |
; mov .dc23r,eax ; |
push eax |
mov ax,word .col3g |
sub ax,word .col2g |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23g,eax |
push eax |
mov ax,word .col3b ;;--- |
sub ax,word .col2b |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc23b,eax |
push eax |
.dx23_done: |
;++++++++++++++++++third zone++++++++++++++++++++++++ |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .dx13_make |
mov ecx,7 |
@@: |
push dword 0 |
loop @b |
jmp .dx13_done |
.dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
mov ax,word .tex_x3 ; triangle b |
sub ax,word .tex_x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dx13r,eax |
push eax |
mov ax,word .tex_y3 |
sub ax,word .tex_y1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .tex_dy13,eax |
push eax |
mov ax,word .z3 |
sub ax,word .z1 ; 333333333 |
cwde ; 3 3 |
shl eax,CATMULL_SHIFT ; 3 |
cdq ; 3 |
idiv ebx ; 3 |
; mov .dz13,eax ; 3 |
push eax ; .dza12 ; 3 |
; 3 |
mov ax,word .col3r ; 3333333333 |
sub ax,word .col1r ; 3 |
cwde ; 3 |
shl eax,ROUND ; 3 |
cdq ; 3 |
idiv ebx ; 3 |
; mov .dc13r,eax ; 3 3 |
push eax ; 33333333 |
mov ax,word .col3g |
sub ax,word .col1g |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13g,eax |
push eax |
mov ax,word .col3b ;;--- |
sub ax,word .col1b |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dc13b,eax |
push eax |
.dx13_done: |
; <<<<<<< ::delta zone end+++++++++++++++++++++ >>>>>>>> |
sub esp,55 ;(12*4) |
movsx eax,.x1 ; eax - cur x1 |
shl eax,ROUND ; ebx - cur x2 |
mov ebx,eax |
movsx edx,word .z1 |
shl edx,CATMULL_SHIFT |
mov .zz1,edx |
mov .zz2,edx |
movzx edi,word .col1r |
shl edi,ROUND |
mov .cur1r,edi |
mov .cur2r,edi |
movzx esi,word .col1g |
shl esi,ROUND |
mov .cur1g,esi |
mov .cur2g,esi |
movzx edx,word .col1b |
shl edx,ROUND |
mov .cur1b,edx |
mov .cur2b,edx |
movzx edi,word .tex_x1 |
shl edi,ROUND |
mov .scan_x1,edi |
mov .scan_x2,edi |
movzx edx,word .tex_y1 |
shl edx,ROUND |
mov .scan_y1,edx |
mov .scan_y2,edx |
mov cx,.y1 |
cmp cx,.y2 |
jge .loop1_end |
.loop_1: |
; push eax ebx ebp |
pushad |
push .tex_ptr |
push .scr_buff |
push .z_ptr |
push cx |
push .zz2 |
push .scan_x2 |
push dword .scan_y2 |
push dword .cur2r |
push .cur2g |
push dword .cur2b |
push .zz1 |
push .scan_x1 |
push dword .scan_y1 |
push dword .cur1r |
push .cur1g |
push dword .cur1b |
sar eax,ROUND |
sar ebx,ROUND |
call horizontal_tex_grd_line |
; pop ebp ebx eax |
popad |
if (Ext = MMX)|(Ext=SSE) |
movq mm0,.cur1b |
movq mm1,.cur1r |
movq mm2,.scan_y1 |
movq mm3,.cur2b |
movq mm4,.cur2r |
movq mm5,.scan_y2 |
paddd mm0,.dc13b |
paddd mm1,.dc13r |
paddd mm2,.tex_dy13 |
paddd mm3,.dc12b |
paddd mm4,.dc12r |
paddd mm5,.tex_dy12 |
movq .cur1b,mm0 |
movq .cur1r,mm1 |
movq .scan_y1,mm2 |
movq .cur2b,mm3 |
movq .cur2r,mm4 |
movq .scan_y2,mm5 |
end if |
if Ext >= SSE2 |
movups xmm0,.cur1b |
movups xmm1,.dc13b |
movups xmm2,.cur2b |
movups xmm3,.dc12b |
movq mm2,.scan_y1 |
movq mm5,.scan_y2 |
paddd xmm0,xmm1 |
paddd xmm2,xmm3 |
paddd mm2,.tex_dy13 |
paddd mm5,.tex_dy12 |
movq .scan_y1,mm2 |
movq .scan_y2,mm5 |
movups .cur1b,xmm0 |
movups .cur2b,xmm2 |
end if |
if Ext = NON |
mov edx,.dc13b |
add .cur1b,edx |
mov esi,.dc13g |
add .cur1g,esi |
mov edi,.dc13r |
add .cur1r,edi |
mov edx,.dz13 |
add .zz1,edx |
mov edx,.tex_dx13 |
add .scan_x1,edx |
mov esi,.tex_dy13 |
add .scan_y1,esi |
mov edi,.dc12b |
add .cur2b,edi |
mov esi,.dc12g |
add .cur2g,esi |
mov edx,.dc12r |
add .cur2r,edx |
mov edi,.tex_dx12 |
add .scan_x2,edi |
mov esi,.tex_dy12 |
add .scan_y2,esi |
mov edx,.dz12 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx12 |
inc cx |
cmp cx,.y2 |
jl .loop_1 |
.loop1_end: |
movzx ecx,.y2 |
cmp cx,.y3 |
jge .loop2_end |
movsx ebx,.x2 ; eax - cur x1 |
shl ebx,ROUND ; ebx - cur x2 |
movsx edx,word .z2 |
shl edx,CATMULL_SHIFT |
; mov .zz1,edx |
mov .zz2,edx |
movzx edi,word .col2r |
shl edi,ROUND |
; mov .cur1r,edi |
mov .cur2r,edi |
movzx esi,word .col2g |
shl esi,ROUND |
; mov .cur1g,esi |
mov .cur2g,esi |
movzx edx,word .col2b |
shl edx,ROUND |
; mov .cur1b,edx |
mov .cur2b,edx |
movzx edi,word .tex_x2 |
shl edi,ROUND |
; mov .scan_x1,edi |
mov .scan_x2,edi |
movzx edx,word .tex_y2 |
shl edx,ROUND |
; mov .scan_y1,edx |
mov .scan_y2,edx |
.loop_2: |
pushad |
push .tex_ptr |
push .scr_buff |
push .z_ptr |
push cx |
push .zz2 |
push .scan_x2 |
push dword .scan_y2 |
push dword .cur2r |
push .cur2g |
push dword .cur2b |
push .zz1 |
push .scan_x1 |
push dword .scan_y1 |
push dword .cur1r |
push .cur1g |
push dword .cur1b |
sar eax,ROUND |
sar ebx,ROUND |
call horizontal_tex_grd_line |
popad |
if (Ext = MMX)|(Ext=SSE) |
movq mm0,.cur1b |
movq mm1,.cur1r |
movq mm2,.scan_y1 |
movq mm3,.cur2b |
movq mm4,.cur2r |
movq mm5,.scan_y2 |
paddd mm0,.dc13b |
paddd mm1,.dc13r |
paddd mm2,.tex_dy13 |
paddd mm3,.dc23b |
paddd mm4,.dc23r |
paddd mm5,.tex_dy23 |
movq .cur1b,mm0 |
movq .cur1r,mm1 |
movq .scan_y1,mm2 |
movq .cur2b,mm3 |
movq .cur2r,mm4 |
movq .scan_y2,mm5 |
end if |
if Ext >= SSE2 |
movups xmm0,.cur1b |
movups xmm1,.dc13b |
movups xmm2,.cur2b |
movups xmm3,.dc23b |
movq mm2,.scan_y1 |
movq mm5,.scan_y2 |
paddd xmm0,xmm1 |
paddd xmm2,xmm3 |
paddd mm2,.tex_dy13 |
paddd mm5,.tex_dy23 |
movq .scan_y1,mm2 |
movq .scan_y2,mm5 |
movups .cur1b,xmm0 |
movups .cur2b,xmm2 |
end if |
if Ext = NON |
mov edx,.dc13b |
add .cur1b,edx |
mov esi,.dc13g |
add .cur1g,esi |
mov edi,.dc13r |
add .cur1r,edi |
mov edx,.tex_dx13 |
add .scan_x1,edx |
mov esi,.tex_dy13 |
add .scan_y1,esi |
mov edx,.dz13 |
add .zz1,edx |
mov edi,.dc23b |
add .cur2b,edi |
mov esi,.dc23g |
add .cur2g,esi |
mov edx,.dc23r |
add .cur2r,edx |
mov edi,.tex_dx23 |
add .scan_x2,edi |
mov esi,.tex_dy23 |
add .scan_y2,esi |
mov edx,.dz23 |
add .zz2,edx |
end if |
add eax,.dx13 |
add ebx,.dx23 |
inc cx |
cmp cx,.y3 |
jl .loop_2 |
.loop2_end: |
mov esp,ebp |
ret 36 |
horizontal_tex_grd_line: |
;in: |
; eax : x1, ebx : x2 |
.tex_ptr equ [ebp+62] |
.screen equ [ebp+58] |
.z_buffer equ [ebp+54] |
.y equ [ebp+52] |
.z2 equ [ebp+48] |
.tex_x2 equ [ebp+44] |
.tex_y2 equ [ebp+40] |
.r2 equ [ebp+36] |
.g2 equ [ebp+32] |
.b2 equ [ebp+28] |
.z1 equ [ebp+24] |
.tex_x1 equ [ebp+20] |
.tex_y1 equ [ebp+16] |
.r1 equ [ebp+12] |
.g1 equ [ebp+8] |
.b1 equ [ebp+4] |
.x1 equ word[ebp-2] |
.x2 equ word[ebp-4] |
.dz equ dword[ebp-8] |
.db equ dword[ebp-12] |
.dg equ dword[ebp-16] |
.dr equ dword[ebp-20] |
.dtex_x equ dword[ebp-24] |
.dtex_y equ dword[ebp-28] |
.c_ty equ [ebp-32] |
.c_tx equ [ebp-36] |
.cb equ [ebp-40] |
.cg equ [ebp-44] |
.cr equ [ebp-48] |
.t_col equ [ebp-52] |
.dtex_yM equ qword[ebp-28] |
.drM equ qword[ebp-20] |
.dbM equ qword[ebp-12] |
mov ebp,esp |
; sub esp,30 |
mov cx,word .y |
or cx,cx |
jl .quit_l |
cmp cx,word[size_y_var] ;SIZE_Y |
jge .quit_l |
cmp ax,bx |
je .quit_l |
jl @f |
xchg eax,ebx |
if Ext=NON |
mov ecx,dword .r1 |
xchg ecx, .r2 |
mov dword .r1, ecx |
mov ecx,dword .g1 |
xchg ecx, .g2 |
mov dword .g1, ecx |
mov ecx,dword .b1 |
xchg ecx, .b2 |
mov dword .b1, ecx |
mov ecx,dword .tex_x1 |
xchg ecx, .tex_x2 |
mov dword .tex_x1, ecx |
mov ecx,dword .tex_y1 |
xchg ecx, .tex_y2 |
mov dword .tex_y1, ecx |
mov ecx,dword .z1 |
xchg ecx, .z2 |
mov dword .z1, ecx |
end if |
if (Ext=MMX) |
movq mm0,.b1 ; b, g |
movq mm1,.b2 |
movq .b1, mm1 |
movq .b2, mm0 |
movq mm2,.r1 ; r, y |
movq mm3,.r2 |
movq .r1,mm3 |
movq .r2,mm2 |
movq mm4,.tex_x1 ; x, z |
movq mm5,.tex_x2 |
movq .tex_x1,mm5 |
movq .tex_x2,mm4 |
end if |
if Ext>=SSE |
movups xmm0,.b1 |
movups xmm1,.b2 |
movups .b1,xmm1 |
movups .b2,xmm0 |
movq mm4,.tex_x1 ; x, z |
movq mm5,.tex_x2 |
movq .tex_x1,mm5 |
movq .tex_x2,mm4 |
end if |
@@: |
or bx,bx |
jle .quit_l |
cmp ax,word[size_x_var] ;SIZE_X |
jge .quit_l |
push ax |
push bx |
mov eax,.z2 ; delta zone************ |
sub eax,.z1 |
cdq |
mov bx,.x2 |
sub bx,.x1 |
movsx ebx,bx |
idiv ebx |
push eax ; .dz |
mov eax,.b2 |
sub eax,.b1 |
cdq |
idiv ebx |
push eax ; .db |
mov eax,.g2 |
sub eax,.g1 |
cdq |
idiv ebx |
push eax ; .dg |
mov eax,.r2 |
sub eax,.r1 |
cdq |
idiv ebx |
push eax ; .dr |
mov eax,.tex_x2 |
sub eax,.tex_x1 |
cdq |
idiv ebx |
push eax ; .dtex_x |
mov eax,.tex_y2 |
sub eax,.tex_y1 |
cdq |
idiv ebx |
push eax ; .dtey_x |
cmp .x1,0 |
jg @f |
mov eax,.dz ; clipping |
movsx ebx,.x1 |
neg ebx |
imul ebx |
add .z1,eax |
mov .x1,0 |
mov eax,.dr |
imul ebx |
add .r1,eax |
;if Ext=NON |
mov eax,.dg |
imul ebx |
add .g1,eax |
mov eax,.db |
imul ebx |
add .b1,eax |
mov eax,.dtex_x |
imul ebx |
add .tex_x1,eax |
mov eax,.dtex_y |
imul ebx |
add .tex_y1,eax |
@@: |
movsx edx,word[size_x_var] ;SIZE_X |
cmp .x2,dx |
jl @f |
mov .x2,dx |
@@: |
; calc line addres begin in screen and Z buffer |
movsx eax,word .y |
mul edx |
movsx edx,.x1 |
add eax,edx |
mov esi,eax |
shl esi,2 |
add esi,.z_buffer |
lea eax,[eax*3] |
mov edi,.screen |
add edi,eax |
mov cx,.x2 |
sub cx,.x1 |
movzx ecx,cx |
; init current variables |
push dword .tex_y1 |
;if Ext=NON |
push dword .tex_x1 |
push dword .b1 |
push dword .g1 |
push dword .r1 |
if Ext>=MMX |
movq mm4,.cr ; lo -> r,g |
movq mm6,.cb ; hi -> b, tex_x |
pxor mm0,mm0 |
end if |
mov ebx,.z1 |
.ddraw: |
cmp ebx,dword[esi] |
jge @f |
mov eax,.c_ty |
; if ROUND<TEX_SHIFT |
; shl eax,TEX_SHIFT-ROUND |
; end if |
; if ROUND>TEX_SHIFT |
; shr eax,ROUND-TEX_SHIFT |
; end if |
shr eax,ROUND |
shl Eax,TEX_SHIFT |
mov edx,.c_tx ; calc texture pixel mem addres |
shr edx,ROUND |
add eax,edx |
and eax,TEXTURE_SIZE ; cutting |
lea eax,[3*eax] |
add eax,.tex_ptr |
mov dword[esi],ebx |
if Ext = NON |
mov eax,dword[eax] |
; mov .tex_col,eax |
push ax |
shl eax,8 |
pop ax |
mov edx,.cr |
sar edx,ROUND |
mul dl ; al*dl |
shr ax,8 |
stosb |
ror eax,16 |
push ax |
mov edx,.cg |
sar edx,ROUND |
mul dl |
shr ax,8 |
stosb |
pop ax |
shr ax,8 |
mov edx,.cb |
sar edx,ROUND |
mul dl |
shr ax,8 |
stosb |
jmp .no_skip |
else |
movd mm1,[eax] |
punpcklbw mm1,mm0 |
movq mm3,mm4 ;.cr ; lo -> r,g |
movq mm5,mm6 ;.cb ; lo -> b,tex_x |
psrld mm3,ROUND ; |
psrld mm5,ROUND ; |
packssdw mm3,mm5 |
pmullw mm1,mm3 |
psrlw mm1,8 |
packuswb mm1,mm0 |
movd [edi],mm1 |
end if |
mov dword[esi],ebx |
if Ext = NON |
jmp .no_skip |
end if |
@@: |
add edi,3 |
.no_skip: |
add esi,4 |
add ebx,.dz |
mov eax,.dtex_x |
add .c_tx, eax |
mov edx,.dtex_y |
add .c_ty, edx |
if Ext=NON |
mov eax,.dr |
add .cr,eax |
mov edx,.dg |
add .cg,edx |
mov eax,.db |
add .cb,eax |
else |
paddd mm4,.drM |
paddd mm6,.dbM |
;; paddd mm7,.dtex_y ; mm4 - b, g |
;; movq .c_tx,mm7 |
; mm6 - r, x |
end if ; mm7 - y, x |
dec ecx |
jnz .ddraw |
.quit_l: |
mov esp,ebp |
ret 42+20 ; horizontal line |
/programs/demos/view3ds/history.txt |
---|
1,11 → 1,16 |
View3ds 0.073 - may 2021 |
1. I introduced procedure for searching nonredundand edges. |
2. Writing some info about object: vertices, triangles unique edges |
count. |
----------------------------------------------------------------------------------- |
View3ds 0.072 - march 2021 |
1. New displaying model - texturing with bilinear filtering and transparency |
simultanusly. Note that filtering is done only inside polygon. To better |
simultanusly. Note that filtering is done only inside polygon. To better |
quality of image there is a need to use floats coordinates of texture to pass |
as arguments to single triangle rendering proc. |
2. Optimizations. |
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and |
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and |
transparented texturing with filtering rendering models are disabled. |
----------------------------------------------------------------------------------- |
/programs/demos/view3ds/readme.txt |
---|
1,20 → 1,16 |
View3ds 0.073 - tiny viewer to .3ds and .asc files with several graphics |
View3ds 0.074 - tiny viewer to .3ds and .asc files with several graphics |
effects implementation. |
What's new? |
1. I introduced procedure for searching nonredundand edges. |
2. Writing some info about object: vertices, triangles unique edges |
count. |
1. Fixed emboss bug in grd lines displaying model. |
2. Grd line exceedes screen problem fix. |
3. New rendering model - ray casted shadows and appropiate button to |
set 'on' this option. Note that is non real time model, especially when |
complex object is computed. I took effort to introduce accelerating |
structure - AABB (Axis Aligned Bounding Boxes).. but it is disabled |
for now - seems to work incorrect(slow). |
1. New displaying model - texturing with bilinear filtering and transparency |
simultanusly. Note that filtering is done only inside polygon. To better |
quality of image there is a need to use floats coordinates of texture to pass |
as arguments to single triangle rendering proc. |
2. Optimizations. |
3. SSE3 version runs correct on SSE2 cpus, but real phong, glass and |
transparented texturing with filtering rendering models are disabled. |
Buttons description: |
1. rotary: choosing rotary axle: x, y, x+y. |
2. shd. model: choosing shading model: flat, grd (smooth), env (spherical |
26,20 → 22,20 |
ptex (real Phong + texturing + transparency). |
3. speed: idle, full. |
4,5. zoom in, out: no comment. |
6. catmull: disabled |
6. ray shadow: calc ray casted shadows. |
7. culling: backface culling on/ off. |
8. rand. light: Randomize 3 unlinear lights( so called Phong's illumination). |
9. Blur: blur N times; N=0,1,2,3,4,5 |
10.11,12,13. loseless operations (rotary 90, 180 degrees). |
12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges |
12. emboss: Do emboss effect( flat bumps ), use 'bumps deep' button to do edges |
more deep. |
13. fire: do motion blur ( looks like fire ). |
14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving |
14. move: changes meaning x,y,z +/- buttons -> obj: moving object, camr: moving |
camera, wave: x,y +/- increase, decrease wave effect frequency and amplitude. |
15. generate: Generates some objects: node, Thorn Crown, heart... |
16. bumps: random, according to texture. |
17. bumps deep -> create bumps deeper or lighter. |
18. re-map tex -> re-map texture and bump map coordinates, to change spherical |
18. re-map tex -> re-map texture and bump map coordinates, to change spherical |
mapping around axle use 'xchg' and 'mirror' buttons, then press 're-map tex' button. |
19. bright + -> increase picture brightness. |
20. bright - -> decrease picture brightness. |
46,8 → 42,8 |
21. wav effect -> do effect based sine function. |
22. editor -> setting editing option. If is "on" then red bars are draw according to each |
vertex, Pressing and moving left mouse button (cursor must be on handler)- change |
vertex position. If left mouse button is released apply current position. You may also |
vertex position. If left mouse button is released apply current position. You may also |
decrease whole handlers count by enable culling (using appropriate button) - some |
back handlers become hidden. |
Maciej Guba V 2021 |
Maciej Guba IX 2021 |
/programs/demos/view3ds/tex_cat.inc |
---|
1,611 → 1,611 |
;TEX_X = 512 |
;TEX_Y = 512 |
;ROUND equ 8 |
;SIZE_X = 512 |
;SIZE_Y = 512 |
;TEX_SHIFT = 9 |
CATMULL_SHIFT equ 8 |
;------------------------------------------------------------------------ |
;- Procedure drawing textured triangle using Catmull Z-buffer algorithm - |
;------------------------------------------------------------------------ |
tex_triangle_z: |
;----------in - eax - x1 shl 16 + y1 |
;-------------- ebx - x2 shl 16 + y2 |
;---------------ecx - x3 shl 16 + y3 |
;---------------edx - pointer to Z-buffer |
;---------------esi - pointer to texture buffer |
;---------------edi - pointer to screen buffer |
;-------------stack - texture coordinates |
;------------------ - z coordinates |
.tex_x1 equ ebp+4 |
.tex_y1 equ ebp+6 |
.tex_x2 equ ebp+8 |
.tex_y2 equ ebp+10 |
.tex_x3 equ ebp+12 |
.tex_y3 equ ebp+14 |
.z1 equ word[ebp+16] |
.z2 equ word[ebp+18] |
.z3 equ word[ebp+20] |
.tex_ptr equ dword[ebp-4] ; pointer to texture |
.z_ptr equ dword[ebp-8] ; pointer to z-buffer |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
.dx12 equ dword[ebp-24] |
.tex_dx12 equ dword[ebp-28] |
.tex_dy12 equ dword[ebp-32] |
.dz12 equ dword[ebp-36] |
.dx13 equ dword[ebp-40] |
.tex_dx13 equ dword[ebp-44] |
.tex_dy13 equ dword[ebp-48] |
.dz13 equ dword[ebp-52] |
.dx23 equ dword[ebp-56] |
.tex_dx23 equ dword[ebp-60] |
.tex_dy23 equ dword[ebp-64] |
.dz23 equ dword[ebp-68] |
.scan_x1 equ dword[ebp-72] |
.scan_x2 equ dword[ebp-76] |
.scan_y1 equ dword[ebp-80] |
.scan_y2 equ dword[ebp-84] |
.cz1 equ dword[ebp-88] |
.cz2 equ dword[ebp-92] |
mov ebp,esp |
push esi ; store memory pointers |
push edx |
.tt_sort3: |
cmp ax,bx ;sort all parameters |
jle .tt_sort1 |
xchg eax,ebx |
mov edx,dword [.tex_x1] |
xchg edx,dword [.tex_x2] |
mov dword[.tex_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.tt_sort1: |
cmp bx,cx |
jle .tt_sort2 |
xchg ebx,ecx |
mov edx,dword [.tex_x2] |
xchg edx,dword [.tex_x3] |
mov dword [.tex_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .tt_sort3 |
.tt_sort2: |
push eax ; and store to user friendly variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .tt_loop2_end |
; cmp ax,SIZE_Y |
; jl @f |
; cmp bx,SIZE_Y |
; jl @f |
; cmp cx,SIZE_Y |
; jl @f |
ror eax,16 |
ror ebx,16 |
ror ecx,16 |
; cmp ax,SIZE_X |
; jl @f |
; cmp bx,SIZE_X |
; jl @f |
; cmp cx,SIZE_X |
; jl @f |
; jmp .tt_loop2_end |
@@: |
mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)] |
mov ebx,dword[.tex_x2] |
mov ecx,dword[.tex_x3] |
mov edx,eax |
or edx,ebx |
or edx,ecx |
test edx,80008000h |
jne .tt_loop2_end |
cmp ax,TEX_X |
jge .tt_loop2_end |
cmp bx,TEX_X |
jge .tt_loop2_end |
cmp cx,TEX_X |
jge .tt_loop2_end |
ror eax,16 |
ror ebx,16 |
ror ecx,16 |
cmp ax,TEX_Y |
jge .tt_loop2_end |
cmp bx,TEX_Y |
jge .tt_loop2_end |
cmp cx,TEX_Y |
jge .tt_loop2_end |
movsx ebx,.y2 ; calc delta |
sub bx,.y1 |
jnz .tt_dx12_make |
xor edx,edx |
mov ecx,4 |
@@: |
push edx |
loop @b |
jmp .tt_dx12_done |
.tt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1) |
push eax |
mov ax,word[.tex_x2] |
sub ax,word[.tex_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1) |
push eax |
mov ax,word[.tex_y2] |
sub ax,word[.tex_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1) |
push eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.tt_dx12_done: |
movsx ebx,.y3 ; calc delta |
sub bx,.y1 |
jnz .tt_dx13_make |
xor edx,edx |
mov ecx,4 |
@@: |
push edx |
loop @b |
jmp .tt_dx13_done |
.tt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1) |
push eax |
mov ax,word[.tex_x3] |
sub ax,word[.tex_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1) |
push eax |
mov ax,word[.tex_y3] |
sub ax,word[.tex_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1) |
push eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.tt_dx13_done: |
mov bx,.y3 ; calc delta |
sub bx,.y2 |
jnz .tt_dx23_make |
xor edx,edx |
mov ecx,4 |
@@: |
push edx |
loop @b |
jmp .tt_dx23_done |
.tt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
shl eax,ROUND |
cdq |
movzx ebx,bx |
idiv ebx |
; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2) |
push eax |
mov ax,word[.tex_x3] |
sub ax,word[.tex_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2) |
push eax |
mov ax,word[.tex_y3] |
sub ax,word[.tex_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2) |
push eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.tt_dx23_done: |
movsx eax,.x1 ;eax - cur x1 |
shl eax,ROUND ;ebx - cur x2 |
mov ebx,eax |
movsx edx, word[.tex_x1] |
shl edx,ROUND |
; mov [.scan_x1],edx |
; mov [.scan_x2],edx |
push edx |
push edx |
movsx edx, word[.tex_y1] |
shl edx,ROUND |
; mov [.scan_y1],edx |
; mov [.scan_y2],edx |
push edx |
push edx |
movsx edx,.z1 |
shl edx,CATMULL_SHIFT |
push edx |
push edx |
mov cx,.y1 |
cmp cx,.y2 |
jge .tt_loop1_end |
.tt_loop1: |
pushad |
push .z_ptr |
push .cz1 ; z coords shifted shl catmull_shift |
push .cz2 |
push .scan_y2 |
push .scan_x2 |
push .scan_y1 |
push .scan_x1 |
push esi ;[.tex_ptr] |
push cx |
sar ebx,ROUND |
push bx |
sar eax,ROUND |
push ax |
call textured_line_z |
popad |
mov edx,.dz13 |
add .cz1,edx |
mov edx,.dz12 |
add .cz2,edx |
mov edx, .tex_dx13 |
add .scan_x1, edx |
mov edx, .tex_dx12 |
add .scan_x2, edx |
mov edx, .tex_dy13 |
add .scan_y1, edx |
mov edx, .tex_dy12 |
add .scan_y2, edx |
add eax, .dx13 |
add ebx, .dx12 |
inc cx |
cmp cx,.y2 |
jl .tt_loop1 |
.tt_loop1_end: |
mov cx,.y2 |
cmp cx,.y3 |
jge .tt_loop2_end |
movsx ebx,.x2 |
shl ebx,ROUND |
movsx edx,.z2 |
shl edx,CATMULL_SHIFT |
mov .cz2,edx |
movzx edx, word [.tex_x2] |
shl edx,ROUND |
mov .scan_x2,edx |
movzx edx, word[.tex_y2] |
shl edx,ROUND |
mov .scan_y2,edx |
.tt_loop2: |
pushad |
push .z_ptr |
push .cz1 ; z coords shifted shl catmull_shift |
push .cz2 |
push .scan_y2 |
push .scan_x2 |
push .scan_y1 |
push .scan_x1 |
push esi ;[.tex_ptr] |
push cx |
sar ebx,ROUND |
push bx |
sar eax,ROUND |
push ax |
call textured_line_z |
popad |
mov edx,.dz13 |
add .cz1,edx |
mov edx,.dz23 |
add .cz2,edx |
mov edx, .tex_dx13 |
add .scan_x1, edx |
mov edx, .tex_dx23 |
add .scan_x2, edx |
mov edx, .tex_dy13 |
add .scan_y1, edx |
mov edx, .tex_dy23 |
add .scan_y2, edx |
add eax, .dx13 |
add ebx, .dx23 |
inc cx |
cmp cx,.y3 |
jl .tt_loop2 |
.tt_loop2_end: |
.tt_end: |
mov esp,ebp |
ret 18 |
textured_line_z: |
;-----in -edi screen buffer pointer |
;------------ stack: |
.x1 equ word [ebp+4] |
.x2 equ word [ebp+6] |
.y equ word [ebp+8] |
.tex_ptr equ dword [ebp+10] |
.tex_x1 equ ebp+14 |
.tex_y1 equ ebp+18 |
.tex_x2 equ ebp+22 |
.tex_y2 equ ebp+26 |
.z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT |
.z1 equ dword [ebp+34] |
.z_ptr equ dword [ebp+38] |
.tex_dy equ dword [ebp-4] |
.tex_dx equ dword [ebp-8] |
.dz equ dword [ebp-12] |
.cz equ dword [ebp-16] |
.c_tex_x equ dword [ebp-20] ; current tex x |
.m_sft1 equ ebp-28 |
.m_sft2 equ ebp-32 |
; .c_tex_xM equ ebp+14 |
.tex_dxM equ ebp-8 |
mov ebp,esp |
mov ax,.y |
or ax,ax |
jl .tl_quit |
mov bx,[size_y_var] |
dec bx |
cmp ax,bx ;SIZE_Y |
jge .tl_quit |
mov ax,.x1 |
cmp ax,.x2 |
je .tl_quit |
jl .tl_ok |
xchg ax,.x2 ; sort params |
mov .x1,ax |
if Ext >= MMX |
movq mm0,[.tex_x1] |
movq mm1,[.tex_x2] |
movq [.tex_x2],mm0 |
movq [.tex_x1],mm1 |
else |
mov eax,dword[.tex_x1] |
xchg eax,dword[.tex_x2] |
mov dword[.tex_x1],eax |
mov eax,dword[.tex_y1] |
xchg eax,dword[.tex_y2] |
mov dword[.tex_y1],eax |
end if |
mov eax,.z1 |
xchg eax,.z2 |
mov .z1,eax |
.tl_ok: |
mov cx,[size_x_var] |
dec cx |
cmp .x1,cx ;SIZE_X |
jge .tl_quit |
cmp .x2,0 |
jle .tl_quit |
mov bx,.x2 |
sub bx,.x1 |
movsx ebx,bx |
mov eax,dword[.tex_y2] ; calc .dty |
sub eax,dword[.tex_y1] |
cdq |
idiv ebx |
push eax |
mov eax,dword[.tex_x2] ; calc .dtx |
sub eax,dword[.tex_x1] |
cdq |
idiv ebx |
push eax |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
cmp .x1,0 ; clipping |
jg @f |
movsx ebx,.x1 |
neg ebx |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov .x1,0 |
mov eax,.tex_dy |
imul ebx |
add dword[.tex_y1],eax |
mov eax,.tex_dx |
imul ebx |
add dword[.tex_x1],eax |
@@: |
cmp .x2,cx ;SIZE_X |
jl @f |
mov .x2,cx ;SIZE_X |
@@: |
movsx ebx,.y ; calc mem begin in buffers |
movzx eax,word[size_x_var] ;SIZE_X |
mul ebx |
movsx ebx,.x1 |
add eax,ebx |
mov ebx,eax |
lea eax,[eax*3] |
add edi,eax ; edi - scr buff |
shl ebx,2 |
add .z_ptr,ebx ; z buffer pointer |
mov cx,.x2 |
sub cx,.x1 |
movzx ecx,cx |
;if Ext >= MMX |
; movq mm0,[.tex_x1] |
; movq mm4,mm0 |
; movq mm1,qword[.tex_dxM] |
; mov ebx,.z1 |
; mov eax,.dz |
;else |
mov eax,dword[.tex_x1] |
mov ebx,dword[.tex_y1] |
push .z1 ; .cz |
push eax ;.c_tex_x |
;end if |
mov edx,.z_ptr |
.tl_loop: |
;if Ext >= MMX |
; cmp ebx,[edx] ; ebx - current z |
; jge @f |
; movq mm2,mm0 |
; psrad mm2,ROUND |
; movq mm3,mm2 |
; psrlq mm2,32-TEX_SHIFT |
; paddd mm3,mm2 |
; movd esi,mm3 |
; mov dword[edx],ebx ; renew z buffer |
;else |
; eax - temp |
mov eax,.cz ; ebx - cur tex y shl ROUND |
cmp eax,[edx] ; ecx - l.lenght |
jge @f ; ebx - cur tex_y ; edx - temp |
mov esi,ebx ; edi - scr buff |
sar esi,ROUND ; esi - tex_ptr temp |
shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff |
mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT |
sar eax,ROUND |
add esi,eax |
mov eax,.cz |
mov dword[edx],eax ; renew z buffer |
;end if |
and esi,TEXTURE_SIZE |
lea esi,[esi*3] |
add esi,.tex_ptr |
movsd |
dec edi |
jmp .no_skip |
@@: |
add edi,3 |
.no_skip: |
add edx,4 |
;if Ext >= MMX |
; add ebx,eax |
; paddd mm0,mm1 |
;else |
mov eax,.dz |
add .cz,eax |
mov eax,.tex_dx |
add .c_tex_x,eax |
add ebx,.tex_dy |
;end if |
loop .tl_loop |
.tl_quit: |
mov esp,ebp |
ret 30+8 |
;TEX_X = 512 |
;TEX_Y = 512 |
;ROUND equ 8 |
;SIZE_X = 512 |
;SIZE_Y = 512 |
;TEX_SHIFT = 9 |
CATMULL_SHIFT equ 8 |
;------------------------------------------------------------------------ |
;- Procedure drawing textured triangle using Catmull Z-buffer algorithm - |
;------------------------------------------------------------------------ |
tex_triangle_z: |
;----------in - eax - x1 shl 16 + y1 |
;-------------- ebx - x2 shl 16 + y2 |
;---------------ecx - x3 shl 16 + y3 |
;---------------edx - pointer to Z-buffer |
;---------------esi - pointer to texture buffer |
;---------------edi - pointer to screen buffer |
;-------------stack - texture coordinates |
;------------------ - z coordinates |
.tex_x1 equ ebp+4 |
.tex_y1 equ ebp+6 |
.tex_x2 equ ebp+8 |
.tex_y2 equ ebp+10 |
.tex_x3 equ ebp+12 |
.tex_y3 equ ebp+14 |
.z1 equ word[ebp+16] |
.z2 equ word[ebp+18] |
.z3 equ word[ebp+20] |
.tex_ptr equ dword[ebp-4] ; pointer to texture |
.z_ptr equ dword[ebp-8] ; pointer to z-buffer |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
.dx12 equ dword[ebp-24] |
.tex_dx12 equ dword[ebp-28] |
.tex_dy12 equ dword[ebp-32] |
.dz12 equ dword[ebp-36] |
.dx13 equ dword[ebp-40] |
.tex_dx13 equ dword[ebp-44] |
.tex_dy13 equ dword[ebp-48] |
.dz13 equ dword[ebp-52] |
.dx23 equ dword[ebp-56] |
.tex_dx23 equ dword[ebp-60] |
.tex_dy23 equ dword[ebp-64] |
.dz23 equ dword[ebp-68] |
.scan_x1 equ dword[ebp-72] |
.scan_x2 equ dword[ebp-76] |
.scan_y1 equ dword[ebp-80] |
.scan_y2 equ dword[ebp-84] |
.cz1 equ dword[ebp-88] |
.cz2 equ dword[ebp-92] |
mov ebp,esp |
push esi ; store memory pointers |
push edx |
.tt_sort3: |
cmp ax,bx ;sort all parameters |
jle .tt_sort1 |
xchg eax,ebx |
mov edx,dword [.tex_x1] |
xchg edx,dword [.tex_x2] |
mov dword[.tex_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.tt_sort1: |
cmp bx,cx |
jle .tt_sort2 |
xchg ebx,ecx |
mov edx,dword [.tex_x2] |
xchg edx,dword [.tex_x3] |
mov dword [.tex_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .tt_sort3 |
.tt_sort2: |
push eax ; and store to user friendly variables |
push ebx |
push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .tt_loop2_end |
; cmp ax,SIZE_Y |
; jl @f |
; cmp bx,SIZE_Y |
; jl @f |
; cmp cx,SIZE_Y |
; jl @f |
ror eax,16 |
ror ebx,16 |
ror ecx,16 |
; cmp ax,SIZE_X |
; jl @f |
; cmp bx,SIZE_X |
; jl @f |
; cmp cx,SIZE_X |
; jl @f |
; jmp .tt_loop2_end |
@@: |
mov eax,dword[.tex_x1] ; texture coords must be in [0..TEX_X(Y)] |
mov ebx,dword[.tex_x2] |
mov ecx,dword[.tex_x3] |
mov edx,eax |
or edx,ebx |
or edx,ecx |
test edx,80008000h |
jne .tt_loop2_end |
cmp ax,TEX_X |
jge .tt_loop2_end |
cmp bx,TEX_X |
jge .tt_loop2_end |
cmp cx,TEX_X |
jge .tt_loop2_end |
ror eax,16 |
ror ebx,16 |
ror ecx,16 |
cmp ax,TEX_Y |
jge .tt_loop2_end |
cmp bx,TEX_Y |
jge .tt_loop2_end |
cmp cx,TEX_Y |
jge .tt_loop2_end |
movsx ebx,.y2 ; calc delta |
sub bx,.y1 |
jnz .tt_dx12_make |
xor edx,edx |
mov ecx,4 |
@@: |
push edx |
loop @b |
jmp .tt_dx12_done |
.tt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax ; dx12 = (x2-x1)/(y2-y1) |
push eax |
mov ax,word[.tex_x2] |
sub ax,word[.tex_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dx12],eax ; tex_dx12 = (tex_x2-tex_x1)/(y2-y1) |
push eax |
mov ax,word[.tex_y2] |
sub ax,word[.tex_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dy12],eax ; tex_dy12 = (tex_y2-tex_y1)/(y2-y1) |
push eax |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.tt_dx12_done: |
movsx ebx,.y3 ; calc delta |
sub bx,.y1 |
jnz .tt_dx13_make |
xor edx,edx |
mov ecx,4 |
@@: |
push edx |
loop @b |
jmp .tt_dx13_done |
.tt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax ; dx13 = (x3-x1)/(y3-y1) |
push eax |
mov ax,word[.tex_x3] |
sub ax,word[.tex_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dx12],eax ; tex_dx13 = (tex_x3-tex_x1)/(y3-y1) |
push eax |
mov ax,word[.tex_y3] |
sub ax,word[.tex_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dy12],eax ; tex_dy13 = (tex_y3-tex_y1)/(y3-y1) |
push eax |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.tt_dx13_done: |
mov bx,.y3 ; calc delta |
sub bx,.y2 |
jnz .tt_dx23_make |
xor edx,edx |
mov ecx,4 |
@@: |
push edx |
loop @b |
jmp .tt_dx23_done |
.tt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
shl eax,ROUND |
cdq |
movzx ebx,bx |
idiv ebx |
; mov .dx23,eax ; dx23 = (x3-x2)/(y3-y2) |
push eax |
mov ax,word[.tex_x3] |
sub ax,word[.tex_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dx23],eax ; tex_dx23 = (tex_x3-tex_x2)/(y3-y2) |
push eax |
mov ax,word[.tex_y3] |
sub ax,word[.tex_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov [.tex_dy23],eax ; tex_dy23 = (tex_y3-tex_y2)/(y3-y2) |
push eax |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.tt_dx23_done: |
movsx eax,.x1 ;eax - cur x1 |
shl eax,ROUND ;ebx - cur x2 |
mov ebx,eax |
movsx edx, word[.tex_x1] |
shl edx,ROUND |
; mov [.scan_x1],edx |
; mov [.scan_x2],edx |
push edx |
push edx |
movsx edx, word[.tex_y1] |
shl edx,ROUND |
; mov [.scan_y1],edx |
; mov [.scan_y2],edx |
push edx |
push edx |
movsx edx,.z1 |
shl edx,CATMULL_SHIFT |
push edx |
push edx |
mov cx,.y1 |
cmp cx,.y2 |
jge .tt_loop1_end |
.tt_loop1: |
pushad |
push .z_ptr |
push .cz1 ; z coords shifted shl catmull_shift |
push .cz2 |
push .scan_y2 |
push .scan_x2 |
push .scan_y1 |
push .scan_x1 |
push esi ;[.tex_ptr] |
push cx |
sar ebx,ROUND |
push bx |
sar eax,ROUND |
push ax |
call textured_line_z |
popad |
mov edx,.dz13 |
add .cz1,edx |
mov edx,.dz12 |
add .cz2,edx |
mov edx, .tex_dx13 |
add .scan_x1, edx |
mov edx, .tex_dx12 |
add .scan_x2, edx |
mov edx, .tex_dy13 |
add .scan_y1, edx |
mov edx, .tex_dy12 |
add .scan_y2, edx |
add eax, .dx13 |
add ebx, .dx12 |
inc cx |
cmp cx,.y2 |
jl .tt_loop1 |
.tt_loop1_end: |
mov cx,.y2 |
cmp cx,.y3 |
jge .tt_loop2_end |
movsx ebx,.x2 |
shl ebx,ROUND |
movsx edx,.z2 |
shl edx,CATMULL_SHIFT |
mov .cz2,edx |
movzx edx, word [.tex_x2] |
shl edx,ROUND |
mov .scan_x2,edx |
movzx edx, word[.tex_y2] |
shl edx,ROUND |
mov .scan_y2,edx |
.tt_loop2: |
pushad |
push .z_ptr |
push .cz1 ; z coords shifted shl catmull_shift |
push .cz2 |
push .scan_y2 |
push .scan_x2 |
push .scan_y1 |
push .scan_x1 |
push esi ;[.tex_ptr] |
push cx |
sar ebx,ROUND |
push bx |
sar eax,ROUND |
push ax |
call textured_line_z |
popad |
mov edx,.dz13 |
add .cz1,edx |
mov edx,.dz23 |
add .cz2,edx |
mov edx, .tex_dx13 |
add .scan_x1, edx |
mov edx, .tex_dx23 |
add .scan_x2, edx |
mov edx, .tex_dy13 |
add .scan_y1, edx |
mov edx, .tex_dy23 |
add .scan_y2, edx |
add eax, .dx13 |
add ebx, .dx23 |
inc cx |
cmp cx,.y3 |
jl .tt_loop2 |
.tt_loop2_end: |
.tt_end: |
mov esp,ebp |
ret 18 |
textured_line_z: |
;-----in -edi screen buffer pointer |
;------------ stack: |
.x1 equ word [ebp+4] |
.x2 equ word [ebp+6] |
.y equ word [ebp+8] |
.tex_ptr equ dword [ebp+10] |
.tex_x1 equ ebp+14 |
.tex_y1 equ ebp+18 |
.tex_x2 equ ebp+22 |
.tex_y2 equ ebp+26 |
.z2 equ dword [ebp+30] ;z1, z2 coords shifted shl CATMULL_SHIFT |
.z1 equ dword [ebp+34] |
.z_ptr equ dword [ebp+38] |
.tex_dy equ dword [ebp-4] |
.tex_dx equ dword [ebp-8] |
.dz equ dword [ebp-12] |
.cz equ dword [ebp-16] |
.c_tex_x equ dword [ebp-20] ; current tex x |
.m_sft1 equ ebp-28 |
.m_sft2 equ ebp-32 |
; .c_tex_xM equ ebp+14 |
.tex_dxM equ ebp-8 |
mov ebp,esp |
mov ax,.y |
or ax,ax |
jl .tl_quit |
mov bx,[size_y_var] |
dec bx |
cmp ax,bx ;SIZE_Y |
jge .tl_quit |
mov ax,.x1 |
cmp ax,.x2 |
je .tl_quit |
jl .tl_ok |
xchg ax,.x2 ; sort params |
mov .x1,ax |
if Ext >= MMX |
movq mm0,[.tex_x1] |
movq mm1,[.tex_x2] |
movq [.tex_x2],mm0 |
movq [.tex_x1],mm1 |
else |
mov eax,dword[.tex_x1] |
xchg eax,dword[.tex_x2] |
mov dword[.tex_x1],eax |
mov eax,dword[.tex_y1] |
xchg eax,dword[.tex_y2] |
mov dword[.tex_y1],eax |
end if |
mov eax,.z1 |
xchg eax,.z2 |
mov .z1,eax |
.tl_ok: |
mov cx,[size_x_var] |
dec cx |
cmp .x1,cx ;SIZE_X |
jge .tl_quit |
cmp .x2,0 |
jle .tl_quit |
mov bx,.x2 |
sub bx,.x1 |
movsx ebx,bx |
mov eax,dword[.tex_y2] ; calc .dty |
sub eax,dword[.tex_y1] |
cdq |
idiv ebx |
push eax |
mov eax,dword[.tex_x2] ; calc .dtx |
sub eax,dword[.tex_x1] |
cdq |
idiv ebx |
push eax |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
cmp .x1,0 ; clipping |
jg @f |
movsx ebx,.x1 |
neg ebx |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov .x1,0 |
mov eax,.tex_dy |
imul ebx |
add dword[.tex_y1],eax |
mov eax,.tex_dx |
imul ebx |
add dword[.tex_x1],eax |
@@: |
cmp .x2,cx ;SIZE_X |
jl @f |
mov .x2,cx ;SIZE_X |
@@: |
movsx ebx,.y ; calc mem begin in buffers |
movzx eax,word[size_x_var] ;SIZE_X |
mul ebx |
movsx ebx,.x1 |
add eax,ebx |
mov ebx,eax |
lea eax,[eax*3] |
add edi,eax ; edi - scr buff |
shl ebx,2 |
add .z_ptr,ebx ; z buffer pointer |
mov cx,.x2 |
sub cx,.x1 |
movzx ecx,cx |
;if Ext >= MMX |
; movq mm0,[.tex_x1] |
; movq mm4,mm0 |
; movq mm1,qword[.tex_dxM] |
; mov ebx,.z1 |
; mov eax,.dz |
;else |
mov eax,dword[.tex_x1] |
mov ebx,dword[.tex_y1] |
push .z1 ; .cz |
push eax ;.c_tex_x |
;end if |
mov edx,.z_ptr |
.tl_loop: |
;if Ext >= MMX |
; cmp ebx,[edx] ; ebx - current z |
; jge @f |
; movq mm2,mm0 |
; psrad mm2,ROUND |
; movq mm3,mm2 |
; psrlq mm2,32-TEX_SHIFT |
; paddd mm3,mm2 |
; movd esi,mm3 |
; mov dword[edx],ebx ; renew z buffer |
;else |
; eax - temp |
mov eax,.cz ; ebx - cur tex y shl ROUND |
cmp eax,[edx] ; ecx - l.lenght |
jge @f ; ebx - cur tex_y ; edx - temp |
mov esi,ebx ; edi - scr buff |
sar esi,ROUND ; esi - tex_ptr temp |
shl esi,TEX_SHIFT ; .z_ptr - cur pointer to z buff |
mov eax,.c_tex_x ; .cz - cur z coord shl CATMULL_SHIFT |
sar eax,ROUND |
add esi,eax |
mov eax,.cz |
mov dword[edx],eax ; renew z buffer |
;end if |
and esi,TEXTURE_SIZE |
lea esi,[esi*3] |
add esi,.tex_ptr |
movsd |
dec edi |
jmp .no_skip |
@@: |
add edi,3 |
.no_skip: |
add edx,4 |
;if Ext >= MMX |
; add ebx,eax |
; paddd mm0,mm1 |
;else |
mov eax,.dz |
add .cz,eax |
mov eax,.tex_dx |
add .c_tex_x,eax |
add ebx,.tex_dy |
;end if |
loop .tl_loop |
.tl_quit: |
mov esp,ebp |
ret 30+8 |
/programs/demos/view3ds/two_tex.inc |
---|
1,1105 → 1,1105 |
;SIZE_X equ 350 |
;SIZE_Y equ 350 |
;ROUND equ 8 |
;TEX_X equ 512 |
;TEX_Y equ 512 |
;TEXTURE_SIZE EQU (512*512)-1 |
;TEX_SHIFT EQU 9 |
;CATMULL_SHIFT equ 8 |
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
;Ext = SSE |
;SSE = 3 |
;MMX = 1 |
;NON = 0 |
;use32 |
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
;------- DOS 13h mode demos -------------------------------------------- |
;------- Procedure draws triangle with two overlapped textures, I use -- |
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
;--------I calc texture pixel by this way: col1*col2/256 --------------- |
two_tex_triangle_z: |
;------------------in - eax - x1 shl 16 + y1 ----------- |
;---------------------- ebx - x2 shl 16 + y2 ----------- |
;---------------------- ecx - x3 shl 16 + y3 ----------- |
;---------------------- edx - pointer to b. texture----- |
;---------------------- esi - pointer to e. texture----- |
;---------------------- edi - pointer to screen buffer-- |
;---------------------- stack : b. tex coordinates------ |
;---------------------- e. tex coordinates------ |
;---------------------- Z position coordinates-- |
;---------------------- pointer io Z buffer----- |
;-- Z-buffer - filled with coordinates as dword -------- |
;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
.b_x1 equ ebp+4 ; procedure don't save registers !!! |
.b_y1 equ ebp+6 ; each coordinate as word |
.b_x2 equ ebp+8 |
.b_y2 equ ebp+10 ; b - first texture |
.b_x3 equ ebp+12 |
.b_y3 equ ebp+14 ; e - second texture |
.e_x1 equ ebp+16 |
.e_y1 equ ebp+18 |
.e_x2 equ ebp+20 |
.e_y2 equ ebp+22 |
.e_x3 equ ebp+24 |
.e_y3 equ ebp+26 |
.z1 equ word[ebp+28] |
.z2 equ word[ebp+30] |
.z3 equ word[ebp+32] |
.z_buff equ dword[ebp+34] ; pointer to Z-buffer |
.t_bmap equ dword[ebp-4] ; pointer to b. texture |
.t_emap equ dword[ebp-8] ; pointer to e. texture |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
.dx12 equ dword[ebp-24] |
.dbx12 equ dword[ebp-28] |
.dby12 equ dword[ebp-32] |
.dby12q equ [ebp-32] |
.dex12 equ dword[ebp-36] |
.dey12 equ dword[ebp-40] |
.dey12q equ [ebp-40] |
.dz12 equ dword[ebp-44] |
.dx13 equ dword[ebp-48] |
.dbx13 equ dword[ebp-52] |
.dby13 equ dword[ebp-56] |
.dby13q equ [ebp-56] |
.dex13 equ dword[ebp-60] |
.dey13 equ dword[ebp-64] |
.dey13q equ [ebp-64] |
.dz13 equ dword[ebp-68] |
.dx23 equ dword[ebp-72] |
.dbx23 equ dword[ebp-76] |
.dby23 equ dword[ebp-80] |
.dby23q equ [ebp-80] |
.dex23 equ dword[ebp-84] |
.dey23 equ dword[ebp-88] |
.dey23q equ [ebp-88] |
.dz23 equ dword[ebp-92] |
.cx1 equ dword[ebp-96] ; current variables |
.cx2 equ dword[ebp-100] |
.cbx1 equ dword[ebp-104] |
.cby1 equ [ebp-108] |
.cex1 equ dword[ebp-112] |
.cey1 equ [ebp-116] |
.cbx2 equ dword[ebp-120] |
.cby2 equ [ebp-124] |
.cex2 equ dword[ebp-128] |
.cey2 equ [ebp-132] |
.cz1 equ dword[ebp-136] |
.cz2 equ dword[ebp-140] |
if Ext >= MMX |
emms |
else |
cld |
end if |
mov ebp,esp |
push edx esi ; store bump map |
; push esi ; store e. map |
; sub esp,120 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.b_x1] |
xchg edx,dword[.b_x2] |
mov dword[.b_x1],edx |
mov edx,dword[.e_x1] |
xchg edx,dword[.e_x2] |
mov dword[.e_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.b_x2] |
xchg edx,dword[.b_x3] |
mov dword[.b_x2],edx |
mov edx,dword[.e_x2] |
xchg edx,dword[.e_x3] |
mov dword[.e_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort3 |
.sort2: |
push eax ebx ecx ; store triangle coords in variables |
; push ebx |
; push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop23_done |
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
; or edx,ecx |
; test edx,80000000h ; Check only X |
; jne .loop23_done |
; cmp .x1,SIZE_X ; { |
; jg .loop23_done |
; cmp .x2,SIZE_X ; This can be optimized with effort |
; jg .loop23_done |
; cmp .x3,SIZE_X |
; jg .loop23_done ; { |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .bt_dx12_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx12_done |
.bt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
if Ext=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] ;eax |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x2] |
movd mm2,[.e_x1] |
movd mm3,[.e_x2] |
; psubsw mm3,mm2 |
; psubsw mm1,mm0 |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
; pslld mm0,ROUND |
; pslld mm1,ROUND |
; pslld mm2,ROUND |
; pslld mm3,ROUND |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
subps xmm1,xmm0 |
; pxor mm4,mm4 |
; movq mm5,mm1 |
; movq mm6,mm1 |
; pcmpeqb mm5,mm4 |
; psubd mm1,mm0 |
; psubd mm3,mm2 |
; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 |
; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 |
; pxor |
; punpcklhd mm0,mm1 ; lwd ; |
; psubw mm1,mm0 ; mm1, mm0 |
; pxor mm2,mm2 |
; pmovmaskb eax,mm1 |
; and eax,10101010b |
; pcmpgtw mm2,mm1 |
; punpcklwd mm1,mm2 |
; psllw mm0,ROUND |
; psllw mm1,ROUND |
; movq mm2,mm0 |
; psrlq mm0,32 |
; cvtpi2ps xmm0,mm1 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey12q,mm0 |
movq .dby12q,mm1 |
; movd .dex12,mm0 |
; psrlq mm0,32 |
; movd .dey12,mm0 |
; movhlps xmm1,xmm1 |
; cvtps2pi mm0,xmm1 |
; movd .dbx12,mm0 |
; psrlq mm0,32 |
; movd .dby12,mm0 |
else |
mov ax,word[.b_x2] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx12,eax |
push eax |
mov ax,word[.b_y2] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby12,eax |
push eax |
; mov eax,.dbx12 |
; mov ebx,.dby12 |
; int3 |
mov ax,word[.e_x2] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex12,eax |
push eax |
mov ax,word[.e_y2] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey12,eax |
push eax |
end if |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.bt_dx12_done: |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .bt_dx13_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx13_done |
.bt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
if Ext=SSE |
cvtsi2ss xmm3,ebx |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
sub esp,16 |
movd mm0,[.b_x1] |
movd mm1,[.b_x3] |
movd mm2,[.e_x1] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
subps xmm1,xmm0 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey13q,mm0 |
movq .dby13q,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx13,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby13,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex13,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey13,eax |
push eax |
end if |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz13,eax |
push eax |
.bt_dx13_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .bt_dx23_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx23_done |
.bt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
if Ext=SSE |
cvtsi2ss xmm3,ebx |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] ;eax |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
sub esp,16 |
movd mm0,[.b_x2] |
movd mm1,[.b_x3] |
movd mm2,[.e_x2] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey23q,mm0 |
movq .dby23q,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx23,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby23,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex23,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey23,eax |
push eax |
end if |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz23,eax |
push eax |
; sub esp,40 |
.bt_dx23_done: |
movsx eax,.x1 |
shl eax,ROUND |
; mov .cx1,eax |
; mov .cx2,eax |
push eax eax |
; push eax |
movsx eax,word[.b_x1] |
shl eax,ROUND |
mov .cbx1,eax |
mov .cbx2,eax |
; push eax eax |
; push eax |
movsx eax,word[.b_y1] |
shl eax,ROUND |
mov .cby1,eax |
mov .cby2,eax |
; push eax eax |
; push eax |
movsx eax,word[.e_x1] |
shl eax,ROUND |
mov .cex1,eax |
mov .cex2,eax |
; push eax eax |
;push eax |
movsx eax,word[.e_y1] |
shl eax,ROUND |
mov .cey1,eax |
mov .cey2,eax |
sub esp,32 |
; push eax eax |
;push eax |
movsx eax,.z1 |
shl eax,CATMULL_SHIFT |
; mov .cz1,eax |
; mov .cz2,eax |
push eax eax |
;push eax |
movsx ecx,.y1 |
cmp cx,.y2 |
jge .loop12_done |
.loop12: |
call .call_line |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx12 |
add .cx2,ebx |
if Ext>= SSE2 |
movups xmm0,.cey1 |
movups xmm1,.cey2 |
movups xmm2,.dey12q |
movups xmm3,.dey13q |
paddd xmm0,xmm3 |
paddd xmm1,xmm2 |
movups .cey1,xmm0 |
movups .cey2,xmm1 |
else if (Ext = MMX) | (Ext=SSE) |
movq mm0,.cby2 ; with this optimization object |
movq mm1,.cby1 ; looks bit annoying |
movq mm2,.cey2 |
movq mm3,.cey1 |
paddd mm0,.dby12q |
paddd mm1,.dby13q |
paddd mm2,.dey12q |
paddd mm3,.dey13q |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
else |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx12 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby12 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex12 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey12 |
add .cey2,eax |
end if |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz12 |
add .cz2,edx |
inc ecx |
cmp cx,.y2 |
jl .loop12 |
.loop12_done: |
movsx ecx,.y2 |
cmp cx,.y3 |
jge .loop23_done |
movsx eax,.z2 |
shl eax,CATMULL_SHIFT |
mov .cz2,eax |
movsx eax,.x2 |
shl eax,ROUND |
mov .cx2,eax |
movzx eax,word[.b_x2] |
shl eax,ROUND |
mov .cbx2,eax |
movzx eax,word[.b_y2] |
shl eax,ROUND |
mov .cby2,eax |
movzx eax,word[.e_x2] |
shl eax,ROUND |
mov .cex2,eax |
movzx eax,word[.e_y2] |
shl eax,ROUND |
mov .cey2,eax |
.loop23: |
call .call_line |
;if Ext = NON |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx23 |
add .cx2,ebx |
if Ext>= SSE2 |
movups xmm0,.cey1 |
movups xmm1,.cey2 |
movups xmm2,.dey23q |
movups xmm3,.dey13q |
paddd xmm0,xmm3 |
paddd xmm1,xmm2 |
movups .cey1,xmm0 |
movups .cey2,xmm1 |
else if (Ext = MMX) | ( Ext = SSE) |
movq mm0,.cby2 ; with this mmx optimization object looks bit |
movq mm1,.cby1 ; annoying |
movq mm2,.cey2 |
movq mm3,.cey1 |
paddd mm0,.dby23q |
paddd mm1,.dby13q |
paddd mm2,.dey23q |
paddd mm3,.dey13q |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey2,mm2 |
movq .cey1,mm3 |
else |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx23 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby23 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex23 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey23 |
add .cey2,eax |
end if |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz23 |
add .cz2,edx |
;else |
; movq mm0,.db13q |
; movq mm1,.cbx1q |
inc ecx |
cmp cx,.y3 |
jl .loop23 |
.loop23_done: |
mov esp,ebp |
ret 34 |
.call_line: |
pushad |
push .cz1 |
push .cz2 |
push .z_buff |
push .t_bmap |
push .t_emap |
push dword .cey2 |
push .cex2 |
push dword .cey1 |
push .cex1 |
push dword .cby2 |
push .cbx2 |
push dword .cby1 |
push .cbx1 |
push ecx |
mov eax,.cx1 |
sar eax,ROUND |
mov ebx,.cx2 |
sar ebx,ROUND |
call two_tex_line_z |
popad |
ret |
two_tex_line_z: |
;--------------in: eax - x1 |
;-------------- ebx - x2 |
;-------------- edi - pointer to screen buffer |
;stack - another parameters : |
.y equ dword [ebp+4] |
.bx1 equ [ebp+8] ; --- |
.by1 equ [ebp+12] ; | |
.bx2 equ [ebp+16] ; | |
.by2 equ [ebp+20] ; |> b. texture and e. texture coords |
.ex1 equ [ebp+24] ; |> shifted shl ROUND |
.ey1 equ [ebp+28] ; | |
.ex2 equ [ebp+32] ; | |
.ey2 equ [ebp+36] ; --- |
.emap equ [ebp+40] ; b texture offset |
.bmap equ [ebp+44] ; e texture offset |
.z_buff equ dword [ebp+48] |
.z2 equ dword [ebp+52] ; -- |> z coords shifted |
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT |
.x1 equ dword [ebp-4] |
.x2 equ dword [ebp-8] |
.dbx equ [ebp-12] |
.dex equ [ebp-16] |
.dby equ [ebp-20] |
.dey equ [ebp-24] |
.dz equ dword [ebp-28] |
.cbx equ [ebp-32] |
.cex equ [ebp-36] |
.cby equ [ebp-40] |
.cey equ [ebp-44] |
.cz equ dword [ebp-48] |
.czbuff equ dword [ebp-52] |
mov ebp,esp |
mov ecx,.y |
or ecx,ecx |
jl .bl_end |
mov dx,word[size_y_var] |
dec dx |
cmp cx,dx ;word[size_y_var] ;SIZE_Y |
jge .bl_end |
cmp eax,ebx |
jl @f |
je .bl_end |
xchg eax,ebx |
if Ext=NON |
mov edx,.bx1 |
xchg edx,.bx2 |
mov .bx1,edx |
mov edx,.by1 |
xchg edx,.by2 |
mov .by1,edx |
mov edx,.ex1 |
xchg edx,.ex2 |
mov .ex1,edx |
mov edx,.ey1 |
xchg edx,.ey2 |
mov .ey1,edx |
else |
movq mm0,.bx1 |
movq mm1,.ex1 |
movq mm2,.bx2 |
movq mm3,.ex2 |
movq .bx2,mm0 |
movq .ex2,mm1 |
movq .bx1,mm2 |
movq .ex1,mm3 |
end if |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
@@: |
push eax ebx ;store x1, x2 |
mov ebx,.x1 |
movzx edx,word[size_x_var] |
dec edx |
cmp ebx,edx |
; cmp bx,word[size_x_var] ;SIZE_X |
jg .bl_end |
cmp .x2,0 |
jle .bl_end |
mov ebx,.x2 |
sub ebx,.x1 |
if Ext >= SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
shufps xmm3,xmm3,0 |
; movq mm0,.bx1q |
; movq mm1,.bx2q |
; movq mm2,.ex1q |
; movq mm3,.ex2q |
; psubd mm1,mm0 |
; psubd mm3,mm2 |
; cvtpi2ps xmm1,mm1 |
; movlhps xmm1,xmm1 |
; cvtpi2ps xmm1,mm3 |
cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 |
cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 |
subps xmm1,xmm0 |
; hi lo |
divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex |
shufps xmm1,xmm1,11011000b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dex,mm0 ; hi - lo -> dbx, dex |
movq .dey,mm1 ; hi - lo -> dby, dey |
else |
mov eax,.bx2 ; calc .dbx |
sub eax,.bx1 |
cdq |
idiv ebx |
push eax |
mov eax,.ex2 ; calc .dby |
sub eax,.ex1 |
cdq |
idiv ebx |
push eax |
mov eax,.by2 ; calc .dex |
sub eax,.by1 |
cdq |
idiv ebx |
push eax |
mov eax,.ey2 ; calc .dey |
sub eax,.ey1 |
cdq |
idiv ebx |
push eax |
end if |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
cmp .x1,0 ; set correctly begin variable |
jge @f ; CLIPPING ON FUNCTION |
; cutting triangle exceedes screen |
mov ebx,.x1 |
neg ebx |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov .x1,0 |
mov eax,.dbx |
imul ebx |
add .bx1,eax |
mov eax,.dby |
imul ebx |
add .by1,eax |
mov eax,.dex |
imul ebx |
add .ex1,eax |
mov eax,.dey |
imul ebx |
add .ey1,eax |
@@: |
movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var] |
mov ebx,.x2 |
cmp eax,ebx |
jg @f |
mov .x2,eax |
@@: |
; movd mm0,eax |
; movd mm1,.x2 |
; pminsw mm0,mm1 |
; movd .x2,mm0 |
; cmp .x2,SIZE_X ;eax | |
; jl @f |> this dont work idk cause |
; mov .x2,SIZE_X ;eax | |
@@: |
; movzx eax,word[size_x_var] ;calc memory begin in buffers |
mov ebx,.y |
mul ebx |
mov ebx,.x1 |
add eax,ebx |
mov ebx,eax |
lea eax,[eax*3] |
add edi,eax ; edi - screen |
mov esi,.z_buff ; z-buffer filled with dd variables |
shl ebx,2 |
add esi,ebx ; esi - Z buffer |
mov ecx,.x2 |
sub ecx,.x1 |
; init current variables |
push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi |
push dword .ex1 |
push dword .by1 |
push dword .ey1 |
push .z1 ; current z shl CATMULL_SHIFT |
push esi |
if Ext >= MMX |
pxor mm0,mm0 |
movq mm3,.cex ; hi - lo -> cbx; cex |
movq mm4,.cey ; hi - lo -> cby; cey |
; movq mm5,mm3 |
; movq mm6,mm4 |
; psrad mm5,ROUND |
; psrad mm6,ROUND |
; movq .ceyq,mm5 |
; movq .cbyq,mm6 |
mov edx,.czbuff |
else |
cld |
end if |
.draw: |
; if TEX = SHIFTING ;bump drawing only in shifting mode |
if Ext=NON |
mov esi,.czbuff ; .czbuff current address in buffer |
mov ebx,.cz ; .cz - cur z position |
cmp ebx,dword[esi] |
else |
mov ebx,.cz |
cmp ebx,dword[edx] |
end if |
jge .skip |
if Ext=NON |
mov eax,.cby |
mov esi,.cbx |
sar eax,ROUND |
sar esi,ROUND |
shl eax,TEX_SHIFT ;- |
add esi,eax |
lea esi,[esi*3] ;- ; esi - current b. texture addres |
add esi,.bmap |
mov ebx,.cex ;.cex - current env map X |
mov eax,.cey ;.cey - current env map y |
sar ebx,ROUND |
sar eax,ROUND |
shl eax,TEX_SHIFT |
add ebx,eax |
lea ebx,[ebx*3] |
add ebx,.emap |
else |
movq mm5,mm4 ;.cey |
psrad mm5,ROUND |
pslld mm5,TEX_SHIFT |
movq mm6,mm3 ;.cex |
psrad mm6,ROUND |
paddd mm5,mm6 |
movq mm6,mm5 |
paddd mm5,mm5 |
paddd mm5,mm6 |
paddd mm5,.emap |
movd esi,mm5 |
psrlq mm5,32 |
movd ebx,mm5 |
end if |
if Ext>=MMX |
movd mm1,[esi] |
movd mm2,[ebx] |
punpcklbw mm1,mm0 |
punpcklbw mm2,mm0 |
pmullw mm1,mm2 |
psrlw mm1,8 |
packuswb mm1,mm0 |
movd [edi],mm1 |
mov ebx,.cz |
mov dword[edx],ebx |
else |
cld ; esi - tex e. |
lodsb ; ebx - tex b. |
mov dl,[ebx] |
mul dl |
shr ax,8 |
stosb |
inc ebx |
lodsb |
mov dl,[ebx] |
mul dl |
shr ax,8 |
stosb |
inc ebx |
lodsb |
mov dl,[ebx] |
mul dl |
shr ax,8 |
stosb |
mov ebx,.cz |
mov esi,.czbuff |
mov dword[esi],ebx |
jmp .no_skip |
end if |
.skip: |
add edi,3 |
if Ext = NON |
.no_skip: |
add .czbuff,4 |
mov eax,.dbx |
add .cbx,eax |
mov eax,.dby |
add .cby,eax |
mov eax,.dex |
add .cex,eax |
mov eax,.dey |
add .cey,eax |
else |
add edx,4 |
paddd mm3,.dex |
paddd mm4,.dey |
; movq mm5,mm3 |
; movq mm6,mm4 |
; psrad mm5,ROUND |
; psrad mm6,ROUND |
; movq .cex,mm3 |
; movq .cey,mm4 |
end if |
mov eax,.dz |
add .cz,eax |
if Ext = NON |
dec ecx |
jnz .draw |
else |
loop .draw |
end if |
.bl_end: |
mov esp,ebp |
ret 56 |
;SIZE_X equ 350 |
;SIZE_Y equ 350 |
;ROUND equ 8 |
;TEX_X equ 512 |
;TEX_Y equ 512 |
;TEXTURE_SIZE EQU (512*512)-1 |
;TEX_SHIFT EQU 9 |
;CATMULL_SHIFT equ 8 |
;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
;Ext = SSE |
;SSE = 3 |
;MMX = 1 |
;NON = 0 |
;use32 |
;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
;------- DOS 13h mode demos -------------------------------------------- |
;------- Procedure draws triangle with two overlapped textures, I use -- |
;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
;--------I calc texture pixel by this way: col1*col2/256 --------------- |
two_tex_triangle_z: |
;------------------in - eax - x1 shl 16 + y1 ----------- |
;---------------------- ebx - x2 shl 16 + y2 ----------- |
;---------------------- ecx - x3 shl 16 + y3 ----------- |
;---------------------- edx - pointer to b. texture----- |
;---------------------- esi - pointer to e. texture----- |
;---------------------- edi - pointer to screen buffer-- |
;---------------------- stack : b. tex coordinates------ |
;---------------------- e. tex coordinates------ |
;---------------------- Z position coordinates-- |
;---------------------- pointer io Z buffer----- |
;-- Z-buffer - filled with coordinates as dword -------- |
;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
.b_x1 equ ebp+4 ; procedure don't save registers !!! |
.b_y1 equ ebp+6 ; each coordinate as word |
.b_x2 equ ebp+8 |
.b_y2 equ ebp+10 ; b - first texture |
.b_x3 equ ebp+12 |
.b_y3 equ ebp+14 ; e - second texture |
.e_x1 equ ebp+16 |
.e_y1 equ ebp+18 |
.e_x2 equ ebp+20 |
.e_y2 equ ebp+22 |
.e_x3 equ ebp+24 |
.e_y3 equ ebp+26 |
.z1 equ word[ebp+28] |
.z2 equ word[ebp+30] |
.z3 equ word[ebp+32] |
.z_buff equ dword[ebp+34] ; pointer to Z-buffer |
.t_bmap equ dword[ebp-4] ; pointer to b. texture |
.t_emap equ dword[ebp-8] ; pointer to e. texture |
.x1 equ word[ebp-10] |
.y1 equ word[ebp-12] |
.x2 equ word[ebp-14] |
.y2 equ word[ebp-16] |
.x3 equ word[ebp-18] |
.y3 equ word[ebp-20] |
.dx12 equ dword[ebp-24] |
.dbx12 equ dword[ebp-28] |
.dby12 equ dword[ebp-32] |
.dby12q equ [ebp-32] |
.dex12 equ dword[ebp-36] |
.dey12 equ dword[ebp-40] |
.dey12q equ [ebp-40] |
.dz12 equ dword[ebp-44] |
.dx13 equ dword[ebp-48] |
.dbx13 equ dword[ebp-52] |
.dby13 equ dword[ebp-56] |
.dby13q equ [ebp-56] |
.dex13 equ dword[ebp-60] |
.dey13 equ dword[ebp-64] |
.dey13q equ [ebp-64] |
.dz13 equ dword[ebp-68] |
.dx23 equ dword[ebp-72] |
.dbx23 equ dword[ebp-76] |
.dby23 equ dword[ebp-80] |
.dby23q equ [ebp-80] |
.dex23 equ dword[ebp-84] |
.dey23 equ dword[ebp-88] |
.dey23q equ [ebp-88] |
.dz23 equ dword[ebp-92] |
.cx1 equ dword[ebp-96] ; current variables |
.cx2 equ dword[ebp-100] |
.cbx1 equ dword[ebp-104] |
.cby1 equ [ebp-108] |
.cex1 equ dword[ebp-112] |
.cey1 equ [ebp-116] |
.cbx2 equ dword[ebp-120] |
.cby2 equ [ebp-124] |
.cex2 equ dword[ebp-128] |
.cey2 equ [ebp-132] |
.cz1 equ dword[ebp-136] |
.cz2 equ dword[ebp-140] |
if Ext >= MMX |
emms |
else |
cld |
end if |
mov ebp,esp |
push edx esi ; store bump map |
; push esi ; store e. map |
; sub esp,120 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
mov edx,dword[.b_x1] |
xchg edx,dword[.b_x2] |
mov dword[.b_x1],edx |
mov edx,dword[.e_x1] |
xchg edx,dword[.e_x2] |
mov dword[.e_x1],edx |
mov dx,.z1 |
xchg dx,.z2 |
mov .z1,dx |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
mov edx,dword[.b_x2] |
xchg edx,dword[.b_x3] |
mov dword[.b_x2],edx |
mov edx,dword[.e_x2] |
xchg edx,dword[.e_x3] |
mov dword[.e_x2],edx |
mov dx,.z2 |
xchg dx,.z3 |
mov .z2,dx |
jmp .sort3 |
.sort2: |
push eax ebx ecx ; store triangle coords in variables |
; push ebx |
; push ecx |
mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
and edx,ebx ; if *all* of them are negative a sign flag is raised |
and edx,ecx |
and edx,eax |
test edx,80008000h ; Check both X&Y at once |
jne .loop23_done |
; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
; or edx,ecx |
; test edx,80000000h ; Check only X |
; jne .loop23_done |
; cmp .x1,SIZE_X ; { |
; jg .loop23_done |
; cmp .x2,SIZE_X ; This can be optimized with effort |
; jg .loop23_done |
; cmp .x3,SIZE_X |
; jg .loop23_done ; { |
mov bx,.y2 ; calc delta 12 |
sub bx,.y1 |
jnz .bt_dx12_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx12_done |
.bt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx12,eax |
push eax |
if Ext=SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] ;eax |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
movd mm0,[.b_x1] |
movd mm1,[.b_x2] |
movd mm2,[.e_x1] |
movd mm3,[.e_x2] |
; psubsw mm3,mm2 |
; psubsw mm1,mm0 |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
; pslld mm0,ROUND |
; pslld mm1,ROUND |
; pslld mm2,ROUND |
; pslld mm3,ROUND |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
subps xmm1,xmm0 |
; pxor mm4,mm4 |
; movq mm5,mm1 |
; movq mm6,mm1 |
; pcmpeqb mm5,mm4 |
; psubd mm1,mm0 |
; psubd mm3,mm2 |
; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 |
; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 |
; pxor |
; punpcklhd mm0,mm1 ; lwd ; |
; psubw mm1,mm0 ; mm1, mm0 |
; pxor mm2,mm2 |
; pmovmaskb eax,mm1 |
; and eax,10101010b |
; pcmpgtw mm2,mm1 |
; punpcklwd mm1,mm2 |
; psllw mm0,ROUND |
; psllw mm1,ROUND |
; movq mm2,mm0 |
; psrlq mm0,32 |
; cvtpi2ps xmm0,mm1 |
; movlhps xmm0,xmm0 |
; cvtpi2ps xmm0,mm3 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey12q,mm0 |
movq .dby12q,mm1 |
; movd .dex12,mm0 |
; psrlq mm0,32 |
; movd .dey12,mm0 |
; movhlps xmm1,xmm1 |
; cvtps2pi mm0,xmm1 |
; movd .dbx12,mm0 |
; psrlq mm0,32 |
; movd .dby12,mm0 |
else |
mov ax,word[.b_x2] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx12,eax |
push eax |
mov ax,word[.b_y2] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby12,eax |
push eax |
; mov eax,.dbx12 |
; mov ebx,.dby12 |
; int3 |
mov ax,word[.e_x2] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex12,eax |
push eax |
mov ax,word[.e_y2] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey12,eax |
push eax |
end if |
mov ax,.z2 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
push eax |
.bt_dx12_done: |
mov bx,.y3 ; calc delta13 |
sub bx,.y1 |
jnz .bt_dx13_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx13_done |
.bt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx13,eax |
push eax |
if Ext=SSE |
cvtsi2ss xmm3,ebx |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] |
divss xmm3,xmm4 |
rcpss xmm3,xmm3 |
; mulss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
sub esp,16 |
movd mm0,[.b_x1] |
movd mm1,[.b_x3] |
movd mm2,[.e_x1] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
subps xmm1,xmm0 |
; divps xmm1,xmm3 |
mulps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey13q,mm0 |
movq .dby13q,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx13,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby13,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex13,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y1] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey13,eax |
push eax |
end if |
mov ax,.z3 |
sub ax,.z1 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz13,eax |
push eax |
.bt_dx13_done: |
mov bx,.y3 ; calc delta23 |
sub bx,.y2 |
jnz .bt_dx23_make |
mov ecx,6 |
xor edx,edx |
@@: |
push edx ;dword 0 |
loop @b |
jmp .bt_dx23_done |
.bt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dx23,eax |
push eax |
if Ext=SSE |
cvtsi2ss xmm3,ebx |
; mov eax,255 |
cvtsi2ss xmm4,[i255d] ;eax |
divss xmm3,xmm4 |
shufps xmm3,xmm3,0 |
sub esp,16 |
movd mm0,[.b_x2] |
movd mm1,[.b_x3] |
movd mm2,[.e_x2] |
movd mm3,[.e_x3] |
pxor mm4,mm4 |
punpcklwd mm0,mm4 |
punpcklwd mm1,mm4 |
punpcklwd mm2,mm4 |
punpcklwd mm3,mm4 |
cvtpi2ps xmm0,mm0 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,mm2 |
cvtpi2ps xmm1,mm1 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,mm3 |
subps xmm1,xmm0 |
divps xmm1,xmm3 |
shufps xmm1,xmm1,10110001b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dey23q,mm0 |
movq .dby23q,mm1 |
else |
mov ax,word[.b_x3] |
sub ax,word[.b_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dbx23,eax |
push eax |
mov ax,word[.b_y3] |
sub ax,word[.b_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dby23,eax |
push eax |
mov ax,word[.e_x3] |
sub ax,word[.e_x2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dex23,eax |
push eax |
mov ax,word[.e_y3] |
sub ax,word[.e_y2] |
cwde |
shl eax,ROUND |
cdq |
idiv ebx |
; mov .dey23,eax |
push eax |
end if |
mov ax,.z3 |
sub ax,.z2 |
cwde |
shl eax,CATMULL_SHIFT |
cdq |
idiv ebx |
; mov .dz23,eax |
push eax |
; sub esp,40 |
.bt_dx23_done: |
movsx eax,.x1 |
shl eax,ROUND |
; mov .cx1,eax |
; mov .cx2,eax |
push eax eax |
; push eax |
movsx eax,word[.b_x1] |
shl eax,ROUND |
mov .cbx1,eax |
mov .cbx2,eax |
; push eax eax |
; push eax |
movsx eax,word[.b_y1] |
shl eax,ROUND |
mov .cby1,eax |
mov .cby2,eax |
; push eax eax |
; push eax |
movsx eax,word[.e_x1] |
shl eax,ROUND |
mov .cex1,eax |
mov .cex2,eax |
; push eax eax |
;push eax |
movsx eax,word[.e_y1] |
shl eax,ROUND |
mov .cey1,eax |
mov .cey2,eax |
sub esp,32 |
; push eax eax |
;push eax |
movsx eax,.z1 |
shl eax,CATMULL_SHIFT |
; mov .cz1,eax |
; mov .cz2,eax |
push eax eax |
;push eax |
movsx ecx,.y1 |
cmp cx,.y2 |
jge .loop12_done |
.loop12: |
call .call_line |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx12 |
add .cx2,ebx |
if Ext>= SSE2 |
movups xmm0,.cey1 |
movups xmm1,.cey2 |
movups xmm2,.dey12q |
movups xmm3,.dey13q |
paddd xmm0,xmm3 |
paddd xmm1,xmm2 |
movups .cey1,xmm0 |
movups .cey2,xmm1 |
else if (Ext = MMX) | (Ext=SSE) |
movq mm0,.cby2 ; with this optimization object |
movq mm1,.cby1 ; looks bit annoying |
movq mm2,.cey2 |
movq mm3,.cey1 |
paddd mm0,.dby12q |
paddd mm1,.dby13q |
paddd mm2,.dey12q |
paddd mm3,.dey13q |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey1,mm3 |
movq .cey2,mm2 |
else |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx12 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby12 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex12 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey12 |
add .cey2,eax |
end if |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz12 |
add .cz2,edx |
inc ecx |
cmp cx,.y2 |
jl .loop12 |
.loop12_done: |
movsx ecx,.y2 |
cmp cx,.y3 |
jge .loop23_done |
movsx eax,.z2 |
shl eax,CATMULL_SHIFT |
mov .cz2,eax |
movsx eax,.x2 |
shl eax,ROUND |
mov .cx2,eax |
movzx eax,word[.b_x2] |
shl eax,ROUND |
mov .cbx2,eax |
movzx eax,word[.b_y2] |
shl eax,ROUND |
mov .cby2,eax |
movzx eax,word[.e_x2] |
shl eax,ROUND |
mov .cex2,eax |
movzx eax,word[.e_y2] |
shl eax,ROUND |
mov .cey2,eax |
.loop23: |
call .call_line |
;if Ext = NON |
mov eax,.dx13 |
add .cx1,eax |
mov ebx,.dx23 |
add .cx2,ebx |
if Ext>= SSE2 |
movups xmm0,.cey1 |
movups xmm1,.cey2 |
movups xmm2,.dey23q |
movups xmm3,.dey13q |
paddd xmm0,xmm3 |
paddd xmm1,xmm2 |
movups .cey1,xmm0 |
movups .cey2,xmm1 |
else if (Ext = MMX) | ( Ext = SSE) |
movq mm0,.cby2 ; with this mmx optimization object looks bit |
movq mm1,.cby1 ; annoying |
movq mm2,.cey2 |
movq mm3,.cey1 |
paddd mm0,.dby23q |
paddd mm1,.dby13q |
paddd mm2,.dey23q |
paddd mm3,.dey13q |
movq .cby2,mm0 |
movq .cby1,mm1 |
movq .cey2,mm2 |
movq .cey1,mm3 |
else |
mov edx,.dbx13 |
add .cbx1,edx |
mov eax,.dbx23 |
add .cbx2,eax |
mov ebx,.dby13 |
add .cby1,ebx |
mov edx,.dby23 |
add .cby2,edx |
mov eax,.dex13 |
add .cex1,eax |
mov ebx,.dex23 |
add .cex2,ebx |
mov edx,.dey13 |
add .cey1,edx |
mov eax,.dey23 |
add .cey2,eax |
end if |
mov ebx,.dz13 |
add .cz1,ebx |
mov edx,.dz23 |
add .cz2,edx |
;else |
; movq mm0,.db13q |
; movq mm1,.cbx1q |
inc ecx |
cmp cx,.y3 |
jl .loop23 |
.loop23_done: |
mov esp,ebp |
ret 34 |
.call_line: |
pushad |
push .cz1 |
push .cz2 |
push .z_buff |
push .t_bmap |
push .t_emap |
push dword .cey2 |
push .cex2 |
push dword .cey1 |
push .cex1 |
push dword .cby2 |
push .cbx2 |
push dword .cby1 |
push .cbx1 |
push ecx |
mov eax,.cx1 |
sar eax,ROUND |
mov ebx,.cx2 |
sar ebx,ROUND |
call two_tex_line_z |
popad |
ret |
two_tex_line_z: |
;--------------in: eax - x1 |
;-------------- ebx - x2 |
;-------------- edi - pointer to screen buffer |
;stack - another parameters : |
.y equ dword [ebp+4] |
.bx1 equ [ebp+8] ; --- |
.by1 equ [ebp+12] ; | |
.bx2 equ [ebp+16] ; | |
.by2 equ [ebp+20] ; |> b. texture and e. texture coords |
.ex1 equ [ebp+24] ; |> shifted shl ROUND |
.ey1 equ [ebp+28] ; | |
.ex2 equ [ebp+32] ; | |
.ey2 equ [ebp+36] ; --- |
.emap equ [ebp+40] ; b texture offset |
.bmap equ [ebp+44] ; e texture offset |
.z_buff equ dword [ebp+48] |
.z2 equ dword [ebp+52] ; -- |> z coords shifted |
.z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT |
.x1 equ dword [ebp-4] |
.x2 equ dword [ebp-8] |
.dbx equ [ebp-12] |
.dex equ [ebp-16] |
.dby equ [ebp-20] |
.dey equ [ebp-24] |
.dz equ dword [ebp-28] |
.cbx equ [ebp-32] |
.cex equ [ebp-36] |
.cby equ [ebp-40] |
.cey equ [ebp-44] |
.cz equ dword [ebp-48] |
.czbuff equ dword [ebp-52] |
mov ebp,esp |
mov ecx,.y |
or ecx,ecx |
jl .bl_end |
mov dx,word[size_y_var] |
dec dx |
cmp cx,dx ;word[size_y_var] ;SIZE_Y |
jge .bl_end |
cmp eax,ebx |
jl @f |
je .bl_end |
xchg eax,ebx |
if Ext=NON |
mov edx,.bx1 |
xchg edx,.bx2 |
mov .bx1,edx |
mov edx,.by1 |
xchg edx,.by2 |
mov .by1,edx |
mov edx,.ex1 |
xchg edx,.ex2 |
mov .ex1,edx |
mov edx,.ey1 |
xchg edx,.ey2 |
mov .ey1,edx |
else |
movq mm0,.bx1 |
movq mm1,.ex1 |
movq mm2,.bx2 |
movq mm3,.ex2 |
movq .bx2,mm0 |
movq .ex2,mm1 |
movq .bx1,mm2 |
movq .ex1,mm3 |
end if |
mov edx,.z1 |
xchg edx,.z2 |
mov .z1,edx |
@@: |
push eax ebx ;store x1, x2 |
mov ebx,.x1 |
movzx edx,word[size_x_var] |
dec edx |
cmp ebx,edx |
; cmp bx,word[size_x_var] ;SIZE_X |
jg .bl_end |
cmp .x2,0 |
jle .bl_end |
mov ebx,.x2 |
sub ebx,.x1 |
if Ext >= SSE |
sub esp,16 |
cvtsi2ss xmm3,ebx ;rcps |
shufps xmm3,xmm3,0 |
; movq mm0,.bx1q |
; movq mm1,.bx2q |
; movq mm2,.ex1q |
; movq mm3,.ex2q |
; psubd mm1,mm0 |
; psubd mm3,mm2 |
; cvtpi2ps xmm1,mm1 |
; movlhps xmm1,xmm1 |
; cvtpi2ps xmm1,mm3 |
cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 |
movlhps xmm0,xmm0 |
cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 |
cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 |
movlhps xmm1,xmm1 |
cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 |
subps xmm1,xmm0 |
; hi lo |
divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex |
shufps xmm1,xmm1,11011000b |
cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
movhlps xmm1,xmm1 |
cvtps2pi mm1,xmm1 |
movq .dex,mm0 ; hi - lo -> dbx, dex |
movq .dey,mm1 ; hi - lo -> dby, dey |
else |
mov eax,.bx2 ; calc .dbx |
sub eax,.bx1 |
cdq |
idiv ebx |
push eax |
mov eax,.ex2 ; calc .dby |
sub eax,.ex1 |
cdq |
idiv ebx |
push eax |
mov eax,.by2 ; calc .dex |
sub eax,.by1 |
cdq |
idiv ebx |
push eax |
mov eax,.ey2 ; calc .dey |
sub eax,.ey1 |
cdq |
idiv ebx |
push eax |
end if |
mov eax,.z2 ; calc .dz |
sub eax,.z1 |
cdq |
idiv ebx |
push eax |
cmp .x1,0 ; set correctly begin variable |
jge @f ; CLIPPING ON FUNCTION |
; cutting triangle exceedes screen |
mov ebx,.x1 |
neg ebx |
imul ebx ; eax = .dz * abs(.x1) |
add .z1,eax |
mov .x1,0 |
mov eax,.dbx |
imul ebx |
add .bx1,eax |
mov eax,.dby |
imul ebx |
add .by1,eax |
mov eax,.dex |
imul ebx |
add .ex1,eax |
mov eax,.dey |
imul ebx |
add .ey1,eax |
@@: |
movzx eax,word[size_x_var] ;SIZE_X ;word[size_x_var] |
mov ebx,.x2 |
cmp eax,ebx |
jg @f |
mov .x2,eax |
@@: |
; movd mm0,eax |
; movd mm1,.x2 |
; pminsw mm0,mm1 |
; movd .x2,mm0 |
; cmp .x2,SIZE_X ;eax | |
; jl @f |> this dont work idk cause |
; mov .x2,SIZE_X ;eax | |
@@: |
; movzx eax,word[size_x_var] ;calc memory begin in buffers |
mov ebx,.y |
mul ebx |
mov ebx,.x1 |
add eax,ebx |
mov ebx,eax |
lea eax,[eax*3] |
add edi,eax ; edi - screen |
mov esi,.z_buff ; z-buffer filled with dd variables |
shl ebx,2 |
add esi,ebx ; esi - Z buffer |
mov ecx,.x2 |
sub ecx,.x1 |
; init current variables |
push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi |
push dword .ex1 |
push dword .by1 |
push dword .ey1 |
push .z1 ; current z shl CATMULL_SHIFT |
push esi |
if Ext >= MMX |
pxor mm0,mm0 |
movq mm3,.cex ; hi - lo -> cbx; cex |
movq mm4,.cey ; hi - lo -> cby; cey |
; movq mm5,mm3 |
; movq mm6,mm4 |
; psrad mm5,ROUND |
; psrad mm6,ROUND |
; movq .ceyq,mm5 |
; movq .cbyq,mm6 |
mov edx,.czbuff |
else |
cld |
end if |
.draw: |
; if TEX = SHIFTING ;bump drawing only in shifting mode |
if Ext=NON |
mov esi,.czbuff ; .czbuff current address in buffer |
mov ebx,.cz ; .cz - cur z position |
cmp ebx,dword[esi] |
else |
mov ebx,.cz |
cmp ebx,dword[edx] |
end if |
jge .skip |
if Ext=NON |
mov eax,.cby |
mov esi,.cbx |
sar eax,ROUND |
sar esi,ROUND |
shl eax,TEX_SHIFT ;- |
add esi,eax |
lea esi,[esi*3] ;- ; esi - current b. texture addres |
add esi,.bmap |
mov ebx,.cex ;.cex - current env map X |
mov eax,.cey ;.cey - current env map y |
sar ebx,ROUND |
sar eax,ROUND |
shl eax,TEX_SHIFT |
add ebx,eax |
lea ebx,[ebx*3] |
add ebx,.emap |
else |
movq mm5,mm4 ;.cey |
psrad mm5,ROUND |
pslld mm5,TEX_SHIFT |
movq mm6,mm3 ;.cex |
psrad mm6,ROUND |
paddd mm5,mm6 |
movq mm6,mm5 |
paddd mm5,mm5 |
paddd mm5,mm6 |
paddd mm5,.emap |
movd esi,mm5 |
psrlq mm5,32 |
movd ebx,mm5 |
end if |
if Ext>=MMX |
movd mm1,[esi] |
movd mm2,[ebx] |
punpcklbw mm1,mm0 |
punpcklbw mm2,mm0 |
pmullw mm1,mm2 |
psrlw mm1,8 |
packuswb mm1,mm0 |
movd [edi],mm1 |
mov ebx,.cz |
mov dword[edx],ebx |
else |
cld ; esi - tex e. |
lodsb ; ebx - tex b. |
mov dl,[ebx] |
mul dl |
shr ax,8 |
stosb |
inc ebx |
lodsb |
mov dl,[ebx] |
mul dl |
shr ax,8 |
stosb |
inc ebx |
lodsb |
mov dl,[ebx] |
mul dl |
shr ax,8 |
stosb |
mov ebx,.cz |
mov esi,.czbuff |
mov dword[esi],ebx |
jmp .no_skip |
end if |
.skip: |
add edi,3 |
if Ext = NON |
.no_skip: |
add .czbuff,4 |
mov eax,.dbx |
add .cbx,eax |
mov eax,.dby |
add .cby,eax |
mov eax,.dex |
add .cex,eax |
mov eax,.dey |
add .cey,eax |
else |
add edx,4 |
paddd mm3,.dex |
paddd mm4,.dey |
; movq mm5,mm3 |
; movq mm6,mm4 |
; psrad mm5,ROUND |
; psrad mm6,ROUND |
; movq .cex,mm3 |
; movq .cey,mm4 |
end if |
mov eax,.dz |
add .cz,eax |
if Ext = NON |
dec ecx |
jnz .draw |
else |
loop .draw |
end if |
.bl_end: |
mov esp,ebp |
ret 56 |
/programs/demos/view3ds/view3ds.asm |
---|
1,5 → 1,5 |
; application : View3ds ver. 0.071 - tiny .3ds and .asc files viewer |
; application : View3ds ver. 0.074 - tiny .3ds and .asc files viewer |
; with a few graphics effects demonstration. |
; compiler : FASM |
; system : KolibriOS |
38,6 → 38,9 |
SSE2 = 3 |
SSE3 = 4 |
Ext = SSE3 ;Ext={ NON | MMX | SSE | SSE2 | SSE3 } |
; For now correct only SSE2 and SSE3 versions. if you have older CPU |
; use older versions of app. Probably ver 005 will be OK but it need |
; re-edit to support new Kolibri features. |
; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) |
USE_LFN = 1 ; App is Kolibri only now. |
54,6 → 57,8 |
dd 0x0 ; I_Icon |
START: ; start of execution |
rdtsc |
mov [rand_seed],ax |
cld |
push dword (SIZE_Y shr 3) * 3 |
fninit |
90,9 → 95,7 |
; set point(0,0,0) in center and calc all coords |
; to be in <-1.0,1.0> |
call normalize_all_light_vectors |
if Ext >= SSE3 |
call copy_lights ; to aligned float |
end if |
call init_triangles_normals2 |
call init_point_normals |
call init_envmap2 |
100,17 → 103,25 |
call generate_texture2 |
call init_sincos_tab |
call do_color_buffer ; intit color_map |
if Ext >= SSE3 |
call init_point_lights |
mov [fire_flag],0 ; proteza |
end if |
mov edi,bumpmap |
call calc_bumpmap |
call calc_bumpmap_coords ; bump and texture mapping |
call do_edges_list |
call draw_window |
;mov [draw_win_at_first],0 |
;mov eax,40 ; set events mask |
;mov ebx,1100000000000000000000000100111b |
;int 0x40 |
if Ext > SSE2 |
mov eax,1 |
cpuid |
bt ecx,0 ; is sse3 on board? |
jc @f |
mov [max_dr_flg],12 |
mov [isSSE3],0 |
@@: |
end if |
still: |
cmp [edit_flag],1 |
jne @f |
122,7 → 133,16 |
mov ebx,111b |
.int: |
int 0x40 |
if Ext > SSE2 |
cmp [ray_shd_flag],1 |
jne @f |
cmp [isSSE3],1 |
jne @f |
mov eax,10 |
jmp .intt |
end if |
@@: |
mov eax,23 |
mov ebx,TIMEOUT |
cmp [speed_flag],0 |
134,6 → 154,7 |
mov eax,10 |
@@: |
.intt: |
int 0x40 |
cmp eax,1 ; redraw event ? |
263,8 → 284,9 |
jne .next_m5 ; 'grd ' 1 |
call make_random_lights ; 'env ' 2 |
call normalize_all_light_vectors ; 'bump' 3 |
call copy_lights |
if Ext >= SSE3 |
call copy_lights |
call init_point_lights ; for ex. ray casting |
end if |
call do_color_buffer ; intit color_map ; 'tex ' 4 |
396,7 → 418,7 |
cmp [move_flag],0 |
jne @f |
.x_minus: |
sub [vect_x],10 |
sub word[vect_x],10 |
jmp .next2 |
@@: |
cmp [move_flag],1 |
414,7 → 436,7 |
cmp [move_flag],0 |
jne @f |
.x_plus: |
add [vect_x],10 |
add word[vect_x],10 |
jmp .next3 |
@@: |
cmp [move_flag],1 |
522,8 → 544,6 |
.no_sort: |
cmp [dr_flag],7 ; fill if 2tex and texgrd |
jge @f |
cmp [catmull_flag],0 ;non fill if Catmull = off |
je .non_f |
cmp [dr_flag],6 ; non fill if dots |
je .non_f |
@@: |
534,46 → 554,74 |
call draw_dots |
jmp .blurrr |
@@: |
if Ext > SSE2 |
cmp [ray_shd_flag],1 ;non fill if Catmull = off |
jne @f |
cmp [isSSE3],1 |
jne @f |
mov ax,100 |
jmp .dr |
@@: |
end if |
movzx ax,[dr_flag] |
.dr: |
call draw_triangles ; draw all triangles from the list |
cmp [edit_flag],0 |
jz .no_edit |
call clear_vertices_index |
call draw_handlers |
movzx eax,[dr_flag] |
movzx ebx,[ray_shd_flag] |
shl ebx,10 |
or eax,ebx |
call draw_handlers |
; call edit |
.no_edit: |
.blurrr: |
cmp [sinus_flag],0 |
je @f |
call do_sinus |
movzx eax,[dr_flag] |
movzx ebx,[ray_shd_flag] |
shl ebx,10 |
or eax,ebx |
cmp [sinus_flag],0 |
je .no_sin |
movzx eax,[dr_flag] |
movzx ebx,[ray_shd_flag] |
shl ebx,10 |
or eax,ebx |
call do_sinus |
; jmp .finito |
.no_sin: |
@@: |
cmp [fire_flag],0 |
jne @f |
movzx ecx,[fire_flag] |
cmp [fire_flag],1 |
je @f |
cmp [blur_flag],0 |
je .no_blur ; no blur, no fire |
movzx ecx,[blur_flag] |
@@: |
movzx eax,[dr_flag] |
movzx ebx,[ray_shd_flag] |
shl ebx,10 |
or eax,ebx |
call blur_screen ; blur and fire |
jmp .no_blur |
@@: |
cmp [emboss_flag],0 |
jne .emb ; if emboss=true -> no fire |
movzx ecx,[fire_flag] |
call blur_screen ; blur and fire |
; jmp .finito |
.no_blur: ; no blur, no fire |
cmp [emboss_flag],0 |
je @f |
.emb: |
movzx eax,[dr_flag] |
movzx ebx,[ray_shd_flag] |
shl ebx,10 |
or eax,ebx |
call do_emboss |
.finito: |
@@: |
@@: |
cmp [inc_bright_flag],0 ; increase brightness |
je .no_inc_bright |
movzx ebx,[inc_bright_flag] |
706,7 → 754,9 |
mov eax,7 ; put image |
mov ebx,[screen_ptr] |
mov ecx,[size_y_var] |
mov edx,[offset_y] |
mov edx,[offset_y] |
cmp [ray_shd_flag],1 |
jge .ff |
cmp [dr_flag],11 |
jge .ff |
int 0x40 |
764,6 → 814,7 |
include '3stencil.inc' |
include '3glass.inc' |
include '3glass_tex.inc' |
include '3ray_shd.inc' |
end if |
clear_vertices_index: |
mov edi,[vertices_index_ptr] |
1075,7 → 1126,7 |
fldpi |
fadd st,st |
mov esi,[points_ptr] |
mov edi,tex_points |
mov edi,[tex_points_ptr] |
mov ecx,[points_count_var] |
inc ecx |
; cmp [map_tex_flag],1 |
1389,7 → 1440,8 |
mov esp,ebp |
pop ebp |
ret |
if Ext >= SSE3 |
if Ext >= SSE2 |
init_point_normals: |
.z equ dword [ebp-8] |
.y equ dword [ebp-12] |
1397,7 → 1449,6 |
.point_number equ dword [ebp-28] |
.hit_faces equ dword [ebp-32] |
fninit |
push ebp |
mov ebp,esp |
sub esp,64 |
1438,19 → 1489,25 |
jne .ipn_check_face |
cvtsi2ss xmm6,.hit_faces |
movaps xmm7,.x |
rcpss xmm6,xmm6 |
shufps xmm6,xmm6,11000000b |
mulps xmm7,xmm6 |
movaps xmm6,xmm7 |
mulps xmm6,xmm6 |
andps xmm6,[zero_hgst_dd] |
haddps xmm6,xmm6 |
haddps xmm6,xmm6 |
rsqrtps xmm6,xmm6 |
mulps xmm7,xmm6 |
movlps [edi],xmm7 |
movhlps xmm7,xmm7 |
movss [edi+8],xmm7 |
call normalize_vector |
; movaps xmm6,xmm7 |
; mulps xmm6,xmm6 |
; andps xmm6,[zero_hgst_dd] |
; haddps xmm6,xmm6 |
; haddps xmm6,xmm6 |
; rsqrtps xmm6,xmm6 |
; mulps xmm7,xmm6 |
; movlps [edi],xmm7 |
; movhlps xmm7,xmm7 |
; movss [edi+8],xmm7 |
add edi,12 |
inc .point_number |
mov edx,.point_number |
1576,11 → 1633,9 |
pop ecx |
sub ecx,1 |
jnz @b |
; cmp dword[ebp],-1 |
; jne @b |
ret |
if Ext >= SSE3 |
copy_lights: ; after normalising ! |
mov esi,lights |
mov edi,lights_aligned |
1610,8 → 1665,8 |
pop ecx |
loop .again |
ret |
end if |
clrscr: |
mov edi,[screen_ptr] |
movzx ecx,word[size_x_var] |
1654,7 → 1709,37 |
draw_triangles: |
; in: eax - render draw model |
.tri_no equ dword[ebp-60] |
.point_index3 equ [ebp-8] |
.point_index2 equ [ebp-12] |
.point_index1 equ [ebp-16] |
.yy3 equ [ebp-18] |
.xx3 equ [ebp-20] |
.yy2 equ [ebp-22] |
.xx2 equ [ebp-24] |
.yy1 equ [ebp-26] |
.xx1 equ [ebp-28] |
.zz3 equ [ebp-30] |
.zz2 equ [ebp-32] |
.zz1 equ [ebp-34] |
.index3x12 equ [ebp-38] |
.index2x12 equ [ebp-42] |
.index1x12 equ [ebp-46] |
.temp1 equ dword[ebp-50] |
.temp2 equ dword[ebp-54] |
.dr_flag equ word[ebp-56] |
push ebp |
mov ebp,esp |
sub esp,60 |
; movzx ax,[dr_flag] |
mov .dr_flag,ax |
emms |
; update translated list MMX required |
cmp [vertex_edit_no],0 |
1671,7 → 1756,8 |
movd dword[eax],mm1 |
@@: |
if Ext >= SSE3 |
cmp [dr_flag],13 |
cmp .dr_flag,13 |
jnge .no_stencil |
mov esi,[triangles_ptr] |
mov ecx,[triangles_count_var] |
1734,124 → 1820,57 |
je .draw_smooth_line |
mov esi,[triangles_ptr] |
mov ecx,[triangles_count_var] |
xor ecx,ecx ;mov ecx,[triangles_count_var] |
.again_dts: |
; push ebp |
push esi |
push ecx |
mov ebp,[points_translated_ptr] |
if Ext >= SSE2 |
mov eax,dword[esi] |
mov [point_index1],eax |
lea eax,[eax*3] |
add eax,eax |
push ebp |
add ebp,eax |
mov eax,[ebp] |
; cmp [vertex_edit_no],0 |
; jne @f |
; |
; @@: |
mov dword[xx1],eax |
mov eax,[ebp+4] |
mov [zz1],ax |
mov .tri_no,ecx |
pop ebp |
mov eax,[esi] |
mov ebx,[esi+4] |
mov ecx,[esi+8] |
mov .point_index1,eax |
mov .point_index2,ebx |
mov .point_index3,ecx |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov .index1x12,eax |
mov .index2x12,ebx |
mov .index3x12,ecx |
mov eax,dword[esi+4] |
mov [point_index2],eax |
lea eax,[eax*3] |
add eax,eax |
push ebp |
add ebp,eax |
mov eax,[ebp] |
mov dword[xx2],eax |
mov eax,[ebp+4] |
mov [zz2],ax |
pop ebp |
shr eax,1 |
shr ebx,1 |
shr ecx,1 |
add eax,[points_translated_ptr] |
add ebx,[points_translated_ptr] |
add ecx,[points_translated_ptr] |
push word[eax+4] |
push word[ebx+4] |
push word[ecx+4] |
pop word .zz3 |
pop word .zz2 |
pop word .zz1 |
mov eax,[eax] |
mov ebx,[ebx] |
mov ecx,[ecx] |
ror eax,16 |
ror ebx,16 |
ror ecx,16 |
mov .xx1,eax |
mov .xx2,ebx |
mov .xx3,ecx |
mov eax,dword[esi+8] ; xyz3 = [ebp+[esi+4]*6] |
mov [point_index3],eax |
lea eax,[eax*3] |
add eax,eax |
; push ebp |
add ebp,eax |
mov eax,[ebp] |
mov dword[xx3],eax |
mov eax,[ebp+4] |
mov [zz3],ax |
else |
movq mm0,[esi] ; don't know MMX |
mov qword[point_index1],mm0 |
; shr eax,16 |
; mov [point_index2],ax |
mov eax,dword[esi+8] |
mov [point_index3],eax |
movdqu xmm0,[esi] |
paddd xmm0,xmm0 |
movdqa xmm1,xmm0 |
paddd xmm0,xmm0 |
paddd xmm0,xmm1 |
movd eax,xmm0 |
psrldq xmm0,4 |
movd ebx,xmm0 |
psrldq xmm0,4 |
movd ecx,xmm0 |
and eax,0FFFFh |
and ebx,0FFFFh |
and ecx,0FFFFh |
movq mm0,[ebp+eax] |
movq mm1,[ebp+ebx] |
movq mm2,[ebp+ecx] |
movq qword[xx1],mm0 |
movq qword[xx2],mm1 |
movq qword[xx3],mm2 |
; emms |
end if ; ********************************* |
if 0 |
cmp [vertex_edit_no],0 |
jne .no_edit |
mov ax,[vertex_edit_no] |
dec ax |
cmp ax,[point_index1] |
jne @f |
movd mm0,[edit_start_x] |
psubw mm0,[edit_end_x] |
movd mm1,dword[xx1] |
paddw mm1,mm0 |
movd dword[xx1],mm1 |
jmp .no_edit |
@@: |
cmp ax,[point_index2] |
jne @f |
movd mm0,[edit_start_x] |
psubw mm0,[edit_end_x] |
movd mm1,dword[xx2] |
paddw mm1,mm0 |
movd dword[xx2],mm1 |
jmp .no_edit |
@@: |
cmp ax,[point_index3] |
jne @f |
movd mm0,[edit_start_x] |
psubw mm0,[edit_end_x] |
movd mm1,dword[xx3] |
paddw mm1,mm0 |
movd dword[xx3],mm1 |
jmp .no_edit |
@@: |
.no_edit: |
end if |
push esi ; |
; push esi |
fninit ; DO culling AT FIRST |
cmp [culling_flag],1 ; (if culling_flag = 1) |
jne .no_culling |
mov esi,point_index1 ; ********************************* |
lea esi,.point_index1 ; ********************************* |
mov ecx,3 ; |
@@: |
mov eax,dword[esi] |
1858,76 → 1877,77 |
lea eax,[eax*3] |
shl eax,2 |
add eax,[points_normals_rot_ptr] |
; lea eax,[eax+point_normals_rotated] |
fld dword[eax+8] ; ***************************** |
ftst ; CHECKING OF Z COOFICIENT OF |
fstsw ax ; NORMAL VECTOR |
sahf |
jb @f |
ffree st |
mov eax,[eax+8] |
bt eax,31 |
jc @f |
; ***************************** |
; CHECKING OF Z COOFICIENT OF |
; NORMAL VECTOR |
add esi,4 |
loop @b |
jmp .end_draw ; non visable |
@@: |
ffree st ;is visable |
.no_culling: |
cmp [dr_flag],0 ; draw type flag |
cmp .dr_flag,0 ; draw type flag |
je .flat_draw |
cmp [dr_flag],2 |
cmp .dr_flag,2 |
je .env_mapping |
cmp [dr_flag],3 |
cmp .dr_flag,3 |
je .bump_mapping |
cmp [dr_flag],4 |
cmp .dr_flag,4 |
je .tex_mapping |
cmp [dr_flag],5 |
cmp .dr_flag,5 |
je .rainbow |
cmp [dr_flag],7 |
cmp .dr_flag,7 |
je .grd_tex |
cmp [dr_flag],8 |
cmp .dr_flag,8 |
je .two_tex |
cmp [dr_flag],9 |
cmp .dr_flag,9 |
je .bump_tex |
cmp [dr_flag],10 |
cmp .dr_flag,10 |
je .cubic_env_mapping |
cmp [dr_flag],11 |
cmp .dr_flag,11 |
je .draw_smooth_line |
if Ext >= SSE3 |
cmp [dr_flag],12 |
cmp .dr_flag,12 |
je .r_phg |
cmp [dr_flag],13 |
cmp .dr_flag,13 |
je .glass |
cmp [dr_flag],14 |
cmp .dr_flag,14 |
je .glass_tex |
end if ; **************** |
mov esi,point_index3 ; do Gouraud shading |
cmp .dr_flag,100 |
je .ray_shd |
end if |
push ebp ; **************** |
lea esi,.index3x12 ; do Gouraud shading |
lea edi,.zz3 |
mov ecx,3 |
.again_grd_draw: |
mov eax,dword[esi] |
shl eax,2 |
lea eax,[eax*3] |
add eax,[points_normals_rot_ptr] |
; texture x=(rotated point normal -> x * 255)+255 |
fld dword[eax] ; x cooficient of normal vector |
fimul [correct_tex] |
fiadd [correct_tex] |
fistp [temp1] |
fistp .temp1 |
; texture y=(rotated point normal -> y * 255)+255 |
fld dword[eax+4] ; y cooficient |
fimul [correct_tex] |
fiadd [correct_tex] |
fistp [temp2] |
fistp .temp2 |
mov eax,[temp2] |
mov ebx,[temp1] |
mov eax,.temp2 |
mov ebx,.temp1 |
and ebx,0xfffffff |
shl eax,TEX_SHIFT |
add eax,ebx |
lea eax,[eax*3+color_map] |
mov eax,dword[eax] |
; cmp [catmull_flag],1 ; put on stack z coordinate if necessary |
; jne @f |
lea edx,[ecx*3] |
push word[edx*2+xx1-2] ; zz1 ,2 ,3 |
; @@: |
push word[edi] ; zz1 ,2 ,3 |
ror eax,16 ; eax -0xxxrrggbb -> 0xggbbxxrr |
xor ah,ah |
push ax ;r |
1938,98 → 1958,55 |
push ax ;b |
sub esi,4 |
sub edi,2 |
dec cx |
jnz .again_grd_draw |
jmp .both_draw |
; movzx edi,[point_index3] ;gouraud shading according to light vector |
; lea edi,[edi*3] |
; lea edi,[4*edi+point_normals_rotated] ; edi - normal |
; mov esi,light_vector |
; call dot_product |
; fabs |
; fimul [orginal_color_r] |
; fistp [temp_col] |
; and [temp_col],0x00ff |
; push [temp_col] |
; push [temp_col] |
; push [temp_col] |
.rainbow: |
push ebp |
push word .zz3 |
; movzx edi,[point_index2] |
; lea edi,[edi*3] |
; lea edi,[4*edi+point_normals_rotated] ; edi - normal |
; mov esi,light_vector |
; call dot_product |
; fabs |
; fimul [orginal_color_r] |
; fistp [temp_col] |
; and [temp_col],0x00ff |
; push [temp_col] |
; push [temp_col] |
; push [temp_col] |
; movzx edi,[point_index1] |
; lea edi,[edi*3] |
; lea edi,[4*edi+point_normals_rotated] ; edi - normal |
; mov esi,light_vector |
; call dot_product |
; fabs |
; fimul [orginal_color_r] |
; fistp [temp_col] |
; and [temp_col],0x00ff |
; push [temp_col] |
; push [temp_col] |
; push [temp_col] |
.rainbow: |
; cmp [catmull_flag],1 ; put on stack z coordinate if necessary |
; jne @f |
push [zz3] |
@@: |
mov eax,dword[yy3] |
mov eax, .xx3 |
ror eax,16 |
mov ebx,0x00ff00ff |
and eax,ebx |
push eax |
neg al |
push ax |
push [zz2] |
push word .zz2 |
mov eax,dword[yy2] |
mov eax, .xx2 |
ror eax,16 |
and eax,ebx |
push eax |
neg al |
push ax |
push [zz1] |
push word .zz1 |
mov eax,dword[yy1] |
mov eax, .xx1 |
ror eax,16 |
and eax,ebx |
push eax |
neg al |
push ax |
.both_draw: |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax, .xx1 |
mov ebx, .xx2 |
mov ecx, .xx3 |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
call gouraud_triangle_z |
pop ebp |
jmp .end_draw |
.flat_draw: ;************************** |
fninit ; FLAT DRAWING |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
shl eax,2 |
shl ebx,2 |
shl ecx,2 |
lea eax,[eax*3] ;+point_normals_rotated] |
mov eax,.index1x12 |
mov ebx,.index2x12 |
mov ecx,.index3x12 |
add eax,[points_normals_rot_ptr] |
lea ebx,[ebx*3] ;+point_normals_rotated] |
add ebx,[points_normals_rot_ptr] |
lea ecx,[ecx*3] ;+point_normals_rotated] |
add ecx,[points_normals_rot_ptr] |
fld dword[eax] ; x cooficient of normal vector |
fadd dword[ebx] |
2037,7 → 2014,7 |
fidiv [i3] |
fimul [correct_tex] |
fiadd [correct_tex] |
fistp [temp1] ;dword[esp-4] ; x temp variables |
fistp .temp1 ;dword[esp-4] ; x temp variables |
fld dword[eax+4] ; y cooficient of normal vector |
fadd dword[ebx+4] |
fadd dword[ecx+4] |
2044,12 → 2021,12 |
fidiv [i3] |
fimul [correct_tex] |
fiadd [correct_tex] |
fistp [temp2] ;dword[esp-8] ; y |
mov edx,[temp2] ;dword[esp-8] |
fistp .temp2 ;dword[esp-8] ; y |
mov edx,.temp2 ;dword[esp-8] |
and edx,0xfffffff |
and [temp1],0xfffffff |
and .temp1,0xfffffff |
shl edx,TEX_SHIFT |
add edx,[temp1] ;dword[esp-4] |
add edx,.temp1 ;dword[esp-4] |
lea eax,[3*edx] |
add eax,color_map |
2071,34 → 2048,32 |
; shl eax,8 |
; mov edx,eax |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
push word[zz3] |
push word[zz2] |
push word[zz1] |
push ebp |
push word .zz3 |
push word .zz2 |
push word .zz1 |
call flat_triangle_z |
pop ebp |
jmp .end_draw |
.env_mapping: |
push [zz3] |
push [zz2] |
push [zz1] |
push ebp |
push word .zz3 |
push word .zz2 |
push word .zz1 |
mov esi,point_index1 |
lea esi, .index1x12 |
sub esp,12 |
mov edi,esp |
mov ecx,3 |
@@: |
mov eax,dword[esi] |
lea eax,[eax*3] |
shl eax,2 |
add eax,[points_normals_rot_ptr] ;point_normals_rotated |
; texture x=(rotated point normal -> x * 255)+255 |
fld dword[eax] |
2115,33 → 2090,29 |
add esi,4 |
loop @b |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax, .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edi,[screen_ptr] |
mov esi,envmap |
mov edx,[Zbuffer_ptr] |
call tex_triangle_z |
pop ebp |
jmp .end_draw |
;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
.cubic_env_mapping: |
push [zz3] |
push [zz2] |
push [zz1] |
push ebp |
push word .zz3 |
push word .zz2 |
push word .zz1 |
mov esi,point_index1 |
lea esi,.index1x12 |
sub esp,12 |
mov edi,esp |
mov ecx,3 |
@@: |
mov eax,dword[esi] |
lea eax,[eax*3] |
shl eax,2 |
add eax,[points_normals_rot_ptr] |
fld dword[eax] |
2171,37 → 2142,32 |
add esi,4 |
loop @b |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax, .xx1 |
mov ebx, .xx2 |
mov ecx, .xx3 |
mov edi,[screen_ptr] |
mov esi,envmap_cub |
mov edx,[Zbuffer_ptr] |
call tex_triangle_z |
pop ebp |
jmp .end_draw |
;+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
.bump_mapping: |
push ebp |
push [Zbuffer_ptr] |
push [zz3] |
push [zz2] |
push [zz1] |
push word .zz3 |
push word .zz2 |
push word .zz1 |
mov esi,point_index1 |
lea esi,.index1x12 |
sub esp,12 |
mov edi,esp |
mov ecx,3 |
@@: |
mov eax,dword[esi] |
lea eax,[eax*3] |
shl eax,2 |
add eax,[points_normals_rot_ptr] ;point_normals_rotated |
; texture x=(rotated point normal -> x * 255)+255 |
fld dword[eax] |
2218,70 → 2184,58 |
add esi,4 |
loop @b |
mov esi,[point_index3] ; bump map coords |
mov esi, .point_index3 ; bump map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index2] |
mov esi, .point_index2 |
shl esi,2 |
add esi,tex_points |
; lea esi,[esi*3] |
; lea esi,[points+2+esi*2] |
add esi,[tex_points_ptr] |
push dword[esi] |
; push dword[xx2] |
mov esi,[point_index1] |
mov esi, .point_index1 |
shl esi,2 |
add esi,tex_points |
; lea esi,[esi*3] |
; lea esi,[points+2+esi*2] |
add esi,[tex_points_ptr] |
push dword[esi] |
; push dword[xx1] |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edi,[screen_ptr] |
mov esi,envmap |
mov edx,bumpmap ;BUMP_MAPPING |
call bump_triangle_z |
pop ebp |
jmp .end_draw |
.tex_mapping: |
push [zz3] |
push [zz2] |
push [zz1] |
push ebp |
push word .zz3 |
push word .zz2 |
push word .zz1 |
; @@: |
mov esi,[point_index3] ; tex map coords |
mov esi, .point_index3 ; tex map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index2] |
mov esi, .point_index2 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index1] |
mov esi, .point_index1 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edi,[screen_ptr] |
mov esi,texmap |
mov edx,[Zbuffer_ptr] |
mov edx,[Zbuffer_ptr] |
call tex_triangle_z |
pop ebp |
jmp .end_draw |
; .ray: |
; grd_triangle according to points index |
2319,49 → 2273,43 |
.grd_tex: ; smooth shading + texture |
push ebp |
mov ebp,esp |
sub esp,4 |
push ebp |
mov esi,[point_index3] ; tex map coords |
mov esi, .point_index3 ; tex map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] ; texture coords as first |
mov esi,[point_index2] ; group of parameters |
mov esi, .point_index2 ; group of parameters |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index1] |
mov esi, .point_index1 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,point_index3 |
lea esi, .index3x12 |
lea edi, .zz3 |
mov ecx,3 |
.aagain_grd_draw: |
.aagain_grd_draw: |
lea edx,[ecx*3] |
push word[edx*2+xx1-2] ; zz1 ,2 ,3 |
push word[edi] ; zz1 ,2 ,3 |
fninit |
mov eax,dword[esi] |
shl eax,2 |
lea eax,[eax*3] ;+point_normals_rotated] |
add eax,[points_normals_rot_ptr] |
; texture x=(rotated point normal -> x * 255)+255 |
fld dword[eax] ; x cooficient of normal vector |
fimul [correct_tex] |
fiadd [correct_tex] |
fistp [temp1] ;word[ebp-2] |
fistp .temp1 ;word[ebp-2] |
; texture y=(rotated point normal -> y * 255)+255 |
fld dword[eax+4] ; y cooficient |
fimul [correct_tex] |
fiadd [correct_tex] |
fistp [temp2] ;word[ebp-4] |
fistp .temp2 ;word[ebp-4] |
mov eax,[temp2] ;word[ebp-4] |
mov ebx,[temp1] ;word[ebp-2] |
and ebx,0xfffffff ; some onjects need thid 'and' |
mov eax,.temp2 |
mov ebx,.temp1 |
and ebx,0xfffffff ; some onjects need this 'and' |
shl eax,TEX_SHIFT |
add eax,ebx |
lea eax,[eax*3] |
2376,17 → 2324,14 |
push ax ;g |
shr eax,24 |
push ax ;b |
sub edi,2 |
sub esi,4 |
dec cx |
jnz .aagain_grd_draw |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax, .xx1 |
mov ebx, .xx2 |
mov ecx, .xx3 |
mov edi,[screen_ptr] |
mov edx,texmap |
mov esi,[Zbuffer_ptr] |
2394,31 → 2339,30 |
call tex_plus_grd_triangle |
pop ebp |
mov esp,ebp |
pop ebp |
jmp .end_draw |
.two_tex: |
push ebp |
push [Zbuffer_ptr] |
push word[zz3] |
push word[zz2] |
push word[zz1] |
push word .zz3 |
push word .zz2 |
push word .zz1 |
mov esi,[point_index3] ; tex map coords |
mov esi, .point_index3 ; tex map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index2] |
mov esi, .point_index2 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index1] |
mov esi, .point_index1 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,point_index1 ; env coords |
lea esi, .point_index1 ; env coords |
sub esp,12 |
mov edi,esp |
mov ecx,3 |
2443,50 → 2387,46 |
add esi,4 |
loop @b |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax, .xx1 |
mov ebx, .xx2 |
mov ecx, .xx3 |
mov edi,[screen_ptr] |
mov esi,texmap |
mov edx,envmap |
call two_tex_triangle_z |
pop ebp |
jmp .end_draw |
.bump_tex: |
mov esi,[point_index3] ; tex map coords |
push ebp |
mov esi, .point_index3 ; tex map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index2] |
mov esi, .point_index2 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index1] |
mov esi, .point_index1 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
push dword texmap |
push [Zbuffer_ptr] |
xor edi,edi |
push word[zz3] |
push word[zz2] |
push word[zz1] |
push word .zz3 |
push word .zz2 |
push word .zz1 |
mov esi,point_index1 ; env coords |
lea esi, .index1x12 ; env coords |
sub esp,12 |
mov edi,esp |
mov ecx,3 |
@@: |
mov eax,dword[esi] |
lea eax,[eax*3] |
shl eax,2 |
add eax,[points_normals_rot_ptr] |
; texture x=(rotated point normal -> x * 255)+255 |
fld dword[eax] |
2503,40 → 2443,28 |
add esi,4 |
loop @b |
; push dword 1 shl 16 + 1 ; emap coords |
; push dword 127 shl 16 + 1 |
; push dword 127 shl 16 + 127 |
mov esi,[point_index3] ; bump map coords |
mov esi, .point_index3 ; bump map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index2] |
mov esi, .point_index2 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index1] |
mov esi, .point_index1 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
; push dword 1 shl 16 + 127 |
; push dword 127 shl 16 + 127 |
; push dword 1 shl 16 + 1 ; bump coords |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edi,[screen_ptr] |
mov esi,envmap |
mov edx,bumpmap |
call bump_tex_triangle_z |
pop ebp |
jmp .end_draw |
2549,12 → 2477,9 |
pshufd xmm5,xmm5,01110011b |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_normals_rot_ptr] |
add ebx,[points_normals_rot_ptr] |
add ecx,[points_normals_rot_ptr] |
2566,12 → 2491,9 |
andps xmm2,[zero_hgst_dd] |
xorps xmm3,xmm3 |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_rotated_ptr] |
add ebx,[points_rotated_ptr] |
add ecx,[points_rotated_ptr] |
2584,12 → 2506,9 |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
2603,12 → 2522,9 |
pshufd xmm5,xmm5,01110011b |
mov eax,[point_index1] |
mov ebx ,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_normals_rot_ptr] |
add ebx,[points_normals_rot_ptr] |
add ecx,[points_normals_rot_ptr] |
2620,12 → 2536,9 |
andps xmm2,[zero_hgst_dd] |
xorps xmm3,xmm3 |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_rotated_ptr] |
add ebx,[points_rotated_ptr] |
add ecx,[points_rotated_ptr] |
2638,12 → 2551,9 |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax, .xx1 |
mov ebx, .xx2 |
mov ecx, .xx3 |
mov edi,[screen_ptr] |
mov edx,[Zbuffer_ptr] |
mov esi,[Zbuffer_ptr] |
2657,12 → 2567,9 |
punpcklwd xmm5,[the_zero] |
pshufd xmm5,xmm5,01110011b |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_normals_rot_ptr] |
add ebx,[points_normals_rot_ptr] |
add ecx,[points_normals_rot_ptr] |
2674,12 → 2581,9 |
andps xmm2,[zero_hgst_dd] |
xorps xmm3,xmm3 |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_rotated_ptr] |
add ebx,[points_rotated_ptr] |
add ecx,[points_rotated_ptr] |
2690,17 → 2594,17 |
add esp,12 |
andps xmm4,[zero_hgst_dd] |
mov esi,[point_index3] ; tex map coords |
mov esi,.point_index3 ; tex map coords |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index2] |
mov esi,.point_index2 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
mov esi,[point_index1] |
mov esi,.point_index1 |
shl esi,2 |
add esi,tex_points |
add esi,[tex_points_ptr] |
push dword[esi] |
movups xmm6,[esp] |
add esp,12 |
2714,31 → 2618,84 |
por xmm6,xmm7 |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edx,texmap |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
call glass_tex_tri |
jmp .end_draw |
.ray_shd: |
emms |
movd xmm5,[size_y_var] |
punpcklwd xmm5,[the_zero] |
pshufd xmm5,xmm5,01110011b |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_normals_rot_ptr] |
add ebx,[points_normals_rot_ptr] |
add ecx,[points_normals_rot_ptr] |
movups xmm0,[eax] |
movups xmm1,[ebx] |
movups xmm2,[ecx] |
andps xmm0,[zero_hgst_dd] |
andps xmm1,[zero_hgst_dd] |
andps xmm2,[zero_hgst_dd] |
xorps xmm3,xmm3 |
; mov ebx,.tri_no |
; cmp ebx,0 |
; je @f |
; int3 |
; @@: |
mov eax, .index1x12 |
mov ebx, .index2x12 |
mov ecx, .index3x12 |
add eax,[points_rotated_ptr] |
add ebx,[points_rotated_ptr] |
add ecx,[points_rotated_ptr] |
push dword[ecx+8] |
push dword[ebx+8] |
push dword[eax+8] |
movups xmm4,[esp] |
add esp,12 |
andps xmm4,[zero_hgst_dd] |
movd mm7,.tri_no |
; mm7 - intialised |
mov eax,dword .xx1 |
mov ebx,dword .xx2 |
mov ecx,dword .xx3 |
mov edx,texmap |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
call ray_shad |
end if |
.end_draw: |
; pop ebp |
pop ecx |
pop esi |
add esi,12 |
inc ecx |
cmp ecx,[triangles_count_var] |
jnz .again_dts |
pop ecx |
dec ecx |
jmp .eend |
jnz .again_dts |
ret |
.draw_smooth_line: |
2789,7 → 2746,8 |
sub esp,16 |
movups [esp],xmm1 |
add esi,4 |
loop .aga_n |
dec ecx |
jnz .aga_n |
movups xmm0,[esp] |
movups xmm1,[esp+16] |
2807,11 → 2765,17 |
movhps xmm7,[edx] |
pshufd xmm7,xmm7,11101000b |
movdqa xmm6,xmm7 |
movdqa xmm3,xmm7 |
movdqa xmm4,xmm7 |
movd xmm5,[size_y_var] |
pshuflw xmm5,xmm5,00010001b |
pcmpeqw xmm3,xmm5 |
pcmpeqw xmm4,[the_zero] |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm6,[the_zero] |
pxor xmm7,xmm6 |
pxor xmm3,xmm4 |
pxor xmm7,xmm3 |
pmovmskb eax,xmm7 |
cmp al,-1 |
jnz .skp |
2851,15 → 2815,25 |
cmp ecx,[edges_count] |
jnz .again_s_line |
ret |
.eend: |
add esp,60 |
pop ebp |
ret |
draw_handlers: |
; in eax - render model |
push ebp |
mov ebp,esp |
2866,14 → 2840,15 |
.counter equ ebp-16 |
.xres3m18 equ ebp-8 |
.xres2m12 equ ebp-12 |
.dr_model equ dword[ebp-4] |
; init counter |
sub esp,12 |
push dword 0 |
mov .dr_model,eax |
movzx eax,word[size_x_var] |
cmp [dr_flag],12 |
cmp .dr_model,12 |
jge @f |
lea ebx,[eax*3] |
sub ebx,18 |
2931,7 → 2906,7 |
add eax,ebx |
push eax |
lea edi,[eax*3] |
cmp [dr_flag],12 |
cmp .dr_model,12 |
jl @f |
add edi,[esp] |
@@: |
2956,7 → 2931,7 |
mov byte[edi+2],0xff ;al |
mov word[eax],dx |
add eax,2 |
cmp [dr_flag],12 |
cmp .dr_model,12 |
jl @f |
add edi,4 |
loop .do |
3226,7 → 3201,6 |
.exit: |
mov dword[edi],-1 |
ret |
alloc_mem_for_tp: |
mov eax, 68 |
cmp [re_alloc_flag],1 |
3293,7 → 3267,15 |
mov [points_rotated_ptr], eax |
mov eax, 68 |
mov ebx, 12 |
mov ecx, [points_count_var] |
shl ecx,2 |
mov edx,[tex_points_ptr] |
int 0x40 |
mov [tex_points_ptr], eax |
mov eax, 68 |
mov ecx, [points_count_var] |
inc ecx |
shl ecx, 3 |
mov edx,[points_translated_ptr] |
3302,7 → 3284,6 |
ret |
read_from_disk: |
mov eax, 68 |
mov ebx, 11 |
3348,11 → 3329,11 |
mov edi,menu |
.again: |
mov eax,8 ; function 8 : define and draw button |
mov bx,[size_x_var] |
movzx ebx,word[size_x_var] |
shl ebx,16 |
add ebx,(10)*65536+62 ; [x start] *65536 + [x size] |
movzx ecx,byte[edi] ; button id = position+2 |
sub cl,2 |
sub ecx,2 |
lea ecx,[ecx*5] |
lea ecx,[ecx*3] |
add ecx,25 |
3364,10 → 3345,10 |
; BUTTON LABEL |
mov eax,4 ; function 4 : write text to window |
movzx ebx,byte[edi] |
sub bl,2 ; button id, according to position |
sub ebx,2 ; button id, according to position |
lea ebx,[ebx*3] |
lea ebx,[ebx*5] |
mov cx,[size_x_var] |
movzx ecx,word[size_x_var] |
shl ecx,16 |
add ebx,ecx |
add ebx,(12)*65536+28 ; [x start] *65536 + [y start] |
3459,6 → 3440,9 |
; ******* WINDOW DEFINITIONS AND DRAW ******** |
; ********************************************* |
draw_window: |
movzx eax,[fire_flag] |
push eax |
; int3 |
mov eax,12 ; function 12:tell os about windowdraw |
mov ebx,1 ; 1, start of draw |
int 0x40 |
3492,6 → 3476,7 |
; add edx,130*65536+60 ; [x start] *65536 + [y start] |
; mov esi,0x00ddeeff ; font 1 & color ( 0xF0RRGGBB ) |
; int 0x40 |
call write_info |
; ADD VECTOR LABEL ; add vector buttons - 30 ++ |
3644,6 → 3629,8 |
mov eax,12 ; function 12:tell os about windowdraw |
mov ebx,2 ; 2, end of draw |
int 0x40 |
pop eax |
mov [fire_flag],al |
ret |