0,0 → 1,762 |
; Bilinear filtering, real Phongs shading and glass like parallel. |
; Thanks to authors of 3dica tutorial. |
; Implemented in FASM by Maciej Guba. |
; http://macgub.j.pl |
|
ROUND2 equ 10 |
|
glass_tex_tri: |
;----Procedure render Phongs shaded triangle with z coord |
;----interpolation ( Catmull alghoritm ), each pixel is - |
;----covered by texture using bilinear filtering.-------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to stencil buffer-- |
;---------------------- filled with dd float variables- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- edx - pointer to texture--------- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- xmm6 - lo -> hi tx1, ty1, tx2, -- |
;---------------------- ty2, tx3, ty3 as word, xres as-- |
;---------------------- dword integers------------------ |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
|
|
|
|
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
|
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
|
.cnv1 equ [ebp-192] ; cur normal vectors |
.cnv2 equ [ebp-208] |
.x_res equ [ebp-212] |
.ty3 equ [ebp-214] |
.tx3 equ [ebp-216] |
.ty2 equ [ebp-218] |
.tx2 equ [ebp-220] |
.ty1 equ [ebp-222] |
.tx1 equ [ebp-224] |
.dz12 equ [ebp-232] |
.dty12 equ [ebp-236] |
.dtx12 equ [ebp-240] |
.dz13 equ [ebp-248] |
.dty13 equ [ebp-252] |
.dtx13 equ [ebp-256] |
.dz23 equ [ebp-264] |
.dty23 equ [ebp-268] |
.dtx23 equ [ebp-272] |
.cz1 equ [ebp-280] |
.cty1 equ [ebp-284] |
.ctx1 equ [ebp-288] |
.cz2 equ [ebp-296] |
.cty2 equ [ebp-300] |
.ctx2 equ [ebp-304] |
.tx_ptr equ [ebp-308] |
|
|
emms |
; movd .x_res,xmm7 |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
shufps xmm6,xmm6,11100001b |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
|
|
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
shufps xmm6,xmm6,11011000b |
movaps xmm7,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm7 |
|
jmp .sort3 |
|
.sort2: |
; movq .tx1,xmm6 |
; pshufd xmm6,xmm6,01001110b |
; movd .tx3,xmm6 |
movaps .tx1,xmm6 |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
|
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov .tx_ptr,edx |
|
|
|
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
|
xorps xmm7,xmm7 |
mov dword .dx12,0 |
movaps .dtx12,xmm7 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
|
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
|
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
|
movd xmm0,.tx1 |
movd xmm2,.tx2 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; movlps .ctx1,xmm0 |
; movlps .ctx2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx12,xmm2 |
|
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
|
|
.rpt_dx12_done: |
|
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
|
xorps xmm7,xmm7 |
mov dword .dx13,0 |
movaps .dtx13,xmm7 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
|
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
|
|
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
|
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
|
movd xmm0,.tx1 |
movd xmm2,.tx3 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx13,xmm2 |
|
|
|
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
|
.rpt_dx13_done: |
|
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
|
xorps xmm7,xmm7 |
mov dword .dx23,0 |
movaps .dtx23,xmm7 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
|
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
|
cvtsi2ss xmm6,ebx |
shufps xmm6,xmm6,0 |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
|
movd xmm0,.tx2 |
movd xmm2,.tx3 |
pxor xmm1,xmm1 |
punpcklwd xmm0,xmm1 |
punpcklwd xmm2,xmm1 |
psubd xmm2,xmm0 |
; cvtdq2ps xmm0,xmm0 |
cvtdq2ps xmm2,xmm2 |
; movlps .ctx1,xmm0 |
; movlps .ctx2,xmm2 |
; subps xmm2,xmm0 |
divps xmm2,xmm6 |
movlps .dtx23,xmm2 |
|
|
|
|
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
|
.rpt_dx23_done: |
|
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
movd xmm1,.tx1 |
pxor xmm2,xmm2 |
punpcklwd xmm1,xmm2 |
cvtdq2ps xmm1,xmm1 |
|
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movlps .ctx1,xmm1 |
movlps .ctx2,xmm1 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
|
; mov edx,.dx13 |
; cmp edx,.dx12 |
; jg .second_cause |
|
movsx ecx,word .y1 |
cmp cx,.y2 |
|
jge .rpt_loop1_end |
|
.rpt_loop1: |
pushad |
|
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movlps xmm3,.cz1 ; cz1, cz2 both |
movaps xmm3,.ctx1 |
movaps xmm5,.ctx2 |
movaps xmm4,.l_v |
movd xmm6,.x_res |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.tx_ptr |
mov edi,.screen |
|
mov esi,.Zbuf |
|
call glass_tex_line |
|
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movss xmm2,.cz1 |
; movss xmm3,.cz2 |
movaps xmm2,.ctx1 |
movaps xmm3,.ctx2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addps xmm2,.dtx13 |
addps xmm3,.dtx12 |
add eax,.dx13 |
add ebx,.dx12 |
|
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
; movss .cz1,xmm2 |
; movss .cz2,xmm3 |
movaps .ctx1,xmm2 |
movaps .ctx2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
|
|
; jmp .rpt_loop2_end |
|
|
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
|
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movd xmm1,.tx2 |
pxor xmm2,xmm2 |
punpcklwd xmm1,xmm2 |
cvtdq2ps xmm1,xmm1 |
movlps .ctx2,xmm1 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
|
|
.rpt_loop2: |
pushad |
|
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movaps xmm3,.ctx1 |
movaps xmm5,.ctx2 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.tx_ptr |
mov edi,.screen |
mov esi,.Zbuf |
movd xmm6,.x_res |
call glass_tex_line |
|
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
; movss xmm2,.cz1 |
; movss xmm3,.cz2 |
movaps xmm2,.ctx1 |
movaps xmm3,.ctx2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
; addss xmm2,.dz13 |
; addss xmm3,.dz23 |
addps xmm2,.dtx13 |
addps xmm3,.dtx23 |
|
add eax,.dx13 |
add ebx,.dx23 |
|
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movaps .ctx1,xmm2 |
movaps .ctx2,xmm3 |
|
; movss .cz1,xmm2 |
; movss .cz2,xmm3 |
|
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
|
.second_cause: ;dx13 > dx12 |
|
.rpt_loop2_end: |
|
add esp,512 |
pop ebp |
|
ret |
align 16 |
glass_tex_line: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi tx1, ty1, z1 coords as dwords float |
; xmm5 - lo -> hi tx2, ty2, z2 coords as dwords float |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edi - screen buffer |
; esi - stencil buffer filled with dd floats |
; edx - texture pointer (handle) |
; xmm6 - lowest dword x_res as integer |
|
push ebp |
mov ebp,esp |
sub esp,350 |
sub ebp,16 |
and ebp,0xfffffff0 |
|
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
; .z2 equ [ebp-60] |
; .z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.x_res equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
.z1 equ [ebp-136] |
.ty1 equ [ebp-140] |
.tx1 equ [ebp-144] |
.z2 equ [ebp-152] |
.ty2 equ [ebp-156] |
.tx2 equ [ebp-160] |
.cz equ [ebp-168] |
.cty equ [ebp-172] |
.ctx equ [ebp-176] |
.dz equ [ebp-184] |
.dty equ [ebp-188] |
.dtx equ [ebp-192] |
.yd equ [ebp-196] |
.xd equ [ebp-200] |
.yf equ [ebp-204] |
.xf equ [ebp-208] |
.w4 equ [ebp-212] |
.w3 equ [ebp-216] |
.w2 equ [ebp-220] |
.w1 equ [ebp-224] |
.p4 equ [ebp-228] |
.p3 equ [ebp-232] |
.p2 equ [ebp-236] |
.p1 equ [ebp-240] |
|
|
.tx_ptr equ [ebp-244] |
|
; movaps xmm7,xmm3 |
; movaps xmm3,xmm5 |
; movaps xmm5,xmm7 |
|
|
mov .y,ecx |
packssdw xmm2,xmm2 |
; movaps xmm7,xmm2 |
; movhps xmm2,[the_zero] |
; pshuflw xmm2,xmm2,11111000b |
; pshufd xmm2,xmm2,11111100b |
; movlps xmm7,[the_zero] |
; pshufhw xmm7,xmm7,11111111b |
; movlps xmm7,[the_zero] |
; psrldq xmm7,4 |
; por xmm2,xmm7 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_line |
cmp cx,.y_max |
jge .end_line ; |
|
cmp eax,ebx |
je .end_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
movaps xmm7,xmm3 |
movaps xmm3,xmm5 |
movaps xmm5,xmm7 |
@@: |
|
cmp ax,.x_max |
jge .end_line |
cmp bx,.x_min |
jle .end_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movaps .tx1,xmm3 |
movaps .tx2,xmm5 |
movd .x_res,xmm6 |
mov .tx_ptr,edx |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
subps xmm5,xmm3 |
divps xmm5,xmm7 |
movaps .dtx,xmm5 |
|
|
|
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulps xmm5,xmm7 |
mulps xmm1,xmm7 |
addps xmm5,.tx1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movaps .tx1,xmm5 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
|
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
mov eax,.x_res |
mul dword .y |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
add esi,eax |
|
mov ecx,.lx2 |
sub ecx,.lx1 |
; movaps xmm0,.n1 |
movaps xmm2,.tx1 |
; xorps xmm1,xmm1 |
align 16 |
.ddraw: |
; movhlps xmm7,xmm2 |
; cmpnltss xmm7,dword[esi] |
; movd eax,xmm7 |
; or eax,eax |
; jnz .skip |
xorps xmm5,xmm5 |
; movhlps xmm7,xmm2 |
; movss [esi],xmm7 |
movaps xmm7,.n1 ;xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,.n1 ;xmm0 |
; andps xmm7,[abs_z_coof] |
movaps .cnv,xmm7 |
|
movaps xmm6,xmm2 |
minps xmm6,[tex_m2] ; float TEX_X-2,TEX_Y-2 |
cvttps2dq xmm7,xmm6 |
cvtdq2ps xmm4,xmm7 |
subps xmm6,xmm4 |
movlps .xf,xmm6 |
; movaps xmm5,.lv |
mov eax,lights_aligned ; global |
align 16 |
.again_col: |
movaps xmm0,[eax] ; calc multple lights |
mulps xmm0,.cnv ;.lv ; last dword should be zeroed |
haddps xmm0,xmm0 |
haddps xmm0,xmm0 |
; andps xmm0,[abs_val] ;calc absolute value |
if 1 |
; stencil |
movhlps xmm6,xmm2 |
movhlps xmm4,xmm2 |
addss xmm6,[aprox] |
subss xmm4,[aprox] |
cmpnltss xmm6,dword[esi] |
cmpnltss xmm4,dword[esi] |
xorps xmm6,xmm4 |
xorps xmm4,xmm4 |
movd ebx,xmm6 |
cmp ebx,-1 |
jne .no_reflective |
end if |
movaps xmm4,xmm0 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,xmm4 |
mulps xmm4,[eax+48] |
|
.no_reflective: |
maxps xmm0,[the_zero] |
; movaps xmm1,xmm0 |
mulps xmm0,[eax+16] |
addps xmm4,xmm0 |
addps xmm4,[eax+32] |
maxps xmm5,xmm4 |
add eax,64 |
cmp eax,lights_aligned_end |
jnz .again_col |
minps xmm5,[mask_255f] |
|
; texture coords work |
movd eax,xmm7 |
psrldq xmm7,4 |
movd ebx,xmm7 |
shl ebx,TEX_SHIFT |
add eax,ebx |
lea eax,[eax*3] |
add eax,.tx_ptr |
mov ebx,eax |
add ebx,TEX_X*3 |
movd xmm7,[eax] |
movd xmm6,[eax+3] |
movd xmm4,[ebx] |
movd xmm3,[ebx+3] |
punpcklbw xmm7,xmm6 ;xmm7 r1 r2 g1 g2 b1 b2 |
punpcklbw xmm4,xmm3 ;xmm4 r3 r4 g3 g4 b3 b4 |
punpcklwd xmm7,xmm4 ;xmm7 r1 r2 r3 r4 g1 g2 g3 g4 ... |
movdqa xmm6,xmm7 |
movdqa xmm4,xmm7 |
psrldq xmm6,4 |
psrldq xmm4,8 |
|
punpcklbw xmm7,[the_zero] ; broadcasted 0 |
punpcklbw xmm6,[the_zero] |
punpcklbw xmm4,[the_zero] |
punpcklwd xmm7,[the_zero] |
punpcklwd xmm6,[the_zero] |
punpcklwd xmm4,[the_zero] |
|
|
; calc w ......... |
movlps xmm3,[the_one] ; broadcasted dword 1.0 |
cvtdq2ps xmm7,xmm7 |
subps xmm3,.xf |
cvtdq2ps xmm6,xmm6 |
movhps xmm3,.xf |
cvtdq2ps xmm4,xmm4 |
movaps xmm1,xmm3 ; 1-xf, 1-yf, xf, yf |
shufps xmm3,xmm3,10001000b |
shufps xmm1,xmm1,11110101b |
mulps xmm3,xmm1 |
|
mulps xmm7,xmm3 |
mulps xmm6,xmm3 |
mulps xmm4,xmm3 |
haddps xmm7,xmm7 ; r |
haddps xmm6,xmm6 ; g |
haddps xmm4,xmm4 ; b |
haddps xmm7,xmm7 ; r |
haddps xmm6,xmm6 ; g |
haddps xmm4,xmm4 ; b |
movlhps xmm7,xmm6 |
shufps xmm7,xmm7,11101000b |
movlhps xmm7,xmm4 |
|
mulps xmm5,xmm7 |
cvtps2dq xmm5,xmm5 |
psrld xmm5,8 |
movd xmm6,[edi] |
packssdw xmm5,xmm5 |
packuswb xmm5,xmm5 |
paddusb xmm5,xmm6 |
movd [edi],xmm5 |
.skip: |
add edi,4 |
add esi,4 |
; addps xmm0,.dn |
movaps xmm0,.n1 ; cur normal |
addps xmm0,.dn |
addps xmm2,.dtx |
movaps .n1,xmm0 |
sub ecx,1 |
jnz .ddraw |
|
.end_line: |
add esp,350 |
pop ebp |
|
ret |
Property changes: |
Added: svn:eol-style |
+native |
\ No newline at end of property |