0,0 → 1,550 |
; Glass like rendering triangle by Maciej Guba. |
; http://macgub.hekko.pl, macgub3@wp.pl |
|
ROUND2 equ 10 |
glass_tri: |
;----procedure render glass like triangle with z coord -- |
;----interpolation ( Catmull alghoritm )----------------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- edx - ptr to stencil_buff ------- |
;---------------------- esi - pointer to Z-buffer filled- |
;---------------------- with dd float variables-------- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
|
|
|
|
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
|
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.dz12 equ [ebp-180] |
.dz13 equ [ebp-184] |
.dz23 equ [ebp-188] |
|
.cnv1 equ [ebp-208] ; cur normal vectors |
.cnv2 equ [ebp-224] |
.cz2 equ [ebp-228] |
.cz1 equ [ebp-232] |
.stencil_buff equ [ebp-236] |
|
|
|
|
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
movaps xmm6,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm6 |
|
|
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
movaps xmm6,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm6 |
|
jmp .sort3 |
|
.sort2: |
|
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
mov .stencil_buff, edx |
|
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
; mov .Zbuf,esi |
mov .screen,edi |
|
|
|
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
|
xorps xmm7,xmm7 |
mov dword .dx12,0 |
mov dword .dz12,0 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
|
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
|
cvtsi2ss xmm6,ebx |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
|
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
|
|
.rpt_dx12_done: |
|
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
|
xorps xmm7,xmm7 |
mov dword .dx13,0 |
mov dword .dz13,0 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
|
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
|
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
|
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
|
.rpt_dx13_done: |
|
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
|
xorps xmm7,xmm7 |
mov dword .dx23,0 |
mov dword .dz23,0 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
|
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
|
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
|
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
|
.rpt_dx23_done: |
|
|
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
|
|
movsx ecx,word .y1 |
cmp cx,.y2 |
|
jge .rpt_loop1_end |
|
.rpt_loop1: |
pushad |
|
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.stencil_buff |
mov edi,.screen |
; mov esi,.Zbuf |
|
call glass_line |
|
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addss xmm2,.dz13 |
addss xmm3,.dz12 |
add eax,.dx13 |
add ebx,.dx12 |
|
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
|
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
|
|
|
|
|
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
|
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
|
|
.rpt_loop2: |
pushad |
|
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.stencil_buff |
mov edi,.screen |
; mov esi,.Zbuf |
|
call glass_line |
|
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
addss xmm2,.dz13 |
addss xmm3,.dz23 |
add eax,.dx13 |
add ebx,.dx23 |
|
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
|
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
|
.rpt_loop2_end: |
|
add esp,512 |
pop ebp |
|
ret |
align 16 |
glass_line: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi z1, z2 coords as dwords floats |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edx - stencil buff ptr |
; edi - screen buffer |
; esi - z buffer ===> not needed in glass rendering |
|
push ebp |
mov ebp,esp |
sub esp,256 |
sub ebp,16 |
and ebp,0xfffffff0 |
|
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
.z2 equ [ebp-60] |
.z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.dz equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
.col_sum_b equ [ebp-136] |
.col_sum_g equ [ebp-140] |
.col_sum_r equ [ebp-144] |
.cur_col equ [ebp-160] |
.stencil_buf equ [ebp-164] |
|
mov .y,ecx |
packssdw xmm2,xmm2 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_rp_line |
cmp cx,.y_max |
jge .end_rp_line ; |
|
cmp eax,ebx |
je .end_rp_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
shufps xmm3,xmm3,11100001b |
@@: |
|
cmp ax,.x_max |
jge .end_rp_line |
cmp bx,.x_min |
jle .end_rp_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
mov .stencil_buf,edx |
movlps .z1,xmm3 |
|
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
psrldq xmm3,4 |
subss xmm3,.z1 |
divss xmm3,xmm7 |
movss .dz,xmm3 |
|
|
|
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulss xmm3,xmm7 |
mulps xmm1,xmm7 |
addss xmm3,.z1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movss .z1,xmm3 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
|
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
movzx eax,word[xres_var] |
mul dword .y |
|
|
add eax,.lx1 |
shl eax,2 |
add edi,eax |
mov ebx,eax |
add ebx,.stencil_buf |
|
|
mov ecx,.lx2 |
sub ecx,.lx1 |
|
|
movaps xmm0,.n1 |
movss xmm2,.z1 |
align 16 |
.ddraw: |
movaps xmm7,xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,xmm0 |
maxps xmm7,[the_zero] |
movups .cnv,xmm7 |
|
mov edx,lights_aligned ; lights_aligned - global variable |
xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
|
.again_col: |
movups xmm7,.cnv |
mulps xmm7,[edx] |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
if 0 |
cmp [bump_flag],1 ; on/off temporaly |
; depend on bump button |
je @f |
; stencil |
movss xmm5,xmm2 |
movss xmm6,xmm2 |
addss xmm5,[aprox] |
subss xmm6,[aprox] |
; Stencil buffer for now not work as I expected, |
; moreover - it not work at all. |
cmpnltss xmm5,dword[ebx] |
cmpnltss xmm6,dword[ebx] |
xorps xmm5,xmm6 |
xorps xmm6,xmm6 |
movd eax,xmm5 |
cmp eax,-1 |
jne .no_reflective |
end if |
@@: |
movaps xmm6,xmm7 |
mulps xmm6,xmm6 |
mulps xmm6,xmm6 |
|
mulps xmm6,xmm6 |
mulps xmm6,[edx+48] |
.no_reflective: |
mulps xmm7,[edx+16] |
addps xmm7,xmm6 |
addps xmm7,[edx+32] |
minps xmm7,[mask_255f] ; global |
|
|
maxps xmm1,xmm7 |
add edx,64 ; size of one light in aligned list |
cmp edx,lights_aligned_end |
jl .again_col |
cvtps2dq xmm1,xmm1 |
movd xmm6,[edi] |
packssdw xmm1,xmm1 |
packuswb xmm1,xmm1 |
paddusb xmm1,xmm6 |
movd [edi],xmm1 |
|
|
.skip: |
add edi,4 |
add ebx,4 ; stencil_buff |
addps xmm0,.dn |
addss xmm2,.dz |
sub ecx,1 |
jnz .ddraw |
|
.end_rp_line: |
add esp,256 |
pop ebp |
|
ret |