/programs/demos/3DS/3GLASS.INC |
---|
0,0 → 1,550 |
; Glass like rendering triangle by Maciej Guba. |
; http://macgub.hekko.pl, macgub3@wp.pl |
ROUND2 equ 10 |
glass_tri: |
;----procedure render glass like triangle with z coord -- |
;----interpolation ( Catmull alghoritm )----------------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- edx - ptr to stencil_buff ------- |
;---------------------- esi - pointer to Z-buffer filled- |
;---------------------- with dd float variables-------- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.dz12 equ [ebp-180] |
.dz13 equ [ebp-184] |
.dz23 equ [ebp-188] |
.cnv1 equ [ebp-208] ; cur normal vectors |
.cnv2 equ [ebp-224] |
.cz2 equ [ebp-228] |
.cz1 equ [ebp-232] |
.stencil_buff equ [ebp-236] |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
movaps xmm6,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm6 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
movaps xmm6,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm6 |
jmp .sort3 |
.sort2: |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
mov .stencil_buff, edx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
; mov .Zbuf,esi |
mov .screen,edi |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
mov dword .dz12,0 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
mov dword .dz13,0 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
mov dword .dz23,0 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.stencil_buff |
mov edi,.screen |
; mov esi,.Zbuf |
call glass_line |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addss xmm2,.dz13 |
addss xmm3,.dz12 |
add eax,.dx13 |
add ebx,.dx12 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edx,.stencil_buff |
mov edi,.screen |
; mov esi,.Zbuf |
call glass_line |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
addss xmm2,.dz13 |
addss xmm3,.dz23 |
add eax,.dx13 |
add ebx,.dx23 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.rpt_loop2_end: |
add esp,512 |
pop ebp |
ret |
align 16 |
glass_line: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi z1, z2 coords as dwords floats |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edx - stencil buff ptr |
; edi - screen buffer |
; esi - z buffer ===> not needed in glass rendering |
push ebp |
mov ebp,esp |
sub esp,256 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
.z2 equ [ebp-60] |
.z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.dz equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
.col_sum_b equ [ebp-136] |
.col_sum_g equ [ebp-140] |
.col_sum_r equ [ebp-144] |
.cur_col equ [ebp-160] |
.stencil_buf equ [ebp-164] |
mov .y,ecx |
packssdw xmm2,xmm2 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_rp_line |
cmp cx,.y_max |
jge .end_rp_line ; |
cmp eax,ebx |
je .end_rp_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
shufps xmm3,xmm3,11100001b |
@@: |
cmp ax,.x_max |
jge .end_rp_line |
cmp bx,.x_min |
jle .end_rp_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
mov .stencil_buf,edx |
movlps .z1,xmm3 |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
psrldq xmm3,4 |
subss xmm3,.z1 |
divss xmm3,xmm7 |
movss .dz,xmm3 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulss xmm3,xmm7 |
mulps xmm1,xmm7 |
addss xmm3,.z1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movss .z1,xmm3 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
movzx eax,word[xres_var] |
mul dword .y |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
mov ebx,eax |
add ebx,.stencil_buf |
mov ecx,.lx2 |
sub ecx,.lx1 |
movaps xmm0,.n1 |
movss xmm2,.z1 |
align 16 |
.ddraw: |
movaps xmm7,xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,xmm0 |
maxps xmm7,[the_zero] |
movups .cnv,xmm7 |
mov edx,lights_aligned ; lights_aligned - global variable |
xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
.again_col: |
movups xmm7,.cnv |
mulps xmm7,[edx] |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
if 0 |
cmp [bump_flag],1 ; on/off temporaly |
; depend on bump button |
je @f |
; stencil |
movss xmm5,xmm2 |
movss xmm6,xmm2 |
addss xmm5,[aprox] |
subss xmm6,[aprox] |
; Stencil buffer for now not work as I expected, |
; moreover - it not work at all. |
cmpnltss xmm5,dword[ebx] |
cmpnltss xmm6,dword[ebx] |
xorps xmm5,xmm6 |
xorps xmm6,xmm6 |
movd eax,xmm5 |
cmp eax,-1 |
jne .no_reflective |
end if |
@@: |
movaps xmm6,xmm7 |
mulps xmm6,xmm6 |
mulps xmm6,xmm6 |
mulps xmm6,xmm6 |
mulps xmm6,[edx+48] |
.no_reflective: |
mulps xmm7,[edx+16] |
addps xmm7,xmm6 |
addps xmm7,[edx+32] |
minps xmm7,[mask_255f] ; global |
maxps xmm1,xmm7 |
add edx,64 ; size of one light in aligned list |
cmp edx,lights_aligned_end |
jl .again_col |
cvtps2dq xmm1,xmm1 |
movd xmm6,[edi] |
packssdw xmm1,xmm1 |
packuswb xmm1,xmm1 |
paddusb xmm1,xmm6 |
movd [edi],xmm1 |
.skip: |
add edi,4 |
add ebx,4 ; stencil_buff |
addps xmm0,.dn |
addss xmm2,.dz |
sub ecx,1 |
jnz .ddraw |
.end_rp_line: |
add esp,256 |
pop ebp |
ret |
/programs/demos/3DS/3R_PHG.INC |
---|
0,0 → 1,528 |
; Real Phong's shading implemented if flat assembler |
; by Maciej Guba. |
; http://macgub.vxm.pl |
ROUND2 equ 10 |
real_phong_tri_z: |
;----procedure render Phongs shaded triangle with z coord |
;----interpolation ( Catmull alghoritm )----------------- |
;----I normalize normal vector in every pixel ----------- |
;------------------in - eax - x1 shl 16 + y1 ------------ |
;---------------------- ebx - x2 shl 16 + y2 ------------ |
;---------------------- ecx - x3 shl 16 + y3 ------------ |
;---------------------- esi - pointer to Z-buffer filled- |
;---------------------- with dd float variables-------- |
;---------------------- edi - pointer to screen buffer--- |
;---------------------- xmm0 - 1st normal vector -------- |
;---------------------- xmm1 - 2cond normal vector ------ |
;---------------------- xmm2 - 3rd normal vector -------- |
;---------------------- xmm3 - normalized light vector -- |
;---------------------- xmm4 - lo -> hi z1, z2, z3 coords |
;---------------------- as dwords floats --------------- |
;---------------------- xmm5 - lo -> hi y_min, y_max, --- |
;---------------------- x_min, x_max as dword integers - |
;---------------------- stack - no parameters ----------- |
;-------------------------------------------------------- |
;----------------- procedure don't save registers !! ---- |
push ebp |
mov ebp,esp |
sub esp,512 |
sub ebp,16 |
and ebp,0xfffffff0 |
.1_nv equ [ebp-16] |
.2_nv equ [ebp-32] |
.3_nv equ [ebp-48] |
.l_v equ [ebp-64] |
.z3 equ [ebp-72] |
.z2 equ [ebp-76] |
.z1 equ [ebp-80] |
.x1 equ [ebp-82] |
.y1 equ [ebp-84] |
.x2 equ [ebp-86] |
.y2 equ [ebp-88] |
.x3 equ [ebp-90] |
.y3 equ [ebp-92] |
.Zbuf equ [ebp-96] |
.x_max equ [ebp-100] |
.x_min equ [ebp-104] |
.y_max equ [ebp-108] |
.y_min equ [ebp-112] |
.screen equ [ebp-116] |
.dx12 equ [ebp-120] |
.dx13 equ [ebp-124] |
.dx23 equ [ebp-128] |
.dn12 equ [ebp-144] |
.dn13 equ [ebp-160] |
.dn23 equ [ebp-176] |
.dz12 equ [ebp-180] |
.dz13 equ [ebp-184] |
.dz23 equ [ebp-188] |
.cnv1 equ [ebp-208] ; cur normal vectors |
.cnv2 equ [ebp-224] |
.cz2 equ [ebp-228] |
.cz1 equ [ebp-232] |
.sort3: ; sort triangle coordinates... |
cmp ax,bx |
jle .sort1 |
xchg eax,ebx |
shufps xmm4,xmm4,11100001b |
movaps xmm6,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm6 |
.sort1: |
cmp bx,cx |
jle .sort2 |
xchg ebx,ecx |
shufps xmm4,xmm4,11011000b |
movaps xmm6,xmm1 |
movaps xmm1,xmm2 |
movaps xmm2,xmm6 |
jmp .sort3 |
.sort2: |
movaps .z1,xmm4 |
mov .y1,eax |
mov .y2,ebx |
mov .y3,ecx |
movdqa .y_min,xmm5 |
if 1 ; check if at last only fragment |
packssdw xmm5,xmm5 ; of triangle is in visable area |
pshuflw xmm5,xmm5,11011000b |
movdqu xmm7,.y3 |
movdqa xmm6,xmm5 |
pshufd xmm5,xmm5,0 ; xmm5 lo-hi -> broadcasted y_min, x_min |
pshufd xmm6,xmm6,01010101b ;xmm6 -> brd y_max x_max |
movdqa xmm4,xmm7 |
pcmpgtw xmm7,xmm5 |
pcmpgtw xmm4,xmm6 |
pxor xmm7,xmm4 |
pmovmskb eax,xmm7 |
and eax,0x00aaaaaa |
or eax,eax |
jz .rpt_loop2_end |
end if |
movaps .1_nv,xmm0 |
movaps .2_nv,xmm1 |
movaps .3_nv,xmm2 |
movaps .l_v,xmm3 |
mov .Zbuf,esi |
mov .screen,edi |
mov bx,.y2 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx12_make |
xorps xmm7,xmm7 |
mov dword .dx12,0 |
mov dword .dz12,0 |
movaps .dn12,xmm7 |
jmp .rpt_dx12_done |
.rpt_dx12_make: |
mov ax,.x2 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx12,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z2 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz12,xmm5 |
movaps xmm0,.2_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn12,xmm0 |
.rpt_dx12_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y1 |
jnz .rpt_dx13_make |
xorps xmm7,xmm7 |
mov dword .dx13,0 |
mov dword .dz13,0 |
movaps .dn13,xmm7 |
jmp .rpt_dx13_done |
.rpt_dx13_make: |
mov ax,.x3 |
sub ax,.x1 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx13,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z1 |
divss xmm5,xmm6 |
movss .dz13,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.1_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn13,xmm0 |
.rpt_dx13_done: |
mov bx,.y3 ; calc deltas |
sub bx,.y2 |
jnz .rpt_dx23_make |
xorps xmm7,xmm7 |
mov dword .dx23,0 |
mov dword .dz23,0 |
movaps .dn23,xmm7 |
jmp .rpt_dx23_done |
.rpt_dx23_make: |
mov ax,.x3 |
sub ax,.x2 |
cwde |
movsx ebx,bx |
shl eax,ROUND2 |
cdq |
idiv ebx |
mov .dx23,eax |
cvtsi2ss xmm6,ebx |
movss xmm5,.z3 |
subss xmm5,.z2 |
divss xmm5,xmm6 |
movss .dz23,xmm5 |
movaps xmm0,.3_nv |
subps xmm0,.2_nv |
shufps xmm6,xmm6,0 |
divps xmm0,xmm6 |
movaps .dn23,xmm0 |
.rpt_dx23_done: |
movsx eax,word .x1 |
shl eax,ROUND2 |
mov ebx,eax |
mov edx,.z1 |
mov .cz1,edx |
mov .cz2,edx |
movaps xmm0,.1_nv |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm0 |
movsx ecx,word .y1 |
cmp cx,.y2 |
jge .rpt_loop1_end |
.rpt_loop1: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edi,.screen |
mov esi,.Zbuf |
call real_phong_line_z |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn12 |
addss xmm2,.dz13 |
addss xmm3,.dz12 |
add eax,.dx13 |
add ebx,.dx12 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y2 |
jl .rpt_loop1 |
.rpt_loop1_end: |
movsx ecx,word .y2 |
cmp cx,.y3 |
jge .rpt_loop2_end |
movsx ebx,word .x2 ; eax - cur x1 |
shl ebx,ROUND2 ; ebx - cur x2 |
push dword .z2 |
pop dword .cz2 |
movaps xmm0,.2_nv |
movaps .cnv2,xmm0 |
.rpt_loop2: |
pushad |
movaps xmm2,.y_min |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movlps xmm3,.cz1 |
movaps xmm4,.l_v |
sar ebx,ROUND2 |
sar eax,ROUND2 |
mov edi,.screen |
mov esi,.Zbuf |
call real_phong_line_z |
popad |
movaps xmm0,.cnv1 |
movaps xmm1,.cnv2 |
movss xmm2,.cz1 |
movss xmm3,.cz2 |
addps xmm0,.dn13 |
addps xmm1,.dn23 |
addss xmm2,.dz13 |
addss xmm3,.dz23 |
add eax,.dx13 |
add ebx,.dx23 |
movaps .cnv1,xmm0 |
movaps .cnv2,xmm1 |
movss .cz1,xmm2 |
movss .cz2,xmm3 |
add ecx,1 |
cmp cx,.y3 |
jl .rpt_loop2 |
.rpt_loop2_end: |
add esp,512 |
pop ebp |
ret |
align 16 |
real_phong_line_z: |
; in: |
; xmm0 - normal vector 1 |
; xmm1 - normal vect 2 |
; xmm3 - lo -> hi z1, z2 coords as dwords floats |
; xmm2 - lo -> hi y_min, y_max, x_min, x_max |
; as dword integers |
; xmm4 - normalized light vector |
; eax - x1 |
; ebx - x2 |
; ecx - y |
; edi - screen buffer |
; esi - z buffer filled with dd floats |
push ebp |
mov ebp,esp |
sub esp,160 |
sub ebp,16 |
and ebp,0xfffffff0 |
.n1 equ [ebp-16] |
.n2 equ [ebp-32] |
.lv equ [ebp-48] |
.lx1 equ [ebp-52] |
.lx2 equ [ebp-56] |
.z2 equ [ebp-60] |
.z1 equ [ebp-64] |
.screen equ [ebp-68] |
.zbuff equ [ebp-72] |
.x_max equ [ebp-74] |
.x_min equ [ebp-76] |
.y_max equ [ebp-78] |
.y_min equ [ebp-80] |
.dn equ [ebp-96] |
.dz equ [ebp-100] |
.y equ [ebp-104] |
.cnv equ [ebp-128] |
mov .y,ecx |
packssdw xmm2,xmm2 |
movq .y_min,xmm2 |
cmp cx,.y_min |
jl .end_rp_line |
cmp cx,.y_max |
jge .end_rp_line ; |
cmp eax,ebx |
je .end_rp_line |
jl @f |
xchg eax,ebx |
movaps xmm7,xmm0 |
movaps xmm0,xmm1 |
movaps xmm1,xmm7 |
shufps xmm3,xmm3,11100001b |
@@: |
cmp ax,.x_max |
jge .end_rp_line |
cmp bx,.x_min |
jle .end_rp_line |
movaps .lv,xmm4 |
movaps .n1,xmm0 |
movaps .n2,xmm1 |
mov .lx1,eax |
mov .lx2,ebx |
movlps .z1,xmm3 |
sub ebx,eax |
cvtsi2ss xmm7,ebx |
shufps xmm7,xmm7,0 |
subps xmm1,xmm0 |
divps xmm1,xmm7 |
movaps .dn,xmm1 |
psrldq xmm3,4 |
subss xmm3,.z1 |
divss xmm3,xmm7 |
movss .dz,xmm3 |
mov ebx,.lx1 |
cmp bx,.x_min ; clipping on function4 |
jge @f |
movzx eax,word .x_min |
sub eax,ebx |
cvtsi2ss xmm7,eax |
shufps xmm7,xmm7,0 |
mulss xmm3,xmm7 |
mulps xmm1,xmm7 |
addss xmm3,.z1 |
addps xmm1,.n1 |
movsx eax,word .x_min |
movss .z1,xmm3 |
movaps .n1,xmm1 |
mov dword .lx1,eax |
@@: |
movzx eax,word .x_max |
cmp .lx2,eax |
jl @f |
mov .lx2,eax |
@@: |
movzx eax,word[size_x_var] |
mul dword .y |
; mov edx,.x1 |
add eax,.lx1 |
shl eax,2 |
add edi,eax |
add esi,eax |
mov ecx,.lx2 |
sub ecx,.lx1 |
movaps xmm0,.n1 |
movss xmm2,.z1 |
align 16 |
.ddraw: |
movss xmm7,xmm2 |
cmpnltss xmm7,dword[esi] |
movd eax,xmm7 |
or eax,eax |
jnz .skip |
movss [esi],xmm2 |
movaps xmm7,xmm0 |
mulps xmm7,xmm7 ; normalize |
haddps xmm7,xmm7 |
haddps xmm7,xmm7 |
rsqrtps xmm7,xmm7 |
mulps xmm7,xmm0 |
movaps .cnv,xmm7 |
mov edx,lights_aligned ; lights - global variable |
xorps xmm1,xmm1 ; instead global can be used .lv - light vect. |
@@: |
movaps xmm6,[edx+16] |
movaps xmm5,[edx] |
movaps xmm3,[edx+48] |
andps xmm5,[zero_hgst_dd] ; global |
mulps xmm5,.cnv ;.lv ; last dword should be zeroed |
haddps xmm5,xmm5 |
haddps xmm5,xmm5 |
; mulps xmm5,[env_const2] |
; maxps xmm5,[dot_min] |
; minps xmm5,[dot_max] |
movaps xmm7,xmm5 |
; mulps xmm7,[env_const2] |
; mulps xmm7,[env_const2] |
; maxps xmm7,[dot_min] |
; minps xmm7,[dot_max] |
mulps xmm7,xmm7 |
mulps xmm7,xmm7 |
mulps xmm5,xmm6 |
mulps xmm7,xmm7 |
mulps xmm7,xmm3 |
addps xmm5,xmm7 |
minps xmm5,[mask_255f] ; global |
maxps xmm1,xmm5 |
; movq xmm3,[edx+20] ; minimal color |
; punpcklwd xmm3,[minimum0] |
; cvtdq2ps xmm3,xmm3 |
; maxps xmm1,xmm3 |
add edx,64 |
cmp edx,lights_aligned_end ; global |
jnz @b |
cvtps2dq xmm1,xmm1 |
packssdw xmm1,xmm1 |
packuswb xmm1,xmm1 |
movd [edi],xmm1 |
.skip: |
add edi,4 |
add esi,4 |
addps xmm0,.dn |
addss xmm2,.dz |
sub ecx,1 |
jnz .ddraw |
.end_rp_line: |
add esp,160 |
pop ebp |
ret |
/programs/demos/3DS/A_PROCS.INC |
---|
76,11 → 76,20 |
mul edx |
; shl eax,9 |
add eax,dword .x |
lea ebx,[eax*3] |
cmp [dr_flag],12 ; 32 bit col cause |
jne @f |
add ebx,eax |
@@: |
mov eax,[esi] |
mov [edi+ebx],eax |
.skip: |
add esi,3 |
cmp [dr_flag],12 |
jne @f |
inc esi |
@@: |
inc dword .x |
movzx edx,word[size_x_var] |
cmp dword .x,edx ;SIZE_X |
97,9 → 106,12 |
movzx ecx,word[size_x_var] |
movzx eax,word[size_y_var] |
imul ecx,eax |
cmp [dr_flag],12 |
je @f |
lea ecx,[ecx*3] |
shr ecx,2 |
; mov ecx,SIZE_X*SIZE_Y*3/4 |
@@: |
cld |
rep movsd |
156,23 → 168,42 |
sub ecx,ebx |
mov esi,[screen_ptr] |
mov edi,[Zbuffer_ptr] |
cmp [dr_flag],12 |
je @f |
lea ebx,[ebx*3] |
jmp .f |
@@: |
shl ebx,2 |
.f: |
mov edx,esi |
add esi,ebx |
lea ebx,[ebx+esi] |
pxor xmm0,xmm0 |
push eax |
@@: |
.emb: |
cmp [dr_flag],12 |
je @f |
movlps xmm1,[esi+3] |
movhps xmm1,[esi+6] |
punpcklbw xmm1,xmm0 |
movlps xmm2,[esi-3] |
movhps xmm2,[esi] |
punpcklbw xmm2,xmm0 |
movlps xmm3,[ebx] |
movhps xmm3,[ebx+3] |
movlps xmm4,[edx] |
movhps xmm4,[edx+3] |
jmp .ff |
@@: |
movlps xmm1,[esi+4] |
movhps xmm1,[esi+8] |
movlps xmm2,[esi-4] |
movhps xmm2,[esi] |
movlps xmm3,[ebx] |
movhps xmm3,[ebx+4] |
movlps xmm4,[edx] |
movhps xmm4,[edx+4] |
.ff: |
punpcklbw xmm1,xmm0 |
punpcklbw xmm2,xmm0 |
punpcklbw xmm3,xmm0 |
punpcklbw xmm4,xmm0 |
psubsw xmm1,xmm2 |
199,7 → 230,12 |
movd eax,xmm1 |
movzx eax,al |
; cmp [dr_flag],12 |
; je @f |
lea eax,[eax*3+envmap_cub] |
; jmp .fff |
;@@: |
mov eax,[eax] |
mov [edi],eax ;xmm1 |
psrldq xmm1,8 |
209,6 → 245,12 |
mov eax,[eax] |
mov [edi+4],eax |
cmp [dr_flag],12 |
jne @f |
add esi,2 |
add ebx,2 |
add edx,2 |
@@: |
add edi,8 |
add esi,6 |
215,17 → 257,23 |
add ebx,6 |
add edx,6 |
sub ecx,2 |
jnc @b |
jnc .emb |
pop ecx ;,eax |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
cmp [dr_flag],12 |
je .e |
@@: |
movsd |
dec edi |
loop @b |
.e: |
rep movsd |
end if |
ret |
;align 16 |
/programs/demos/3DS/B_PROCS.INC |
---|
749,10 → 749,13 |
push ecx |
mov edi,[screen_ptr] |
movzx ecx,word[size_x_var] ;SIZE_X*3/4 |
cmp [dr_flag],12 |
je @f |
lea ecx,[ecx*3+1] |
shr ecx,2 |
@@: |
; mov ecx,SIZE_X*3/4 |
xor eax,eax |
rep stosd |
if 1 |
760,9 → 763,16 |
movzx ecx,word[size_y_var] |
sub ecx,3 |
imul ecx,ebx |
cmp [dr_flag],12 ; 32 bit per pix cause |
je @f |
lea ecx,[ecx*3] |
shr ecx,4 |
lea ebx,[ebx*3] |
jmp .blr |
@@: |
shr ecx,2 |
shl ebx,2 |
; mov ecx,(SIZE_X*(SIZE_Y-3))*3/16 |
.blr: |
@@: |
771,15 → 781,19 |
mov ecx,edi |
sub ecx,ebx |
movups xmm1,[ecx] |
cmp [dr_flag],12 |
je @f |
movups xmm2,[edi-3] |
movups xmm3,[edi+3] |
jmp .f |
@@: |
movups xmm2,[edi-4] |
movups xmm3,[edi+4] |
.f: |
pavgb xmm0,xmm1 |
pavgb xmm2,xmm3 |
pavgb xmm0,xmm2 |
psubusb xmm0,xmm5 ; importand if fire |
movups [edi],xmm0 |
add edi,16 |
add esi,16 |
788,12 → 802,16 |
end if |
xor eax,eax |
movzx ecx,word[size_x_var] |
cmp [dr_flag],12 |
je @f |
lea ecx,[ecx*3] |
shr ecx,2 |
@@: |
; mov ecx,SIZE_X*3/4 |
rep stosd |
pop ecx |
loop .again_blur |
dec ecx |
jnz .again_blur |
mov esp,ebp |
pop ebp |
end if |
/programs/demos/3DS/DATA.INC |
---|
1,6 → 1,7 |
; DATA AREA ************************************ |
i3 dw 3 |
i12 dd 12 |
i256 dw 256 |
i255d dd 255 |
dot_max dd 1.0 ; dot product max and min |
64,6 → 65,7 |
dw 0 |
edit_end_y dw 0 |
mouse_state dd 0 |
menu: |
db 2 ; button number = index |
db 'rotary ' ; label |
73,7 → 75,11 |
db 3 |
db 'shd. model' |
if Ext >= SSE3 |
db 13 |
else |
db 12 |
end if |
dr_flag db 0 ; 6 - dots |
dd shd_f |
242,6 → 248,9 |
;; dd color_component_f |
db -1 ; end mark |
259,6 → 268,7 |
db 'btex' |
db 'cenv' |
db 'grdl' |
db 'rphg' |
spd_f: |
db 'idle' |
db 'full' |
344,7 → 354,10 |
if Ext=SSE2 |
db ' (SSE2)' |
end if |
db ' 0.069b',0 |
if Ext=SSE3 |
db ' (SSE3)' |
end if |
db ' 0.070',0 |
labellen: |
STRdata db '-1 ' |
420,9 → 433,19 |
;=============================================== |
lightsend: |
align 16 |
emboss_bias: |
dw 128, 128, 128, 128, 128, 128, 128, 128 |
zero_hgst_dd: |
dd -1, -1, -1, 0 |
mask_255f: |
times 4 dd 255.0 |
the_zero: |
times 4 dd 0.0 |
I_END: |
if USE_LFN = 0 |
559,6 → 582,11 |
tex_points rb points_count * 4 ; bump_map and texture coords |
; each point word x, word y |
align 16 |
lights_aligned: |
lights_aligned_end = $ + 16 * 12 |
rb 16 * 12 |
if Ext >= SSE2 |
sse_repository rb 1024 |
end if |
571,6 → 599,7 |
procinfo: |
rb 1024 ; process info |
I_Param rb 256 |
memStack rb 4000 ;memory area for stack |
memStack: |
rb 2000 |
align 16 |
screen: |
/programs/demos/3DS/History.txt |
---|
1,4 → 1,13 |
View3ds 0.069 - May 2020 |
1. KPacked files support by Leency. |
2. 32bit vertices indexes and ability to load whole RAM limited objects. |
(Above 65535 vertices and triangles), (by me). |
3. I switch off painters algotithm mode (depth sorting). In app impelementetion it has |
limited vertices count and produce less quality image than Z buffer Catmull algo. |
In addition this switch off reduces app size, (by me). |
----------------------------------------------------------------------------------- |
View3ds 0.068 - XI 2016 |
1. Editing option - new 'editor' button. |
2. For now I disable perspective correction, to make implemtation |
/programs/demos/3DS/README.TXT |
---|
1,14 → 1,11 |
View3ds 0.069 - tiny viewer to .3ds and .asc files with several graphics |
View3ds 0.070 - tiny viewer to .3ds and .asc files with several graphics |
effects implementation. |
What's new? |
1. KPacked files support by Leency. |
1. 32bit vertices indexes and ability to load whole RAM limited objects. |
(Above 65535 vertices and triangles), (by me). |
2. I switch off painters algotithm mode (depth sorting). In app impelementetion it has |
limited vertices count and produce less quality image than Z buffer Catmull algo. |
In addition this switch off reduces app size, (by me). |
1. Some keys support by Leency. |
2. New displaying model - real Phong - real not fake normal vector interpolation, normalising it and calculating |
dot product (one for each light). It requires SSE3. (by me) |
Buttons description: |
1. rotary: choosing rotary axle: x, y, x+y. |
17,7 → 14,7 |
pos (position shading depend), dots (app draws only points - nodes of object), |
txgrd (texture mapping + smooth shading), 2tex (texture mapping + spherical |
environment mapping), bmap (bump + texture mapping), cenv (cubic environment |
mapping), grdl (Gouraud lines - edges only). |
mapping), grdl (Gouraud lines - edges only), rphg (real Phong). |
3. speed: idle, full. |
4,5. zoom in, out: no comment. |
6. catmull: disabled |
42,4 → 39,4 |
is released apply current position. You may also decrease whole handlers count by enable culling (using |
appropriate button) - some back handlers become hidden. |
Maciej Guba V 2020 |
Maciej Guba VII 2020 |
/programs/demos/3DS/VIEW3DS.ASM |
---|
1,11 → 1,11 |
; application : View3ds ver. 0.069 - tiny .3ds and .asc files viewer |
; application : View3ds ver. 0.070 - tiny .3ds and .asc files viewer |
; with a few graphics effects demonstration. |
; compiler : FASM |
; system : KolibriOS |
; author : Macgub aka Maciej Guba |
; email : macgub3@wp.pl |
; web : www.macgub.hekko.pl |
; web : http://macgub.vxm.pl |
; Fell free to use this intro in your own distribution of KolibriOS. |
; Special greetings to KolibriOS team . |
; I hope because my demos Christian Belive will be near to each of You. |
36,6 → 36,7 |
MMX = 1 |
SSE = 2 |
SSE2 = 3 |
SSE3 = 4 |
Ext = SSE2 ;Ext={ NON | MMX | SSE | SSE2 } |
; 0 for short names (Menuet-compatible), 1 for long names (Kolibri features) |
54,9 → 55,6 |
START: ; start of execution |
cld |
; mov eax,14 ; window size according to cur res ... |
; int 0x40 |
; sub eax,150 shl 16 + 150 |
mov eax, 500 shl 16 + 600 ; ... or set manually |
mov [size_y_var],ax |
shr ax,1 |
94,15 → 92,10 |
je .gen |
jmp .malloc |
.gen: |
if USE_LFN |
mov [triangles_count_var],1000 |
mov [points_count_var],1000 |
call alloc_mem_for_tp |
end if |
call generate_object |
jmp .opt |
.asc: |
mov [triangles_count_var],10000 |
mov [triangles_count_var],10000 ; to do: read asc header |
mov [points_count_var],10000 |
call alloc_mem_for_tp |
call read_asc |
115,12 → 108,14 |
.opt: |
; call alloc_buffer_mem ; alloc memfor screnn and z buffer |
call optimize_object1 ; proc in file b_procs.asm |
; set point(0,0,0) in center and calc all coords |
; to be in <-1.0,1.0> |
call normalize_all_light_vectors |
if Ext >= SSE3 |
call copy_lights ; to aligned float |
end if |
call init_triangles_normals2 |
call init_point_normals |
call init_envmap2 |
290,6 → 285,9 |
jne .next_m5 ; 'grd ' 1 |
call make_random_lights ; 'env ' 2 |
call normalize_all_light_vectors ; 'bump' 3 |
if Ext >= SSE3 |
call copy_lights |
end if |
call do_color_buffer ; intit color_map ; 'tex ' 4 |
; cmp [emboss_flag],1 ; 'pos ' 5 |
; je @f ; 'dots' 6 |
332,13 → 330,13 |
.next_m: |
cmp ah,18 |
jne .next_m2 |
if USE_LFN |
mov [re_alloc_flag],1 ; reallocate memory |
mov [triangles_count_var],1000 |
mov [points_count_var],1000 |
call alloc_mem_for_tp |
mov [re_alloc_flag],0 |
end if |
mov bl,[generator_flag] |
; or bl,bl |
; jz .next_m2 |
536,11 → 534,6 |
mov ecx,[points_count_var] |
call rotary |
; RDTSC |
; pop ebx |
; sub eax,ebx |
; sub eax,41 |
; push eax |
mov esi,[points_rotated_ptr] |
mov edi,[points_translated_ptr] |
547,10 → 540,6 |
mov ecx,[points_count_var] |
call translate_points |
; cmp [dr_flag],5 |
; jne @f |
; call calc_attenuation_light |
; @@: |
cmp [fire_flag],0 |
jne @f |
call clrscr ; clear the screen |
568,8 → 557,6 |
@@: |
call fill_Z_buffer ; make background |
.non_f: |
; RDTSC |
; push eax |
cmp [dr_flag],6 |
jne @f |
call draw_dots |
742,13 → 729,22 |
loop .dc |
pop eax |
mov eax,7 ; put image |
mov ebx,[screen_ptr] |
mov ecx,[size_y_var] |
; mov ecx,SIZE_X shl 16 + SIZE_Y |
mov edx,[offset_y] ;5 shl 16 + 25 |
cmp [dr_flag],12 |
je .ff |
int 0x40 |
jmp .f |
.ff: |
mov eax,65 |
mov esi,32 |
xor ebp,ebp |
int 0x40 |
.f: |
mov eax,13 |
mov bx,[size_x_var] |
add ebx,18 |
773,9 → 769,7 |
int 40h |
; addsubps xmm0,xmm0 |
jmp still |
;-------------------------------------------------------------------------------- |
797,6 → 791,9 |
include "GRD_TEX.INC" |
include "TWO_TEX.INC" |
include "ASC.INC" |
if Ext >= SSE3 |
include "3r_phg.inc" |
end if |
clear_vertices_index: |
mov edi,[vertices_index_ptr] |
movzx eax,word[size_x_var] |
825,9 → 822,15 |
movzx edx,word[size_x_var] |
imul edx,ecx |
add ebx,edx |
push ebx |
lea ecx,[ebx*2] |
lea ebx,[ebx*3] |
cmp [dr_flag],12 |
jne @f |
add ebx,[esp] |
@@: |
add esp,4 |
add ebx,[screen_ptr] |
mov ebx,[ebx] |
and ebx,0x00ffffff |
868,18 → 871,6 |
; left button pressed |
; macro check_bar |
; { |
; movzx ebx,word[.x_coord] |
; movzx ecx,word[.y_coord] |
; imul ebx,ecx |
; lea ecx,[ebx*2] |
; lea ebx,[ebx*3] |
; add ebx,[screen_ptr] |
; mov ebx,[ebx] |
; and ebx,0x00ffffff |
; cmp ebx,0x00ff0000 ; is handle bar ? |
; } |
check_bar |
jne .no_edit |
985,7 → 976,7 |
mul ecx |
mov [.temp],eax |
lea ecx,[eax*3] |
lea ecx,[eax*4] ; more mem for r_phg cause |
add ecx,256 |
mov eax,68 |
mov ebx,20 |
1020,28 → 1011,6 |
if 0 |
;old Menuet style alloc |
movzx ecx,word[size_x_var] |
movzx eax,word[size_y_var] |
add eax,200 |
mul ecx |
lea ecx,[eax*3] |
add ecx,16 |
and ecx,0xfffffff0 |
push ecx |
shl eax,2 |
add ecx,eax |
add ecx,MEM_END |
mov ebx,1 |
mov eax,64 ; allocate mem - resize app mem |
int 0x40 |
mov [screen_ptr],MEM_END |
mov [Zbuffer_ptr],MEM_END |
pop ecx |
add [Zbuffer_ptr],ecx |
end if |
ret |
update_flags: |
; updates flags and writing flag description |
1063,6 → 1032,7 |
add edi,17 |
cmp byte[edi],-1 |
jne .ch_another |
jmp .no_write |
.write: |
; clreol {pascal never dies} |
; * eax = 13 - function number |
1447,39 → 1417,6 |
mov esp,ebp |
pop ebp |
ret |
if 0 |
init_triangles_normals: |
mov ebx,triangles_normals |
mov ebp,triangles |
@@: |
push ebx |
mov ebx,vectors |
mov esi,dword[ebp] ; first point index |
lea esi,[esi*3] |
lea esi,[points+esi*2] ; esi - pointer to 1st 3d point |
movzx edi,dword[ebp+4] ; second point index |
lea edi,[edi*3] |
lea edi,[points+edi*2] ; edi - pointer to 2nd 3d point |
call make_vector |
add ebx,12 |
mov esi,edi |
movzx edi,dword[ebp+8] ; third point index |
lea edi,[edi*3] |
lea edi,[points+edi*2] |
call make_vector |
mov edi,ebx ; edi - pointer to 2nd vector |
mov esi,ebx |
sub esi,12 ; esi - pointer to 1st vector |
pop ebx |
call cross_product |
mov edi,ebx |
call normalize_vector |
add ebp,12 |
add ebx,12 |
cmp dword[ebp],-1 |
jne @b |
ret |
end if |
init_point_normals: |
.x equ dword [ebp-4] |
1599,326 → 1536,46 |
; cmp dword[ebp],-1 |
; jne @b |
ret |
if 0 ; ind 64 but |
;================================================================= |
sort_triangles: |
mov esi,[triangles_ptr] |
mov edi,triangles_with_z |
mov ebp,[points_translated_ptr] |
make_triangle_with_z: ;makes list with triangles and z position |
movzx eax,word[esi] |
lea eax,[eax*3] |
movzx ecx,word[ebp+eax*2+4] |
movzx eax,word[esi+2] |
lea eax,[eax*3] |
add cx,word[ebp+eax*2+4] |
movzx eax,word[esi+4] |
lea eax,[eax*3] |
add cx,word[ebp+eax*2+4] |
mov ax,cx |
; cwd |
; idiv word[i3] |
movsd ; store vertex coordinates |
movsw |
stosw ; middle vertex coordinate 'z' in triangles_with_z list |
cmp dword[esi],-1 |
jne make_triangle_with_z |
movsd ; copy end mark |
mov eax,4 |
lea edx,[edi-8-trizdd] |
; lea edx, [edi-8] |
; sub edx,[triangles_w_z_ptr] |
mov [high],edx |
call quicksort |
mov eax,4 |
mov edx,[high] |
call insertsort |
jmp end_sort |
quicksort: |
mov ecx,edx |
sub ecx,eax |
cmp ecx,32 |
jc .exit |
lea ecx,[eax+edx] |
shr ecx,4 |
lea ecx,[ecx*8-4]; |
; mov edi,[triangles_w_z_ptr] |
; mov ebx,[edi+eax] |
; mov esi,[edi+ecx] |
; mov edi,[edi+edx] |
mov ebx,[trizdd+eax]; trizdd[l] |
mov esi,[trizdd+ecx]; trizdd[i] |
mov edi,[trizdd+edx]; trizdd[h] |
cmp ebx,esi |
jg @f ; direction NB! you need to negate these to invert the order |
if Ext=NON |
mov [trizdd+eax],esi |
mov [trizdd+ecx],ebx |
mov ebx,[trizdd+eax-4] |
mov esi,[trizdd+ecx-4] |
mov [trizdd+eax-4],esi |
mov [trizdd+ecx-4],ebx |
mov ebx,[trizdd+eax] |
mov esi,[trizdd+ecx] |
else |
; push ebx |
; mov ebx,[triangles_w_z_ptr] |
; movq mm0,[ebx+eax-4] |
; movq mm1,[ebx+ecx-4] |
; movq [ebx+ecx-4],mm0 |
; movq [ebx+eax-4],mm1 |
; pop ebx |
movq mm0,[trizdq+eax-4] |
movq mm1,[trizdq+ecx-4] |
movq [trizdq+ecx-4],mm0 |
movq [trizdq+eax-4],mm1 |
xchg ebx,esi |
end if |
if Ext >= SSE3 |
copy_lights: ; after normalising ! |
mov esi,lights |
mov edi,lights_aligned |
mov ecx,3 |
.again: |
push ecx |
mov ecx,3 |
cld |
rep movsd |
xor eax,eax |
stosd |
mov ecx,3 |
.b: |
push ecx |
mov ecx,3 |
@@: |
cmp ebx,edi |
jg @f ; direction |
if Ext=NON |
mov [trizdd+eax],edi |
mov [trizdd+edx],ebx |
mov ebx,[trizdd+eax-4] |
mov edi,[trizdd+edx-4] |
mov [trizdd+eax-4],edi |
mov [trizdd+edx-4],ebx |
mov ebx,[trizdd+eax] |
mov edi,[trizdd+edx] |
else |
; push ebx |
; mov ebx,[triangles_w_z_ptr] |
; movq mm0,[ebx+eax-4] |
; movq mm1,[ebx+edx-4] |
; movq [ebx+edx-4],mm0 |
; movq [ebx+eax-4],mm1 |
movq mm0,[trizdq+eax-4] |
movq mm1,[trizdq+edx-4] |
movq [trizdq+edx-4],mm0 |
movq [trizdq+eax-4],mm1 |
; pop ebx |
xchg ebx,edi |
movzx ebx,byte[esi] |
cvtsi2ss xmm0,ebx |
movss [edi],xmm0 |
inc esi |
add edi,4 |
loop @b |
stosd |
pop ecx |
loop .b |
inc esi ; skip shiness |
pop ecx |
loop .again |
ret |
end if |
@@: |
cmp esi,edi |
jg @f ; direction |
if Ext=NON |
mov [trizdd+ecx],edi |
mov [trizdd+edx],esi |
mov esi,[trizdd+ecx-4] |
mov edi,[trizdd+edx-4] |
mov [trizdd+ecx-4],edi |
mov [trizdd+edx-4],esi |
else |
; push ebx |
; mov ebx,[triangles_w_z_ptr] |
; movq mm0,[ebx+ecx-4] |
; movq mm1,[ebx+edx-4] |
; movq [ebx+edx-4],mm0 |
; movq [ebx+ecx-4],mm1 |
; pop ebx |
movq mm0,[trizdq+ecx-4] |
movq mm1,[trizdq+edx-4] |
movq [trizdq+edx-4],mm0 |
movq [trizdq+ecx-4],mm1 |
xchg ebx,esi |
end if |
@@: |
mov ebp,eax ; direction |
add ebp,8 ; j |
if Ext=NON |
mov esi,[trizdd+ebp] |
mov edi,[trizdd+ecx] |
mov [trizdd+ebp],edi |
mov [trizdd+ecx],esi |
mov esi,[trizdd+ebp-4] |
mov edi,[trizdd+ecx-4] |
mov [trizdd+ecx-4],esi |
mov [trizdd+ebp-4],edi |
else |
; push ebx |
; mov ebx,[triangles_w_z_ptr] |
; movq mm0,[ebx+ebp-4] |
; movq mm1,[ebx+ecx-4] |
; movq [ebx+ecx-4],mm0 |
; movq [ebx+ebp-4],mm1 |
; pop ebx |
movq mm0,[trizdq+ebp-4] |
movq mm1,[trizdq+ecx-4] |
movq [trizdq+ecx-4],mm0 |
movq [trizdq+ebp-4],mm1 |
end if |
mov ecx,edx ; i; direction |
mov ebx,[trizdd+ebp]; trizdd[j] |
; mov ebx, [triangles_w_z_ptr] |
; add ebx, ebp |
; push eax |
; mov eax, [triangles_w_z_ptr] |
.loop: |
sub ecx,8 ; direction |
cmp [trizdd+ecx],ebx |
; cmp [eax+ecx],ebx |
jl .loop ; direction |
@@: |
add ebp,8 ; direction |
cmp [trizdd+ebp],ebx |
; cmp [eax+ebp],ebx |
jg @b ; direction |
cmp ebp,ecx |
jge @f ; direction |
if Ext=NON |
mov esi,[trizdd+ecx] |
mov edi,[trizdd+ebp] |
mov [trizdd+ebp],esi |
mov [trizdd+ecx],edi |
mov edi,[trizdd+ecx-4] |
mov esi,[trizdd+ebp-4] |
mov [trizdd+ebp-4],edi |
mov [trizdd+ecx-4],esi |
else |
; movq mm0,[eax+ecx-4] |
; movq mm1,[eax+ebp-4] |
; movq [eax+ebp-4],mm0 |
; movq [eax+ecx-4],mm1 |
movq mm0,[trizdq+ecx-4] |
movq mm1,[trizdq+ebp-4] |
movq [trizdq+ebp-4],mm0 |
movq [trizdq+ecx-4],mm1 |
end if |
jmp .loop |
; pop eax |
@@: |
if Ext=NON |
mov esi,[trizdd+ecx] |
mov edi,[trizdd+eax+8] |
mov [trizdd+eax+8],esi |
mov [trizdd+ecx],edi |
mov edi,[trizdd+ecx-4] |
mov esi,[trizdd+eax+4] |
mov [trizdd+eax+4],edi |
mov [trizdd+ecx-4],esi |
else |
; push edx |
; mov edx,[triangles_w_z_ptr] |
; movq mm0,[edx+ecx-4] |
; movq mm1,[edx+eax+4]; dir |
; movq [edx+eax+4],mm0; dir |
; movq [edx+ecx-4],mm1 |
; pop edx |
movq mm0,[trizdq+ecx-4] |
movq mm1,[trizdq+eax+4]; dir |
movq [trizdq+eax+4],mm0; dir |
movq [trizdq+ecx-4],mm1 |
end if |
add ecx,8 |
push ecx edx |
mov edx,ebp |
call quicksort |
pop edx eax |
call quicksort |
.exit: |
ret |
insertsort: |
mov esi,eax |
.start: |
add esi,8 |
cmp esi,edx |
ja .exit |
mov ebx,[trizdd+esi] |
; mov ebx,[triangles_w_z_ptr] |
; add ebx,esi |
if Ext=NON |
mov ecx,[trizdd+esi-4] |
else |
; push ebx |
; mov ebx,[triangles_w_z_ptr] |
; movq mm1,[ebx+esi-4] |
movq mm1,[trizdq+esi-4] |
; pop ebx |
end if |
mov edi,esi |
@@: |
cmp edi,eax |
jna @f |
; push eax |
; mov eax,[triangles_w_z_ptr] |
; cmp [eax+edi-8],ebx |
; pop eax |
cmp [trizdd+edi-8],ebx |
jg @f ; direction |
if Ext=NON |
mov ebp,[trizdd+edi-8] |
mov [trizdd+edi],ebp |
mov ebp,[trizdd+edi-12] |
mov [trizdd+edi-4],ebp |
else |
; push eax |
; mov eax,[triangles_w_z_ptr] |
; movq mm0,[eax+edi-12] |
; movq [eax+edi-4],mm0 |
movq mm0,[trizdq+edi-12] |
movq [trizdq+edi-4],mm0 |
; pop eax |
end if |
sub edi,8 |
jmp @b |
@@: |
if Ext=NON |
mov [trizdd+edi],ebx |
mov [trizdd+edi-4],ecx |
else |
; push eax |
; mov eax,[triangles_w_z_ptr] |
; movq [eax+edi-4],mm1 |
movq [trizdq+edi-4],mm1 |
; pop eax |
end if |
jmp .start |
.exit: |
ret |
end_sort: |
; translate triangles_with_z to sorted_triangles |
mov esi,triangles_with_z |
; mov esi,[triangles_w_z_ptr] |
; mov edi,sorted_triangles |
mov edi,[triangles_ptr] |
again_copy: |
if Ext=NON |
movsd |
movsw |
add esi,2 |
else |
movq mm0,[esi] |
movq [edi],mm0 |
add esi,8 |
add edi,6 |
end if |
cmp dword[esi],-1 |
jne again_copy |
; if Ext=MMX |
; emms |
; end if |
movsd ; copy end mark too |
ret |
end if ; 64 ind |
clrscr: |
mov edi,[screen_ptr] |
movzx ecx,word[size_x_var] |
movzx eax,word[size_y_var] |
imul ecx,eax |
lea ecx,[ecx*3] |
shr ecx,2 |
xor eax,eax |
if Ext=NON |
rep stosd |
2130,7 → 1787,10 |
je .cubic_env_mapping |
cmp [dr_flag],11 |
je .draw_smooth_line |
; **************** |
if Ext >= SSE3 |
cmp [dr_flag],12 |
je .r_phg |
end if ; **************** |
mov esi,point_index3 ; do Gouraud shading |
mov ecx,3 |
.again_grd_draw: |
2953,8 → 2613,73 |
push [xx2] |
call smooth_line |
jmp .end_draw |
@@: |
if Ext >= SSE3 |
.r_phg: |
movd xmm5,[size_y_var] |
punpcklwd xmm5,[the_zero] |
pshufd xmm5,xmm5,01110011b |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
add eax,[points_normals_rot_ptr] |
add ebx,[points_normals_rot_ptr] |
add ecx,[points_normals_rot_ptr] |
movups xmm0,[eax] |
movups xmm1,[ebx] |
movups xmm2,[ecx] |
andps xmm0,[zero_hgst_dd] |
andps xmm1,[zero_hgst_dd] |
andps xmm2,[zero_hgst_dd] |
xorps xmm3,xmm3 |
mov eax,[point_index1] |
mov ebx,[point_index2] |
mov ecx,[point_index3] |
imul eax,[i12] |
imul ebx,[i12] |
imul ecx,[i12] |
add eax,[points_rotated_ptr] |
add ebx,[points_rotated_ptr] |
add ecx,[points_rotated_ptr] |
push dword[ecx+8] |
push dword[ebx+8] |
push dword[eax+8] |
movups xmm4,[esp] |
add esp,12 |
andps xmm4,[zero_hgst_dd] |
mov eax,dword[xx1] |
ror eax,16 |
mov ebx,dword[xx2] |
ror ebx,16 |
mov ecx,dword[xx3] |
ror ecx,16 |
mov edi,[screen_ptr] |
mov esi,[Zbuffer_ptr] |
call real_phong_tri_z |
jmp .end_draw |
end if |
.end_draw: |
pop esi |
add esi,12 |
2990,6 → 2715,8 |
push dword 0 |
movzx eax,word[size_x_var] |
cmp [dr_flag],12 |
je @f |
lea ebx,[eax*3] |
sub ebx,18 |
add eax,eax |
2996,7 → 2723,18 |
sub eax,12 |
mov [.xres3m18],ebx |
mov [.xres2m12],eax |
jmp .f |
@@: |
lea ebx,[eax*4] |
sub ebx,4*6 |
add eax,eax |
sub eax,3*4 |
mov [.xres3m18],ebx |
mov [.xres2m12],eax |
.f: |
mov esi,[points_translated_ptr] |
.loop: |
push esi |
3033,7 → 2771,13 |
; sub eax,3 |
imul eax,edx |
add eax,ebx |
push eax |
lea edi,[eax*3] |
cmp [dr_flag],12 |
jne @f |
add edi,[esp] |
@@: |
add esp,4 |
lea eax,[eax*2] |
; draw bar 6x6 |
add edi,[screen_ptr] |
3049,13 → 2793,20 |
push ecx |
mov ecx,6 |
@@: |
.do: |
mov word[edi],0x0000 ;ax |
mov byte[edi+2],0xff ;al |
mov word[eax],dx |
add eax,2 |
cmp [dr_flag],12 |
jne @f |
add edi,4 |
loop .do |
jmp .ad |
@@: |
add edi,3 |
loop @b |
loop .do |
.ad: |
add edi,[.xres3m18] |
add eax,[.xres2m12] |
pop ecx |
3077,6 → 2828,10 |
fill_Z_buffer: |
mov eax,0x70000000 |
cmp [dr_flag],12 |
jne @f |
mov eax,60000.1 |
@@: |
mov edi,[Zbuffer_ptr] |
movzx ecx,word[size_x_var] |
movzx ebx,word[size_y_var] |