Rev 1979 | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 1979 | Rev 2192 | ||
---|---|---|---|
1 | ;SIZE_X equ 350 |
1 | ;SIZE_X equ 350 |
2 | ;SIZE_Y equ 350 |
2 | ;SIZE_Y equ 350 |
3 | ;ROUND equ 8 |
3 | ;ROUND equ 8 |
4 | ;TEX_X equ 512 |
4 | ;TEX_X equ 512 |
5 | ;TEX_Y equ 512 |
5 | ;TEX_Y equ 512 |
6 | ;TEXTURE_SIZE EQU (512*512)-1 |
6 | ;TEXTURE_SIZE EQU (512*512)-1 |
7 | ;TEX_SHIFT EQU 9 |
7 | ;TEX_SHIFT EQU 9 |
8 | 8 | ||
9 | ;CATMULL_SHIFT equ 8 |
9 | ;CATMULL_SHIFT equ 8 |
10 | ;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
10 | ;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
11 | ;Ext = SSE |
11 | ;Ext = SSE |
12 | ;SSE = 3 |
12 | ;SSE = 3 |
13 | ;MMX = 1 |
13 | ;MMX = 1 |
14 | ;NON = 0 |
14 | ;NON = 0 |
15 | ;use32 |
15 | ;use32 |
16 | ;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
16 | ;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
17 | ;------- DOS 13h mode demos -------------------------------------------- |
17 | ;------- DOS 13h mode demos -------------------------------------------- |
18 | ;------- Procedure draws triangle with two overlapped textures, I use -- |
18 | ;------- Procedure draws triangle with two overlapped textures, I use -- |
19 | ;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
19 | ;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
20 | ;--------I calc texture pixel by this way: col1*col2/256 --------------- |
20 | ;--------I calc texture pixel by this way: col1*col2/256 --------------- |
21 | two_tex_triangle_z: |
21 | two_tex_triangle_z: |
22 | ;------------------in - eax - x1 shl 16 + y1 ----------- |
22 | ;------------------in - eax - x1 shl 16 + y1 ----------- |
23 | ;---------------------- ebx - x2 shl 16 + y2 ----------- |
23 | ;---------------------- ebx - x2 shl 16 + y2 ----------- |
24 | ;---------------------- ecx - x3 shl 16 + y3 ----------- |
24 | ;---------------------- ecx - x3 shl 16 + y3 ----------- |
25 | ;---------------------- edx - pointer to b. texture----- |
25 | ;---------------------- edx - pointer to b. texture----- |
26 | ;---------------------- esi - pointer to e. texture----- |
26 | ;---------------------- esi - pointer to e. texture----- |
27 | ;---------------------- edi - pointer to screen buffer-- |
27 | ;---------------------- edi - pointer to screen buffer-- |
28 | ;---------------------- stack : b. tex coordinates------ |
28 | ;---------------------- stack : b. tex coordinates------ |
29 | ;---------------------- e. tex coordinates------ |
29 | ;---------------------- e. tex coordinates------ |
30 | ;---------------------- Z position coordinates-- |
30 | ;---------------------- Z position coordinates-- |
31 | ;---------------------- pointer io Z buffer----- |
31 | ;---------------------- pointer io Z buffer----- |
32 | ;-- Z-buffer - filled with coordinates as dword -------- |
32 | ;-- Z-buffer - filled with coordinates as dword -------- |
33 | ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
33 | ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
34 | .b_x1 equ ebp+4 ; procedure don't save registers !!! |
34 | .b_x1 equ ebp+4 ; procedure don't save registers !!! |
35 | .b_y1 equ ebp+6 ; each coordinate as word |
35 | .b_y1 equ ebp+6 ; each coordinate as word |
36 | .b_x2 equ ebp+8 |
36 | .b_x2 equ ebp+8 |
37 | .b_y2 equ ebp+10 ; b - first texture |
37 | .b_y2 equ ebp+10 ; b - first texture |
38 | .b_x3 equ ebp+12 |
38 | .b_x3 equ ebp+12 |
39 | .b_y3 equ ebp+14 ; e - second texture |
39 | .b_y3 equ ebp+14 ; e - second texture |
40 | .e_x1 equ ebp+16 |
40 | .e_x1 equ ebp+16 |
41 | .e_y1 equ ebp+18 |
41 | .e_y1 equ ebp+18 |
42 | .e_x2 equ ebp+20 |
42 | .e_x2 equ ebp+20 |
43 | .e_y2 equ ebp+22 |
43 | .e_y2 equ ebp+22 |
44 | .e_x3 equ ebp+24 |
44 | .e_x3 equ ebp+24 |
45 | .e_y3 equ ebp+26 |
45 | .e_y3 equ ebp+26 |
46 | .z1 equ word[ebp+28] |
46 | .z1 equ word[ebp+28] |
47 | .z2 equ word[ebp+30] |
47 | .z2 equ word[ebp+30] |
48 | .z3 equ word[ebp+32] |
48 | .z3 equ word[ebp+32] |
49 | .z_buff equ dword[ebp+34] ; pointer to Z-buffer |
49 | .z_buff equ dword[ebp+34] ; pointer to Z-buffer |
50 | 50 | ||
51 | 51 | ||
52 | .t_bmap equ dword[ebp-4] ; pointer to b. texture |
52 | .t_bmap equ dword[ebp-4] ; pointer to b. texture |
53 | .t_emap equ dword[ebp-8] ; pointer to e. texture |
53 | .t_emap equ dword[ebp-8] ; pointer to e. texture |
54 | .x1 equ word[ebp-10] |
54 | .x1 equ word[ebp-10] |
55 | .y1 equ word[ebp-12] |
55 | .y1 equ word[ebp-12] |
56 | .x2 equ word[ebp-14] |
56 | .x2 equ word[ebp-14] |
57 | .y2 equ word[ebp-16] |
57 | .y2 equ word[ebp-16] |
58 | .x3 equ word[ebp-18] |
58 | .x3 equ word[ebp-18] |
59 | .y3 equ word[ebp-20] |
59 | .y3 equ word[ebp-20] |
60 | 60 | ||
61 | .dx12 equ dword[ebp-24] |
61 | .dx12 equ dword[ebp-24] |
62 | .dbx12 equ dword[ebp-28] |
62 | .dbx12 equ dword[ebp-28] |
63 | .dby12 equ dword[ebp-32] |
63 | .dby12 equ dword[ebp-32] |
64 | .dby12q equ [ebp-32] |
64 | .dby12q equ [ebp-32] |
65 | .dex12 equ dword[ebp-36] |
65 | .dex12 equ dword[ebp-36] |
66 | .dey12 equ dword[ebp-40] |
66 | .dey12 equ dword[ebp-40] |
67 | .dey12q equ [ebp-40] |
67 | .dey12q equ [ebp-40] |
68 | .dz12 equ dword[ebp-44] |
68 | .dz12 equ dword[ebp-44] |
69 | 69 | ||
70 | .dx13 equ dword[ebp-48] |
70 | .dx13 equ dword[ebp-48] |
71 | .dbx13 equ dword[ebp-52] |
71 | .dbx13 equ dword[ebp-52] |
72 | .dby13 equ dword[ebp-56] |
72 | .dby13 equ dword[ebp-56] |
73 | .dby13q equ [ebp-56] |
73 | .dby13q equ [ebp-56] |
74 | .dex13 equ dword[ebp-60] |
74 | .dex13 equ dword[ebp-60] |
75 | .dey13 equ dword[ebp-64] |
75 | .dey13 equ dword[ebp-64] |
76 | .dey13q equ [ebp-64] |
76 | .dey13q equ [ebp-64] |
77 | .dz13 equ dword[ebp-68] |
77 | .dz13 equ dword[ebp-68] |
78 | 78 | ||
79 | .dx23 equ dword[ebp-72] |
79 | .dx23 equ dword[ebp-72] |
80 | .dbx23 equ dword[ebp-76] |
80 | .dbx23 equ dword[ebp-76] |
81 | .dby23 equ dword[ebp-80] |
81 | .dby23 equ dword[ebp-80] |
82 | .dby23q equ [ebp-80] |
82 | .dby23q equ [ebp-80] |
83 | .dex23 equ dword[ebp-84] |
83 | .dex23 equ dword[ebp-84] |
84 | .dey23 equ dword[ebp-88] |
84 | .dey23 equ dword[ebp-88] |
85 | .dey23q equ [ebp-88] |
85 | .dey23q equ [ebp-88] |
86 | .dz23 equ dword[ebp-92] |
86 | .dz23 equ dword[ebp-92] |
87 | 87 | ||
88 | .cx1 equ dword[ebp-96] ; current variables |
88 | .cx1 equ dword[ebp-96] ; current variables |
89 | .cx2 equ dword[ebp-100] |
89 | .cx2 equ dword[ebp-100] |
90 | ;.cbx1q equ [ebp-104] |
90 | .cbx1 equ dword[ebp-104] |
91 | .cbx1 equ dword[ebp-104] |
- | |
92 | .cby1 equ [ebp-108] |
91 | .cby1 equ [ebp-108] |
93 | ;.cbx2q [ebp-112] |
92 | .cex1 equ dword[ebp-112] |
94 | .cbx2 equ dword[ebp-112] |
- | |
95 | .cby2 equ [ebp-116] |
93 | .cey1 equ [ebp-116] |
96 | ;.cex1q equ [ebp-120] |
94 | .cbx2 equ dword[ebp-120] |
97 | .cex1 equ dword[ebp-120] |
- | |
98 | .cey1 equ [ebp-124] |
95 | .cby2 equ [ebp-124] |
99 | ;.cex2q equ [ebp-128] |
96 | .cex2 equ dword[ebp-128] |
100 | .cex2 equ dword[ebp-128] |
- | |
101 | .cey2 equ [ebp-132] |
97 | .cey2 equ [ebp-132] |
102 | 98 | ||
103 | .cz1 equ dword[ebp-136] |
99 | .cz1 equ dword[ebp-136] |
104 | .cz2 equ dword[ebp-140] |
100 | .cz2 equ dword[ebp-140] |
105 | 101 | ||
106 | if Ext >= MMX |
102 | if Ext >= MMX |
107 | emms |
103 | emms |
108 | else |
104 | else |
109 | cld |
105 | cld |
110 | end if |
106 | end if |
111 | mov ebp,esp |
107 | mov ebp,esp |
112 | push edx esi ; store bump map |
108 | push edx esi ; store bump map |
113 | ; push esi ; store e. map |
109 | ; push esi ; store e. map |
114 | ; sub esp,120 |
110 | ; sub esp,120 |
115 | .sort3: ; sort triangle coordinates... |
111 | .sort3: ; sort triangle coordinates... |
116 | cmp ax,bx |
112 | cmp ax,bx |
117 | jle .sort1 |
113 | jle .sort1 |
118 | xchg eax,ebx |
114 | xchg eax,ebx |
119 | mov edx,dword[.b_x1] |
115 | mov edx,dword[.b_x1] |
120 | xchg edx,dword[.b_x2] |
116 | xchg edx,dword[.b_x2] |
121 | mov dword[.b_x1],edx |
117 | mov dword[.b_x1],edx |
122 | mov edx,dword[.e_x1] |
118 | mov edx,dword[.e_x1] |
123 | xchg edx,dword[.e_x2] |
119 | xchg edx,dword[.e_x2] |
124 | mov dword[.e_x1],edx |
120 | mov dword[.e_x1],edx |
125 | mov dx,.z1 |
121 | mov dx,.z1 |
126 | xchg dx,.z2 |
122 | xchg dx,.z2 |
127 | mov .z1,dx |
123 | mov .z1,dx |
128 | .sort1: |
124 | .sort1: |
129 | cmp bx,cx |
125 | cmp bx,cx |
130 | jle .sort2 |
126 | jle .sort2 |
131 | xchg ebx,ecx |
127 | xchg ebx,ecx |
132 | mov edx,dword[.b_x2] |
128 | mov edx,dword[.b_x2] |
133 | xchg edx,dword[.b_x3] |
129 | xchg edx,dword[.b_x3] |
134 | mov dword[.b_x2],edx |
130 | mov dword[.b_x2],edx |
135 | mov edx,dword[.e_x2] |
131 | mov edx,dword[.e_x2] |
136 | xchg edx,dword[.e_x3] |
132 | xchg edx,dword[.e_x3] |
137 | mov dword[.e_x2],edx |
133 | mov dword[.e_x2],edx |
138 | mov dx,.z2 |
134 | mov dx,.z2 |
139 | xchg dx,.z3 |
135 | xchg dx,.z3 |
140 | mov .z2,dx |
136 | mov .z2,dx |
141 | jmp .sort3 |
137 | jmp .sort3 |
142 | .sort2: |
138 | .sort2: |
143 | push eax ebx ecx ; store triangle coords in variables |
139 | push eax ebx ecx ; store triangle coords in variables |
144 | ; push ebx |
140 | ; push ebx |
145 | ; push ecx |
141 | ; push ecx |
146 | 142 | ||
147 | mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
143 | mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
148 | and edx,ebx ; if *all* of them are negative a sign flag is raised |
144 | and edx,ebx ; if *all* of them are negative a sign flag is raised |
149 | and edx,ecx |
145 | and edx,ecx |
150 | and edx,eax |
146 | and edx,eax |
151 | test edx,80008000h ; Check both X&Y at once |
147 | test edx,80008000h ; Check both X&Y at once |
152 | jne .loop23_done |
148 | jne .loop23_done |
153 | ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
149 | ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
154 | ; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
150 | ; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
155 | ; or edx,ecx |
151 | ; or edx,ecx |
156 | ; test edx,80000000h ; Check only X |
152 | ; test edx,80000000h ; Check only X |
157 | ; jne .loop23_done |
153 | ; jne .loop23_done |
158 | 154 | ||
159 | ; cmp .x1,SIZE_X ; { |
155 | ; cmp .x1,SIZE_X ; { |
160 | ; jg .loop23_done |
156 | ; jg .loop23_done |
161 | ; cmp .x2,SIZE_X ; This can be optimized with effort |
157 | ; cmp .x2,SIZE_X ; This can be optimized with effort |
162 | ; jg .loop23_done |
158 | ; jg .loop23_done |
163 | ; cmp .x3,SIZE_X |
159 | ; cmp .x3,SIZE_X |
164 | ; jg .loop23_done ; { |
160 | ; jg .loop23_done ; { |
165 | 161 | ||
166 | 162 | ||
167 | mov bx,.y2 ; calc delta 12 |
163 | mov bx,.y2 ; calc delta 12 |
168 | sub bx,.y1 |
164 | sub bx,.y1 |
169 | jnz .bt_dx12_make |
165 | jnz .bt_dx12_make |
170 | mov ecx,6 |
166 | mov ecx,6 |
171 | xor edx,edx |
167 | xor edx,edx |
172 | @@: |
168 | @@: |
173 | push edx ;dword 0 |
169 | push edx ;dword 0 |
174 | loop @b |
170 | loop @b |
175 | jmp .bt_dx12_done |
171 | jmp .bt_dx12_done |
176 | .bt_dx12_make: |
172 | .bt_dx12_make: |
177 | mov ax,.x2 |
173 | mov ax,.x2 |
178 | sub ax,.x1 |
174 | sub ax,.x1 |
179 | cwde |
175 | cwde |
180 | movsx ebx,bx |
176 | movsx ebx,bx |
181 | shl eax,ROUND |
177 | shl eax,ROUND |
182 | cdq |
178 | cdq |
183 | idiv ebx |
179 | idiv ebx |
184 | ; mov .dx12,eax |
180 | ; mov .dx12,eax |
185 | push eax |
181 | push eax |
186 | 182 | ||
187 | if Ext=SSE |
183 | if Ext=SSE |
188 | 184 | ||
189 | sub esp,16 |
185 | sub esp,16 |
190 | cvtsi2ss xmm3,ebx ;rcps |
186 | cvtsi2ss xmm3,ebx ;rcps |
191 | ; mov eax,255 |
187 | ; mov eax,255 |
192 | cvtsi2ss xmm4,[i255d] ;eax |
188 | cvtsi2ss xmm4,[i255d] ;eax |
193 | divss xmm3,xmm4 |
189 | divss xmm3,xmm4 |
194 | rcpss xmm3,xmm3 |
190 | rcpss xmm3,xmm3 |
195 | ; mulss xmm3,xmm4 |
191 | ; mulss xmm3,xmm4 |
196 | shufps xmm3,xmm3,0 |
192 | shufps xmm3,xmm3,0 |
197 | 193 | ||
198 | movd mm0,[.b_x1] |
194 | movd mm0,[.b_x1] |
199 | movd mm1,[.b_x2] |
195 | movd mm1,[.b_x2] |
200 | movd mm2,[.e_x1] |
196 | movd mm2,[.e_x1] |
201 | movd mm3,[.e_x2] |
197 | movd mm3,[.e_x2] |
202 | ; psubsw mm3,mm2 |
198 | ; psubsw mm3,mm2 |
203 | ; psubsw mm1,mm0 |
199 | ; psubsw mm1,mm0 |
204 | pxor mm4,mm4 |
200 | pxor mm4,mm4 |
205 | punpcklwd mm0,mm4 |
201 | punpcklwd mm0,mm4 |
206 | punpcklwd mm1,mm4 |
202 | punpcklwd mm1,mm4 |
207 | punpcklwd mm2,mm4 |
203 | punpcklwd mm2,mm4 |
208 | punpcklwd mm3,mm4 |
204 | punpcklwd mm3,mm4 |
209 | ; pslld mm0,ROUND |
205 | ; pslld mm0,ROUND |
210 | ; pslld mm1,ROUND |
206 | ; pslld mm1,ROUND |
211 | ; pslld mm2,ROUND |
207 | ; pslld mm2,ROUND |
212 | ; pslld mm3,ROUND |
208 | ; pslld mm3,ROUND |
213 | cvtpi2ps xmm0,mm0 |
209 | cvtpi2ps xmm0,mm0 |
214 | movlhps xmm0,xmm0 |
210 | movlhps xmm0,xmm0 |
215 | cvtpi2ps xmm0,mm2 |
211 | cvtpi2ps xmm0,mm2 |
216 | cvtpi2ps xmm1,mm1 |
212 | cvtpi2ps xmm1,mm1 |
217 | movlhps xmm1,xmm1 |
213 | movlhps xmm1,xmm1 |
218 | cvtpi2ps xmm1,mm3 |
214 | cvtpi2ps xmm1,mm3 |
219 | subps xmm1,xmm0 |
215 | subps xmm1,xmm0 |
220 | 216 | ||
221 | ; pxor mm4,mm4 |
217 | ; pxor mm4,mm4 |
222 | ; movq mm5,mm1 |
218 | ; movq mm5,mm1 |
223 | ; movq mm6,mm1 |
219 | ; movq mm6,mm1 |
224 | ; pcmpeqb mm5,mm4 |
220 | ; pcmpeqb mm5,mm4 |
225 | ; psubd mm1,mm0 |
221 | ; psubd mm1,mm0 |
226 | ; psubd mm3,mm2 |
222 | ; psubd mm3,mm2 |
227 | 223 | ||
228 | ; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 |
224 | ; movq mm0,[.b_x1] ; bx1 by1 bx2 by2 |
229 | ; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 |
225 | ; movq mm1,[.e_x1] ; ex1 ey1 ex2 ey2 |
230 | ; pxor |
226 | ; pxor |
231 | ; punpcklhd mm0,mm1 ; lwd ; |
227 | ; punpcklhd mm0,mm1 ; lwd ; |
232 | ; psubw mm1,mm0 ; mm1, mm0 |
228 | ; psubw mm1,mm0 ; mm1, mm0 |
233 | ; pxor mm2,mm2 |
229 | ; pxor mm2,mm2 |
234 | ; pmovmaskb eax,mm1 |
230 | ; pmovmaskb eax,mm1 |
235 | ; and eax,10101010b |
231 | ; and eax,10101010b |
236 | ; pcmpgtw mm2,mm1 |
232 | ; pcmpgtw mm2,mm1 |
237 | ; punpcklwd mm1,mm2 |
233 | ; punpcklwd mm1,mm2 |
238 | ; psllw mm0,ROUND |
234 | ; psllw mm0,ROUND |
239 | ; psllw mm1,ROUND |
235 | ; psllw mm1,ROUND |
240 | ; movq mm2,mm0 |
236 | ; movq mm2,mm0 |
241 | ; psrlq mm0,32 |
237 | ; psrlq mm0,32 |
242 | 238 | ||
243 | ; cvtpi2ps xmm0,mm1 |
239 | ; cvtpi2ps xmm0,mm1 |
244 | ; movlhps xmm0,xmm0 |
240 | ; movlhps xmm0,xmm0 |
245 | ; cvtpi2ps xmm0,mm3 |
241 | ; cvtpi2ps xmm0,mm3 |
246 | ; divps xmm1,xmm3 |
242 | ; divps xmm1,xmm3 |
247 | mulps xmm1,xmm3 |
243 | mulps xmm1,xmm3 |
248 | shufps xmm1,xmm1,10110001b |
244 | shufps xmm1,xmm1,10110001b |
249 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
245 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
250 | movhlps xmm1,xmm1 |
246 | movhlps xmm1,xmm1 |
251 | cvtps2pi mm1,xmm1 |
247 | cvtps2pi mm1,xmm1 |
252 | movq .dey12q,mm0 |
248 | movq .dey12q,mm0 |
253 | movq .dby12q,mm1 |
249 | movq .dby12q,mm1 |
254 | 250 | ||
255 | ; movd .dex12,mm0 |
251 | ; movd .dex12,mm0 |
256 | ; psrlq mm0,32 |
252 | ; psrlq mm0,32 |
257 | ; movd .dey12,mm0 |
253 | ; movd .dey12,mm0 |
258 | ; movhlps xmm1,xmm1 |
254 | ; movhlps xmm1,xmm1 |
259 | ; cvtps2pi mm0,xmm1 |
255 | ; cvtps2pi mm0,xmm1 |
260 | ; movd .dbx12,mm0 |
256 | ; movd .dbx12,mm0 |
261 | ; psrlq mm0,32 |
257 | ; psrlq mm0,32 |
262 | ; movd .dby12,mm0 |
258 | ; movd .dby12,mm0 |
263 | 259 | ||
264 | else |
260 | else |
265 | mov ax,word[.b_x2] |
261 | mov ax,word[.b_x2] |
266 | sub ax,word[.b_x1] |
262 | sub ax,word[.b_x1] |
267 | cwde |
263 | cwde |
268 | shl eax,ROUND |
264 | shl eax,ROUND |
269 | cdq |
265 | cdq |
270 | idiv ebx |
266 | idiv ebx |
271 | ; mov .dbx12,eax |
267 | ; mov .dbx12,eax |
272 | push eax |
268 | push eax |
273 | 269 | ||
274 | mov ax,word[.b_y2] |
270 | mov ax,word[.b_y2] |
275 | sub ax,word[.b_y1] |
271 | sub ax,word[.b_y1] |
276 | cwde |
272 | cwde |
277 | shl eax,ROUND |
273 | shl eax,ROUND |
278 | cdq |
274 | cdq |
279 | idiv ebx |
275 | idiv ebx |
280 | ; mov .dby12,eax |
276 | ; mov .dby12,eax |
281 | push eax |
277 | push eax |
282 | 278 | ||
283 | ; mov eax,.dbx12 |
279 | ; mov eax,.dbx12 |
284 | ; mov ebx,.dby12 |
280 | ; mov ebx,.dby12 |
285 | ; int3 |
281 | ; int3 |
286 | 282 | ||
287 | mov ax,word[.e_x2] |
283 | mov ax,word[.e_x2] |
288 | sub ax,word[.e_x1] |
284 | sub ax,word[.e_x1] |
289 | cwde |
285 | cwde |
290 | shl eax,ROUND |
286 | shl eax,ROUND |
291 | cdq |
287 | cdq |
292 | idiv ebx |
288 | idiv ebx |
293 | ; mov .dex12,eax |
289 | ; mov .dex12,eax |
294 | push eax |
290 | push eax |
295 | 291 | ||
296 | mov ax,word[.e_y2] |
292 | mov ax,word[.e_y2] |
297 | sub ax,word[.e_y1] |
293 | sub ax,word[.e_y1] |
298 | cwde |
294 | cwde |
299 | shl eax,ROUND |
295 | shl eax,ROUND |
300 | cdq |
296 | cdq |
301 | idiv ebx |
297 | idiv ebx |
302 | ; mov .dey12,eax |
298 | ; mov .dey12,eax |
303 | push eax |
299 | push eax |
304 | 300 | ||
305 | end if |
301 | end if |
306 | mov ax,.z2 |
302 | mov ax,.z2 |
307 | sub ax,.z1 |
303 | sub ax,.z1 |
308 | cwde |
304 | cwde |
309 | shl eax,CATMULL_SHIFT |
305 | shl eax,CATMULL_SHIFT |
310 | cdq |
306 | cdq |
311 | idiv ebx |
307 | idiv ebx |
312 | push eax |
308 | push eax |
313 | .bt_dx12_done: |
309 | .bt_dx12_done: |
314 | 310 | ||
315 | mov bx,.y3 ; calc delta13 |
311 | mov bx,.y3 ; calc delta13 |
316 | sub bx,.y1 |
312 | sub bx,.y1 |
317 | jnz .bt_dx13_make |
313 | jnz .bt_dx13_make |
318 | mov ecx,6 |
314 | mov ecx,6 |
319 | xor edx,edx |
315 | xor edx,edx |
320 | @@: |
316 | @@: |
321 | push edx ;dword 0 |
317 | push edx ;dword 0 |
322 | loop @b |
318 | loop @b |
323 | jmp .bt_dx13_done |
319 | jmp .bt_dx13_done |
324 | .bt_dx13_make: |
320 | .bt_dx13_make: |
325 | mov ax,.x3 |
321 | mov ax,.x3 |
326 | sub ax,.x1 |
322 | sub ax,.x1 |
327 | cwde |
323 | cwde |
328 | movsx ebx,bx |
324 | movsx ebx,bx |
329 | shl eax,ROUND |
325 | shl eax,ROUND |
330 | cdq |
326 | cdq |
331 | idiv ebx |
327 | idiv ebx |
332 | ; mov .dx13,eax |
328 | ; mov .dx13,eax |
333 | push eax |
329 | push eax |
334 | 330 | ||
335 | if Ext=SSE |
331 | if Ext=SSE |
336 | 332 | ||
337 | cvtsi2ss xmm3,ebx |
333 | cvtsi2ss xmm3,ebx |
338 | ; mov eax,255 |
334 | ; mov eax,255 |
339 | cvtsi2ss xmm4,[i255d] |
335 | cvtsi2ss xmm4,[i255d] |
340 | divss xmm3,xmm4 |
336 | divss xmm3,xmm4 |
341 | rcpss xmm3,xmm3 |
337 | rcpss xmm3,xmm3 |
342 | ; mulss xmm3,xmm4 |
338 | ; mulss xmm3,xmm4 |
343 | shufps xmm3,xmm3,0 |
339 | shufps xmm3,xmm3,0 |
344 | sub esp,16 |
340 | sub esp,16 |
345 | 341 | ||
346 | movd mm0,[.b_x1] |
342 | movd mm0,[.b_x1] |
347 | movd mm1,[.b_x3] |
343 | movd mm1,[.b_x3] |
348 | movd mm2,[.e_x1] |
344 | movd mm2,[.e_x1] |
349 | movd mm3,[.e_x3] |
345 | movd mm3,[.e_x3] |
350 | 346 | ||
351 | pxor mm4,mm4 |
347 | pxor mm4,mm4 |
352 | punpcklwd mm0,mm4 |
348 | punpcklwd mm0,mm4 |
353 | punpcklwd mm1,mm4 |
349 | punpcklwd mm1,mm4 |
354 | punpcklwd mm2,mm4 |
350 | punpcklwd mm2,mm4 |
355 | punpcklwd mm3,mm4 |
351 | punpcklwd mm3,mm4 |
356 | 352 | ||
357 | cvtpi2ps xmm0,mm0 |
353 | cvtpi2ps xmm0,mm0 |
358 | movlhps xmm0,xmm0 |
354 | movlhps xmm0,xmm0 |
359 | cvtpi2ps xmm0,mm2 |
355 | cvtpi2ps xmm0,mm2 |
360 | cvtpi2ps xmm1,mm1 |
356 | cvtpi2ps xmm1,mm1 |
361 | movlhps xmm1,xmm1 |
357 | movlhps xmm1,xmm1 |
362 | cvtpi2ps xmm1,mm3 |
358 | cvtpi2ps xmm1,mm3 |
363 | subps xmm1,xmm0 |
359 | subps xmm1,xmm0 |
364 | 360 | ||
365 | ; divps xmm1,xmm3 |
361 | ; divps xmm1,xmm3 |
366 | mulps xmm1,xmm3 |
362 | mulps xmm1,xmm3 |
367 | shufps xmm1,xmm1,10110001b |
363 | shufps xmm1,xmm1,10110001b |
368 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
364 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
369 | movhlps xmm1,xmm1 |
365 | movhlps xmm1,xmm1 |
370 | cvtps2pi mm1,xmm1 |
366 | cvtps2pi mm1,xmm1 |
371 | movq .dey13q,mm0 |
367 | movq .dey13q,mm0 |
372 | movq .dby13q,mm1 |
368 | movq .dby13q,mm1 |
373 | 369 | ||
374 | else |
370 | else |
375 | 371 | ||
376 | mov ax,word[.b_x3] |
372 | mov ax,word[.b_x3] |
377 | sub ax,word[.b_x1] |
373 | sub ax,word[.b_x1] |
378 | cwde |
374 | cwde |
379 | shl eax,ROUND |
375 | shl eax,ROUND |
380 | cdq |
376 | cdq |
381 | idiv ebx |
377 | idiv ebx |
382 | ; mov .dbx13,eax |
378 | ; mov .dbx13,eax |
383 | push eax |
379 | push eax |
384 | 380 | ||
385 | mov ax,word[.b_y3] |
381 | mov ax,word[.b_y3] |
386 | sub ax,word[.b_y1] |
382 | sub ax,word[.b_y1] |
387 | cwde |
383 | cwde |
388 | shl eax,ROUND |
384 | shl eax,ROUND |
389 | cdq |
385 | cdq |
390 | idiv ebx |
386 | idiv ebx |
391 | ; mov .dby13,eax |
387 | ; mov .dby13,eax |
392 | push eax |
388 | push eax |
393 | 389 | ||
394 | mov ax,word[.e_x3] |
390 | mov ax,word[.e_x3] |
395 | sub ax,word[.e_x1] |
391 | sub ax,word[.e_x1] |
396 | cwde |
392 | cwde |
397 | shl eax,ROUND |
393 | shl eax,ROUND |
398 | cdq |
394 | cdq |
399 | idiv ebx |
395 | idiv ebx |
400 | ; mov .dex13,eax |
396 | ; mov .dex13,eax |
401 | push eax |
397 | push eax |
402 | 398 | ||
403 | mov ax,word[.e_y3] |
399 | mov ax,word[.e_y3] |
404 | sub ax,word[.e_y1] |
400 | sub ax,word[.e_y1] |
405 | cwde |
401 | cwde |
406 | shl eax,ROUND |
402 | shl eax,ROUND |
407 | cdq |
403 | cdq |
408 | idiv ebx |
404 | idiv ebx |
409 | ; mov .dey13,eax |
405 | ; mov .dey13,eax |
410 | push eax |
406 | push eax |
411 | 407 | ||
412 | end if |
408 | end if |
413 | 409 | ||
414 | mov ax,.z3 |
410 | mov ax,.z3 |
415 | sub ax,.z1 |
411 | sub ax,.z1 |
416 | cwde |
412 | cwde |
417 | shl eax,CATMULL_SHIFT |
413 | shl eax,CATMULL_SHIFT |
418 | cdq |
414 | cdq |
419 | idiv ebx |
415 | idiv ebx |
420 | ; mov .dz13,eax |
416 | ; mov .dz13,eax |
421 | push eax |
417 | push eax |
422 | .bt_dx13_done: |
418 | .bt_dx13_done: |
423 | 419 | ||
424 | mov bx,.y3 ; calc delta23 |
420 | mov bx,.y3 ; calc delta23 |
425 | sub bx,.y2 |
421 | sub bx,.y2 |
426 | jnz .bt_dx23_make |
422 | jnz .bt_dx23_make |
427 | mov ecx,6 |
423 | mov ecx,6 |
428 | xor edx,edx |
424 | xor edx,edx |
429 | @@: |
425 | @@: |
430 | push edx ;dword 0 |
426 | push edx ;dword 0 |
431 | loop @b |
427 | loop @b |
432 | jmp .bt_dx23_done |
428 | jmp .bt_dx23_done |
433 | .bt_dx23_make: |
429 | .bt_dx23_make: |
434 | mov ax,.x3 |
430 | mov ax,.x3 |
435 | sub ax,.x2 |
431 | sub ax,.x2 |
436 | cwde |
432 | cwde |
437 | movsx ebx,bx |
433 | movsx ebx,bx |
438 | shl eax,ROUND |
434 | shl eax,ROUND |
439 | cdq |
435 | cdq |
440 | idiv ebx |
436 | idiv ebx |
441 | ; mov .dx23,eax |
437 | ; mov .dx23,eax |
442 | push eax |
438 | push eax |
443 | 439 | ||
444 | if Ext=SSE |
440 | if Ext=SSE |
445 | 441 | ||
446 | cvtsi2ss xmm3,ebx |
442 | cvtsi2ss xmm3,ebx |
447 | ; mov eax,255 |
443 | ; mov eax,255 |
448 | cvtsi2ss xmm4,[i255d] ;eax |
444 | cvtsi2ss xmm4,[i255d] ;eax |
449 | divss xmm3,xmm4 |
445 | divss xmm3,xmm4 |
450 | shufps xmm3,xmm3,0 |
446 | shufps xmm3,xmm3,0 |
451 | sub esp,16 |
447 | sub esp,16 |
452 | 448 | ||
453 | movd mm0,[.b_x2] |
449 | movd mm0,[.b_x2] |
454 | movd mm1,[.b_x3] |
450 | movd mm1,[.b_x3] |
455 | movd mm2,[.e_x2] |
451 | movd mm2,[.e_x2] |
456 | movd mm3,[.e_x3] |
452 | movd mm3,[.e_x3] |
457 | 453 | ||
458 | pxor mm4,mm4 |
454 | pxor mm4,mm4 |
459 | punpcklwd mm0,mm4 |
455 | punpcklwd mm0,mm4 |
460 | punpcklwd mm1,mm4 |
456 | punpcklwd mm1,mm4 |
461 | punpcklwd mm2,mm4 |
457 | punpcklwd mm2,mm4 |
462 | punpcklwd mm3,mm4 |
458 | punpcklwd mm3,mm4 |
463 | 459 | ||
464 | cvtpi2ps xmm0,mm0 |
460 | cvtpi2ps xmm0,mm0 |
465 | movlhps xmm0,xmm0 |
461 | movlhps xmm0,xmm0 |
466 | cvtpi2ps xmm0,mm2 |
462 | cvtpi2ps xmm0,mm2 |
467 | cvtpi2ps xmm1,mm1 |
463 | cvtpi2ps xmm1,mm1 |
468 | movlhps xmm1,xmm1 |
464 | movlhps xmm1,xmm1 |
469 | cvtpi2ps xmm1,mm3 |
465 | cvtpi2ps xmm1,mm3 |
470 | subps xmm1,xmm0 |
466 | subps xmm1,xmm0 |
471 | 467 | ||
472 | divps xmm1,xmm3 |
468 | divps xmm1,xmm3 |
473 | shufps xmm1,xmm1,10110001b |
469 | shufps xmm1,xmm1,10110001b |
474 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
470 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
475 | movhlps xmm1,xmm1 |
471 | movhlps xmm1,xmm1 |
476 | cvtps2pi mm1,xmm1 |
472 | cvtps2pi mm1,xmm1 |
477 | movq .dey23q,mm0 |
473 | movq .dey23q,mm0 |
478 | movq .dby23q,mm1 |
474 | movq .dby23q,mm1 |
479 | 475 | ||
480 | else |
476 | else |
481 | 477 | ||
482 | mov ax,word[.b_x3] |
478 | mov ax,word[.b_x3] |
483 | sub ax,word[.b_x2] |
479 | sub ax,word[.b_x2] |
484 | cwde |
480 | cwde |
485 | shl eax,ROUND |
481 | shl eax,ROUND |
486 | cdq |
482 | cdq |
487 | idiv ebx |
483 | idiv ebx |
488 | ; mov .dbx23,eax |
484 | ; mov .dbx23,eax |
489 | push eax |
485 | push eax |
490 | 486 | ||
491 | mov ax,word[.b_y3] |
487 | mov ax,word[.b_y3] |
492 | sub ax,word[.b_y2] |
488 | sub ax,word[.b_y2] |
493 | cwde |
489 | cwde |
494 | shl eax,ROUND |
490 | shl eax,ROUND |
495 | cdq |
491 | cdq |
496 | idiv ebx |
492 | idiv ebx |
497 | ; mov .dby23,eax |
493 | ; mov .dby23,eax |
498 | push eax |
494 | push eax |
499 | 495 | ||
500 | mov ax,word[.e_x3] |
496 | mov ax,word[.e_x3] |
501 | sub ax,word[.e_x2] |
497 | sub ax,word[.e_x2] |
502 | cwde |
498 | cwde |
503 | shl eax,ROUND |
499 | shl eax,ROUND |
504 | cdq |
500 | cdq |
505 | idiv ebx |
501 | idiv ebx |
506 | ; mov .dex23,eax |
502 | ; mov .dex23,eax |
507 | push eax |
503 | push eax |
508 | 504 | ||
509 | mov ax,word[.e_y3] |
505 | mov ax,word[.e_y3] |
510 | sub ax,word[.e_y2] |
506 | sub ax,word[.e_y2] |
511 | cwde |
507 | cwde |
512 | shl eax,ROUND |
508 | shl eax,ROUND |
513 | cdq |
509 | cdq |
514 | idiv ebx |
510 | idiv ebx |
515 | ; mov .dey23,eax |
511 | ; mov .dey23,eax |
516 | push eax |
512 | push eax |
517 | end if |
513 | end if |
518 | mov ax,.z3 |
514 | mov ax,.z3 |
519 | sub ax,.z2 |
515 | sub ax,.z2 |
520 | cwde |
516 | cwde |
521 | shl eax,CATMULL_SHIFT |
517 | shl eax,CATMULL_SHIFT |
522 | cdq |
518 | cdq |
523 | idiv ebx |
519 | idiv ebx |
524 | ; mov .dz23,eax |
520 | ; mov .dz23,eax |
525 | push eax |
521 | push eax |
526 | ; sub esp,40 |
522 | ; sub esp,40 |
527 | .bt_dx23_done: |
523 | .bt_dx23_done: |
528 | movsx eax,.x1 |
524 | movsx eax,.x1 |
529 | shl eax,ROUND |
525 | shl eax,ROUND |
530 | ; mov .cx1,eax |
526 | ; mov .cx1,eax |
531 | ; mov .cx2,eax |
527 | ; mov .cx2,eax |
532 | push eax eax |
528 | push eax eax |
533 | ; push eax |
529 | ; push eax |
534 | 530 | ||
535 | movsx eax,word[.b_x1] |
531 | movsx eax,word[.b_x1] |
536 | shl eax,ROUND |
532 | shl eax,ROUND |
537 | mov .cbx1,eax |
533 | mov .cbx1,eax |
538 | mov .cbx2,eax |
534 | mov .cbx2,eax |
539 | ; push eax eax |
535 | ; push eax eax |
540 | ; push eax |
536 | ; push eax |
541 | 537 | ||
542 | movsx eax,word[.b_y1] |
538 | movsx eax,word[.b_y1] |
543 | shl eax,ROUND |
539 | shl eax,ROUND |
544 | mov .cby1,eax |
540 | mov .cby1,eax |
545 | mov .cby2,eax |
541 | mov .cby2,eax |
546 | ; push eax eax |
542 | ; push eax eax |
547 | ; push eax |
543 | ; push eax |
548 | 544 | ||
549 | movsx eax,word[.e_x1] |
545 | movsx eax,word[.e_x1] |
550 | shl eax,ROUND |
546 | shl eax,ROUND |
551 | mov .cex1,eax |
547 | mov .cex1,eax |
552 | mov .cex2,eax |
548 | mov .cex2,eax |
553 | ; push eax eax |
549 | ; push eax eax |
554 | ;push eax |
550 | ;push eax |
555 | 551 | ||
556 | movsx eax,word[.e_y1] |
552 | movsx eax,word[.e_y1] |
557 | shl eax,ROUND |
553 | shl eax,ROUND |
558 | mov .cey1,eax |
554 | mov .cey1,eax |
559 | mov .cey2,eax |
555 | mov .cey2,eax |
560 | sub esp,32 |
556 | sub esp,32 |
561 | ; push eax eax |
557 | ; push eax eax |
562 | ;push eax |
558 | ;push eax |
563 | 559 | ||
564 | movsx eax,.z1 |
560 | movsx eax,.z1 |
565 | shl eax,CATMULL_SHIFT |
561 | shl eax,CATMULL_SHIFT |
566 | ; mov .cz1,eax |
562 | ; mov .cz1,eax |
567 | ; mov .cz2,eax |
563 | ; mov .cz2,eax |
568 | push eax eax |
564 | push eax eax |
569 | ;push eax |
565 | ;push eax |
570 | 566 | ||
571 | movsx ecx,.y1 |
567 | movsx ecx,.y1 |
572 | cmp cx,.y2 |
568 | cmp cx,.y2 |
573 | jge .loop12_done |
569 | jge .loop12_done |
574 | .loop12: |
570 | .loop12: |
575 | call .call_line |
571 | call .call_line |
576 | 572 | ||
577 | mov eax,.dx13 |
573 | mov eax,.dx13 |
578 | add .cx1,eax |
574 | add .cx1,eax |
579 | mov ebx,.dx12 |
575 | mov ebx,.dx12 |
580 | add .cx2,ebx |
576 | add .cx2,ebx |
581 | 577 | if Ext>= SSE2 |
|
582 | if Ext >= MMX |
- | |
583 | movq mm0,.cby2 ; with this optimization object |
578 | movups xmm0,.cey1 |
- | 579 | movups xmm1,.cey2 |
|
- | 580 | movups xmm2,.dey12q |
|
- | 581 | movups xmm3,.dey13q |
|
- | 582 | paddd xmm0,xmm3 |
|
- | 583 | paddd xmm1,xmm2 |
|
- | 584 | movups .cey1,xmm0 |
|
- | 585 | movups .cey2,xmm1 |
|
- | 586 | else if (Ext = MMX) | (Ext=SSE) |
|
- | 587 | movq mm0,.cby2 ; with this optimization object |
|
584 | movq mm1,.cby1 ; looks bit annoying |
588 | movq mm1,.cby1 ; looks bit annoying |
585 | movq mm2,.cey2 |
589 | movq mm2,.cey2 |
586 | movq mm3,.cey1 |
590 | movq mm3,.cey1 |
587 | paddd mm0,.dby12q |
591 | paddd mm0,.dby12q |
588 | paddd mm1,.dby13q |
592 | paddd mm1,.dby13q |
589 | paddd mm2,.dey12q |
593 | paddd mm2,.dey12q |
590 | paddd mm3,.dey13q |
594 | paddd mm3,.dey13q |
591 | movq .cby2,mm0 |
595 | movq .cby2,mm0 |
592 | movq .cby1,mm1 |
596 | movq .cby1,mm1 |
593 | movq .cey1,mm3 |
597 | movq .cey1,mm3 |
594 | movq .cey2,mm2 |
598 | movq .cey2,mm2 |
595 | else |
599 | else |
596 | mov edx,.dbx13 |
600 | mov edx,.dbx13 |
597 | add .cbx1,edx |
601 | add .cbx1,edx |
598 | mov eax,.dbx12 |
602 | mov eax,.dbx12 |
599 | add .cbx2,eax |
603 | add .cbx2,eax |
600 | mov ebx,.dby13 |
604 | mov ebx,.dby13 |
601 | add .cby1,ebx |
605 | add .cby1,ebx |
602 | mov edx,.dby12 |
606 | mov edx,.dby12 |
603 | add .cby2,edx |
607 | add .cby2,edx |
604 | 608 | ||
605 | mov eax,.dex13 |
609 | mov eax,.dex13 |
606 | add .cex1,eax |
610 | add .cex1,eax |
607 | mov ebx,.dex12 |
611 | mov ebx,.dex12 |
608 | add .cex2,ebx |
612 | add .cex2,ebx |
609 | mov edx,.dey13 |
613 | mov edx,.dey13 |
610 | add .cey1,edx |
614 | add .cey1,edx |
611 | mov eax,.dey12 |
615 | mov eax,.dey12 |
612 | add .cey2,eax |
616 | add .cey2,eax |
613 | 617 | ||
614 | end if |
618 | end if |
615 | mov ebx,.dz13 |
619 | mov ebx,.dz13 |
616 | add .cz1,ebx |
620 | add .cz1,ebx |
617 | mov edx,.dz12 |
621 | mov edx,.dz12 |
618 | add .cz2,edx |
622 | add .cz2,edx |
619 | 623 | ||
620 | inc ecx |
624 | inc ecx |
621 | cmp cx,.y2 |
625 | cmp cx,.y2 |
622 | jl .loop12 |
626 | jl .loop12 |
623 | .loop12_done: |
627 | .loop12_done: |
624 | 628 | ||
625 | movsx ecx,.y2 |
629 | movsx ecx,.y2 |
626 | cmp cx,.y3 |
630 | cmp cx,.y3 |
627 | jge .loop23_done |
631 | jge .loop23_done |
628 | 632 | ||
629 | movsx eax,.z2 |
633 | movsx eax,.z2 |
630 | shl eax,CATMULL_SHIFT |
634 | shl eax,CATMULL_SHIFT |
631 | mov .cz2,eax |
635 | mov .cz2,eax |
632 | 636 | ||
633 | movsx eax,.x2 |
637 | movsx eax,.x2 |
634 | shl eax,ROUND |
638 | shl eax,ROUND |
635 | mov .cx2,eax |
639 | mov .cx2,eax |
636 | 640 | ||
637 | movzx eax,word[.b_x2] |
641 | movzx eax,word[.b_x2] |
638 | shl eax,ROUND |
642 | shl eax,ROUND |
639 | mov .cbx2,eax |
643 | mov .cbx2,eax |
640 | 644 | ||
641 | movzx eax,word[.b_y2] |
645 | movzx eax,word[.b_y2] |
642 | shl eax,ROUND |
646 | shl eax,ROUND |
643 | mov .cby2,eax |
647 | mov .cby2,eax |
644 | 648 | ||
645 | movzx eax,word[.e_x2] |
649 | movzx eax,word[.e_x2] |
646 | shl eax,ROUND |
650 | shl eax,ROUND |
647 | mov .cex2,eax |
651 | mov .cex2,eax |
648 | 652 | ||
649 | movzx eax,word[.e_y2] |
653 | movzx eax,word[.e_y2] |
650 | shl eax,ROUND |
654 | shl eax,ROUND |
651 | mov .cey2,eax |
655 | mov .cey2,eax |
652 | 656 | ||
653 | .loop23: |
657 | .loop23: |
654 | call .call_line |
658 | call .call_line |
655 | ;if Ext = NON |
659 | ;if Ext = NON |
656 | mov eax,.dx13 |
660 | mov eax,.dx13 |
657 | add .cx1,eax |
661 | add .cx1,eax |
658 | mov ebx,.dx23 |
662 | mov ebx,.dx23 |
659 | add .cx2,ebx |
663 | add .cx2,ebx |
660 | 664 | if Ext>= SSE2 |
|
661 | if Ext >= MMX |
- | |
662 | movq mm0,.cby2 ; with this mmx optimization object looks bit |
665 | movups xmm0,.cey1 |
- | 666 | movups xmm1,.cey2 |
|
- | 667 | movups xmm2,.dey23q |
|
- | 668 | movups xmm3,.dey13q |
|
- | 669 | paddd xmm0,xmm3 |
|
- | 670 | paddd xmm1,xmm2 |
|
- | 671 | movups .cey1,xmm0 |
|
- | 672 | movups .cey2,xmm1 |
|
- | 673 | else if (Ext = MMX) | ( Ext = SSE) |
|
- | 674 | movq mm0,.cby2 ; with this mmx optimization object looks bit |
|
663 | movq mm1,.cby1 ; annoying |
675 | movq mm1,.cby1 ; annoying |
664 | movq mm2,.cey2 |
676 | movq mm2,.cey2 |
665 | movq mm3,.cey1 |
677 | movq mm3,.cey1 |
666 | paddd mm0,.dby23q |
678 | paddd mm0,.dby23q |
667 | paddd mm1,.dby13q |
679 | paddd mm1,.dby13q |
668 | paddd mm2,.dey23q |
680 | paddd mm2,.dey23q |
669 | paddd mm3,.dey13q |
681 | paddd mm3,.dey13q |
670 | movq .cby2,mm0 |
682 | movq .cby2,mm0 |
671 | movq .cby1,mm1 |
683 | movq .cby1,mm1 |
672 | movq .cey2,mm2 |
684 | movq .cey2,mm2 |
673 | movq .cey1,mm3 |
685 | movq .cey1,mm3 |
674 | 686 | ||
675 | else |
687 | else |
676 | mov edx,.dbx13 |
688 | mov edx,.dbx13 |
677 | add .cbx1,edx |
689 | add .cbx1,edx |
678 | mov eax,.dbx23 |
690 | mov eax,.dbx23 |
679 | add .cbx2,eax |
691 | add .cbx2,eax |
680 | mov ebx,.dby13 |
692 | mov ebx,.dby13 |
681 | add .cby1,ebx |
693 | add .cby1,ebx |
682 | mov edx,.dby23 |
694 | mov edx,.dby23 |
683 | add .cby2,edx |
695 | add .cby2,edx |
684 | 696 | ||
685 | mov eax,.dex13 |
697 | mov eax,.dex13 |
686 | add .cex1,eax |
698 | add .cex1,eax |
687 | mov ebx,.dex23 |
699 | mov ebx,.dex23 |
688 | add .cex2,ebx |
700 | add .cex2,ebx |
689 | mov edx,.dey13 |
701 | mov edx,.dey13 |
690 | add .cey1,edx |
702 | add .cey1,edx |
691 | mov eax,.dey23 |
703 | mov eax,.dey23 |
692 | add .cey2,eax |
704 | add .cey2,eax |
693 | end if |
705 | end if |
694 | 706 | ||
695 | mov ebx,.dz13 |
707 | mov ebx,.dz13 |
696 | add .cz1,ebx |
708 | add .cz1,ebx |
697 | mov edx,.dz23 |
709 | mov edx,.dz23 |
698 | add .cz2,edx |
710 | add .cz2,edx |
699 | ;else |
711 | ;else |
700 | ; movq mm0,.db13q |
712 | ; movq mm0,.db13q |
701 | ; movq mm1,.cbx1q |
713 | ; movq mm1,.cbx1q |
702 | 714 | ||
703 | inc ecx |
715 | inc ecx |
704 | cmp cx,.y3 |
716 | cmp cx,.y3 |
705 | jl .loop23 |
717 | jl .loop23 |
706 | .loop23_done: |
718 | .loop23_done: |
707 | 719 | ||
708 | mov esp,ebp |
720 | mov esp,ebp |
709 | ret 34 |
721 | ret 34 |
710 | 722 | ||
711 | .call_line: |
723 | .call_line: |
712 | 724 | ||
713 | pushad |
725 | pushad |
714 | 726 | ||
715 | push .cz1 |
727 | push .cz1 |
716 | push .cz2 |
728 | push .cz2 |
717 | push .z_buff |
729 | push .z_buff |
718 | push .t_bmap |
730 | push .t_bmap |
719 | push .t_emap |
731 | push .t_emap |
720 | push dword .cey2 |
732 | push dword .cey2 |
721 | push .cex2 |
733 | push .cex2 |
722 | push dword .cey1 |
734 | push dword .cey1 |
723 | push .cex1 |
735 | push .cex1 |
724 | push dword .cby2 |
736 | push dword .cby2 |
725 | push .cbx2 |
737 | push .cbx2 |
726 | push dword .cby1 |
738 | push dword .cby1 |
727 | push .cbx1 |
739 | push .cbx1 |
728 | push ecx |
740 | push ecx |
729 | 741 | ||
730 | mov eax,.cx1 |
742 | mov eax,.cx1 |
731 | sar eax,ROUND |
743 | sar eax,ROUND |
732 | mov ebx,.cx2 |
744 | mov ebx,.cx2 |
733 | sar ebx,ROUND |
745 | sar ebx,ROUND |
734 | 746 | ||
735 | call two_tex_line_z |
747 | call two_tex_line_z |
736 | 748 | ||
737 | popad |
749 | popad |
738 | ret |
750 | ret |
739 | two_tex_line_z: |
751 | two_tex_line_z: |
740 | ;--------------in: eax - x1 |
752 | ;--------------in: eax - x1 |
741 | ;-------------- ebx - x2 |
753 | ;-------------- ebx - x2 |
742 | ;-------------- edi - pointer to screen buffer |
754 | ;-------------- edi - pointer to screen buffer |
743 | ;stack - another parameters : |
755 | ;stack - another parameters : |
744 | .y equ dword [ebp+4] |
756 | .y equ dword [ebp+4] |
745 | .bx1 equ [ebp+8] ; --- |
757 | .bx1 equ [ebp+8] ; --- |
746 | .by1 equ [ebp+12] ; | |
758 | .by1 equ [ebp+12] ; | |
747 | .bx2 equ [ebp+16] ; | |
759 | .bx2 equ [ebp+16] ; | |
748 | .by2 equ [ebp+20] ; |> b. texture and e. texture coords |
760 | .by2 equ [ebp+20] ; |> b. texture and e. texture coords |
749 | .ex1 equ [ebp+24] ; |> shifted shl ROUND |
761 | .ex1 equ [ebp+24] ; |> shifted shl ROUND |
750 | .ey1 equ [ebp+28] ; | |
762 | .ey1 equ [ebp+28] ; | |
751 | .ex2 equ [ebp+32] ; | |
763 | .ex2 equ [ebp+32] ; | |
752 | .ey2 equ [ebp+36] ; --- |
764 | .ey2 equ [ebp+36] ; --- |
753 | .emap equ [ebp+40] ; b texture offset |
765 | .emap equ [ebp+40] ; b texture offset |
754 | .bmap equ [ebp+44] ; e texture offset |
766 | .bmap equ [ebp+44] ; e texture offset |
755 | .z_buff equ dword [ebp+48] |
767 | .z_buff equ dword [ebp+48] |
756 | .z2 equ dword [ebp+52] ; -- |> z coords shifted |
768 | .z2 equ dword [ebp+52] ; -- |> z coords shifted |
757 | .z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT |
769 | .z1 equ dword [ebp+56] ; -- shl CATMULL_SHIFT |
758 | 770 | ||
759 | .x1 equ dword [ebp-4] |
771 | .x1 equ dword [ebp-4] |
760 | .x2 equ dword [ebp-8] |
772 | .x2 equ dword [ebp-8] |
761 | .dbx equ [ebp-12] |
773 | .dbx equ [ebp-12] |
762 | .dex equ [ebp-16] |
774 | .dex equ [ebp-16] |
763 | .dby equ [ebp-20] |
775 | .dby equ [ebp-20] |
764 | .dey equ [ebp-24] |
776 | .dey equ [ebp-24] |
765 | .dz equ dword [ebp-28] |
777 | .dz equ dword [ebp-28] |
766 | .cbx equ [ebp-32] |
778 | .cbx equ [ebp-32] |
767 | .cex equ [ebp-36] |
779 | .cex equ [ebp-36] |
768 | .cby equ [ebp-40] |
780 | .cby equ [ebp-40] |
769 | .cey equ [ebp-44] |
781 | .cey equ [ebp-44] |
770 | .cz equ dword [ebp-48] |
782 | .cz equ dword [ebp-48] |
771 | .czbuff equ dword [ebp-52] |
783 | .czbuff equ dword [ebp-52] |
772 | 784 | ||
773 | mov ebp,esp |
785 | mov ebp,esp |
774 | 786 | ||
775 | mov ecx,.y |
787 | mov ecx,.y |
776 | or ecx,ecx |
788 | or ecx,ecx |
777 | jl .bl_end |
789 | jl .bl_end |
778 | cmp ecx,SIZE_Y |
790 | cmp ecx,SIZE_Y |
779 | jge .bl_end |
791 | jge .bl_end |
780 | 792 | ||
781 | cmp eax,ebx |
793 | cmp eax,ebx |
782 | jl @f |
794 | jl @f |
783 | je .bl_end |
795 | je .bl_end |
784 | 796 | ||
785 | xchg eax,ebx |
797 | xchg eax,ebx |
786 | if Ext=NON |
798 | if Ext=NON |
787 | mov edx,.bx1 |
799 | mov edx,.bx1 |
788 | xchg edx,.bx2 |
800 | xchg edx,.bx2 |
789 | mov .bx1,edx |
801 | mov .bx1,edx |
790 | mov edx,.by1 |
802 | mov edx,.by1 |
791 | xchg edx,.by2 |
803 | xchg edx,.by2 |
792 | mov .by1,edx |
804 | mov .by1,edx |
793 | 805 | ||
794 | mov edx,.ex1 |
806 | mov edx,.ex1 |
795 | xchg edx,.ex2 |
807 | xchg edx,.ex2 |
796 | mov .ex1,edx |
808 | mov .ex1,edx |
797 | mov edx,.ey1 |
809 | mov edx,.ey1 |
798 | xchg edx,.ey2 |
810 | xchg edx,.ey2 |
799 | mov .ey1,edx |
811 | mov .ey1,edx |
800 | else |
812 | else |
801 | movq mm0,.bx1 |
813 | movq mm0,.bx1 |
802 | movq mm1,.ex1 |
814 | movq mm1,.ex1 |
803 | movq mm2,.bx2 |
815 | movq mm2,.bx2 |
804 | movq mm3,.ex2 |
816 | movq mm3,.ex2 |
805 | movq .bx2,mm0 |
817 | movq .bx2,mm0 |
806 | movq .ex2,mm1 |
818 | movq .ex2,mm1 |
807 | movq .bx1,mm2 |
819 | movq .bx1,mm2 |
808 | movq .ex1,mm3 |
820 | movq .ex1,mm3 |
809 | end if |
821 | end if |
810 | mov edx,.z1 |
822 | mov edx,.z1 |
811 | xchg edx,.z2 |
823 | xchg edx,.z2 |
812 | mov .z1,edx |
824 | mov .z1,edx |
813 | @@: |
825 | @@: |
814 | push eax ebx |
826 | push eax ebx |
815 | ; push ebx ;store x1, x2 |
827 | ; push ebx ;store x1, x2 |
816 | 828 | ||
817 | cmp .x1,SIZE_X |
829 | cmp .x1,SIZE_X |
818 | jge .bl_end |
830 | jge .bl_end |
819 | cmp .x2,0 |
831 | cmp .x2,0 |
820 | jle .bl_end |
832 | jle .bl_end |
821 | 833 | ||
822 | mov ebx,.x2 |
834 | mov ebx,.x2 |
823 | sub ebx,.x1 |
835 | sub ebx,.x1 |
824 | 836 | ||
825 | if Ext>=SSE |
837 | if Ext>=SSE |
826 | 838 | ||
827 | sub esp,16 |
839 | sub esp,16 |
828 | cvtsi2ss xmm3,ebx ;rcps |
840 | cvtsi2ss xmm3,ebx ;rcps |
829 | shufps xmm3,xmm3,0 |
841 | shufps xmm3,xmm3,0 |
830 | 842 | ||
831 | ; movq mm0,.bx1q |
843 | ; movq mm0,.bx1q |
832 | ; movq mm1,.bx2q |
844 | ; movq mm1,.bx2q |
833 | ; movq mm2,.ex1q |
845 | ; movq mm2,.ex1q |
834 | ; movq mm3,.ex2q |
846 | ; movq mm3,.ex2q |
835 | ; psubd mm1,mm0 |
847 | ; psubd mm1,mm0 |
836 | ; psubd mm3,mm2 |
848 | ; psubd mm3,mm2 |
837 | ; cvtpi2ps xmm1,mm1 |
849 | ; cvtpi2ps xmm1,mm1 |
838 | ; movlhps xmm1,xmm1 |
850 | ; movlhps xmm1,xmm1 |
839 | ; cvtpi2ps xmm1,mm3 |
851 | ; cvtpi2ps xmm1,mm3 |
840 | 852 | ||
841 | cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 |
853 | cvtpi2ps xmm0,.bx1 ;mm0 ; bx1; by1 |
842 | movlhps xmm0,xmm0 |
854 | movlhps xmm0,xmm0 |
843 | cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 |
855 | cvtpi2ps xmm0,.ex1 ;mm2 ; ex1; ey1 |
844 | cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 |
856 | cvtpi2ps xmm1,.bx2 ;mm1 ; bx2; by2 |
845 | movlhps xmm1,xmm1 |
857 | movlhps xmm1,xmm1 |
846 | cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 |
858 | cvtpi2ps xmm1,.ex2 ;mm3 ; ex2; ey2 |
847 | subps xmm1,xmm0 |
859 | subps xmm1,xmm0 |
848 | ; hi lo |
860 | ; hi lo |
849 | divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex |
861 | divps xmm1,xmm3 ; xmm1 -> dby; dbx; dey; dex |
850 | 862 | ||
851 | shufps xmm1,xmm1,11011000b |
863 | shufps xmm1,xmm1,11011000b |
852 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
864 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
853 | movhlps xmm1,xmm1 |
865 | movhlps xmm1,xmm1 |
854 | cvtps2pi mm1,xmm1 |
866 | cvtps2pi mm1,xmm1 |
855 | movq .dex,mm0 ; hi - lo -> dbx, dex |
867 | movq .dex,mm0 ; hi - lo -> dbx, dex |
856 | movq .dey,mm1 ; hi - lo -> dby, dey |
868 | movq .dey,mm1 ; hi - lo -> dby, dey |
857 | 869 | ||
858 | else |
870 | else |
859 | 871 | ||
860 | mov eax,.bx2 ; calc .dbx |
872 | mov eax,.bx2 ; calc .dbx |
861 | sub eax,.bx1 |
873 | sub eax,.bx1 |
862 | cdq |
874 | cdq |
863 | idiv ebx |
875 | idiv ebx |
864 | push eax |
876 | push eax |
865 | 877 | ||
866 | mov eax,.ex2 ; calc .dby |
878 | mov eax,.ex2 ; calc .dby |
867 | sub eax,.ex1 |
879 | sub eax,.ex1 |
868 | cdq |
880 | cdq |
869 | idiv ebx |
881 | idiv ebx |
870 | push eax |
882 | push eax |
871 | 883 | ||
872 | mov eax,.by2 ; calc .dex |
884 | mov eax,.by2 ; calc .dex |
873 | sub eax,.by1 |
885 | sub eax,.by1 |
874 | cdq |
886 | cdq |
875 | idiv ebx |
887 | idiv ebx |
876 | push eax |
888 | push eax |
877 | 889 | ||
878 | mov eax,.ey2 ; calc .dey |
890 | mov eax,.ey2 ; calc .dey |
879 | sub eax,.ey1 |
891 | sub eax,.ey1 |
880 | cdq |
892 | cdq |
881 | idiv ebx |
893 | idiv ebx |
882 | push eax |
894 | push eax |
883 | 895 | ||
884 | end if |
896 | end if |
885 | 897 | ||
886 | mov eax,.z2 ; calc .dz |
898 | mov eax,.z2 ; calc .dz |
887 | sub eax,.z1 |
899 | sub eax,.z1 |
888 | cdq |
900 | cdq |
889 | idiv ebx |
901 | idiv ebx |
890 | push eax |
902 | push eax |
891 | 903 | ||
892 | cmp .x1,0 ; set correctly begin variable |
904 | cmp .x1,0 ; set correctly begin variable |
893 | jge @f ; CLIPPING ON FUNCTION |
905 | jge @f ; CLIPPING ON FUNCTION |
894 | ; cutting triangle exceedes screen |
906 | ; cutting triangle exceedes screen |
895 | mov ebx,.x1 |
907 | mov ebx,.x1 |
896 | neg ebx |
908 | neg ebx |
897 | imul ebx ; eax = .dz * abs(.x1) |
909 | imul ebx ; eax = .dz * abs(.x1) |
898 | add .z1,eax |
910 | add .z1,eax |
899 | mov .x1,0 |
911 | mov .x1,0 |
900 | 912 | ||
901 | mov eax,.dbx |
913 | mov eax,.dbx |
902 | imul ebx |
914 | imul ebx |
903 | add .bx1,eax |
915 | add .bx1,eax |
904 | 916 | ||
905 | mov eax,.dby |
917 | mov eax,.dby |
906 | imul ebx |
918 | imul ebx |
907 | add .by1,eax |
919 | add .by1,eax |
908 | 920 | ||
909 | mov eax,.dex |
921 | mov eax,.dex |
910 | imul ebx |
922 | imul ebx |
911 | add .ex1,eax |
923 | add .ex1,eax |
912 | 924 | ||
913 | mov eax,.dey |
925 | mov eax,.dey |
914 | imul ebx |
926 | imul ebx |
915 | add .ey1,eax |
927 | add .ey1,eax |
916 | @@: |
928 | @@: |
917 | cmp .x2,SIZE_X |
929 | cmp .x2,SIZE_X |
918 | jl @f |
930 | jl @f |
919 | mov .x2,SIZE_X |
931 | mov .x2,SIZE_X |
920 | @@: |
932 | @@: |
921 | mov eax,SIZE_X ;calc memory begin in buffers |
933 | mov eax,SIZE_X ;calc memory begin in buffers |
922 | mov ebx,.y |
934 | mov ebx,.y |
923 | mul ebx |
935 | mul ebx |
924 | mov ebx,.x1 |
936 | mov ebx,.x1 |
925 | add eax,ebx |
937 | add eax,ebx |
926 | mov ebx,eax |
938 | mov ebx,eax |
927 | lea eax,[eax*3] |
939 | lea eax,[eax*3] |
928 | add edi,eax ; edi - screen |
940 | add edi,eax ; edi - screen |
929 | mov esi,.z_buff ; z-buffer filled with dd variables |
941 | mov esi,.z_buff ; z-buffer filled with dd variables |
930 | shl ebx,2 |
942 | shl ebx,2 |
931 | add esi,ebx ; esi - Z buffer |
943 | add esi,ebx ; esi - Z buffer |
932 | 944 | ||
933 | mov ecx,.x2 |
945 | mov ecx,.x2 |
934 | sub ecx,.x1 |
946 | sub ecx,.x1 |
935 | ; init current variables |
947 | ; init current variables |
936 | push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi |
948 | push dword .bx1 ;.by1 .ex1 .ey1 .z1 esi |
937 | push dword .ex1 |
949 | push dword .ex1 |
938 | push dword .by1 |
950 | push dword .by1 |
939 | push dword .ey1 |
951 | push dword .ey1 |
940 | 952 | ||
941 | push .z1 ; current z shl CATMULL_SHIFT |
953 | push .z1 ; current z shl CATMULL_SHIFT |
942 | push esi |
954 | push esi |
943 | 955 | ||
944 | if Ext >= MMX |
956 | if Ext >= MMX |
945 | pxor mm0,mm0 |
957 | pxor mm0,mm0 |
946 | movq mm3,.cex ; hi - lo -> cbx; cex |
958 | movq mm3,.cex ; hi - lo -> cbx; cex |
947 | movq mm4,.cey ; hi - lo -> cby; cey |
959 | movq mm4,.cey ; hi - lo -> cby; cey |
948 | ; movq mm5,mm3 |
960 | ; movq mm5,mm3 |
949 | ; movq mm6,mm4 |
961 | ; movq mm6,mm4 |
950 | ; psrad mm5,ROUND |
962 | ; psrad mm5,ROUND |
951 | ; psrad mm6,ROUND |
963 | ; psrad mm6,ROUND |
952 | ; movq .ceyq,mm5 |
964 | ; movq .ceyq,mm5 |
953 | ; movq .cbyq,mm6 |
965 | ; movq .cbyq,mm6 |
954 | mov edx,.czbuff |
966 | mov edx,.czbuff |
955 | else |
967 | else |
956 | cld |
968 | cld |
957 | end if |
969 | end if |
958 | .draw: |
970 | .draw: |
959 | ; if TEX = SHIFTING ;bump drawing only in shifting mode |
971 | ; if TEX = SHIFTING ;bump drawing only in shifting mode |
960 | if Ext=NON |
972 | if Ext=NON |
961 | mov esi,.czbuff ; .czbuff current address in buffer |
973 | mov esi,.czbuff ; .czbuff current address in buffer |
962 | mov ebx,.cz ; .cz - cur z position |
974 | mov ebx,.cz ; .cz - cur z position |
963 | cmp ebx,dword[esi] |
975 | cmp ebx,dword[esi] |
964 | else |
976 | else |
965 | mov ebx,.cz |
977 | mov ebx,.cz |
966 | cmp ebx,dword[edx] |
978 | cmp ebx,dword[edx] |
967 | end if |
979 | end if |
968 | jge .skip |
980 | jge .skip |
969 | 981 | ||
970 | if Ext=NON |
982 | if Ext=NON |
971 | mov eax,.cby |
983 | mov eax,.cby |
972 | mov esi,.cbx |
984 | mov esi,.cbx |
973 | sar eax,ROUND |
985 | sar eax,ROUND |
974 | sar esi,ROUND |
986 | sar esi,ROUND |
975 | shl eax,TEX_SHIFT ;- |
987 | shl eax,TEX_SHIFT ;- |
976 | add esi,eax |
988 | add esi,eax |
977 | lea esi,[esi*3] ;- ; esi - current b. texture addres |
989 | lea esi,[esi*3] ;- ; esi - current b. texture addres |
978 | add esi,.bmap |
990 | add esi,.bmap |
979 | 991 | ||
980 | mov ebx,.cex ;.cex - current env map X |
992 | mov ebx,.cex ;.cex - current env map X |
981 | mov eax,.cey ;.cey - current env map y |
993 | mov eax,.cey ;.cey - current env map y |
982 | sar ebx,ROUND |
994 | sar ebx,ROUND |
983 | sar eax,ROUND |
995 | sar eax,ROUND |
984 | 996 | ||
985 | shl eax,TEX_SHIFT |
997 | shl eax,TEX_SHIFT |
986 | add ebx,eax |
998 | add ebx,eax |
987 | lea ebx,[ebx*3] |
999 | lea ebx,[ebx*3] |
988 | add ebx,.emap |
1000 | add ebx,.emap |
989 | 1001 | ||
990 | 1002 | ||
991 | else |
1003 | else |
992 | movq mm5,mm4 ;.cey |
1004 | movq mm5,mm4 ;.cey |
993 | psrad mm5,ROUND |
1005 | psrad mm5,ROUND |
994 | pslld mm5,TEX_SHIFT |
1006 | pslld mm5,TEX_SHIFT |
995 | movq mm6,mm3 ;.cex |
1007 | movq mm6,mm3 ;.cex |
996 | psrad mm6,ROUND |
1008 | psrad mm6,ROUND |
997 | paddd mm5,mm6 |
1009 | paddd mm5,mm6 |
998 | movq mm6,mm5 |
1010 | movq mm6,mm5 |
999 | paddd mm5,mm5 |
1011 | paddd mm5,mm5 |
1000 | paddd mm5,mm6 |
1012 | paddd mm5,mm6 |
1001 | paddd mm5,.emap |
1013 | paddd mm5,.emap |
1002 | movd esi,mm5 |
1014 | movd esi,mm5 |
1003 | psrlq mm5,32 |
1015 | psrlq mm5,32 |
1004 | movd ebx,mm5 |
1016 | movd ebx,mm5 |
1005 | end if |
1017 | end if |
1006 | if Ext>=MMX |
1018 | if Ext>=MMX |
1007 | movd mm1,[esi] |
1019 | movd mm1,[esi] |
1008 | movd mm2,[ebx] |
1020 | movd mm2,[ebx] |
1009 | punpcklbw mm1,mm0 |
1021 | punpcklbw mm1,mm0 |
1010 | punpcklbw mm2,mm0 |
1022 | punpcklbw mm2,mm0 |
1011 | pmullw mm1,mm2 |
1023 | pmullw mm1,mm2 |
1012 | psrlw mm1,8 |
1024 | psrlw mm1,8 |
1013 | packuswb mm1,mm0 |
1025 | packuswb mm1,mm0 |
1014 | movd [edi],mm1 |
1026 | movd [edi],mm1 |
1015 | mov ebx,.cz |
1027 | mov ebx,.cz |
1016 | mov dword[edx],ebx |
1028 | mov dword[edx],ebx |
1017 | else |
1029 | else |
1018 | cld ; esi - tex e. |
1030 | cld ; esi - tex e. |
1019 | lodsb ; ebx - tex b. |
1031 | lodsb ; ebx - tex b. |
1020 | mov dl,[ebx] |
1032 | mov dl,[ebx] |
1021 | mul dl |
1033 | mul dl |
1022 | shr ax,8 |
1034 | shr ax,8 |
1023 | stosb |
1035 | stosb |
1024 | inc ebx |
1036 | inc ebx |
1025 | lodsb |
1037 | lodsb |
1026 | mov dl,[ebx] |
1038 | mov dl,[ebx] |
1027 | mul dl |
1039 | mul dl |
1028 | shr ax,8 |
1040 | shr ax,8 |
1029 | stosb |
1041 | stosb |
1030 | inc ebx |
1042 | inc ebx |
1031 | lodsb |
1043 | lodsb |
1032 | mov dl,[ebx] |
1044 | mov dl,[ebx] |
1033 | mul dl |
1045 | mul dl |
1034 | shr ax,8 |
1046 | shr ax,8 |
1035 | stosb |
1047 | stosb |
1036 | mov ebx,.cz |
1048 | mov ebx,.cz |
1037 | mov esi,.czbuff |
1049 | mov esi,.czbuff |
1038 | mov dword[esi],ebx |
1050 | mov dword[esi],ebx |
1039 | jmp .no_skip |
1051 | jmp .no_skip |
1040 | end if |
1052 | end if |
1041 | .skip: |
1053 | .skip: |
1042 | add edi,3 |
1054 | add edi,3 |
1043 | 1055 | ||
1044 | if Ext = NON |
1056 | if Ext = NON |
1045 | .no_skip: |
1057 | .no_skip: |
1046 | add .czbuff,4 |
1058 | add .czbuff,4 |
1047 | mov eax,.dbx |
1059 | mov eax,.dbx |
1048 | add .cbx,eax |
1060 | add .cbx,eax |
1049 | mov eax,.dby |
1061 | mov eax,.dby |
1050 | add .cby,eax |
1062 | add .cby,eax |
1051 | mov eax,.dex |
1063 | mov eax,.dex |
1052 | add .cex,eax |
1064 | add .cex,eax |
1053 | mov eax,.dey |
1065 | mov eax,.dey |
1054 | add .cey,eax |
1066 | add .cey,eax |
1055 | else |
1067 | else |
1056 | add edx,4 |
1068 | add edx,4 |
1057 | paddd mm3,.dex |
1069 | paddd mm3,.dex |
1058 | paddd mm4,.dey |
1070 | paddd mm4,.dey |
1059 | ; movq mm5,mm3 |
1071 | ; movq mm5,mm3 |
1060 | ; movq mm6,mm4 |
1072 | ; movq mm6,mm4 |
1061 | ; psrad mm5,ROUND |
1073 | ; psrad mm5,ROUND |
1062 | ; psrad mm6,ROUND |
1074 | ; psrad mm6,ROUND |
1063 | ; movq .cex,mm3 |
1075 | ; movq .cex,mm3 |
1064 | ; movq .cey,mm4 |
1076 | ; movq .cey,mm4 |
1065 | end if |
1077 | end if |
1066 | mov eax,.dz |
1078 | mov eax,.dz |
1067 | add .cz,eax |
1079 | add .cz,eax |
1068 | if Ext = NON |
1080 | if Ext = NON |
1069 | dec ecx |
1081 | dec ecx |
1070 | jnz .draw |
1082 | jnz .draw |
1071 | else |
1083 | else |
1072 | loop .draw |
1084 | loop .draw |
1073 | end if |
1085 | end if |
1074 | 1086 | ||
1075 | .bl_end: |
1087 | .bl_end: |
1076 | mov esp,ebp |
1088 | mov esp,ebp |
1077 | ret 56 |
1089 | ret 56 |