Rev 2881 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 2881 | Rev 2984 | ||
---|---|---|---|
1 | ;CATMULL_SHIFT equ 8 |
1 | ;CATMULL_SHIFT equ 8 |
2 | ;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
2 | ;TEXTURE_SIZE EQU (TEX_X * TEX_Y)-1 |
3 | ;ROUND equ 8 |
3 | ;ROUND equ 8 |
4 | ;Ext = NON |
4 | ;Ext = NON |
5 | ;MMX = 1 |
5 | ;MMX = 1 |
6 | ;NON = 0 |
6 | ;NON = 0 |
7 | ;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
7 | ;------- Big thanks to Majuma (www.majuma.xt.pl) for absolutely great--- |
8 | ;------- DOS 13h mode demos -------------------------------------------- |
8 | ;------- DOS 13h mode demos -------------------------------------------- |
9 | ;------- Procedure draws bump triangle with texture, I use ------------- |
9 | ;------- Procedure draws bump triangle with texture, I use ------------- |
10 | ;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
10 | ;--------Catmull Z-buffer algorithm- (Z coordinate interpolation)------- |
11 | ;--------I calc texture pixel by this way: col1*col2/256 --------------- |
11 | ;--------I calc texture pixel by this way: col1*col2/256 --------------- |
12 | bump_tex_triangle_z: |
12 | bump_tex_triangle_z: |
13 | ;------------------in - eax - x1 shl 16 + y1 ----------- |
13 | ;------------------in - eax - x1 shl 16 + y1 ----------- |
14 | ;---------------------- ebx - x2 shl 16 + y2 ----------- |
14 | ;---------------------- ebx - x2 shl 16 + y2 ----------- |
15 | ;---------------------- ecx - x3 shl 16 + y3 ----------- |
15 | ;---------------------- ecx - x3 shl 16 + y3 ----------- |
16 | ;---------------------- edx - pointer to bump map------- |
16 | ;---------------------- edx - pointer to bump map------- |
17 | ;---------------------- esi - pointer to env map-------- |
17 | ;---------------------- esi - pointer to env map-------- |
18 | ;---------------------- edi - pointer to screen buffer-- |
18 | ;---------------------- edi - pointer to screen buffer-- |
19 | ;---------------------- stack : bump coordinates-------- |
19 | ;---------------------- stack : bump coordinates-------- |
20 | ;---------------------- environment coordinates- |
20 | ;---------------------- environment coordinates- |
21 | ;---------------------- Z position coordinates-- |
21 | ;---------------------- Z position coordinates-- |
22 | ;---------------------- pointer to Z buffer----- |
22 | ;---------------------- pointer to Z buffer----- |
23 | ;---------------------- pointer to texture------ |
23 | ;---------------------- pointer to texture------ |
24 | ;---------------------- texture coordinates----- |
24 | ;---------------------- texture coordinates----- |
25 | ;-- Z-buffer - filled with coordinates as dword -------- |
25 | ;-- Z-buffer - filled with coordinates as dword -------- |
26 | ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
26 | ;-- (Z coor. as word) shl CATMULL_SHIFT ---------------- |
27 | .b_x1 equ ebp+4 ; procedure don't save registers !!! |
27 | .b_x1 equ ebp+4 ; procedure don't save registers !!! |
28 | .b_y1 equ ebp+6 ; each coordinate as word |
28 | .b_y1 equ ebp+6 ; each coordinate as word |
29 | .b_x2 equ ebp+8 |
29 | .b_x2 equ ebp+8 |
30 | .b_y2 equ ebp+10 ; b - bump map coords |
30 | .b_y2 equ ebp+10 ; b - bump map coords |
31 | .b_x3 equ ebp+12 ; e - env map coords |
31 | .b_x3 equ ebp+12 ; e - env map coords |
32 | .b_y3 equ ebp+14 |
32 | .b_y3 equ ebp+14 |
33 | .e_x1 equ ebp+16 |
33 | .e_x1 equ ebp+16 |
34 | .e_y1 equ ebp+18 |
34 | .e_y1 equ ebp+18 |
35 | .e_x2 equ ebp+20 |
35 | .e_x2 equ ebp+20 |
36 | .e_y2 equ ebp+22 |
36 | .e_y2 equ ebp+22 |
37 | .e_x3 equ ebp+24 |
37 | .e_x3 equ ebp+24 |
38 | .e_y3 equ ebp+26 |
38 | .e_y3 equ ebp+26 |
39 | .z1 equ word[ebp+28] |
39 | .z1 equ word[ebp+28] |
40 | .z2 equ word[ebp+30] |
40 | .z2 equ word[ebp+30] |
41 | .z3 equ word[ebp+32] |
41 | .z3 equ word[ebp+32] |
42 | .z_buff equ dword[ebp+34] ; pointer to Z-buffer |
42 | .z_buff equ dword[ebp+34] ; pointer to Z-buffer |
43 | .tex_ptr equ dword[ebp+38] ; ptr to texture |
43 | .tex_ptr equ dword[ebp+38] ; ptr to texture |
44 | .t_x1 equ ebp+42 ; texture coords |
44 | .t_x1 equ ebp+42 ; texture coords |
45 | .t_y1 equ ebp+44 |
45 | .t_y1 equ ebp+44 |
46 | .t_x2 equ ebp+46 |
46 | .t_x2 equ ebp+46 |
47 | .t_y2 equ ebp+48 |
47 | .t_y2 equ ebp+48 |
48 | .t_x3 equ ebp+50 |
48 | .t_x3 equ ebp+50 |
49 | .t_y3 equ ebp+52 |
49 | .t_y3 equ ebp+52 |
50 | 50 | ||
51 | 51 | ||
52 | 52 | ||
53 | .t_bmap equ dword[ebp-4] ; pointer to bump map |
53 | .t_bmap equ dword[ebp-4] ; pointer to bump map |
54 | .t_emap equ dword[ebp-8] ; pointer to env map |
54 | .t_emap equ dword[ebp-8] ; pointer to env map |
55 | .x1 equ word[ebp-10] |
55 | .x1 equ word[ebp-10] |
56 | .y1 equ word[ebp-12] |
56 | .y1 equ word[ebp-12] |
57 | .x2 equ word[ebp-14] |
57 | .x2 equ word[ebp-14] |
58 | .y2 equ word[ebp-16] |
58 | .y2 equ word[ebp-16] |
59 | .x3 equ word[ebp-18] |
59 | .x3 equ word[ebp-18] |
60 | .y3 equ word[ebp-20] |
60 | .y3 equ word[ebp-20] |
61 | 61 | ||
62 | if 0 ;Ext <= SSE2 |
62 | if 0 ;Ext <= SSE2 |
63 | 63 | ||
64 | .dx12 equ dword[edi-4] |
64 | .dx12 equ dword[edi-4] |
65 | .dz12 equ [edi-8] |
65 | .dz12 equ [edi-8] |
66 | .dbx12 equ dword[edi-12] |
66 | .dbx12 equ dword[edi-12] |
67 | .dby12 equ [edi-16] |
67 | .dby12 equ [edi-16] |
68 | .dex12 equ dword[edi-20] |
68 | .dex12 equ dword[edi-20] |
69 | .dey12 equ [edi-24] |
69 | .dey12 equ [edi-24] |
70 | .dtx12 equ dword[edi-28] |
70 | .dtx12 equ dword[edi-28] |
71 | .dty12 equ [edi-32] |
71 | .dty12 equ [edi-32] |
72 | 72 | ||
73 | .dx13 equ dword[ebp-52-4*1] |
73 | .dx13 equ dword[ebp-52-4*1] |
74 | .dz13 equ [ebp-52-4*2] |
74 | .dz13 equ [ebp-52-4*2] |
75 | .dbx13 equ dword[ebp-52-4*3] |
75 | .dbx13 equ dword[ebp-52-4*3] |
76 | .dby13 equ [ebp-52-4*4] |
76 | .dby13 equ [ebp-52-4*4] |
77 | .dex13 equ dword[ebp-52-4*5] |
77 | .dex13 equ dword[ebp-52-4*5] |
78 | .dey13 equ [ebp-52-4*6] |
78 | .dey13 equ [ebp-52-4*6] |
79 | .dtx13 equ dword[ebp-52-4*7] |
79 | .dtx13 equ dword[ebp-52-4*7] |
80 | .dty13 equ [ebp-52-4*8] |
80 | .dty13 equ [ebp-52-4*8] |
81 | 81 | ||
82 | 82 | ||
83 | .dx23 equ dword[ebp-(52+4*9)] |
83 | .dx23 equ dword[ebp-(52+4*9)] |
84 | .dz23 equ [ebp-(52+4*10)] |
84 | .dz23 equ [ebp-(52+4*10)] |
85 | .dbx23 equ dword[ebp-(52+4*11)] |
85 | .dbx23 equ dword[ebp-(52+4*11)] |
86 | .dby23 equ [ebp-(52+4*12)] |
86 | .dby23 equ [ebp-(52+4*12)] |
87 | .dex23 equ dword[ebp-(52+4*13)] |
87 | .dex23 equ dword[ebp-(52+4*13)] |
88 | .dey23 equ [ebp-(52+4*14)] |
88 | .dey23 equ [ebp-(52+4*14)] |
89 | .dtx23 equ dword[ebp-(52+4*15)] |
89 | .dtx23 equ dword[ebp-(52+4*15)] |
90 | .dty23 equ [ebp-(52+4*16)] |
90 | .dty23 equ [ebp-(52+4*16)] |
91 | 91 | ||
92 | else |
92 | else |
93 | 93 | ||
94 | .dx12 equ dword[ebp-24] |
94 | .dx12 equ dword[ebp-24] |
95 | .dz12 equ [ebp-28] |
95 | .dz12 equ [ebp-28] |
96 | .dbx12 equ dword[ebp-32] |
96 | .dbx12 equ dword[ebp-32] |
97 | .dby12 equ [ebp-36] |
97 | .dby12 equ [ebp-36] |
98 | .dex12 equ dword[ebp-40] |
98 | .dex12 equ dword[ebp-40] |
99 | .dey12 equ [ebp-44] |
99 | .dey12 equ [ebp-44] |
100 | .dtx12 equ dword[ebp-48] |
100 | .dtx12 equ dword[ebp-48] |
101 | .dty12 equ [ebp-52] |
101 | .dty12 equ [ebp-52] |
102 | 102 | ||
103 | .dx13 equ dword[ebp-52-4*1] |
103 | .dx13 equ dword[ebp-52-4*1] |
104 | .dz13 equ [ebp-52-4*2] |
104 | .dz13 equ [ebp-52-4*2] |
105 | .dbx13 equ dword[ebp-52-4*3] |
105 | .dbx13 equ dword[ebp-52-4*3] |
106 | .dby13 equ [ebp-52-4*4] |
106 | .dby13 equ [ebp-52-4*4] |
107 | .dex13 equ dword[ebp-52-4*5] |
107 | .dex13 equ dword[ebp-52-4*5] |
108 | .dey13 equ [ebp-52-4*6] |
108 | .dey13 equ [ebp-52-4*6] |
109 | .dtx13 equ dword[ebp-52-4*7] |
109 | .dtx13 equ dword[ebp-52-4*7] |
110 | .dty13 equ [ebp-52-4*8] |
110 | .dty13 equ [ebp-52-4*8] |
111 | 111 | ||
112 | 112 | ||
113 | .dx23 equ dword[ebp-(52+4*9)] |
113 | .dx23 equ dword[ebp-(52+4*9)] |
114 | .dz23 equ [ebp-(52+4*10)] |
114 | .dz23 equ [ebp-(52+4*10)] |
115 | .dbx23 equ dword[ebp-(52+4*11)] |
115 | .dbx23 equ dword[ebp-(52+4*11)] |
116 | .dby23 equ [ebp-(52+4*12)] |
116 | .dby23 equ [ebp-(52+4*12)] |
117 | .dex23 equ dword[ebp-(52+4*13)] |
117 | .dex23 equ dword[ebp-(52+4*13)] |
118 | .dey23 equ [ebp-(52+4*14)] |
118 | .dey23 equ [ebp-(52+4*14)] |
119 | .dtx23 equ dword[ebp-(52+4*15)] |
119 | .dtx23 equ dword[ebp-(52+4*15)] |
120 | .dty23 equ [ebp-(52+4*16)] |
120 | .dty23 equ [ebp-(52+4*16)] |
121 | 121 | ||
122 | end if |
122 | end if |
123 | 123 | ||
124 | if Ext < SSE |
124 | if Ext < SSE |
125 | 125 | ||
126 | .cx1 equ dword[ebp-(52+4*17)] ; current variables |
126 | .cx1 equ dword[ebp-(52+4*17)] ; current variables |
127 | .cz1 equ [ebp-(52+4*18)] |
127 | .cz1 equ [ebp-(52+4*18)] |
128 | .cx2 equ dword[ebp-(52+4*19)] |
128 | .cx2 equ dword[ebp-(52+4*19)] |
129 | .cz2 equ [ebp-(52+4*20)] |
129 | .cz2 equ [ebp-(52+4*20)] |
130 | .cbx1 equ dword[ebp-(52+4*21)] |
130 | .cbx1 equ dword[ebp-(52+4*21)] |
131 | .cby1 equ [ebp-(52+4*22)] |
131 | .cby1 equ [ebp-(52+4*22)] |
132 | .cbx2 equ dword[ebp-(52+4*23)] |
132 | .cbx2 equ dword[ebp-(52+4*23)] |
133 | .cby2 equ [ebp-(52+4*24)] |
133 | .cby2 equ [ebp-(52+4*24)] |
134 | .cex1 equ dword[ebp-(52+4*25)] |
134 | .cex1 equ dword[ebp-(52+4*25)] |
135 | .cey1 equ [ebp-(52+4*26)] |
135 | .cey1 equ [ebp-(52+4*26)] |
136 | .cex2 equ dword[ebp-(52+4*27)] |
136 | .cex2 equ dword[ebp-(52+4*27)] |
137 | .cey2 equ [ebp-(52+4*28)] |
137 | .cey2 equ [ebp-(52+4*28)] |
138 | 138 | ||
139 | .ctx1 equ dword[ebp-(52+4*29)] |
139 | .ctx1 equ dword[ebp-(52+4*29)] |
140 | .cty1 equ [ebp-(52+4*30)] |
140 | .cty1 equ [ebp-(52+4*30)] |
141 | .ctx2 equ dword[ebp-(52+4*31)] |
141 | .ctx2 equ dword[ebp-(52+4*31)] |
142 | .cty2 equ [ebp-(52+4*32)] |
142 | .cty2 equ [ebp-(52+4*32)] |
143 | 143 | ||
144 | else |
144 | else |
145 | 145 | ||
146 | .cx1 equ dword[ebp-(52+4*17)] ; current variables |
146 | .cx1 equ dword[ebp-(52+4*17)] ; current variables |
147 | .cz1 equ [ebp-(52+4*18)] |
147 | .cz1 equ [ebp-(52+4*18)] |
148 | .cbx1 equ dword[ebp-(52+4*19)] |
148 | .cbx1 equ dword[ebp-(52+4*19)] |
149 | .cby1 equ [ebp-(52+4*20)] |
149 | .cby1 equ [ebp-(52+4*20)] |
150 | .cex1 equ dword[ebp-(52+4*21)] |
150 | .cex1 equ dword[ebp-(52+4*21)] |
151 | .cey1 equ [ebp-(52+4*22)] |
151 | .cey1 equ [ebp-(52+4*22)] |
152 | .ctx1 equ dword[ebp-(52+4*23)] |
152 | .ctx1 equ dword[ebp-(52+4*23)] |
153 | .cty1 equ [ebp-(52+4*24)] |
153 | .cty1 equ [ebp-(52+4*24)] |
154 | 154 | ||
155 | .cx2 equ dword[ebp-(52+4*25)] |
155 | .cx2 equ dword[ebp-(52+4*25)] |
156 | .cz2 equ [ebp-(52+4*26)] |
156 | .cz2 equ [ebp-(52+4*26)] |
157 | .cbx2 equ dword[ebp-(52+4*27)] |
157 | .cbx2 equ dword[ebp-(52+4*27)] |
158 | .cby2 equ [ebp-(52+4*28)] |
158 | .cby2 equ [ebp-(52+4*28)] |
159 | .cex2 equ dword[ebp-(52+4*29)] |
159 | .cex2 equ dword[ebp-(52+4*29)] |
160 | .cey2 equ [ebp-(52+4*30)] |
160 | .cey2 equ [ebp-(52+4*30)] |
161 | .ctx2 equ dword[ebp-(52+4*31)] |
161 | .ctx2 equ dword[ebp-(52+4*31)] |
162 | .cty2 equ [ebp-(52+4*32)] |
162 | .cty2 equ [ebp-(52+4*32)] |
163 | 163 | ||
164 | end if |
164 | end if |
165 | cld |
165 | cld |
166 | mov ebp,esp |
166 | mov ebp,esp |
167 | push edx ; store bump map |
167 | push edx ; store bump map |
168 | push esi ; store e. map |
168 | push esi ; store e. map |
169 | ; sub esp,120 |
169 | ; sub esp,120 |
170 | .sort3: ; sort triangle coordinates... |
170 | .sort3: ; sort triangle coordinates... |
171 | cmp ax,bx |
171 | cmp ax,bx |
172 | jle .sort1 |
172 | jle .sort1 |
173 | xchg eax,ebx |
173 | xchg eax,ebx |
174 | mov edx,dword[.b_x1] |
174 | mov edx,dword[.b_x1] |
175 | xchg edx,dword[.b_x2] |
175 | xchg edx,dword[.b_x2] |
176 | mov dword[.b_x1],edx |
176 | mov dword[.b_x1],edx |
177 | mov edx,dword[.e_x1] |
177 | mov edx,dword[.e_x1] |
178 | xchg edx,dword[.e_x2] |
178 | xchg edx,dword[.e_x2] |
179 | mov dword[.e_x1],edx |
179 | mov dword[.e_x1],edx |
180 | mov edx,dword[.t_x1] |
180 | mov edx,dword[.t_x1] |
181 | xchg edx,dword[.t_x2] |
181 | xchg edx,dword[.t_x2] |
182 | mov dword[.t_x1],edx |
182 | mov dword[.t_x1],edx |
183 | mov dx,.z1 |
183 | mov dx,.z1 |
184 | xchg dx,.z2 |
184 | xchg dx,.z2 |
185 | mov .z1,dx |
185 | mov .z1,dx |
186 | .sort1: |
186 | .sort1: |
187 | cmp bx,cx |
187 | cmp bx,cx |
188 | jle .sort2 |
188 | jle .sort2 |
189 | xchg ebx,ecx |
189 | xchg ebx,ecx |
190 | mov edx,dword[.b_x2] |
190 | mov edx,dword[.b_x2] |
191 | xchg edx,dword[.b_x3] |
191 | xchg edx,dword[.b_x3] |
192 | mov dword[.b_x2],edx |
192 | mov dword[.b_x2],edx |
193 | mov edx,dword[.e_x2] |
193 | mov edx,dword[.e_x2] |
194 | xchg edx,dword[.e_x3] |
194 | xchg edx,dword[.e_x3] |
195 | mov dword[.e_x2],edx |
195 | mov dword[.e_x2],edx |
196 | mov edx,dword[.t_x2] |
196 | mov edx,dword[.t_x2] |
197 | xchg edx,dword[.t_x3] |
197 | xchg edx,dword[.t_x3] |
198 | mov dword[.t_x2],edx |
198 | mov dword[.t_x2],edx |
199 | mov dx,.z2 |
199 | mov dx,.z2 |
200 | xchg dx,.z3 |
200 | xchg dx,.z3 |
201 | mov .z2,dx |
201 | mov .z2,dx |
202 | jmp .sort3 |
202 | jmp .sort3 |
203 | .sort2: |
203 | .sort2: |
204 | push eax ; store triangle coords in variables |
204 | push eax ; store triangle coords in variables |
205 | push ebx |
205 | push ebx |
206 | push ecx |
206 | push ecx |
207 | mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
207 | mov edx,80008000h ; eax,ebx,ecx are ANDd together into edx which means that |
208 | and edx,ebx ; if *all* of them are negative a sign flag is raised |
208 | and edx,ebx ; if *all* of them are negative a sign flag is raised |
209 | and edx,ecx |
209 | and edx,ecx |
210 | and edx,eax |
210 | and edx,eax |
211 | test edx,80008000h ; Check both X&Y at once |
211 | test edx,80008000h ; Check both X&Y at once |
212 | jne .loop23_done |
212 | jne .loop23_done |
213 | ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
213 | ; mov edx,eax ; eax,ebx,ecx are ORd together into edx which means that |
214 | ; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
214 | ; or edx,ebx ; if any *one* of them is negative a sign flag is raised |
215 | ; or edx,ecx |
215 | ; or edx,ecx |
216 | ; test edx,80000000h ; Check only X |
216 | ; test edx,80000000h ; Check only X |
217 | ; jne .loop23_done |
217 | ; jne .loop23_done |
218 | 218 | ||
219 | ; cmp .x1,SIZE_X ; { |
219 | ; cmp .x1,SIZE_X ; { |
220 | ; jg .loop23_done |
220 | ; jg .loop23_done |
221 | ; cmp .x2,SIZE_X ; This can be optimized with effort |
221 | ; cmp .x2,SIZE_X ; This can be optimized with effort |
222 | ; jg .loop23_done |
222 | ; jg .loop23_done |
223 | ; cmp .x3,SIZE_X |
223 | ; cmp .x3,SIZE_X |
224 | ; jg .loop23_done ; { |
224 | ; jg .loop23_done ; { |
225 | 225 | ||
226 | 226 | ||
227 | mov bx,.y2 ; calc delta 12 |
227 | mov bx,.y2 ; calc delta 12 |
228 | sub bx,.y1 |
228 | sub bx,.y1 |
229 | jnz .bt_dx12_make |
229 | jnz .bt_dx12_make |
230 | if 0 ;Ext >= SSE2 |
230 | if 0 ;Ext >= SSE2 |
231 | pxor xmm0,xmm0 |
231 | pxor xmm0,xmm0 |
232 | movups .dty12,xmm0 |
232 | movups .dty12,xmm0 |
233 | movups .dey12,xmm0 |
233 | movups .dey12,xmm0 |
234 | sub esp,16 |
234 | sub esp,16 |
235 | else |
235 | else |
236 | mov ecx,8 |
236 | mov ecx,8 |
237 | xor edx,edx |
237 | xor edx,edx |
238 | @@: |
238 | @@: |
239 | push edx ;dword 0 |
239 | push edx ;dword 0 |
240 | loop @b |
240 | loop @b |
241 | end if |
241 | end if |
242 | jmp .bt_dx12_done |
242 | jmp .bt_dx12_done |
243 | .bt_dx12_make: |
243 | .bt_dx12_make: |
244 | movsx ebx,bx |
244 | movsx ebx,bx |
245 | 245 | ||
246 | 246 | ||
247 | if Ext>=SSE |
247 | if Ext>=SSE |
248 | sub esp,32 |
248 | sub esp,32 |
249 | ; mov eax,256 |
249 | ; mov eax,256 |
250 | cvtsi2ss xmm4,[i255d] |
250 | cvtsi2ss xmm4,[i255d] |
251 | cvtsi2ss xmm3,ebx ;rcps |
251 | cvtsi2ss xmm3,ebx ;rcps |
252 | if 0 ;Ext >= SSE2 |
252 | if 0 ;Ext >= SSE2 |
253 | mov edi,ebp |
253 | mov edi,ebp |
254 | sub edi,512 |
254 | sub edi,512 |
255 | or edi,0x0000000f |
255 | or edi,0x0000000f |
256 | end if |
256 | end if |
257 | divss xmm3,xmm4 |
257 | divss xmm3,xmm4 |
258 | shufps xmm3,xmm3,0 |
258 | shufps xmm3,xmm3,0 |
259 | 259 | ||
260 | movd mm0,[.b_x1] |
260 | movd mm0,[.b_x1] |
261 | movd mm1,[.b_x2] |
261 | movd mm1,[.b_x2] |
262 | movd mm2,[.e_x1] |
262 | movd mm2,[.e_x1] |
263 | movd mm3,[.e_x2] |
263 | movd mm3,[.e_x2] |
264 | 264 | ||
265 | pxor mm4,mm4 |
265 | pxor mm4,mm4 |
266 | punpcklwd mm0,mm4 |
266 | punpcklwd mm0,mm4 |
267 | punpcklwd mm1,mm4 |
267 | punpcklwd mm1,mm4 |
268 | punpcklwd mm2,mm4 |
268 | punpcklwd mm2,mm4 |
269 | punpcklwd mm3,mm4 |
269 | punpcklwd mm3,mm4 |
270 | 270 | ||
271 | psubd mm1,mm0 |
271 | psubd mm1,mm0 |
272 | psubd mm3,mm2 |
272 | psubd mm3,mm2 |
273 | 273 | ||
274 | cvtpi2ps xmm1,mm1 |
274 | cvtpi2ps xmm1,mm1 |
275 | movlhps xmm1,xmm1 |
275 | movlhps xmm1,xmm1 |
276 | cvtpi2ps xmm1,mm3 |
276 | cvtpi2ps xmm1,mm3 |
277 | 277 | ||
278 | divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
278 | divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
279 | 279 | ||
280 | shufps xmm1,xmm1,10110001b |
280 | shufps xmm1,xmm1,10110001b |
281 | ;xmm1--> | dbx | dby | dex | dey | |
281 | ;xmm1--> | dbx | dby | dex | dey | |
282 | ;1 movups .dey12,xmm1 |
282 | ;1 movups .dey12,xmm1 |
283 | cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
283 | cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
284 | movhlps xmm1,xmm1 |
284 | movhlps xmm1,xmm1 |
285 | cvtps2pi mm1,xmm1 ;mm1,xmm1 |
285 | cvtps2pi mm1,xmm1 ;mm1,xmm1 |
286 | movq .dey12,mm0 |
286 | movq .dey12,mm0 |
287 | movq .dby12,mm1 |
287 | movq .dby12,mm1 |
288 | ;------------- |
288 | ;------------- |
289 | ; pxor mm0,mm0 |
289 | ; pxor mm0,mm0 |
290 | ; pxor mm1,mm1 |
290 | ; pxor mm1,mm1 |
291 | ;/ pinsrw mm0,.z1,1 |
291 | ;/ pinsrw mm0,.z1,1 |
292 | ;/ pinsrw mm0,.x1,0 |
292 | ;/ pinsrw mm0,.x1,0 |
293 | ;/ pinsrw mm1,.z2,1 |
293 | ;/ pinsrw mm1,.z2,1 |
294 | ;/ pinsrw mm1,.x2,0 |
294 | ;/ pinsrw mm1,.x2,0 |
295 | mov ax,.z2 |
295 | mov ax,.z2 |
296 | sub ax,.z1 |
296 | sub ax,.z1 |
297 | cwde |
297 | cwde |
298 | 298 | ||
299 | mov dx,.x2 |
299 | mov dx,.x2 |
300 | sub dx,.x1 |
300 | sub dx,.x1 |
301 | movsx edx,dx |
301 | movsx edx,dx |
302 | 302 | ||
303 | ;/ movd mm1,eax |
303 | ;/ movd mm1,eax |
304 | 304 | ||
305 | ;/ punpcklwd mm0,mm4 |
305 | ;/ punpcklwd mm0,mm4 |
306 | ;/ punpcklwd mm1,mm4 |
306 | ;/ punpcklwd mm1,mm4 |
307 | 307 | ||
308 | ; cvtpi2ps xmm1,mm1 |
308 | ; cvtpi2ps xmm1,mm1 |
309 | ; cvtpi2ps xmm2,mm0 |
309 | ; cvtpi2ps xmm2,mm0 |
310 | ; subps xmm1,xmm2 |
310 | ; subps xmm1,xmm2 |
311 | 311 | ||
312 | ;/ psubd mm1,mm0 |
312 | ;/ psubd mm1,mm0 |
313 | 313 | ||
314 | movd mm2,[.t_x1] |
314 | movd mm2,[.t_x1] |
315 | movd mm3,[.t_x2] |
315 | movd mm3,[.t_x2] |
316 | 316 | ||
317 | punpcklwd mm2,mm4 |
317 | punpcklwd mm2,mm4 |
318 | punpcklwd mm3,mm4 |
318 | punpcklwd mm3,mm4 |
319 | psubd mm3,mm2 |
319 | psubd mm3,mm2 |
320 | 320 | ||
321 | ;/ cvtpi2ps xmm1,mm1 |
321 | ;/ cvtpi2ps xmm1,mm1 |
322 | cvtsi2ss xmm1,eax |
322 | cvtsi2ss xmm1,eax |
323 | movlhps xmm1,xmm1 |
323 | movlhps xmm1,xmm1 |
324 | cvtsi2ss xmm1,edx |
324 | cvtsi2ss xmm1,edx |
325 | ; movss xmm1,xmm4 |
325 | ; movss xmm1,xmm4 |
326 | shufps xmm1,xmm1,00101111b |
326 | shufps xmm1,xmm1,00101111b |
327 | cvtpi2ps xmm1,mm3 |
327 | cvtpi2ps xmm1,mm3 |
328 | 328 | ||
329 | divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
329 | divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
330 | 330 | ||
331 | shufps xmm1,xmm1,11100001b |
331 | shufps xmm1,xmm1,11100001b |
332 | ; xmm1--> | dx | dz | dtx | dty | |
332 | ; xmm1--> | dx | dz | dtx | dty | |
333 | ;1 movlps .dty12,xmm1 |
333 | ;1 movlps .dty12,xmm1 |
334 | ;1 movhps .dz12,xmm1 |
334 | ;1 movhps .dz12,xmm1 |
335 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
335 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
336 | movhlps xmm1,xmm1 |
336 | movhlps xmm1,xmm1 |
337 | cvtps2pi mm1,xmm1 |
337 | cvtps2pi mm1,xmm1 |
338 | movq .dty12,mm0 |
338 | movq .dty12,mm0 |
339 | movq .dz12,mm1 |
339 | movq .dz12,mm1 |
340 | ;---- |
340 | ;---- |
341 | ; mov ax,.z2 |
341 | ; mov ax,.z2 |
342 | ; sub ax,.z1 |
342 | ; sub ax,.z1 |
343 | ; cwde |
343 | ; cwde |
344 | ; mov bx,.x2 |
344 | ; mov bx,.x2 |
345 | ; sub bx,.x1 |
345 | ; sub bx,.x1 |
346 | ; movsx ebx,bx |
346 | ; movsx ebx,bx |
347 | ; movd mm1,eax |
347 | ; movd mm1,eax |
348 | ; psllq mm1,32 |
348 | ; psllq mm1,32 |
349 | ; movd mm1,ebx |
349 | ; movd mm1,ebx |
350 | 350 | ||
351 | ;; push ebx |
351 | ;; push ebx |
352 | ;; push eax |
352 | ;; push eax |
353 | ;; movq mm1,[esp] |
353 | ;; movq mm1,[esp] |
354 | ;; add esp,8 |
354 | ;; add esp,8 |
355 | ;;; mov ax,.z1 |
355 | ;;; mov ax,.z1 |
356 | ;;; mov bx,.z2 |
356 | ;;; mov bx,.z2 |
357 | ;;; shl eax,16 |
357 | ;;; shl eax,16 |
358 | ;;; shl ebx,16 |
358 | ;;; shl ebx,16 |
359 | ;;; mov ax,.x1 |
359 | ;;; mov ax,.x1 |
360 | ;;; mov bx,.x2 |
360 | ;;; mov bx,.x2 |
361 | ; movd mm2,[.t_x1] |
361 | ; movd mm2,[.t_x1] |
362 | ; movd mm3,[.t_x2] |
362 | ; movd mm3,[.t_x2] |
363 | ;; movd mm0,eax |
363 | ;; movd mm0,eax |
364 | ;; movd mm1,ebx |
364 | ;; movd mm1,ebx |
365 | 365 | ||
366 | ; pxor mm4,mm4 |
366 | ; pxor mm4,mm4 |
367 | ;; punpcklwd mm0,mm4 |
367 | ;; punpcklwd mm0,mm4 |
368 | ;; punpcklwd mm1,mm4 |
368 | ;; punpcklwd mm1,mm4 |
369 | ; punpcklwd mm2,mm4 |
369 | ; punpcklwd mm2,mm4 |
370 | ; punpcklwd mm3,mm4 |
370 | ; punpcklwd mm3,mm4 |
371 | 371 | ||
372 | ;; psubd mm1,mm0 |
372 | ;; psubd mm1,mm0 |
373 | ; psubd mm3,mm2 |
373 | ; psubd mm3,mm2 |
374 | 374 | ||
375 | 375 | ||
376 | ; cvtpi2ps xmm1,mm1 |
376 | ; cvtpi2ps xmm1,mm1 |
377 | ; movlhps xmm1,xmm1 |
377 | ; movlhps xmm1,xmm1 |
378 | ; cvtpi2ps xmm1,mm3 |
378 | ; cvtpi2ps xmm1,mm3 |
379 | 379 | ||
380 | ; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | |
380 | ; divps xmm1,xmm3 ; xmm1--> | dz | dx | dty | dtx | |
381 | 381 | ||
382 | ; shufps xmm1,xmm1,10110001b |
382 | ; shufps xmm1,xmm1,10110001b |
383 | ; xmm1--> | dx | dz | dtx | dty | |
383 | ; xmm1--> | dx | dz | dtx | dty | |
384 | ; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
384 | ; cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
385 | ; movhlps xmm1,xmm1 |
385 | ; movhlps xmm1,xmm1 |
386 | ; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
386 | ; cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
387 | ; movq .dty12,mm0 |
387 | ; movq .dty12,mm0 |
388 | ; movq .dz12,mm1 |
388 | ; movq .dz12,mm1 |
389 | else |
389 | else |
390 | mov ax,.x2 |
390 | mov ax,.x2 |
391 | sub ax,.x1 |
391 | sub ax,.x1 |
392 | cwde |
392 | cwde |
393 | shl eax,ROUND |
393 | shl eax,ROUND |
394 | cdq |
394 | cdq |
395 | idiv ebx |
395 | idiv ebx |
396 | ; mov .dx12,eax |
396 | ; mov .dx12,eax |
397 | push eax |
397 | push eax |
398 | 398 | ||
399 | mov ax,.z2 |
399 | mov ax,.z2 |
400 | sub ax,.z1 |
400 | sub ax,.z1 |
401 | cwde |
401 | cwde |
402 | shl eax,CATMULL_SHIFT |
402 | shl eax,CATMULL_SHIFT |
403 | cdq |
403 | cdq |
404 | idiv ebx |
404 | idiv ebx |
405 | push eax |
405 | push eax |
406 | 406 | ||
407 | mov ax,word[.b_x2] |
407 | mov ax,word[.b_x2] |
408 | sub ax,word[.b_x1] |
408 | sub ax,word[.b_x1] |
409 | cwde |
409 | cwde |
410 | shl eax,ROUND |
410 | shl eax,ROUND |
411 | cdq |
411 | cdq |
412 | idiv ebx |
412 | idiv ebx |
413 | ; mov .dbx12,eax |
413 | ; mov .dbx12,eax |
414 | push eax |
414 | push eax |
415 | 415 | ||
416 | mov ax,word[.b_y2] |
416 | mov ax,word[.b_y2] |
417 | sub ax,word[.b_y1] |
417 | sub ax,word[.b_y1] |
418 | cwde |
418 | cwde |
419 | shl eax,ROUND |
419 | shl eax,ROUND |
420 | cdq |
420 | cdq |
421 | idiv ebx |
421 | idiv ebx |
422 | ; mov .dby12,eax |
422 | ; mov .dby12,eax |
423 | push eax |
423 | push eax |
424 | 424 | ||
425 | mov ax,word[.e_x2] |
425 | mov ax,word[.e_x2] |
426 | sub ax,word[.e_x1] |
426 | sub ax,word[.e_x1] |
427 | cwde |
427 | cwde |
428 | shl eax,ROUND |
428 | shl eax,ROUND |
429 | cdq |
429 | cdq |
430 | idiv ebx |
430 | idiv ebx |
431 | ; mov .dex12,eax |
431 | ; mov .dex12,eax |
432 | push eax |
432 | push eax |
433 | 433 | ||
434 | mov ax,word[.e_y2] |
434 | mov ax,word[.e_y2] |
435 | sub ax,word[.e_y1] |
435 | sub ax,word[.e_y1] |
436 | cwde |
436 | cwde |
437 | shl eax,ROUND |
437 | shl eax,ROUND |
438 | cdq |
438 | cdq |
439 | idiv ebx |
439 | idiv ebx |
440 | ; mov .dey12,eax |
440 | ; mov .dey12,eax |
441 | push eax |
441 | push eax |
442 | 442 | ||
443 | mov ax,word[.t_x2] |
443 | mov ax,word[.t_x2] |
444 | sub ax,word[.t_x1] |
444 | sub ax,word[.t_x1] |
445 | cwde |
445 | cwde |
446 | shl eax,ROUND |
446 | shl eax,ROUND |
447 | cdq |
447 | cdq |
448 | idiv ebx |
448 | idiv ebx |
449 | ; mov .dtx12,eax |
449 | ; mov .dtx12,eax |
450 | push eax |
450 | push eax |
451 | 451 | ||
452 | mov ax,word[.t_y2] |
452 | mov ax,word[.t_y2] |
453 | sub ax,word[.t_y1] |
453 | sub ax,word[.t_y1] |
454 | cwde |
454 | cwde |
455 | shl eax,ROUND |
455 | shl eax,ROUND |
456 | cdq |
456 | cdq |
457 | idiv ebx |
457 | idiv ebx |
458 | ; mov .dty12,eax |
458 | ; mov .dty12,eax |
459 | push eax |
459 | push eax |
460 | end if |
460 | end if |
461 | .bt_dx12_done: |
461 | .bt_dx12_done: |
462 | 462 | ||
463 | mov bx,.y3 ; calc delta13 |
463 | mov bx,.y3 ; calc delta13 |
464 | sub bx,.y1 |
464 | sub bx,.y1 |
465 | jnz .bt_dx13_make |
465 | jnz .bt_dx13_make |
466 | mov ecx,8 |
466 | mov ecx,8 |
467 | xor edx,edx |
467 | xor edx,edx |
468 | @@: |
468 | @@: |
469 | push edx ;dword 0 |
469 | push edx ;dword 0 |
470 | loop @b |
470 | loop @b |
471 | jmp .bt_dx13_done |
471 | jmp .bt_dx13_done |
472 | .bt_dx13_make: |
472 | .bt_dx13_make: |
473 | movsx ebx,bx |
473 | movsx ebx,bx |
474 | 474 | ||
475 | if Ext>=SSE |
475 | if Ext>=SSE |
476 | 476 | ||
477 | sub esp,32 |
477 | sub esp,32 |
478 | ; mov eax,256 |
478 | ; mov eax,256 |
479 | cvtsi2ss xmm4,[i255d] |
479 | cvtsi2ss xmm4,[i255d] |
480 | cvtsi2ss xmm3,ebx ;rcps |
480 | cvtsi2ss xmm3,ebx ;rcps |
481 | divss xmm3,xmm4 |
481 | divss xmm3,xmm4 |
482 | shufps xmm3,xmm3,0 |
482 | shufps xmm3,xmm3,0 |
483 | 483 | ||
484 | movd mm0,[.b_x1] |
484 | movd mm0,[.b_x1] |
485 | movd mm1,[.b_x3] |
485 | movd mm1,[.b_x3] |
486 | movd mm2,[.e_x1] |
486 | movd mm2,[.e_x1] |
487 | movd mm3,[.e_x3] |
487 | movd mm3,[.e_x3] |
488 | 488 | ||
489 | pxor mm4,mm4 |
489 | pxor mm4,mm4 |
490 | punpcklwd mm0,mm4 |
490 | punpcklwd mm0,mm4 |
491 | punpcklwd mm1,mm4 |
491 | punpcklwd mm1,mm4 |
492 | punpcklwd mm2,mm4 |
492 | punpcklwd mm2,mm4 |
493 | punpcklwd mm3,mm4 |
493 | punpcklwd mm3,mm4 |
494 | 494 | ||
495 | psubd mm1,mm0 |
495 | psubd mm1,mm0 |
496 | psubd mm3,mm2 |
496 | psubd mm3,mm2 |
497 | 497 | ||
498 | cvtpi2ps xmm1,mm1 |
498 | cvtpi2ps xmm1,mm1 |
499 | movlhps xmm1,xmm1 |
499 | movlhps xmm1,xmm1 |
500 | cvtpi2ps xmm1,mm3 |
500 | cvtpi2ps xmm1,mm3 |
501 | 501 | ||
502 | divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
502 | divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
503 | 503 | ||
504 | shufps xmm1,xmm1,10110001b |
504 | shufps xmm1,xmm1,10110001b |
505 | ;xmm1--> | dbx | dby | dex | dey | |
505 | ;xmm1--> | dbx | dby | dex | dey | |
506 | ;1 movups .dey13,xmm1 |
506 | ;1 movups .dey13,xmm1 |
507 | 507 | ||
508 | cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
508 | cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
509 | movhlps xmm1,xmm1 |
509 | movhlps xmm1,xmm1 |
510 | cvtps2pi mm1,xmm1 ;mm1,xmm1 |
510 | cvtps2pi mm1,xmm1 ;mm1,xmm1 |
511 | movq .dey13,mm0 |
511 | movq .dey13,mm0 |
512 | movq .dby13,mm1 |
512 | movq .dby13,mm1 |
513 | 513 | ||
514 | mov ax,.z3 |
514 | mov ax,.z3 |
515 | sub ax,.z1 |
515 | sub ax,.z1 |
516 | cwde |
516 | cwde |
517 | 517 | ||
518 | mov dx,.x3 |
518 | mov dx,.x3 |
519 | sub dx,.x1 |
519 | sub dx,.x1 |
520 | movsx edx,dx |
520 | movsx edx,dx |
521 | 521 | ||
522 | movd mm2,[.t_x1] |
522 | movd mm2,[.t_x1] |
523 | movd mm3,[.t_x3] |
523 | movd mm3,[.t_x3] |
524 | 524 | ||
525 | punpcklwd mm2,mm4 |
525 | punpcklwd mm2,mm4 |
526 | punpcklwd mm3,mm4 |
526 | punpcklwd mm3,mm4 |
527 | psubd mm3,mm2 |
527 | psubd mm3,mm2 |
528 | 528 | ||
529 | cvtsi2ss xmm1,eax |
529 | cvtsi2ss xmm1,eax |
530 | movlhps xmm1,xmm1 |
530 | movlhps xmm1,xmm1 |
531 | cvtsi2ss xmm1,edx |
531 | cvtsi2ss xmm1,edx |
532 | shufps xmm1,xmm1,00101111b |
532 | shufps xmm1,xmm1,00101111b |
533 | cvtpi2ps xmm1,mm3 |
533 | cvtpi2ps xmm1,mm3 |
534 | 534 | ||
535 | divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
535 | divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
536 | 536 | ||
537 | shufps xmm1,xmm1,11100001b |
537 | shufps xmm1,xmm1,11100001b |
538 | ; xmm1--> | dx | dz | dtx | dty | |
538 | ; xmm1--> | dx | dz | dtx | dty | |
539 | ;1 movlps .dty13,xmm1 |
539 | ;1 movlps .dty13,xmm1 |
540 | ;1 movhps .dz13,xmm1 |
540 | ;1 movhps .dz13,xmm1 |
541 | 541 | ||
542 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
542 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
543 | movhlps xmm1,xmm1 |
543 | movhlps xmm1,xmm1 |
544 | cvtps2pi mm1,xmm1 |
544 | cvtps2pi mm1,xmm1 |
545 | movq .dty13,mm0 |
545 | movq .dty13,mm0 |
546 | movq .dz13,mm1 |
546 | movq .dz13,mm1 |
547 | 547 | ||
548 | else |
548 | else |
549 | 549 | ||
550 | mov ax,.x3 |
550 | mov ax,.x3 |
551 | sub ax,.x1 |
551 | sub ax,.x1 |
552 | cwde |
552 | cwde |
553 | shl eax,ROUND |
553 | shl eax,ROUND |
554 | cdq |
554 | cdq |
555 | idiv ebx |
555 | idiv ebx |
556 | ; mov .dx13,eax |
556 | ; mov .dx13,eax |
557 | push eax |
557 | push eax |
558 | 558 | ||
559 | mov ax,.z3 |
559 | mov ax,.z3 |
560 | sub ax,.z1 |
560 | sub ax,.z1 |
561 | cwde |
561 | cwde |
562 | shl eax,CATMULL_SHIFT |
562 | shl eax,CATMULL_SHIFT |
563 | cdq |
563 | cdq |
564 | idiv ebx |
564 | idiv ebx |
565 | ; mov .dz13,eax |
565 | ; mov .dz13,eax |
566 | push eax |
566 | push eax |
567 | 567 | ||
568 | 568 | ||
569 | mov ax,word[.b_x3] |
569 | mov ax,word[.b_x3] |
570 | sub ax,word[.b_x1] |
570 | sub ax,word[.b_x1] |
571 | cwde |
571 | cwde |
572 | shl eax,ROUND |
572 | shl eax,ROUND |
573 | cdq |
573 | cdq |
574 | idiv ebx |
574 | idiv ebx |
575 | ; mov .dbx13,eax |
575 | ; mov .dbx13,eax |
576 | push eax |
576 | push eax |
577 | 577 | ||
578 | mov ax,word[.b_y3] |
578 | mov ax,word[.b_y3] |
579 | sub ax,word[.b_y1] |
579 | sub ax,word[.b_y1] |
580 | cwde |
580 | cwde |
581 | shl eax,ROUND |
581 | shl eax,ROUND |
582 | cdq |
582 | cdq |
583 | idiv ebx |
583 | idiv ebx |
584 | ; mov .dby13,eax |
584 | ; mov .dby13,eax |
585 | push eax |
585 | push eax |
586 | 586 | ||
587 | mov ax,word[.e_x3] |
587 | mov ax,word[.e_x3] |
588 | sub ax,word[.e_x1] |
588 | sub ax,word[.e_x1] |
589 | cwde |
589 | cwde |
590 | shl eax,ROUND |
590 | shl eax,ROUND |
591 | cdq |
591 | cdq |
592 | idiv ebx |
592 | idiv ebx |
593 | ; mov .dex13,eax |
593 | ; mov .dex13,eax |
594 | push eax |
594 | push eax |
595 | 595 | ||
596 | mov ax,word[.e_y3] |
596 | mov ax,word[.e_y3] |
597 | sub ax,word[.e_y1] |
597 | sub ax,word[.e_y1] |
598 | cwde |
598 | cwde |
599 | shl eax,ROUND |
599 | shl eax,ROUND |
600 | cdq |
600 | cdq |
601 | idiv ebx |
601 | idiv ebx |
602 | ; mov .dey13,eax |
602 | ; mov .dey13,eax |
603 | push eax |
603 | push eax |
604 | 604 | ||
605 | mov ax,word[.t_x3] |
605 | mov ax,word[.t_x3] |
606 | sub ax,word[.t_x1] |
606 | sub ax,word[.t_x1] |
607 | cwde |
607 | cwde |
608 | shl eax,ROUND |
608 | shl eax,ROUND |
609 | cdq |
609 | cdq |
610 | idiv ebx |
610 | idiv ebx |
611 | ; mov .dtx13,eax |
611 | ; mov .dtx13,eax |
612 | push eax |
612 | push eax |
613 | 613 | ||
614 | mov ax,word[.t_y3] |
614 | mov ax,word[.t_y3] |
615 | sub ax,word[.t_y1] |
615 | sub ax,word[.t_y1] |
616 | cwde |
616 | cwde |
617 | shl eax,ROUND |
617 | shl eax,ROUND |
618 | cdq |
618 | cdq |
619 | idiv ebx |
619 | idiv ebx |
620 | ; mov .dty13,eax |
620 | ; mov .dty13,eax |
621 | push eax |
621 | push eax |
622 | end if |
622 | end if |
623 | .bt_dx13_done: |
623 | .bt_dx13_done: |
624 | 624 | ||
625 | mov bx,.y3 ; calc delta23 |
625 | mov bx,.y3 ; calc delta23 |
626 | sub bx,.y2 |
626 | sub bx,.y2 |
627 | jnz .bt_dx23_make |
627 | jnz .bt_dx23_make |
628 | mov ecx,8 |
628 | mov ecx,8 |
629 | xor edx,edx |
629 | xor edx,edx |
630 | @@: |
630 | @@: |
631 | push edx ;dword 0 |
631 | push edx ;dword 0 |
632 | loop @b |
632 | loop @b |
633 | jmp .bt_dx23_done |
633 | jmp .bt_dx23_done |
634 | .bt_dx23_make: |
634 | .bt_dx23_make: |
635 | movsx ebx,bx |
635 | movsx ebx,bx |
636 | 636 | ||
637 | if Ext>=SSE |
637 | if Ext>=SSE |
638 | 638 | ||
639 | sub esp,32 |
639 | sub esp,32 |
640 | ; mov eax,256 |
640 | ; mov eax,256 |
641 | cvtsi2ss xmm4,[i255d] |
641 | cvtsi2ss xmm4,[i255d] |
642 | cvtsi2ss xmm3,ebx ;rcps |
642 | cvtsi2ss xmm3,ebx ;rcps |
643 | divss xmm3,xmm4 |
643 | divss xmm3,xmm4 |
644 | shufps xmm3,xmm3,0 |
644 | shufps xmm3,xmm3,0 |
645 | 645 | ||
646 | movd mm0,[.b_x2] |
646 | movd mm0,[.b_x2] |
647 | movd mm1,[.b_x3] |
647 | movd mm1,[.b_x3] |
648 | movd mm2,[.e_x2] |
648 | movd mm2,[.e_x2] |
649 | movd mm3,[.e_x3] |
649 | movd mm3,[.e_x3] |
650 | 650 | ||
651 | pxor mm4,mm4 |
651 | pxor mm4,mm4 |
652 | punpcklwd mm0,mm4 |
652 | punpcklwd mm0,mm4 |
653 | punpcklwd mm1,mm4 |
653 | punpcklwd mm1,mm4 |
654 | punpcklwd mm2,mm4 |
654 | punpcklwd mm2,mm4 |
655 | punpcklwd mm3,mm4 |
655 | punpcklwd mm3,mm4 |
656 | 656 | ||
657 | psubd mm1,mm0 |
657 | psubd mm1,mm0 |
658 | psubd mm3,mm2 |
658 | psubd mm3,mm2 |
659 | 659 | ||
660 | cvtpi2ps xmm1,mm1 |
660 | cvtpi2ps xmm1,mm1 |
661 | movlhps xmm1,xmm1 |
661 | movlhps xmm1,xmm1 |
662 | cvtpi2ps xmm1,mm3 |
662 | cvtpi2ps xmm1,mm3 |
663 | 663 | ||
664 | divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
664 | divps xmm1,xmm3 ;xmm1--> | dby | dbx | dey | dex | |
665 | 665 | ||
666 | shufps xmm1,xmm1,10110001b |
666 | shufps xmm1,xmm1,10110001b |
667 | ;xmm1--> | dbx | dby | dex | dey | |
667 | ;xmm1--> | dbx | dby | dex | dey | |
668 | ;1 movups .dey23,xmm1 |
668 | ;1 movups .dey23,xmm1 |
669 | 669 | ||
670 | cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
670 | cvtps2pi mm0,xmm1 ;mm0,xmm1 ; mm0 -> 2 delta dwords |
671 | movhlps xmm1,xmm1 |
671 | movhlps xmm1,xmm1 |
672 | cvtps2pi mm1,xmm1 ;mm1,xmm1 |
672 | cvtps2pi mm1,xmm1 ;mm1,xmm1 |
673 | movq .dey23,mm0 |
673 | movq .dey23,mm0 |
674 | movq .dby23,mm1 |
674 | movq .dby23,mm1 |
675 | 675 | ||
676 | mov ax,.z3 |
676 | mov ax,.z3 |
677 | sub ax,.z2 |
677 | sub ax,.z2 |
678 | cwde |
678 | cwde |
679 | 679 | ||
680 | mov dx,.x3 |
680 | mov dx,.x3 |
681 | sub dx,.x2 |
681 | sub dx,.x2 |
682 | movsx edx,dx |
682 | movsx edx,dx |
683 | 683 | ||
684 | movd mm2,[.t_x2] |
684 | movd mm2,[.t_x2] |
685 | movd mm3,[.t_x3] |
685 | movd mm3,[.t_x3] |
686 | 686 | ||
687 | punpcklwd mm2,mm4 |
687 | punpcklwd mm2,mm4 |
688 | punpcklwd mm3,mm4 |
688 | punpcklwd mm3,mm4 |
689 | psubd mm3,mm2 |
689 | psubd mm3,mm2 |
690 | 690 | ||
691 | cvtsi2ss xmm1,eax |
691 | cvtsi2ss xmm1,eax |
692 | movlhps xmm1,xmm1 |
692 | movlhps xmm1,xmm1 |
693 | cvtsi2ss xmm1,edx |
693 | cvtsi2ss xmm1,edx |
694 | shufps xmm1,xmm1,00101111b |
694 | shufps xmm1,xmm1,00101111b |
695 | cvtpi2ps xmm1,mm3 |
695 | cvtpi2ps xmm1,mm3 |
696 | 696 | ||
697 | divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
697 | divps xmm1,xmm3 ; xmm1--> | dx | dz | dty | dtx | |
698 | 698 | ||
699 | shufps xmm1,xmm1,11100001b |
699 | shufps xmm1,xmm1,11100001b |
700 | ; xmm1--> | dx | dz | dtx | dty | |
700 | ; xmm1--> | dx | dz | dtx | dty | |
701 | ; movlps .dty23,xmm1 |
701 | ; movlps .dty23,xmm1 |
702 | ; movhps .dz23,xmm1 |
702 | ; movhps .dz23,xmm1 |
703 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
703 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords | dtx | dty | |
704 | movhlps xmm1,xmm1 |
704 | movhlps xmm1,xmm1 |
705 | cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
705 | cvtps2pi mm1,xmm1 ; mm1 --> 2 delta dwords | dx | dz | |
706 | movq .dty23,mm0 |
706 | movq .dty23,mm0 |
707 | movq .dz23,mm1 |
707 | movq .dz23,mm1 |
708 | 708 | ||
709 | 709 | ||
710 | else |
710 | else |
711 | mov ax,.x3 |
711 | mov ax,.x3 |
712 | sub ax,.x2 |
712 | sub ax,.x2 |
713 | cwde |
713 | cwde |
714 | shl eax,ROUND |
714 | shl eax,ROUND |
715 | cdq |
715 | cdq |
716 | idiv ebx |
716 | idiv ebx |
717 | ; mov .dx23,eax |
717 | ; mov .dx23,eax |
718 | push eax |
718 | push eax |
719 | 719 | ||
720 | mov ax,.z3 |
720 | mov ax,.z3 |
721 | sub ax,.z2 |
721 | sub ax,.z2 |
722 | cwde |
722 | cwde |
723 | shl eax,CATMULL_SHIFT |
723 | shl eax,CATMULL_SHIFT |
724 | cdq |
724 | cdq |
725 | idiv ebx |
725 | idiv ebx |
726 | ; mov .dz23,eax |
726 | ; mov .dz23,eax |
727 | push eax |
727 | push eax |
728 | 728 | ||
729 | mov ax,word[.b_x3] |
729 | mov ax,word[.b_x3] |
730 | sub ax,word[.b_x2] |
730 | sub ax,word[.b_x2] |
731 | cwde |
731 | cwde |
732 | shl eax,ROUND |
732 | shl eax,ROUND |
733 | cdq |
733 | cdq |
734 | idiv ebx |
734 | idiv ebx |
735 | ; mov .dbx23,eax |
735 | ; mov .dbx23,eax |
736 | push eax |
736 | push eax |
737 | 737 | ||
738 | mov ax,word[.b_y3] |
738 | mov ax,word[.b_y3] |
739 | sub ax,word[.b_y2] |
739 | sub ax,word[.b_y2] |
740 | cwde |
740 | cwde |
741 | shl eax,ROUND |
741 | shl eax,ROUND |
742 | cdq |
742 | cdq |
743 | idiv ebx |
743 | idiv ebx |
744 | ; mov .dby23,eax |
744 | ; mov .dby23,eax |
745 | push eax |
745 | push eax |
746 | 746 | ||
747 | mov ax,word[.e_x3] |
747 | mov ax,word[.e_x3] |
748 | sub ax,word[.e_x2] |
748 | sub ax,word[.e_x2] |
749 | cwde |
749 | cwde |
750 | shl eax,ROUND |
750 | shl eax,ROUND |
751 | cdq |
751 | cdq |
752 | idiv ebx |
752 | idiv ebx |
753 | ; mov .dex23,eax |
753 | ; mov .dex23,eax |
754 | push eax |
754 | push eax |
755 | 755 | ||
756 | mov ax,word[.e_y3] |
756 | mov ax,word[.e_y3] |
757 | sub ax,word[.e_y2] |
757 | sub ax,word[.e_y2] |
758 | cwde |
758 | cwde |
759 | shl eax,ROUND |
759 | shl eax,ROUND |
760 | cdq |
760 | cdq |
761 | idiv ebx |
761 | idiv ebx |
762 | ; mov .dey23,eax |
762 | ; mov .dey23,eax |
763 | push eax |
763 | push eax |
764 | 764 | ||
765 | 765 | ||
766 | mov ax,word[.t_x3] |
766 | mov ax,word[.t_x3] |
767 | sub ax,word[.t_x2] |
767 | sub ax,word[.t_x2] |
768 | cwde |
768 | cwde |
769 | shl eax,ROUND |
769 | shl eax,ROUND |
770 | cdq |
770 | cdq |
771 | idiv ebx |
771 | idiv ebx |
772 | ; mov .dtx23,eax |
772 | ; mov .dtx23,eax |
773 | push eax |
773 | push eax |
774 | 774 | ||
775 | mov ax,word[.t_y3] |
775 | mov ax,word[.t_y3] |
776 | sub ax,word[.t_y2] |
776 | sub ax,word[.t_y2] |
777 | cwde |
777 | cwde |
778 | shl eax,ROUND |
778 | shl eax,ROUND |
779 | cdq |
779 | cdq |
780 | idiv ebx |
780 | idiv ebx |
781 | ; mov .dty23,eax |
781 | ; mov .dty23,eax |
782 | push eax |
782 | push eax |
783 | end if |
783 | end if |
784 | ; sub esp,40 |
784 | ; sub esp,40 |
785 | .bt_dx23_done: |
785 | .bt_dx23_done: |
786 | sub esp,64 |
786 | sub esp,64 |
787 | 787 | ||
788 | movsx eax,.x1 |
788 | movsx eax,.x1 |
789 | shl eax,ROUND |
789 | shl eax,ROUND |
790 | mov .cx1,eax |
790 | mov .cx1,eax |
791 | mov .cx2,eax |
791 | mov .cx2,eax |
792 | ; push eax |
792 | ; push eax |
793 | ; push eax |
793 | ; push eax |
794 | 794 | ||
795 | movsx ebx,word[.b_x1] |
795 | movsx ebx,word[.b_x1] |
796 | shl ebx,ROUND |
796 | shl ebx,ROUND |
797 | mov .cbx1,ebx |
797 | mov .cbx1,ebx |
798 | mov .cbx2,ebx |
798 | mov .cbx2,ebx |
799 | ; push ebx |
799 | ; push ebx |
800 | ; push ebx |
800 | ; push ebx |
801 | 801 | ||
802 | movsx ecx,word[.b_y1] |
802 | movsx ecx,word[.b_y1] |
803 | shl ecx,ROUND |
803 | shl ecx,ROUND |
804 | mov .cby1,ecx |
804 | mov .cby1,ecx |
805 | mov .cby2,ecx |
805 | mov .cby2,ecx |
806 | ; push ecx |
806 | ; push ecx |
807 | ; push ecx |
807 | ; push ecx |
808 | 808 | ||
809 | movsx edx,word[.e_x1] |
809 | movsx edx,word[.e_x1] |
810 | shl edx,ROUND |
810 | shl edx,ROUND |
811 | mov .cex1,edx |
811 | mov .cex1,edx |
812 | mov .cex2,edx |
812 | mov .cex2,edx |
813 | ; push edx |
813 | ; push edx |
814 | ; push edx |
814 | ; push edx |
815 | 815 | ||
816 | movsx eax,word[.e_y1] |
816 | movsx eax,word[.e_y1] |
817 | shl eax,ROUND |
817 | shl eax,ROUND |
818 | mov .cey1,eax |
818 | mov .cey1,eax |
819 | mov .cey2,eax |
819 | mov .cey2,eax |
820 | ; push eax |
820 | ; push eax |
821 | ; push eax |
821 | ; push eax |
822 | 822 | ||
823 | movsx ebx,.z1 |
823 | movsx ebx,.z1 |
824 | shl ebx,CATMULL_SHIFT |
824 | shl ebx,CATMULL_SHIFT |
825 | mov .cz1,ebx |
825 | mov .cz1,ebx |
826 | mov .cz2,ebx |
826 | mov .cz2,ebx |
827 | ; push ebx |
827 | ; push ebx |
828 | ; push ebx |
828 | ; push ebx |
829 | 829 | ||
830 | ; sub esp,16 |
830 | ; sub esp,16 |
831 | movsx ecx,word[.t_x1] |
831 | movsx ecx,word[.t_x1] |
832 | shl ecx,ROUND |
832 | shl ecx,ROUND |
833 | mov .ctx1,ecx |
833 | mov .ctx1,ecx |
834 | mov .ctx2,ecx |
834 | mov .ctx2,ecx |
835 | ;push ecx |
835 | ;push ecx |
836 | ;push ecx |
836 | ;push ecx |
837 | 837 | ||
838 | movsx edx,word[.t_y1] |
838 | movsx edx,word[.t_y1] |
839 | shl edx,ROUND |
839 | shl edx,ROUND |
840 | mov .cty1,edx |
840 | mov .cty1,edx |
841 | mov .cty2,edx |
841 | mov .cty2,edx |
842 | ; push edx |
842 | ; push edx |
843 | ; push edx |
843 | ; push edx |
844 | 844 | ||
845 | if Ext >= SSE2 |
845 | if Ext >= SSE2 |
846 | movups xmm0,.cby1 |
846 | movups xmm0,.cby1 |
847 | movups xmm1,.cty1 |
847 | movups xmm1,.cty1 |
848 | movups xmm2,.cby2 |
848 | movups xmm2,.cby2 |
849 | movups xmm3,.cty2 |
849 | movups xmm3,.cty2 |
850 | movups xmm4,.dby13 |
850 | movups xmm4,.dby13 |
851 | movups xmm5,.dty13 |
851 | movups xmm5,.dty13 |
852 | movups xmm6,.dby12 |
852 | movups xmm6,.dby12 |
853 | movups xmm7,.dty12 |
853 | movups xmm7,.dty12 |
854 | .scby1 equ [edi] |
854 | .scby1 equ [edi] |
855 | .scty1 equ [edi+16] |
855 | .scty1 equ [edi+16] |
856 | .scby2 equ [edi+32] |
856 | .scby2 equ [edi+32] |
857 | .scty2 equ [edi+48] |
857 | .scty2 equ [edi+48] |
858 | .sdby13 equ [edi+64] |
858 | .sdby13 equ [edi+64] |
859 | .sdty13 equ [edi+80] |
859 | .sdty13 equ [edi+80] |
860 | .sdby12 equ [edi+96] |
860 | .sdby12 equ [edi+96] |
861 | .sdty12 equ [edi+128] |
861 | .sdty12 equ [edi+128] |
862 | push edi |
862 | push edi |
863 | mov edi,sse_repository |
863 | mov edi,sse_repository |
864 | movaps .scby1,xmm0 |
864 | movaps .scby1,xmm0 |
865 | movaps .scty1,xmm1 |
865 | movaps .scty1,xmm1 |
866 | movaps .scby2,xmm2 |
866 | movaps .scby2,xmm2 |
867 | movaps .scty2,xmm3 |
867 | movaps .scty2,xmm3 |
868 | movaps .sdby13,xmm4 |
868 | movaps .sdby13,xmm4 |
869 | movaps .sdty13,xmm5 |
869 | movaps .sdty13,xmm5 |
870 | movaps .sdby12,xmm6 |
870 | movaps .sdby12,xmm6 |
871 | movaps .sdty12,xmm7 |
871 | movaps .sdty12,xmm7 |
872 | pop edi |
872 | pop edi |
873 | 873 | ||
874 | end if |
874 | end if |
875 | movsx ecx,.y1 |
875 | movsx ecx,.y1 |
876 | cmp cx,.y2 |
876 | cmp cx,.y2 |
877 | jge .loop12_done |
877 | jge .loop12_done |
878 | .loop12: |
878 | .loop12: |
879 | ;if Ext >= SSE2 |
879 | ;if Ext >= SSE2 |
880 | ; fxsave [sse_repository] |
880 | ; fxsave [sse_repository] |
881 | ;end if |
881 | ;end if |
882 | call .call_line |
882 | call .call_line |
883 | if Ext >= SSE2 |
883 | if Ext >= SSE2 |
884 | ; fxrstor [sse_repository] |
884 | ; fxrstor [sse_repository] |
885 | movups xmm0,.cby1 |
885 | movups xmm0,.cby1 |
886 | movups xmm1,.cty1 |
886 | movups xmm1,.cty1 |
887 | movups xmm2,.cby2 |
887 | movups xmm2,.cby2 |
888 | movups xmm3,.cty2 |
888 | movups xmm3,.cty2 |
889 | ; movups xmm4,.dby13 |
889 | ; movups xmm4,.dby13 |
890 | ; movups xmm5,.dty13 |
890 | ; movups xmm5,.dty13 |
891 | ; movups xmm6,.dby12 |
891 | ; movups xmm6,.dby12 |
892 | ; movups xmm7,.dty12 |
892 | ; movups xmm7,.dty12 |
893 | ; paddd xmm0,xmm4 |
893 | ; paddd xmm0,xmm4 |
894 | ; paddd xmm1,xmm5 |
894 | ; paddd xmm1,xmm5 |
895 | ; paddd xmm2,xmm6 |
895 | ; paddd xmm2,xmm6 |
896 | ; paddd xmm3,xmm7 |
896 | ; paddd xmm3,xmm7 |
897 | push edi |
897 | push edi |
898 | mov edi,sse_repository |
898 | mov edi,sse_repository |
899 | paddd xmm0,.sdby13 |
899 | paddd xmm0,.sdby13 |
900 | paddd xmm1,.sdty13 |
900 | paddd xmm1,.sdty13 |
901 | paddd xmm2,.sdby12 |
901 | paddd xmm2,.sdby12 |
902 | paddd xmm3,.sdty12 |
902 | paddd xmm3,.sdty12 |
903 | pop edi |
903 | pop edi |
904 | movups .cby1,xmm0 |
904 | movups .cby1,xmm0 |
905 | movups .cty1,xmm1 |
905 | movups .cty1,xmm1 |
906 | movups .cby2,xmm2 |
906 | movups .cby2,xmm2 |
907 | movups .cty2,xmm3 |
907 | movups .cty2,xmm3 |
908 | end if |
908 | end if |
909 | 909 | ||
910 | if (Ext = MMX) | (Ext = SSE) |
910 | if (Ext = MMX) | (Ext = SSE) |
911 | movq mm0,.cby2 |
911 | movq mm0,.cby2 |
912 | movq mm1,.cby1 |
912 | movq mm1,.cby1 |
913 | movq mm2,.cey2 |
913 | movq mm2,.cey2 |
914 | movq mm3,.cey1 |
914 | movq mm3,.cey1 |
915 | movq mm4,.cty1 |
915 | movq mm4,.cty1 |
916 | movq mm5,.cty2 |
916 | movq mm5,.cty2 |
917 | movq mm6,.cz1 |
917 | movq mm6,.cz1 |
918 | movq mm7,.cz2 |
918 | movq mm7,.cz2 |
919 | paddd mm0,.dby12 |
919 | paddd mm0,.dby12 |
920 | paddd mm1,.dby13 |
920 | paddd mm1,.dby13 |
921 | paddd mm2,.dey12 |
921 | paddd mm2,.dey12 |
922 | paddd mm3,.dey13 |
922 | paddd mm3,.dey13 |
923 | paddd mm4,.dty13 |
923 | paddd mm4,.dty13 |
924 | paddd mm5,.dty12 |
924 | paddd mm5,.dty12 |
925 | paddd mm6,.dz13 |
925 | paddd mm6,.dz13 |
926 | paddd mm7,.dz12 |
926 | paddd mm7,.dz12 |
927 | movq .cby2,mm0 |
927 | movq .cby2,mm0 |
928 | movq .cby1,mm1 |
928 | movq .cby1,mm1 |
929 | movq .cey1,mm3 |
929 | movq .cey1,mm3 |
930 | movq .cey2,mm2 |
930 | movq .cey2,mm2 |
931 | movq .cty1,mm4 |
931 | movq .cty1,mm4 |
932 | movq .cty2,mm5 |
932 | movq .cty2,mm5 |
933 | movq .cz1,mm6 |
933 | movq .cz1,mm6 |
934 | movq .cz2,mm7 |
934 | movq .cz2,mm7 |
935 | end if |
935 | end if |
936 | if Ext = NON |
936 | if Ext = NON |
937 | mov edx,.dbx13 |
937 | mov edx,.dbx13 |
938 | add .cbx1,edx |
938 | add .cbx1,edx |
939 | mov eax,.dbx12 |
939 | mov eax,.dbx12 |
940 | add .cbx2,eax |
940 | add .cbx2,eax |
941 | mov ebx,.dby13 |
941 | mov ebx,.dby13 |
942 | add .cby1,ebx |
942 | add .cby1,ebx |
943 | mov edx,.dby12 |
943 | mov edx,.dby12 |
944 | add .cby2,edx |
944 | add .cby2,edx |
945 | 945 | ||
946 | mov eax,.dex13 |
946 | mov eax,.dex13 |
947 | add .cex1,eax |
947 | add .cex1,eax |
948 | mov ebx,.dex12 |
948 | mov ebx,.dex12 |
949 | add .cex2,ebx |
949 | add .cex2,ebx |
950 | mov edx,.dey13 |
950 | mov edx,.dey13 |
951 | add .cey1,edx |
951 | add .cey1,edx |
952 | mov eax,.dey12 |
952 | mov eax,.dey12 |
953 | add .cey2,eax |
953 | add .cey2,eax |
954 | 954 | ||
955 | mov eax,.dtx13 |
955 | mov eax,.dtx13 |
956 | add .ctx1,eax |
956 | add .ctx1,eax |
957 | mov ebx,.dtx12 |
957 | mov ebx,.dtx12 |
958 | add .ctx2,ebx |
958 | add .ctx2,ebx |
959 | mov edx,.dty13 |
959 | mov edx,.dty13 |
960 | add .cty1,edx |
960 | add .cty1,edx |
961 | mov eax,.dty12 |
961 | mov eax,.dty12 |
962 | add .cty2,eax |
962 | add .cty2,eax |
963 | 963 | ||
964 | mov eax,.dx13 |
964 | mov eax,.dx13 |
965 | add .cx1,eax |
965 | add .cx1,eax |
966 | mov ebx,.dx12 |
966 | mov ebx,.dx12 |
967 | add .cx2,ebx |
967 | add .cx2,ebx |
968 | mov ebx,.dz13 |
968 | mov ebx,.dz13 |
969 | add .cz1,ebx |
969 | add .cz1,ebx |
970 | mov edx,.dz12 |
970 | mov edx,.dz12 |
971 | add .cz2,edx |
971 | add .cz2,edx |
972 | end if |
972 | end if |
973 | inc ecx |
973 | inc ecx |
974 | cmp cx,.y2 |
974 | cmp cx,.y2 |
975 | jl .loop12 |
975 | jl .loop12 |
976 | .loop12_done: |
976 | .loop12_done: |
977 | 977 | ||
978 | movsx ecx,.y2 |
978 | movsx ecx,.y2 |
979 | cmp cx,.y3 |
979 | cmp cx,.y3 |
980 | jge .loop23_done |
980 | jge .loop23_done |
981 | 981 | ||
982 | 982 | ||
983 | movsx eax,.z2 |
983 | movsx eax,.z2 |
984 | shl eax,CATMULL_SHIFT |
984 | shl eax,CATMULL_SHIFT |
985 | mov .cz2,eax |
985 | mov .cz2,eax |
986 | 986 | ||
987 | movsx ebx,.x2 |
987 | movsx ebx,.x2 |
988 | shl ebx,ROUND |
988 | shl ebx,ROUND |
989 | mov .cx2,ebx |
989 | mov .cx2,ebx |
990 | 990 | ||
991 | movzx edx,word[.b_x2] |
991 | movzx edx,word[.b_x2] |
992 | shl edx,ROUND |
992 | shl edx,ROUND |
993 | mov .cbx2,edx |
993 | mov .cbx2,edx |
994 | 994 | ||
995 | movzx eax,word[.b_y2] |
995 | movzx eax,word[.b_y2] |
996 | shl eax,ROUND |
996 | shl eax,ROUND |
997 | mov .cby2,eax |
997 | mov .cby2,eax |
998 | 998 | ||
999 | movzx ebx,word[.e_x2] |
999 | movzx ebx,word[.e_x2] |
1000 | shl ebx,ROUND |
1000 | shl ebx,ROUND |
1001 | mov .cex2,ebx |
1001 | mov .cex2,ebx |
1002 | 1002 | ||
1003 | movzx edx,word[.e_y2] |
1003 | movzx edx,word[.e_y2] |
1004 | shl edx,ROUND |
1004 | shl edx,ROUND |
1005 | mov .cey2,edx |
1005 | mov .cey2,edx |
1006 | 1006 | ||
1007 | movzx eax,word[.t_x2] |
1007 | movzx eax,word[.t_x2] |
1008 | shl eax,ROUND |
1008 | shl eax,ROUND |
1009 | mov .ctx2,eax |
1009 | mov .ctx2,eax |
1010 | 1010 | ||
1011 | movzx ebx,word[.t_y2] |
1011 | movzx ebx,word[.t_y2] |
1012 | shl ebx,ROUND |
1012 | shl ebx,ROUND |
1013 | mov .cty2,ebx |
1013 | mov .cty2,ebx |
1014 | if Ext >= SSE2 |
1014 | if Ext >= SSE2 |
1015 | movups xmm2,.cby2 |
1015 | movups xmm2,.cby2 |
1016 | movups xmm3,.cty2 |
1016 | movups xmm3,.cty2 |
1017 | ; movups xmm4,.dby13 |
1017 | ; movups xmm4,.dby13 |
1018 | ; movups xmm5,.dty13 |
1018 | ; movups xmm5,.dty13 |
1019 | movups xmm6,.dby23 |
1019 | movups xmm6,.dby23 |
1020 | movups xmm7,.dty23 |
1020 | movups xmm7,.dty23 |
1021 | ; .scby1 equ [edi] |
1021 | ; .scby1 equ [edi] |
1022 | ; .scty1 equ [edi+16] |
1022 | ; .scty1 equ [edi+16] |
1023 | ; .scby2 equ [edi+32] |
1023 | ; .scby2 equ [edi+32] |
1024 | ; .scty2 equ [edi+48] |
1024 | ; .scty2 equ [edi+48] |
1025 | ; .sdby13 equ [edi+64] |
1025 | ; .sdby13 equ [edi+64] |
1026 | ; .sdty13 equ [edi+80] |
1026 | ; .sdty13 equ [edi+80] |
1027 | .sdby23 equ [edi+160] |
1027 | .sdby23 equ [edi+160] |
1028 | .sdty23 equ [edi+192] |
1028 | .sdty23 equ [edi+192] |
1029 | push edi |
1029 | push edi |
1030 | mov edi,sse_repository |
1030 | mov edi,sse_repository |
1031 | ; movaps .scby1,xmm0 |
1031 | ; movaps .scby1,xmm0 |
1032 | ; movaps .scty1,xmm1 |
1032 | ; movaps .scty1,xmm1 |
1033 | movaps .scby2,xmm2 |
1033 | movaps .scby2,xmm2 |
1034 | movaps .scty2,xmm3 |
1034 | movaps .scty2,xmm3 |
1035 | ; movaps .sdby13,xmm4 |
1035 | ; movaps .sdby13,xmm4 |
1036 | ; movaps .sdty13,xmm5 |
1036 | ; movaps .sdty13,xmm5 |
1037 | movaps .sdby23,xmm6 |
1037 | movaps .sdby23,xmm6 |
1038 | movaps .sdty23,xmm7 |
1038 | movaps .sdty23,xmm7 |
1039 | pop edi |
1039 | pop edi |
1040 | 1040 | ||
1041 | end if |
1041 | end if |
1042 | 1042 | ||
1043 | .loop23: |
1043 | .loop23: |
1044 | ;if Ext >= SSE2 |
1044 | ;if Ext >= SSE2 |
1045 | ; fxsave [sse_repository] |
1045 | ; fxsave [sse_repository] |
1046 | ;end if |
1046 | ;end if |
1047 | call .call_line |
1047 | call .call_line |
1048 | 1048 | ||
1049 | if Ext >= SSE2 |
1049 | if Ext >= SSE2 |
1050 | 1050 | ||
1051 | movups xmm0,.cby1 |
1051 | movups xmm0,.cby1 |
1052 | movups xmm1,.cty1 |
1052 | movups xmm1,.cty1 |
1053 | movups xmm2,.cby2 |
1053 | movups xmm2,.cby2 |
1054 | movups xmm3,.cty2 |
1054 | movups xmm3,.cty2 |
1055 | 1055 | ||
1056 | 1056 | ||
1057 | push edi |
1057 | push edi |
1058 | mov edi,sse_repository |
1058 | mov edi,sse_repository |
1059 | paddd xmm0,.sdby13 |
1059 | paddd xmm0,.sdby13 |
1060 | paddd xmm1,.sdty13 |
1060 | paddd xmm1,.sdty13 |
1061 | paddd xmm2,.sdby23 |
1061 | paddd xmm2,.sdby23 |
1062 | paddd xmm3,.sdty23 |
1062 | paddd xmm3,.sdty23 |
1063 | pop edi |
1063 | pop edi |
1064 | movups .cby1,xmm0 |
1064 | movups .cby1,xmm0 |
1065 | movups .cty1,xmm1 |
1065 | movups .cty1,xmm1 |
1066 | movups .cby2,xmm2 |
1066 | movups .cby2,xmm2 |
1067 | movups .cty2,xmm3 |
1067 | movups .cty2,xmm3 |
1068 | 1068 | ||
1069 | 1069 | ||
1070 | 1070 | ||
1071 | 1071 | ||
1072 | ; fxrstor [sse_repository] |
1072 | ; fxrstor [sse_repository] |
1073 | ; movups xmm0,.cby1 |
1073 | ; movups xmm0,.cby1 |
1074 | ; movups xmm1,.cty1 |
1074 | ; movups xmm1,.cty1 |
1075 | ; movups xmm2,.cby2 |
1075 | ; movups xmm2,.cby2 |
1076 | ; movups xmm3,.cty2 |
1076 | ; movups xmm3,.cty2 |
1077 | ; movups xmm4,.dby13 |
1077 | ; movups xmm4,.dby13 |
1078 | ; movups xmm5,.dty13 |
1078 | ; movups xmm5,.dty13 |
1079 | ; movups xmm6,.dby23 |
1079 | ; movups xmm6,.dby23 |
1080 | ; movups xmm7,.dty23 |
1080 | ; movups xmm7,.dty23 |
1081 | ; paddd xmm0,xmm4 |
1081 | ; paddd xmm0,xmm4 |
1082 | ; paddd xmm1,xmm5 |
1082 | ; paddd xmm1,xmm5 |
1083 | ; paddd xmm2,xmm6 |
1083 | ; paddd xmm2,xmm6 |
1084 | ; paddd xmm3,xmm7 |
1084 | ; paddd xmm3,xmm7 |
1085 | ; movups .cby1,xmm0 |
1085 | ; movups .cby1,xmm0 |
1086 | ; movups .cty1,xmm1 |
1086 | ; movups .cty1,xmm1 |
1087 | ; movups .cby2,xmm2 |
1087 | ; movups .cby2,xmm2 |
1088 | ; movups .cty2,xmm3 |
1088 | ; movups .cty2,xmm3 |
1089 | ; |
1089 | ; |
1090 | end if |
1090 | end if |
1091 | if (Ext = MMX) | (Ext = SSE) |
1091 | if (Ext = MMX) | (Ext = SSE) |
1092 | movq mm0,.cby2 |
1092 | movq mm0,.cby2 |
1093 | movq mm1,.cby1 |
1093 | movq mm1,.cby1 |
1094 | movq mm2,.cey2 |
1094 | movq mm2,.cey2 |
1095 | movq mm3,.cey1 |
1095 | movq mm3,.cey1 |
1096 | movq mm4,.cty1 |
1096 | movq mm4,.cty1 |
1097 | movq mm5,.cty2 |
1097 | movq mm5,.cty2 |
1098 | movq mm6,.cz1 |
1098 | movq mm6,.cz1 |
1099 | movq mm7,.cz2 |
1099 | movq mm7,.cz2 |
1100 | paddd mm0,.dby23 |
1100 | paddd mm0,.dby23 |
1101 | paddd mm1,.dby13 |
1101 | paddd mm1,.dby13 |
1102 | paddd mm2,.dey23 |
1102 | paddd mm2,.dey23 |
1103 | paddd mm3,.dey13 |
1103 | paddd mm3,.dey13 |
1104 | paddd mm4,.dty13 |
1104 | paddd mm4,.dty13 |
1105 | paddd mm5,.dty23 |
1105 | paddd mm5,.dty23 |
1106 | paddd mm6,.dz13 |
1106 | paddd mm6,.dz13 |
1107 | paddd mm7,.dz23 |
1107 | paddd mm7,.dz23 |
1108 | movq .cby2,mm0 |
1108 | movq .cby2,mm0 |
1109 | movq .cby1,mm1 |
1109 | movq .cby1,mm1 |
1110 | movq .cey2,mm2 |
1110 | movq .cey2,mm2 |
1111 | movq .cey1,mm3 |
1111 | movq .cey1,mm3 |
1112 | movq .cty1,mm4 |
1112 | movq .cty1,mm4 |
1113 | movq .cty2,mm5 |
1113 | movq .cty2,mm5 |
1114 | movq .cz1,mm6 |
1114 | movq .cz1,mm6 |
1115 | movq .cz2,mm7 |
1115 | movq .cz2,mm7 |
1116 | end if |
1116 | end if |
1117 | If Ext = NON |
1117 | If Ext = NON |
1118 | mov edx,.dbx13 |
1118 | mov edx,.dbx13 |
1119 | add .cbx1,edx |
1119 | add .cbx1,edx |
1120 | mov eax,.dbx23 |
1120 | mov eax,.dbx23 |
1121 | add .cbx2,eax |
1121 | add .cbx2,eax |
1122 | mov ebx,.dby13 |
1122 | mov ebx,.dby13 |
1123 | add .cby1,ebx |
1123 | add .cby1,ebx |
1124 | mov edx,.dby23 |
1124 | mov edx,.dby23 |
1125 | add .cby2,edx |
1125 | add .cby2,edx |
1126 | 1126 | ||
1127 | mov eax,.dex13 |
1127 | mov eax,.dex13 |
1128 | add .cex1,eax |
1128 | add .cex1,eax |
1129 | mov ebx,.dex23 |
1129 | mov ebx,.dex23 |
1130 | add .cex2,ebx |
1130 | add .cex2,ebx |
1131 | mov edx,.dey13 |
1131 | mov edx,.dey13 |
1132 | add .cey1,edx |
1132 | add .cey1,edx |
1133 | mov eax,.dey23 |
1133 | mov eax,.dey23 |
1134 | add .cey2,eax |
1134 | add .cey2,eax |
1135 | 1135 | ||
1136 | mov eax,.dx13 |
1136 | mov eax,.dx13 |
1137 | add .cx1,eax |
1137 | add .cx1,eax |
1138 | mov ebx,.dx23 |
1138 | mov ebx,.dx23 |
1139 | add .cx2,ebx |
1139 | add .cx2,ebx |
1140 | mov ebx,.dz13 |
1140 | mov ebx,.dz13 |
1141 | add .cz1,ebx |
1141 | add .cz1,ebx |
1142 | mov edx,.dz23 |
1142 | mov edx,.dz23 |
1143 | add .cz2,edx |
1143 | add .cz2,edx |
1144 | 1144 | ||
1145 | mov eax,.dtx13 |
1145 | mov eax,.dtx13 |
1146 | add .ctx1,eax |
1146 | add .ctx1,eax |
1147 | mov ebx,.dtx23 |
1147 | mov ebx,.dtx23 |
1148 | add .ctx2,ebx |
1148 | add .ctx2,ebx |
1149 | mov edx,.dty13 |
1149 | mov edx,.dty13 |
1150 | add .cty1,edx |
1150 | add .cty1,edx |
1151 | mov eax,.dty23 |
1151 | mov eax,.dty23 |
1152 | add .cty2,eax |
1152 | add .cty2,eax |
1153 | end if |
1153 | end if |
1154 | inc ecx |
1154 | inc ecx |
1155 | cmp cx,.y3 |
1155 | cmp cx,.y3 |
1156 | jl .loop23 |
1156 | jl .loop23 |
1157 | .loop23_done: |
1157 | .loop23_done: |
1158 | 1158 | ||
1159 | mov esp,ebp |
1159 | mov esp,ebp |
1160 | ret 50 |
1160 | ret 50 |
1161 | 1161 | ||
1162 | .call_line: |
1162 | .call_line: |
1163 | 1163 | ||
1164 | pushad |
1164 | pushad |
1165 | ; xmm0= cby1,cbx1,cz1,cx1 |
1165 | ; xmm0= cby1,cbx1,cz1,cx1 |
1166 | ; xmm1= cty1,ctx1,cey1,cex1 |
1166 | ; xmm1= cty1,ctx1,cey1,cex1 |
1167 | if Ext >= SSE2 |
1167 | if Ext >= SSE2 |
1168 | sub esp,8 |
1168 | sub esp,8 |
1169 | shufps xmm1,xmm1,10110001b |
1169 | shufps xmm1,xmm1,10110001b |
1170 | shufps xmm3,xmm3,10110001b |
1170 | shufps xmm3,xmm3,10110001b |
1171 | movlps [esp],xmm1 |
1171 | movlps [esp],xmm1 |
1172 | else |
1172 | else |
1173 | push dword .cty1 |
1173 | push dword .cty1 |
1174 | push .ctx1 |
1174 | push .ctx1 |
1175 | end if |
1175 | end if |
1176 | push dword .cz1 |
1176 | push dword .cz1 |
1177 | if Ext>=SSE2 |
1177 | if Ext>=SSE2 |
1178 | sub esp,8 |
1178 | sub esp,8 |
1179 | movlps [esp],xmm3 |
1179 | movlps [esp],xmm3 |
1180 | else |
1180 | else |
1181 | push dword .cty2 |
1181 | push dword .cty2 |
1182 | push .ctx2 |
1182 | push .ctx2 |
1183 | end if |
1183 | end if |
1184 | push dword .cz2 |
1184 | push dword .cz2 |
1185 | if Ext>=SSE2 |
1185 | if Ext>=SSE2 |
1186 | sub esp,32 |
1186 | sub esp,32 |
1187 | movhps [esp+24],xmm3 |
1187 | movhps [esp+24],xmm3 |
1188 | shufps xmm2,xmm2,10110001b |
1188 | shufps xmm2,xmm2,10110001b |
1189 | movlps [esp+16],xmm2 |
1189 | movlps [esp+16],xmm2 |
1190 | movhps [esp+8],xmm1 |
1190 | movhps [esp+8],xmm1 |
1191 | shufps xmm0,xmm0,10110001b |
1191 | shufps xmm0,xmm0,10110001b |
1192 | movlps [esp],xmm0 ;================================ |
1192 | movlps [esp],xmm0 ;================================ |
1193 | 1193 | ||
1194 | else |
1194 | else |
1195 | push dword .cey2 |
1195 | push dword .cey2 |
1196 | push .cex2 |
1196 | push .cex2 |
1197 | push dword .cby2 |
1197 | push dword .cby2 |
1198 | push .cbx2 |
1198 | push .cbx2 |
1199 | push dword .cey1 |
1199 | push dword .cey1 |
1200 | push .cex1 |
1200 | push .cex1 |
1201 | push dword .cby1 |
1201 | push dword .cby1 |
1202 | push .cbx1 |
1202 | push .cbx1 |
1203 | end if |
1203 | end if |
1204 | 1204 | ||
1205 | push .tex_ptr |
1205 | push .tex_ptr |
1206 | push .z_buff |
1206 | push .z_buff |
1207 | push .t_emap |
1207 | push .t_emap |
1208 | push .t_bmap |
1208 | push .t_bmap |
1209 | 1209 | ||
1210 | push ecx |
1210 | push ecx |
1211 | 1211 | ||
1212 | mov eax,.cx1 |
1212 | mov eax,.cx1 |
1213 | sar eax,ROUND |
1213 | sar eax,ROUND |
1214 | mov ebx,.cx2 |
1214 | mov ebx,.cx2 |
1215 | sar ebx,ROUND |
1215 | sar ebx,ROUND |
1216 | 1216 | ||
1217 | call bump_tex_line_z |
1217 | call bump_tex_line_z |
1218 | 1218 | ||
1219 | popad |
1219 | popad |
1220 | ;end if |
1220 | ;end if |
1221 | ret |
1221 | ret |
1222 | bump_tex_line_z: |
1222 | bump_tex_line_z: |
1223 | ;--------------in: eax - x1 |
1223 | ;--------------in: eax - x1 |
1224 | ;-------------- ebx - x2 |
1224 | ;-------------- ebx - x2 |
1225 | ;-------------- edi - pointer to screen buffer |
1225 | ;-------------- edi - pointer to screen buffer |
1226 | ;stack - another parameters : |
1226 | ;stack - another parameters : |
1227 | .y equ dword [ebp+4] |
1227 | .y equ dword [ebp+4] |
1228 | .bmap equ dword [ebp+8] ; bump map pointer |
1228 | .bmap equ dword [ebp+8] ; bump map pointer |
1229 | .emap equ dword [ebp+12] ; env map pointer |
1229 | .emap equ dword [ebp+12] ; env map pointer |
1230 | .z_buff equ dword [ebp+16] ; z buffer |
1230 | .z_buff equ dword [ebp+16] ; z buffer |
1231 | .tex_map equ dword [ebp+20] ; texture pointer |
1231 | .tex_map equ dword [ebp+20] ; texture pointer |
1232 | 1232 | ||
1233 | .bx1 equ [ebp+24] ; --- |
1233 | .bx1 equ [ebp+24] ; --- |
1234 | .by1 equ [ebp+28] ; | |
1234 | .by1 equ [ebp+28] ; | |
1235 | .ex1 equ [ebp+32] ; | |
1235 | .ex1 equ [ebp+32] ; | |
1236 | .ey1 equ [ebp+36] ; | |
1236 | .ey1 equ [ebp+36] ; | |
1237 | .bx2 equ [ebp+40] ; | |
1237 | .bx2 equ [ebp+40] ; | |
1238 | .by2 equ [ebp+44] ; |> b. map and e. map coords |
1238 | .by2 equ [ebp+44] ; |> b. map and e. map coords |
1239 | .ex2 equ [ebp+48] ; |> shifted shl ROUND |
1239 | .ex2 equ [ebp+48] ; |> shifted shl ROUND |
1240 | .ey2 equ [ebp+52] ; --- |
1240 | .ey2 equ [ebp+52] ; --- |
1241 | .z2 equ [ebp+56] |
1241 | .z2 equ [ebp+56] |
1242 | .tx2 equ [ebp+60] |
1242 | .tx2 equ [ebp+60] |
1243 | .ty2 equ [ebp+64] |
1243 | .ty2 equ [ebp+64] |
1244 | .z1 equ [ebp+68] |
1244 | .z1 equ [ebp+68] |
1245 | .tx1 equ [ebp+72] |
1245 | .tx1 equ [ebp+72] |
1246 | .ty1 equ [ebp+76] |
1246 | .ty1 equ [ebp+76] |
1247 | 1247 | ||
1248 | 1248 | ||
1249 | 1249 | ||
1250 | .x1 equ [ebp-4] |
1250 | .x1 equ [ebp-4] |
1251 | .x2 equ [ebp-8] |
1251 | .x2 equ [ebp-8] |
1252 | .dbx equ [ebp-12] |
1252 | .dbx equ [ebp-12] |
1253 | .dby equ [ebp-16] |
1253 | .dby equ [ebp-16] |
1254 | .dex equ [ebp-20] |
1254 | .dex equ [ebp-20] |
1255 | .dey equ [ebp-24] |
1255 | .dey equ [ebp-24] |
1256 | .dz equ [ebp-28] |
1256 | .dz equ [ebp-28] |
1257 | .dtx equ [ebp-32] |
1257 | .dtx equ [ebp-32] |
1258 | .dty equ [ebp-36] |
1258 | .dty equ [ebp-36] |
1259 | 1259 | ||
1260 | .cbx equ [ebp-40] |
1260 | .cbx equ [ebp-40] |
1261 | .cby equ [ebp-44] |
1261 | .cby equ [ebp-44] |
1262 | .cex equ [ebp-48] |
1262 | .cex equ [ebp-48] |
1263 | .cey equ [ebp-52] |
1263 | .cey equ [ebp-52] |
1264 | .cz equ [ebp-56] |
1264 | .cz equ [ebp-56] |
1265 | .czbuff equ [ebp-60] |
1265 | .czbuff equ [ebp-60] |
1266 | .ctx equ [ebp-64] |
1266 | .ctx equ [ebp-64] |
1267 | .cty equ [ebp-68] |
1267 | .cty equ [ebp-68] |
1268 | .c_scr equ [ebp-72] |
1268 | .c_scr equ [ebp-72] |
1269 | 1269 | ||
1270 | .temp1 equ ebp-80 |
1270 | .temp1 equ ebp-80 |
1271 | .temp2 equ ebp-88 |
1271 | .temp2 equ ebp-88 |
1272 | .temp3 equ ebp-76 |
1272 | .temp3 equ ebp-76 |
1273 | .temp4 equ ebp-84 |
1273 | .temp4 equ ebp-84 |
1274 | .temp5 equ ebp-92 |
1274 | .temp5 equ ebp-92 |
1275 | 1275 | ||
1276 | mov ebp,esp |
1276 | mov ebp,esp |
1277 | 1277 | ||
1278 | mov ecx,.y |
1278 | mov ecx,.y |
1279 | or ecx,ecx |
1279 | or ecx,ecx |
1280 | jl .bl_end |
1280 | jl .bl_end |
1281 | cmp ecx,SIZE_Y |
1281 | cmp ecx,SIZE_Y |
1282 | jge .bl_end |
1282 | jge .bl_end |
1283 | 1283 | ||
1284 | cmp eax,ebx |
1284 | cmp eax,ebx |
1285 | jl .bl_ok |
1285 | jl .bl_ok |
1286 | je .bl_end |
1286 | je .bl_end |
1287 | 1287 | ||
1288 | 1288 | ||
1289 | if Ext=NON |
1289 | if Ext=NON |
1290 | mov edx,.bx1 |
1290 | mov edx,.bx1 |
1291 | xchg edx,.bx2 |
1291 | xchg edx,.bx2 |
1292 | mov .bx1,edx |
1292 | mov .bx1,edx |
1293 | mov edx,.by1 |
1293 | mov edx,.by1 |
1294 | xchg edx,.by2 |
1294 | xchg edx,.by2 |
1295 | mov .by1,edx |
1295 | mov .by1,edx |
1296 | 1296 | ||
1297 | mov edx,.ex1 |
1297 | mov edx,.ex1 |
1298 | xchg edx,.ex2 |
1298 | xchg edx,.ex2 |
1299 | mov .ex1,edx |
1299 | mov .ex1,edx |
1300 | mov edx,.ey1 |
1300 | mov edx,.ey1 |
1301 | xchg edx,.ey2 |
1301 | xchg edx,.ey2 |
1302 | mov .ey1,edx |
1302 | mov .ey1,edx |
1303 | 1303 | ||
1304 | mov edx,.tx1 |
1304 | mov edx,.tx1 |
1305 | xchg edx,.tx2 |
1305 | xchg edx,.tx2 |
1306 | mov .tx1,edx |
1306 | mov .tx1,edx |
1307 | mov edx,.ty1 |
1307 | mov edx,.ty1 |
1308 | xchg edx,.ty2 |
1308 | xchg edx,.ty2 |
1309 | mov .ty1,edx |
1309 | mov .ty1,edx |
1310 | end if |
1310 | end if |
1311 | if Ext = MMX |
1311 | if Ext = MMX |
1312 | movq mm0,.bx1 |
1312 | movq mm0,.bx1 |
1313 | movq mm1,.bx2 |
1313 | movq mm1,.bx2 |
1314 | movq mm2,.ex1 |
1314 | movq mm2,.ex1 |
1315 | movq mm3,.ex2 |
1315 | movq mm3,.ex2 |
1316 | movq mm4,.tx1 |
1316 | movq mm4,.tx1 |
1317 | movq mm5,.tx2 |
1317 | movq mm5,.tx2 |
1318 | movq .bx2,mm0 |
1318 | movq .bx2,mm0 |
1319 | movq .bx1,mm1 |
1319 | movq .bx1,mm1 |
1320 | movq .ex1,mm3 |
1320 | movq .ex1,mm3 |
1321 | movq .ex2,mm2 |
1321 | movq .ex2,mm2 |
1322 | movq .tx1,mm5 |
1322 | movq .tx1,mm5 |
1323 | movq .tx2,mm4 |
1323 | movq .tx2,mm4 |
1324 | end if |
1324 | end if |
1325 | if Ext>=SSE |
1325 | if Ext>=SSE |
1326 | movups xmm0,.bx1 |
1326 | movups xmm0,.bx1 |
1327 | movups xmm1,.bx2 |
1327 | movups xmm1,.bx2 |
1328 | movups .bx1,xmm1 |
1328 | movups .bx1,xmm1 |
1329 | movups .bx2,xmm0 |
1329 | movups .bx2,xmm0 |
1330 | movq mm0,.tx1 |
1330 | movq mm0,.tx1 |
1331 | movq mm1,.tx2 |
1331 | movq mm1,.tx2 |
1332 | movq .tx1,mm1 |
1332 | movq .tx1,mm1 |
1333 | movq .tx2,mm0 |
1333 | movq .tx2,mm0 |
1334 | end if |
1334 | end if |
1335 | ;if Ext>=SSE2 |
1335 | ;if Ext>=SSE2 |
1336 | ; movaps xmm4,xmm0 |
1336 | ; movaps xmm4,xmm0 |
1337 | ; movaps xmm0,xmm2 |
1337 | ; movaps xmm0,xmm2 |
1338 | ; movaps xmm2,xmm4 |
1338 | ; movaps xmm2,xmm4 |
1339 | ; movaps xmm5,xmm1 |
1339 | ; movaps xmm5,xmm1 |
1340 | ; movaps xmm1,xmm3 |
1340 | ; movaps xmm1,xmm3 |
1341 | ; movaps xmm3,xmm5 |
1341 | ; movaps xmm3,xmm5 |
1342 | ;else |
1342 | ;else |
1343 | 1343 | ||
1344 | xchg eax,ebx |
1344 | xchg eax,ebx |
1345 | mov edx,.z1 |
1345 | mov edx,.z1 |
1346 | xchg edx,.z2 |
1346 | xchg edx,.z2 |
1347 | mov .z1,edx |
1347 | mov .z1,edx |
1348 | ;end if |
1348 | ;end if |
1349 | .bl_ok: |
1349 | .bl_ok: |
1350 | ;if Ext >= SSE2 |
1350 | ;if Ext >= SSE2 |
1351 | ; shufps xmm0,xmm0,11100001b |
1351 | ; shufps xmm0,xmm0,11100001b |
1352 | ; shufps xmm2,xmm2,11100001b |
1352 | ; shufps xmm2,xmm2,11100001b |
1353 | ; movlps .bx1,xmm0 |
1353 | ; movlps .bx1,xmm0 |
1354 | ; movlps .bx2,xmm2 |
1354 | ; movlps .bx2,xmm2 |
1355 | 1355 | ||
1356 | 1356 | ||
1357 | ; shufps xmm0,xmm0,00011011b |
1357 | ; shufps xmm0,xmm0,00011011b |
1358 | ; shufps xmm2,xmm2,00011011b |
1358 | ; shufps xmm2,xmm2,00011011b |
1359 | ; movd eax,xmm0 |
1359 | ; movd eax,xmm0 |
1360 | ; movd ebx,xmm2 |
1360 | ; movd ebx,xmm2 |
1361 | ; shufps xmm0,xmm0,11000110b |
1361 | ; shufps xmm0,xmm0,11000110b |
1362 | ; shufps xmm2,xmm2,11000110b |
1362 | ; shufps xmm2,xmm2,11000110b |
1363 | ; movd .z1,xmm0 |
1363 | ; movd .z1,xmm0 |
1364 | ; movd .z2,xmm2 |
1364 | ; movd .z2,xmm2 |
1365 | ; shufps xmm1,xmm1,10110001b |
1365 | ; shufps xmm1,xmm1,10110001b |
1366 | ; shufps xmm3,xmm3,10110001b |
1366 | ; shufps xmm3,xmm3,10110001b |
1367 | ; movlps .ex1,xmm1 |
1367 | ; movlps .ex1,xmm1 |
1368 | ; movlps .ex2,xmm2 |
1368 | ; movlps .ex2,xmm2 |
1369 | ; movhps .tx1,xmm1 |
1369 | ; movhps .tx1,xmm1 |
1370 | ; movhps .tx2,xmm2 |
1370 | ; movhps .tx2,xmm2 |
1371 | 1371 | ||
1372 | ; xchg eax,ebx |
1372 | ; xchg eax,ebx |
1373 | ; mov edx,.z1 |
1373 | ; mov edx,.z1 |
1374 | ; xchg edx,.z2 |
1374 | ; xchg edx,.z2 |
1375 | ; mov .z1,edx |
1375 | ; mov .z1,edx |
1376 | 1376 | ||
1377 | 1377 | ||
1378 | ;end if |
1378 | ;end if |
1379 | 1379 | ||
1380 | push eax |
1380 | push eax |
1381 | push ebx ;store x1, x2 |
1381 | push ebx ;store x1, x2 |
1382 | cmp dword .x1,SIZE_X |
1382 | cmp dword .x1,SIZE_X |
1383 | jge .bl_end |
1383 | jge .bl_end |
1384 | cmp dword .x2,0 |
1384 | cmp dword .x2,0 |
1385 | jle .bl_end |
1385 | jle .bl_end |
1386 | 1386 | ||
1387 | mov ebx,.x2 |
1387 | mov ebx,.x2 |
1388 | sub ebx,.x1 |
1388 | sub ebx,.x1 |
1389 | 1389 | ||
1390 | if Ext>=SSE |
1390 | if Ext>=SSE |
1391 | 1391 | ||
1392 | sub esp,28 |
1392 | sub esp,28 |
1393 | cvtsi2ss xmm3,ebx ;rcps |
1393 | cvtsi2ss xmm3,ebx ;rcps |
1394 | shufps xmm3,xmm3,0 |
1394 | shufps xmm3,xmm3,0 |
1395 | ; float using SSE variant ::--> |
1395 | ; float using SSE variant ::--> |
1396 | ; movups xmm0,.bx1 ; new |
1396 | ; movups xmm0,.bx1 ; new |
1397 | ; movups xmm1,.bx2 ; new |
1397 | ; movups xmm1,.bx2 ; new |
1398 | 1398 | ||
1399 | cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point |
1399 | cvtpi2ps xmm0,.bx1 ;mm0 ; variant fixed point |
1400 | movlhps xmm0,xmm0 |
1400 | movlhps xmm0,xmm0 |
1401 | cvtpi2ps xmm0,.ex1 ;mm2 |
1401 | cvtpi2ps xmm0,.ex1 ;mm2 |
1402 | cvtpi2ps xmm1,.bx2 ;mm1 |
1402 | cvtpi2ps xmm1,.bx2 ;mm1 |
1403 | movlhps xmm1,xmm1 |
1403 | movlhps xmm1,xmm1 |
1404 | cvtpi2ps xmm1,.ex2 ;mm3 |
1404 | cvtpi2ps xmm1,.ex2 ;mm3 |
1405 | subps xmm1,xmm0 |
1405 | subps xmm1,xmm0 |
1406 | 1406 | ||
1407 | divps xmm1,xmm3 |
1407 | divps xmm1,xmm3 |
1408 | 1408 | ||
1409 | shufps xmm1,xmm1,10110001b |
1409 | shufps xmm1,xmm1,10110001b |
1410 | ; movups .dey,xmm1 ; new |
1410 | ; movups .dey,xmm1 ; new |
1411 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
1411 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
1412 | movhlps xmm1,xmm1 |
1412 | movhlps xmm1,xmm1 |
1413 | cvtps2pi mm1,xmm1 |
1413 | cvtps2pi mm1,xmm1 |
1414 | movq .dey,mm0 |
1414 | movq .dey,mm0 |
1415 | movq .dby,mm1 |
1415 | movq .dby,mm1 |
1416 | 1416 | ||
1417 | movd mm2,.z1 |
1417 | movd mm2,.z1 |
1418 | movd mm3,.z2 |
1418 | movd mm3,.z2 |
1419 | 1419 | ||
1420 | cvtpi2ps xmm0,.tx1 ;mm0 |
1420 | cvtpi2ps xmm0,.tx1 ;mm0 |
1421 | movlhps xmm0,xmm0 |
1421 | movlhps xmm0,xmm0 |
1422 | cvtpi2ps xmm0,mm2 |
1422 | cvtpi2ps xmm0,mm2 |
1423 | cvtpi2ps xmm1,.tx2 ;mm1 |
1423 | cvtpi2ps xmm1,.tx2 ;mm1 |
1424 | movlhps xmm1,xmm1 |
1424 | movlhps xmm1,xmm1 |
1425 | cvtpi2ps xmm1,mm3 |
1425 | cvtpi2ps xmm1,mm3 |
1426 | ; movups xmm0,,z1 ; new |
1426 | ; movups xmm0,,z1 ; new |
1427 | ; movups xmm1,.z2 ; new |
1427 | ; movups xmm1,.z2 ; new |
1428 | subps xmm1,xmm0 |
1428 | subps xmm1,xmm0 |
1429 | 1429 | ||
1430 | divps xmm1,xmm3 |
1430 | divps xmm1,xmm3 |
1431 | 1431 | ||
1432 | ; movups .dz,xmm1 ;new |
1432 | ; movups .dz,xmm1 ;new |
1433 | 1433 | ||
1434 | shufps xmm1,xmm1,10110100b |
1434 | shufps xmm1,xmm1,10110100b |
1435 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
1435 | cvtps2pi mm0,xmm1 ; mm0 -> 2 delta dwords |
1436 | movhlps xmm1,xmm1 |
1436 | movhlps xmm1,xmm1 |
1437 | cvtps2pi mm1,xmm1 |
1437 | cvtps2pi mm1,xmm1 |
1438 | movd .dz,mm0 |
1438 | movd .dz,mm0 |
1439 | movq .dty,mm1 |
1439 | movq .dty,mm1 |
1440 | 1440 | ||
1441 | else |
1441 | else |
1442 | 1442 | ||
1443 | mov eax,.bx2 ; calc .dbx |
1443 | mov eax,.bx2 ; calc .dbx |
1444 | sub eax,.bx1 |
1444 | sub eax,.bx1 |
1445 | cdq |
1445 | cdq |
1446 | idiv ebx |
1446 | idiv ebx |
1447 | push eax |
1447 | push eax |
1448 | 1448 | ||
1449 | mov eax,.by2 ; calc .dby |
1449 | mov eax,.by2 ; calc .dby |
1450 | sub eax,.by1 |
1450 | sub eax,.by1 |
1451 | cdq |
1451 | cdq |
1452 | idiv ebx |
1452 | idiv ebx |
1453 | push eax |
1453 | push eax |
1454 | 1454 | ||
1455 | mov eax,.ex2 ; calc .dex |
1455 | mov eax,.ex2 ; calc .dex |
1456 | sub eax,.ex1 |
1456 | sub eax,.ex1 |
1457 | cdq |
1457 | cdq |
1458 | idiv ebx |
1458 | idiv ebx |
1459 | push eax |
1459 | push eax |
1460 | 1460 | ||
1461 | mov eax,.ey2 ; calc .dey |
1461 | mov eax,.ey2 ; calc .dey |
1462 | sub eax,.ey1 |
1462 | sub eax,.ey1 |
1463 | cdq |
1463 | cdq |
1464 | idiv ebx |
1464 | idiv ebx |
1465 | push eax |
1465 | push eax |
1466 | 1466 | ||
1467 | 1467 | ||
1468 | mov eax,.z2 ; calc .dz |
1468 | mov eax,.z2 ; calc .dz |
1469 | sub eax,.z1 |
1469 | sub eax,.z1 |
1470 | cdq |
1470 | cdq |
1471 | idiv ebx |
1471 | idiv ebx |
1472 | push eax |
1472 | push eax |
1473 | 1473 | ||
1474 | mov eax,.tx2 ; calc .dtx |
1474 | mov eax,.tx2 ; calc .dtx |
1475 | sub eax,.tx1 |
1475 | sub eax,.tx1 |
1476 | cdq |
1476 | cdq |
1477 | idiv ebx |
1477 | idiv ebx |
1478 | push eax |
1478 | push eax |
1479 | 1479 | ||
1480 | mov eax,.ty2 ; calc .dty |
1480 | mov eax,.ty2 ; calc .dty |
1481 | sub eax,.ty1 |
1481 | sub eax,.ty1 |
1482 | cdq |
1482 | cdq |
1483 | idiv ebx |
1483 | idiv ebx |
1484 | push eax |
1484 | push eax |
1485 | 1485 | ||
1486 | end if |
1486 | end if |
1487 | cmp dword .x1,0 ; set correctly begin variable |
1487 | cmp dword .x1,0 ; set correctly begin variable |
1488 | jge @f ; CLIPPING ON FUNCTION |
1488 | jge @f ; CLIPPING ON FUNCTION |
1489 | ; cutting triangle exceedes screen |
1489 | ; cutting triangle exceedes screen |
1490 | mov ebx,.x1 |
1490 | mov ebx,.x1 |
1491 | neg ebx |
1491 | neg ebx |
1492 | 1492 | ||
1493 | ;if Ext >= SSE |
1493 | ;if Ext >= SSE |
1494 | 1494 | ||
1495 | ; cvtsi2ss xmm0,ebx |
1495 | ; cvtsi2ss xmm0,ebx |
1496 | ; shufps xmm0,xmm0,0 |
1496 | ; shufps xmm0,xmm0,0 |
1497 | ; movups xmm1,.dey |
1497 | ; movups xmm1,.dey |
1498 | ; mulps xmm1,xmm0 |
1498 | ; mulps xmm1,xmm0 |
1499 | ; shufps xmm1,xmm1,00011011b |
1499 | ; shufps xmm1,xmm1,00011011b |
1500 | ; movups xmm2,.bx1 |
1500 | ; movups xmm2,.bx1 |
1501 | ; addps xmm2,xmm1 |
1501 | ; addps xmm2,xmm1 |
1502 | ; movups .bx1,xmm2 |
1502 | ; movups .bx1,xmm2 |
1503 | 1503 | ||
1504 | mov eax,.dz |
1504 | mov eax,.dz |
1505 | imul ebx ; eax = .dz * abs(.x1) |
1505 | imul ebx ; eax = .dz * abs(.x1) |
1506 | add .z1,eax |
1506 | add .z1,eax |
1507 | mov dword .x1,0 |
1507 | mov dword .x1,0 |
1508 | 1508 | ||
1509 | mov eax,.dbx |
1509 | mov eax,.dbx |
1510 | imul ebx |
1510 | imul ebx |
1511 | add .bx1,eax |
1511 | add .bx1,eax |
1512 | 1512 | ||
1513 | mov eax,.dby |
1513 | mov eax,.dby |
1514 | imul ebx |
1514 | imul ebx |
1515 | add .by1,eax |
1515 | add .by1,eax |
1516 | 1516 | ||
1517 | mov eax,.dex |
1517 | mov eax,.dex |
1518 | imul ebx |
1518 | imul ebx |
1519 | add .ex1,eax |
1519 | add .ex1,eax |
1520 | 1520 | ||
1521 | mov eax,.dey |
1521 | mov eax,.dey |
1522 | imul ebx |
1522 | imul ebx |
1523 | add .ey1,eax |
1523 | add .ey1,eax |
1524 | 1524 | ||
1525 | mov eax,.dtx |
1525 | mov eax,.dtx |
1526 | imul ebx |
1526 | imul ebx |
1527 | add .tx1,eax |
1527 | add .tx1,eax |
1528 | 1528 | ||
1529 | mov eax,.dty |
1529 | mov eax,.dty |
1530 | imul ebx |
1530 | imul ebx |
1531 | add .ty1,eax |
1531 | add .ty1,eax |
1532 | 1532 | ||
1533 | @@: |
1533 | @@: |
1534 | cmp dword .x2,SIZE_X |
1534 | cmp dword .x2,SIZE_X |
1535 | jl @f |
1535 | jl @f |
1536 | mov dword .x2,SIZE_X |
1536 | mov dword .x2,SIZE_X |
1537 | @@: |
1537 | @@: |
1538 | mov eax,SIZE_X ;calc memory begin in buffers |
1538 | mov eax,SIZE_X ;calc memory begin in buffers |
1539 | mul .y |
1539 | mul .y |
1540 | add eax,.x1 |
1540 | add eax,.x1 |
1541 | lea esi,[4*eax] |
1541 | lea esi,[4*eax] |
1542 | add esi,.z_buff ; z-buffer filled with dd variables |
1542 | add esi,.z_buff ; z-buffer filled with dd variables |
1543 | lea eax,[eax*3] |
1543 | lea eax,[eax*3] |
1544 | add edi,eax |
1544 | add edi,eax |
1545 | 1545 | ||
1546 | 1546 | ||
1547 | mov ecx,.x2 |
1547 | mov ecx,.x2 |
1548 | sub ecx,.x1 |
1548 | sub ecx,.x1 |
1549 | ; init current variables |
1549 | ; init current variables |
1550 | push dword .bx1 ; current b, e and t shifted shl ROUND .cbx |
1550 | push dword .bx1 ; current b, e and t shifted shl ROUND .cbx |
1551 | push dword .by1 ; .cby |
1551 | push dword .by1 ; .cby |
1552 | push dword .ex1 ; .cex |
1552 | push dword .ex1 ; .cex |
1553 | push dword .ey1 ; .cey |
1553 | push dword .ey1 ; .cey |
1554 | 1554 | ||
1555 | push dword .z1 ; current z shl CATMULL_SHIFT ; .cz |
1555 | push dword .z1 ; current z shl CATMULL_SHIFT ; .cz |
1556 | push esi ; .czbuff |
1556 | push esi ; .czbuff |
1557 | 1557 | ||
1558 | push dword .tx1 ; .ctx |
1558 | push dword .tx1 ; .ctx |
1559 | push dword .ty1 ; .cty |
1559 | push dword .ty1 ; .cty |
1560 | push edi ; .c_scr |
1560 | push edi ; .c_scr |
1561 | ;if Ext = SSE2 |
1561 | if Ext = SSE2 |
1562 | ; mov eax,TEXTURE_SIZE |
1562 | mov eax,TEXTURE_SIZE |
1563 | ; movd xmm1,eax |
1563 | movd xmm1,eax |
1564 | ; shufps xmm1,xmm1,0 |
1564 | shufps xmm1,xmm1,0 |
1565 | ; push dword TEX_X |
1565 | push dword TEX_X |
1566 | ; push dword -TEX_X |
1566 | push dword -TEX_X |
1567 | ; push dword 1 |
1567 | push dword 1 |
1568 | ; push dword -1 |
1568 | push dword -1 |
1569 | ; movups xmm2,[esp] |
1569 | movups xmm2,[esp] |
1570 | ; movd xmm3,.bmap |
1570 | movd xmm3,.bmap |
1571 | ; shufps xmm3,xmm3,0 |
1571 | shufps xmm3,xmm3,0 |
1572 | ;end if |
1572 | end if |
1573 | 1573 | ||
1574 | if Ext>=MMX |
1574 | if Ext>=MMX |
1575 | movq mm7,.cty |
1575 | movq mm7,.cty |
1576 | movq mm6,.cby |
1576 | movq mm6,.cby |
1577 | movq mm5,.cey |
1577 | movq mm5,.cey |
1578 | ; movq mm4,.dtyq |
1578 | ; movq mm4,.dtyq |
1579 | ; movq mm3,.dbyq |
1579 | ; movq mm3,.dbyq |
1580 | end if |
1580 | end if |
1581 | 1581 | ||
1582 | .draw: |
1582 | .draw: |
1583 | ; if TEX = SHIFTING ;bump drawing only in shifting mode |
1583 | ; if TEX = SHIFTING ;bump drawing only in shifting mode |
1584 | mov esi,.czbuff ; .czbuff current address in buffer |
1584 | mov esi,.czbuff ; .czbuff current address in buffer |
1585 | mov ebx,.cz ; .cz - cur z position |
1585 | mov ebx,.cz ; .cz - cur z position |
1586 | cmp ebx,dword[esi] |
1586 | cmp ebx,dword[esi] |
1587 | jge .skip |
1587 | jge .skip |
1588 | if Ext=NON |
1588 | if Ext=NON |
1589 | mov eax,.cby |
1589 | mov eax,.cby |
1590 | shr eax,ROUND |
1590 | shr eax,ROUND |
1591 | mov esi,.cbx |
1591 | mov esi,.cbx |
1592 | shr esi,ROUND |
1592 | shr esi,ROUND |
1593 | else |
1593 | else |
1594 | movq mm1,mm6 |
1594 | movq mm1,mm6 |
1595 | psrld mm1,ROUND |
1595 | psrld mm1,ROUND |
1596 | movd eax,mm1 |
1596 | movd eax,mm1 |
1597 | psrlq mm1,32 |
1597 | psrlq mm1,32 |
1598 | movd esi,mm1 |
1598 | movd esi,mm1 |
1599 | end if |
1599 | end if |
1600 | 1600 | ||
1601 | shl eax,TEX_SHIFT |
1601 | shl eax,TEX_SHIFT |
1602 | add esi,eax ;- ; esi - current bump map index |
1602 | add esi,eax ;- ; esi - current bump map index |
1603 | 1603 | ||
1604 | ;if Ext = SSE2 |
1604 | if Ext = SSE2 |
1605 | ; |
1605 | |
1606 | ; movd xmm0,esi |
1606 | movd xmm0,esi |
1607 | ; shufps xmm0,xmm0,0 |
1607 | shufps xmm0,xmm0,0 |
1608 | ; paddd xmm0,xmm2 |
1608 | paddd xmm0,xmm2 |
1609 | ; pand xmm0,xmm1 |
1609 | pand xmm0,xmm1 |
1610 | ; paddd xmm0,xmm3 |
1610 | paddd xmm0,xmm3 |
1611 | ; |
1611 | |
1612 | ; movd ebx,xmm0 |
1612 | movd ebx,xmm0 |
1613 | ; movzx eax,byte[ebx] |
1613 | movzx eax,byte[ebx] |
1614 | ; |
1614 | ; |
1615 | ; shufps xmm0,xmm0,11100001b |
1615 | ; shufps xmm0,xmm0,11100001b |
- | 1616 | psrldq xmm0,4 |
|
1616 | ; movd ebx,xmm0 |
1617 | movd ebx,xmm0 |
1617 | ; movzx ebx,byte[ebx] |
1618 | movzx ebx,byte[ebx] |
1618 | ; sub eax,ebx |
1619 | sub eax,ebx |
1619 | ; |
1620 | ; |
1620 | ; shufps xmm0,xmm0,11111110b |
1621 | ; shufps xmm0,xmm0,11111110b |
- | 1622 | psrldq xmm0,4 |
|
1621 | ; movd ebx,xmm0 |
1623 | movd ebx,xmm0 |
1622 | ; movzx edx, byte [ebx] |
1624 | movzx edx, byte [ebx] |
1623 | ; |
1625 | ; |
1624 | ; shufps xmm0,xmm0,11111111b |
1626 | ; shufps xmm0,xmm0,11111111b |
- | 1627 | psrldq xmm0,4 |
|
1625 | ; movd ebx,xmm0 |
1628 | movd ebx,xmm0 |
1626 | ; movzx ebx, byte [ebx] |
1629 | movzx ebx, byte [ebx] |
1627 | ; sub edx,ebx |
1630 | sub edx,ebx |
1628 | ; |
1631 | ; |
1629 | ;else |
1632 | else |
1630 | mov ebx,esi |
1633 | ; mov ebx,esi |
- | 1634 | ; dec ebx |
|
1631 | dec ebx |
1635 | lea ebx,[esi-1] |
1632 | and ebx,TEXTURE_SIZE |
1636 | and ebx,TEXTURE_SIZE |
1633 | add ebx,.bmap |
1637 | add ebx,.bmap |
1634 | movzx eax,byte [ebx] |
1638 | movzx eax,byte [ebx] |
1635 | 1639 | ||
1636 | mov ebx,esi |
1640 | ; mov ebx,esi |
- | 1641 | ; inc ebx |
|
1637 | inc ebx |
1642 | lea ebx,[esi+1] |
1638 | and ebx,TEXTURE_SIZE |
1643 | and ebx,TEXTURE_SIZE |
1639 | add ebx,.bmap |
1644 | add ebx,.bmap |
1640 | movzx ebx,byte [ebx] |
1645 | movzx ebx,byte [ebx] |
1641 | sub eax,ebx |
1646 | sub eax,ebx |
1642 | 1647 | ||
1643 | mov ebx,esi |
1648 | ; mov ebx,esi |
- | 1649 | ; sub ebx,TEX_X |
|
1644 | sub ebx,TEX_X |
1650 | lea ebx,[esi-TEX_X] |
1645 | and ebx,TEXTURE_SIZE |
1651 | and ebx,TEXTURE_SIZE |
1646 | add ebx,.bmap |
1652 | add ebx,.bmap |
1647 | movzx edx,byte [ebx] |
1653 | movzx edx,byte [ebx] |
1648 | 1654 | ||
1649 | mov ebx,esi |
1655 | ; mov ebx,esi |
- | 1656 | ; add ebx,TEX_X |
|
1650 | add ebx,TEX_X |
1657 | lea ebx,[esi+TEX_X] |
1651 | and ebx,TEXTURE_SIZE |
1658 | and ebx,TEXTURE_SIZE |
1652 | add ebx,.bmap |
1659 | add ebx,.bmap |
1653 | movzx ebx,byte [ebx] |
1660 | movzx ebx,byte [ebx] |
1654 | sub edx,ebx |
1661 | sub edx,ebx |
1655 | ;end if |
1662 | end if |
1656 | 1663 | ||
1657 | ; eax - horizontal sub modificated x coord |
1664 | ; eax - horizontal sub modificated x coord |
1658 | ; edx - vertical sub modificated y coord |
1665 | ; edx - vertical sub modificated y coord |
1659 | if Ext=NON |
1666 | if Ext=NON |
1660 | mov ebx,.cex ;.cex - current env map X |
1667 | mov ebx,.cex ;.cex - current env map X |
1661 | shr ebx,ROUND |
1668 | shr ebx,ROUND |
1662 | add eax,ebx |
1669 | add eax,ebx |
1663 | 1670 | ||
1664 | 1671 | ||
1665 | mov ebx,.cey ;.cey - current env map y |
1672 | mov ebx,.cey ;.cey - current env map y |
1666 | shr ebx,ROUND |
1673 | shr ebx,ROUND |
1667 | add edx,ebx |
1674 | add edx,ebx |
1668 | 1675 | ||
1669 | else |
1676 | else |
1670 | movq mm1,mm5 ; mm5 - copy of cur env coords |
1677 | movq mm1,mm5 ; mm5 - copy of cur env coords |
1671 | psrld mm1,ROUND |
1678 | psrld mm1,ROUND |
1672 | movd ebx,mm1 |
1679 | movd ebx,mm1 |
1673 | psrlq mm1,32 |
1680 | psrlq mm1,32 |
1674 | add eax,ebx |
1681 | add eax,ebx |
1675 | movd ebx,mm1 |
1682 | movd ebx,mm1 |
1676 | add edx,ebx |
1683 | add edx,ebx |
1677 | ; movq qword[.temp1],mm3 |
1684 | ; movq qword[.temp1],mm3 |
1678 | ; add eax,dword [.temp1] |
1685 | ; add eax,dword [.temp1] |
1679 | ; add edx,dword [.temp1+4] |
1686 | ; add edx,dword [.temp1+4] |
1680 | end if |
1687 | end if |
1681 | 1688 | ||
1682 | or eax,eax |
1689 | or eax,eax |
1683 | jl .black |
1690 | jl .black |
1684 | cmp eax,TEX_X |
1691 | cmp eax,TEX_X |
1685 | jg .black |
1692 | jg .black |
1686 | or edx,edx |
1693 | or edx,edx |
1687 | jl .black |
1694 | jl .black |
1688 | cmp edx,TEX_Y |
1695 | cmp edx,TEX_Y |
1689 | jg .black |
1696 | jg .black |
1690 | 1697 | ||
1691 | shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze |
1698 | shl edx,TEX_SHIFT ; zaburzenie w emapie = zaburzenie w teksturze |
1692 | add edx,eax ; proponuje nie stawiac czarnego pixela tylko |
1699 | add edx,eax ; proponuje nie stawiac czarnego pixela tylko |
1693 | lea esi,[edx*3] ; niezaburzony. |
1700 | lea esi,[edx*3] ; niezaburzony. |
1694 | add esi,.emap ; |
1701 | add esi,.emap ; |
1695 | lodsd |
1702 | lodsd |
1696 | 1703 | ||
1697 | if Ext=NON |
1704 | if Ext=NON |
1698 | mov edx,.cty |
1705 | mov edx,.cty |
1699 | shr edx,ROUND ; sar |
1706 | shr edx,ROUND ; sar |
1700 | 1707 | ||
1701 | mov edi,.ctx |
1708 | mov edi,.ctx |
1702 | shr edi,ROUND ; sar |
1709 | shr edi,ROUND ; sar |
1703 | else |
1710 | else |
1704 | movq mm1,mm7 |
1711 | movq mm1,mm7 |
1705 | psrld mm1,ROUND |
1712 | psrld mm1,ROUND |
1706 | movd edx,mm1 |
1713 | movd edx,mm1 |
1707 | psrlq mm1,32 |
1714 | psrlq mm1,32 |
1708 | movd edi,mm1 |
1715 | movd edi,mm1 |
1709 | 1716 | ||
1710 | end if |
1717 | end if |
1711 | 1718 | ||
1712 | shl edx,TEX_SHIFT |
1719 | shl edx,TEX_SHIFT |
1713 | add edi,edx |
1720 | add edi,edx |
1714 | and edi,TEXTURE_SIZE |
1721 | and edi,TEXTURE_SIZE |
1715 | lea esi,[edi*3] |
1722 | lea esi,[edi*3] |
1716 | add esi,.tex_map |
1723 | add esi,.tex_map |
1717 | 1724 | ||
1718 | if Ext=NON |
1725 | if Ext=NON |
1719 | mov edx,eax |
1726 | mov edx,eax |
1720 | lodsd |
1727 | lodsd |
1721 | push ax |
1728 | push ax |
1722 | mul dl |
1729 | mul dl |
1723 | mov dl,ah |
1730 | mov dl,ah |
1724 | pop ax |
1731 | pop ax |
1725 | shr ax,8 |
1732 | shr ax,8 |
1726 | mul dh |
1733 | mul dh |
1727 | mov al,dl |
1734 | mov al,dl |
1728 | mov edi,.c_scr |
1735 | mov edi,.c_scr |
1729 | stosw |
1736 | stosw |
1730 | shr edx,16 |
1737 | shr edx,16 |
1731 | shr eax,16 |
1738 | shr eax,16 |
1732 | mul dl |
1739 | mul dl |
1733 | shr ax,8 |
1740 | shr ax,8 |
1734 | stosb |
1741 | stosb |
1735 | else |
1742 | else |
1736 | movd mm0,eax |
1743 | movd mm0,eax |
1737 | pxor mm1,mm1 |
1744 | pxor mm1,mm1 |
1738 | punpcklbw mm0,mm1 |
1745 | punpcklbw mm0,mm1 |
1739 | movd mm2,[esi] |
1746 | movd mm2,[esi] |
1740 | punpcklbw mm2,mm1 |
1747 | punpcklbw mm2,mm1 |
1741 | pmullw mm0,mm2 |
1748 | pmullw mm0,mm2 |
1742 | psrlw mm0,8 |
1749 | psrlw mm0,8 |
1743 | packuswb mm0,mm1 |
1750 | packuswb mm0,mm1 |
1744 | mov edi,.c_scr |
1751 | mov edi,.c_scr |
1745 | movd [edi],mm0 |
1752 | movd [edi],mm0 |
1746 | 1753 | ||
1747 | end if |
1754 | end if |
1748 | 1755 | ||
1749 | jmp .actual_zbuff ; actualize z buffer |
1756 | jmp .actual_zbuff ; actualize z buffer |
1750 | @@: |
1757 | @@: |
1751 | .black: |
1758 | .black: |
1752 | xor eax,eax |
1759 | xor eax,eax |
1753 | mov edi,.c_scr |
1760 | mov edi,.c_scr |
1754 | stosd |
1761 | stosd |
1755 | .actual_zbuff: |
1762 | .actual_zbuff: |
1756 | mov eax,.cz |
1763 | mov eax,.cz |
1757 | mov edi,.czbuff |
1764 | mov edi,.czbuff |
1758 | stosd |
1765 | stosd |
1759 | 1766 | ||
1760 | .skip: |
1767 | .skip: |
1761 | add dword .czbuff,4 |
1768 | add dword .czbuff,4 |
1762 | add dword .c_scr,3 |
1769 | add dword .c_scr,3 |
1763 | 1770 | ||
1764 | if Ext=NON |
1771 | if Ext=NON |
1765 | mov eax,.dbx |
1772 | mov eax,.dbx |
1766 | add .cbx,eax |
1773 | add .cbx,eax |
1767 | mov ebx,.dby |
1774 | mov ebx,.dby |
1768 | add .cby,ebx |
1775 | add .cby,ebx |
1769 | 1776 | ||
1770 | mov edx,.dex |
1777 | mov edx,.dex |
1771 | add .cex,edx |
1778 | add .cex,edx |
1772 | mov eax,.dey |
1779 | mov eax,.dey |
1773 | add .cey,eax |
1780 | add .cey,eax |
1774 | 1781 | ||
1775 | mov ebx,.dtx |
1782 | mov ebx,.dtx |
1776 | add .ctx,ebx |
1783 | add .ctx,ebx |
1777 | mov edx,.dty |
1784 | mov edx,.dty |
1778 | add .cty,edx |
1785 | add .cty,edx |
1779 | 1786 | ||
1780 | else |
1787 | else |
1781 | paddd mm7,.dty |
1788 | paddd mm7,.dty |
1782 | paddd mm6,.dby |
1789 | paddd mm6,.dby |
1783 | paddd mm5,.dey |
1790 | paddd mm5,.dey |
1784 | end if |
1791 | end if |
1785 | mov eax,.dz |
1792 | mov eax,.dz |
1786 | add .cz,eax |
1793 | add .cz,eax |
1787 | 1794 | ||
1788 | dec ecx |
1795 | dec ecx |
1789 | jnz .draw |
1796 | jnz .draw |
1790 | 1797 | ||
1791 | .bl_end: |
1798 | .bl_end: |
1792 | mov esp,ebp |
1799 | mov esp,ebp |
1793 | ret 76 |
1800 | ret 76 |
1794 | ;Ext = MMX |
1801 | ;Ext = MMX |
1795 | 1802 | ||
1796 | ; else |
1803 | ; else |
1797 | ; movq mm5, qword[.temp1] ;- |
1804 | ; movq mm5, qword[.temp1] ;- |
1798 | ; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X |
1805 | ; paddd mm5, qword[.temp5] ; .temp5 == low dword = TEX_X, high dword = -TEX_X |
1799 | ; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE |
1806 | ; pand mm5, qword[.temp3] ; .temp3 == low = high dword = TEX_SIZE |
1800 | ; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap |
1807 | ; paddd mm5, qword[.temp4] ; .temp4 == low = high dword = offset .bmap |
1801 | ; movd ebx,mm5 |
1808 | ; movd ebx,mm5 |
1802 | ; psrlq mm5,32 |
1809 | ; psrlq mm5,32 |
1803 | ; end if>=> |
1810 | ; end if>=> |