Rev 8210 | Go to most recent revision | Only display areas with differences | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed
Rev 8210 | Rev 9172 | ||
---|---|---|---|
1 | ; |
1 | ; |
2 | ; x86 format converters for HERMES |
2 | ; x86 format converters for HERMES |
3 | ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
3 | ; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at) |
4 | ; This source code is licensed under the GNU LGPL |
4 | ; This source code is licensed under the GNU LGPL |
5 | ; |
5 | ; |
6 | ; Please refer to the file COPYING.LIB contained in the distribution for |
6 | ; Please refer to the file COPYING.LIB contained in the distribution for |
7 | ; licensing conditions |
7 | ; licensing conditions |
8 | ; |
8 | ; |
9 | ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
9 | ; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission |
10 | ; |
10 | ; |
11 | - | ||
12 | 11 | ||
13 | BITS 32 |
- | |
14 | - | ||
15 | GLOBAL _ConvertX86p32_32BGR888 |
- | |
16 | GLOBAL _ConvertX86p32_32RGBA888 |
- | |
17 | GLOBAL _ConvertX86p32_32BGRA888 |
- | |
18 | GLOBAL _ConvertX86p32_24RGB888 |
- | |
19 | GLOBAL _ConvertX86p32_24BGR888 |
- | |
20 | GLOBAL _ConvertX86p32_16RGB565 |
- | |
21 | GLOBAL _ConvertX86p32_16BGR565 |
- | |
22 | GLOBAL _ConvertX86p32_16RGB555 |
12 | BITS 32 |
- | 13 | ||
- | 14 | %include "common.inc" |
|
- | 15 | ||
- | 16 | SDL_FUNC _ConvertX86p32_32BGR888 |
|
- | 17 | SDL_FUNC _ConvertX86p32_32RGBA888 |
|
- | 18 | SDL_FUNC _ConvertX86p32_32BGRA888 |
|
- | 19 | SDL_FUNC _ConvertX86p32_24RGB888 |
|
- | 20 | SDL_FUNC _ConvertX86p32_24BGR888 |
|
- | 21 | SDL_FUNC _ConvertX86p32_16RGB565 |
|
23 | GLOBAL _ConvertX86p32_16BGR555 |
22 | SDL_FUNC _ConvertX86p32_16BGR565 |
24 | GLOBAL _ConvertX86p32_8RGB332 |
23 | SDL_FUNC _ConvertX86p32_16RGB555 |
25 | 24 | SDL_FUNC _ConvertX86p32_16BGR555 |
|
26 | EXTERN _x86return |
25 | SDL_FUNC _ConvertX86p32_8RGB332 |
27 | 26 | ||
28 | SECTION .text |
27 | SECTION .text |
29 | - | ||
30 | 28 | ||
31 | ;; _Convert_* |
29 | ;; _Convert_* |
32 | ;; Paramters: |
30 | ;; Paramters: |
33 | ;; ESI = source |
31 | ;; ESI = source |
34 | ;; EDI = dest |
32 | ;; EDI = dest |
35 | ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
33 | ;; ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though)) |
36 | ;; Destroys: |
34 | ;; Destroys: |
37 | ;; EAX, EBX, EDX |
35 | ;; EAX, EBX, EDX |
38 | 36 | ||
39 | 37 | ||
40 | _ConvertX86p32_32BGR888: |
38 | _ConvertX86p32_32BGR888: |
41 | 39 | ||
42 | ; check short |
40 | ; check short |
43 | cmp ecx,BYTE 32 |
41 | cmp ecx,BYTE 32 |
44 | ja .L3 |
42 | ja .L3 |
45 | 43 | ||
46 | .L1 ; short loop |
44 | .L1: ; short loop |
47 | mov edx,[esi] |
45 | mov edx,[esi] |
48 | bswap edx |
46 | bswap edx |
49 | ror edx,8 |
47 | ror edx,8 |
50 | mov [edi],edx |
48 | mov [edi],edx |
51 | add esi,BYTE 4 |
49 | add esi,BYTE 4 |
52 | add edi,BYTE 4 |
50 | add edi,BYTE 4 |
53 | dec ecx |
51 | dec ecx |
54 | jnz .L1 |
52 | jnz .L1 |
55 | .L2 |
53 | .L2: |
56 | jmp _x86return |
54 | retn |
57 | 55 | ||
58 | .L3 ; save ebp |
56 | .L3: ; save ebp |
59 | push ebp |
57 | push ebp |
60 | 58 | ||
61 | ; unroll four times |
59 | ; unroll four times |
62 | mov ebp,ecx |
60 | mov ebp,ecx |
63 | shr ebp,2 |
61 | shr ebp,2 |
64 | 62 | ||
65 | ; save count |
63 | ; save count |
66 | push ecx |
64 | push ecx |
67 | 65 | ||
68 | .L4 mov eax,[esi] |
66 | .L4: mov eax,[esi] |
69 | mov ebx,[esi+4] |
67 | mov ebx,[esi+4] |
70 | 68 | ||
71 | bswap eax |
69 | bswap eax |
72 | 70 | ||
73 | bswap ebx |
71 | bswap ebx |
74 | 72 | ||
75 | ror eax,8 |
73 | ror eax,8 |
76 | mov ecx,[esi+8] |
74 | mov ecx,[esi+8] |
77 | 75 | ||
78 | ror ebx,8 |
76 | ror ebx,8 |
79 | mov edx,[esi+12] |
77 | mov edx,[esi+12] |
80 | 78 | ||
81 | bswap ecx |
79 | bswap ecx |
82 | 80 | ||
83 | bswap edx |
81 | bswap edx |
84 | 82 | ||
85 | ror ecx,8 |
83 | ror ecx,8 |
86 | mov [edi+0],eax |
84 | mov [edi+0],eax |
87 | 85 | ||
88 | ror edx,8 |
86 | ror edx,8 |
89 | mov [edi+4],ebx |
87 | mov [edi+4],ebx |
90 | 88 | ||
91 | mov [edi+8],ecx |
89 | mov [edi+8],ecx |
92 | mov [edi+12],edx |
90 | mov [edi+12],edx |
93 | 91 | ||
94 | add esi,BYTE 16 |
92 | add esi,BYTE 16 |
95 | add edi,BYTE 16 |
93 | add edi,BYTE 16 |
96 | 94 | ||
97 | dec ebp |
95 | dec ebp |
98 | jnz .L4 |
96 | jnz .L4 |
99 | 97 | ||
100 | ; check tail |
98 | ; check tail |
101 | pop ecx |
99 | pop ecx |
102 | and ecx,BYTE 11b |
100 | and ecx,BYTE 11b |
103 | jz .L6 |
101 | jz .L6 |
104 | 102 | ||
105 | .L5 ; tail loop |
103 | .L5: ; tail loop |
106 | mov edx,[esi] |
104 | mov edx,[esi] |
107 | bswap edx |
105 | bswap edx |
108 | ror edx,8 |
106 | ror edx,8 |
109 | mov [edi],edx |
107 | mov [edi],edx |
110 | add esi,BYTE 4 |
108 | add esi,BYTE 4 |
111 | add edi,BYTE 4 |
109 | add edi,BYTE 4 |
112 | dec ecx |
110 | dec ecx |
113 | jnz .L5 |
111 | jnz .L5 |
114 | 112 | ||
115 | .L6 pop ebp |
113 | .L6: pop ebp |
116 | jmp _x86return |
114 | retn |
117 | 115 | ||
118 | 116 | ||
119 | 117 | ||
120 | 118 | ||
121 | _ConvertX86p32_32RGBA888: |
119 | _ConvertX86p32_32RGBA888: |
122 | 120 | ||
123 | ; check short |
121 | ; check short |
124 | cmp ecx,BYTE 32 |
122 | cmp ecx,BYTE 32 |
125 | ja .L3 |
123 | ja .L3 |
126 | 124 | ||
127 | .L1 ; short loop |
125 | .L1: ; short loop |
128 | mov edx,[esi] |
126 | mov edx,[esi] |
129 | rol edx,8 |
127 | rol edx,8 |
130 | mov [edi],edx |
128 | mov [edi],edx |
131 | add esi,BYTE 4 |
129 | add esi,BYTE 4 |
132 | add edi,BYTE 4 |
130 | add edi,BYTE 4 |
133 | dec ecx |
131 | dec ecx |
134 | jnz .L1 |
132 | jnz .L1 |
135 | .L2 |
133 | .L2: |
136 | jmp _x86return |
134 | retn |
137 | 135 | ||
138 | .L3 ; save ebp |
136 | .L3: ; save ebp |
139 | push ebp |
137 | push ebp |
140 | 138 | ||
141 | ; unroll four times |
139 | ; unroll four times |
142 | mov ebp,ecx |
140 | mov ebp,ecx |
143 | shr ebp,2 |
141 | shr ebp,2 |
144 | 142 | ||
145 | ; save count |
143 | ; save count |
146 | push ecx |
144 | push ecx |
147 | 145 | ||
148 | .L4 mov eax,[esi] |
146 | .L4: mov eax,[esi] |
149 | mov ebx,[esi+4] |
147 | mov ebx,[esi+4] |
150 | 148 | ||
151 | rol eax,8 |
149 | rol eax,8 |
152 | mov ecx,[esi+8] |
150 | mov ecx,[esi+8] |
153 | 151 | ||
154 | rol ebx,8 |
152 | rol ebx,8 |
155 | mov edx,[esi+12] |
153 | mov edx,[esi+12] |
156 | 154 | ||
157 | rol ecx,8 |
155 | rol ecx,8 |
158 | mov [edi+0],eax |
156 | mov [edi+0],eax |
159 | 157 | ||
160 | rol edx,8 |
158 | rol edx,8 |
161 | mov [edi+4],ebx |
159 | mov [edi+4],ebx |
162 | 160 | ||
163 | mov [edi+8],ecx |
161 | mov [edi+8],ecx |
164 | mov [edi+12],edx |
162 | mov [edi+12],edx |
165 | 163 | ||
166 | add esi,BYTE 16 |
164 | add esi,BYTE 16 |
167 | add edi,BYTE 16 |
165 | add edi,BYTE 16 |
168 | 166 | ||
169 | dec ebp |
167 | dec ebp |
170 | jnz .L4 |
168 | jnz .L4 |
171 | 169 | ||
172 | ; check tail |
170 | ; check tail |
173 | pop ecx |
171 | pop ecx |
174 | and ecx,BYTE 11b |
172 | and ecx,BYTE 11b |
175 | jz .L6 |
173 | jz .L6 |
176 | 174 | ||
177 | .L5 ; tail loop |
175 | .L5: ; tail loop |
178 | mov edx,[esi] |
176 | mov edx,[esi] |
179 | rol edx,8 |
177 | rol edx,8 |
180 | mov [edi],edx |
178 | mov [edi],edx |
181 | add esi,BYTE 4 |
179 | add esi,BYTE 4 |
182 | add edi,BYTE 4 |
180 | add edi,BYTE 4 |
183 | dec ecx |
181 | dec ecx |
184 | jnz .L5 |
182 | jnz .L5 |
185 | 183 | ||
186 | .L6 pop ebp |
184 | .L6: pop ebp |
187 | jmp _x86return |
185 | retn |
188 | 186 | ||
189 | 187 | ||
190 | 188 | ||
191 | 189 | ||
192 | _ConvertX86p32_32BGRA888: |
190 | _ConvertX86p32_32BGRA888: |
193 | 191 | ||
194 | ; check short |
192 | ; check short |
195 | cmp ecx,BYTE 32 |
193 | cmp ecx,BYTE 32 |
196 | ja .L3 |
194 | ja .L3 |
197 | 195 | ||
198 | .L1 ; short loop |
196 | .L1: ; short loop |
199 | mov edx,[esi] |
197 | mov edx,[esi] |
200 | bswap edx |
198 | bswap edx |
201 | mov [edi],edx |
199 | mov [edi],edx |
202 | add esi,BYTE 4 |
200 | add esi,BYTE 4 |
203 | add edi,BYTE 4 |
201 | add edi,BYTE 4 |
204 | dec ecx |
202 | dec ecx |
205 | jnz .L1 |
203 | jnz .L1 |
206 | .L2 |
204 | .L2: |
207 | jmp _x86return |
205 | retn |
208 | 206 | ||
209 | .L3 ; save ebp |
207 | .L3: ; save ebp |
210 | push ebp |
208 | push ebp |
211 | 209 | ||
212 | ; unroll four times |
210 | ; unroll four times |
213 | mov ebp,ecx |
211 | mov ebp,ecx |
214 | shr ebp,2 |
212 | shr ebp,2 |
215 | 213 | ||
216 | ; save count |
214 | ; save count |
217 | push ecx |
215 | push ecx |
218 | 216 | ||
219 | .L4 mov eax,[esi] |
217 | .L4: mov eax,[esi] |
220 | mov ebx,[esi+4] |
218 | mov ebx,[esi+4] |
221 | 219 | ||
222 | mov ecx,[esi+8] |
220 | mov ecx,[esi+8] |
223 | mov edx,[esi+12] |
221 | mov edx,[esi+12] |
224 | 222 | ||
225 | bswap eax |
223 | bswap eax |
226 | 224 | ||
227 | bswap ebx |
225 | bswap ebx |
228 | 226 | ||
229 | bswap ecx |
227 | bswap ecx |
230 | 228 | ||
231 | bswap edx |
229 | bswap edx |
232 | 230 | ||
233 | mov [edi+0],eax |
231 | mov [edi+0],eax |
234 | mov [edi+4],ebx |
232 | mov [edi+4],ebx |
235 | 233 | ||
236 | mov [edi+8],ecx |
234 | mov [edi+8],ecx |
237 | mov [edi+12],edx |
235 | mov [edi+12],edx |
238 | 236 | ||
239 | add esi,BYTE 16 |
237 | add esi,BYTE 16 |
240 | add edi,BYTE 16 |
238 | add edi,BYTE 16 |
241 | 239 | ||
242 | dec ebp |
240 | dec ebp |
243 | jnz .L4 |
241 | jnz .L4 |
244 | 242 | ||
245 | ; check tail |
243 | ; check tail |
246 | pop ecx |
244 | pop ecx |
247 | and ecx,BYTE 11b |
245 | and ecx,BYTE 11b |
248 | jz .L6 |
246 | jz .L6 |
249 | 247 | ||
250 | .L5 ; tail loop |
248 | .L5: ; tail loop |
251 | mov edx,[esi] |
249 | mov edx,[esi] |
252 | bswap edx |
250 | bswap edx |
253 | mov [edi],edx |
251 | mov [edi],edx |
254 | add esi,BYTE 4 |
252 | add esi,BYTE 4 |
255 | add edi,BYTE 4 |
253 | add edi,BYTE 4 |
256 | dec ecx |
254 | dec ecx |
257 | jnz .L5 |
255 | jnz .L5 |
258 | 256 | ||
259 | .L6 pop ebp |
257 | .L6: pop ebp |
260 | jmp _x86return |
258 | retn |
261 | 259 | ||
262 | 260 | ||
263 | 261 | ||
264 | 262 | ||
265 | ;; 32 bit RGB 888 to 24 BIT RGB 888 |
263 | ;; 32 bit RGB 888 to 24 BIT RGB 888 |
266 | 264 | ||
267 | _ConvertX86p32_24RGB888: |
265 | _ConvertX86p32_24RGB888: |
268 | 266 | ||
269 | ; check short |
267 | ; check short |
270 | cmp ecx,BYTE 32 |
268 | cmp ecx,BYTE 32 |
271 | ja .L3 |
269 | ja .L3 |
272 | 270 | ||
273 | .L1 ; short loop |
271 | .L1: ; short loop |
274 | mov al,[esi] |
272 | mov al,[esi] |
275 | mov bl,[esi+1] |
273 | mov bl,[esi+1] |
276 | mov dl,[esi+2] |
274 | mov dl,[esi+2] |
277 | mov [edi],al |
275 | mov [edi],al |
278 | mov [edi+1],bl |
276 | mov [edi+1],bl |
279 | mov [edi+2],dl |
277 | mov [edi+2],dl |
280 | add esi,BYTE 4 |
278 | add esi,BYTE 4 |
281 | add edi,BYTE 3 |
279 | add edi,BYTE 3 |
282 | dec ecx |
280 | dec ecx |
283 | jnz .L1 |
281 | jnz .L1 |
284 | .L2 |
282 | .L2: |
285 | jmp _x86return |
283 | retn |
286 | 284 | ||
287 | .L3 ; head |
285 | .L3: ; head |
288 | mov edx,edi |
286 | mov edx,edi |
289 | and edx,BYTE 11b |
287 | and edx,BYTE 11b |
290 | jz .L4 |
288 | jz .L4 |
291 | mov al,[esi] |
289 | mov al,[esi] |
292 | mov bl,[esi+1] |
290 | mov bl,[esi+1] |
293 | mov dl,[esi+2] |
291 | mov dl,[esi+2] |
294 | mov [edi],al |
292 | mov [edi],al |
295 | mov [edi+1],bl |
293 | mov [edi+1],bl |
296 | mov [edi+2],dl |
294 | mov [edi+2],dl |
297 | add esi,BYTE 4 |
295 | add esi,BYTE 4 |
298 | add edi,BYTE 3 |
296 | add edi,BYTE 3 |
299 | dec ecx |
297 | dec ecx |
300 | jmp SHORT .L3 |
298 | jmp SHORT .L3 |
301 | 299 | ||
302 | .L4 ; unroll 4 times |
300 | .L4: ; unroll 4 times |
303 | push ebp |
301 | push ebp |
304 | mov ebp,ecx |
302 | mov ebp,ecx |
305 | shr ebp,2 |
303 | shr ebp,2 |
306 | 304 | ||
307 | ; save count |
305 | ; save count |
308 | push ecx |
306 | push ecx |
309 | 307 | ||
310 | .L5 mov eax,[esi] ; first dword eax = [A][R][G][B] |
308 | .L5: mov eax,[esi] ; first dword eax = [A][R][G][B] |
311 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
309 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
312 | 310 | ||
313 | shl eax,8 ; eax = [R][G][B][.] |
311 | shl eax,8 ; eax = [R][G][B][.] |
314 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
312 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
315 | 313 | ||
316 | shl ebx,8 ; ebx = [r][g][b][.] |
314 | shl ebx,8 ; ebx = [r][g][b][.] |
317 | mov al,[esi+4] ; eax = [R][G][B][b] |
315 | mov al,[esi+4] ; eax = [R][G][B][b] |
318 | 316 | ||
319 | ror eax,8 ; eax = [b][R][G][B] (done) |
317 | ror eax,8 ; eax = [b][R][G][B] (done) |
320 | mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
318 | mov bh,[esi+8+1] ; ebx = [r][g][G][.] |
321 | 319 | ||
322 | mov [edi],eax |
320 | mov [edi],eax |
323 | add edi,BYTE 3*4 |
321 | add edi,BYTE 3*4 |
324 | 322 | ||
325 | shl ecx,8 ; ecx = [r][g][b][.] |
323 | shl ecx,8 ; ecx = [r][g][b][.] |
326 | mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
324 | mov bl,[esi+8+0] ; ebx = [r][g][G][B] |
327 | 325 | ||
328 | rol ebx,16 ; ebx = [G][B][r][g] (done) |
326 | rol ebx,16 ; ebx = [G][B][r][g] (done) |
329 | mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
327 | mov cl,[esi+8+2] ; ecx = [r][g][b][R] (done) |
330 | 328 | ||
331 | mov [edi+4-3*4],ebx |
329 | mov [edi+4-3*4],ebx |
332 | add esi,BYTE 4*4 |
330 | add esi,BYTE 4*4 |
333 | 331 | ||
334 | mov [edi+8-3*4],ecx |
332 | mov [edi+8-3*4],ecx |
335 | dec ebp |
333 | dec ebp |
336 | 334 | ||
337 | jnz .L5 |
335 | jnz .L5 |
338 | 336 | ||
339 | ; check tail |
337 | ; check tail |
340 | pop ecx |
338 | pop ecx |
341 | and ecx,BYTE 11b |
339 | and ecx,BYTE 11b |
342 | jz .L7 |
340 | jz .L7 |
343 | 341 | ||
344 | .L6 ; tail loop |
342 | .L6: ; tail loop |
345 | mov al,[esi] |
343 | mov al,[esi] |
346 | mov bl,[esi+1] |
344 | mov bl,[esi+1] |
347 | mov dl,[esi+2] |
345 | mov dl,[esi+2] |
348 | mov [edi],al |
346 | mov [edi],al |
349 | mov [edi+1],bl |
347 | mov [edi+1],bl |
350 | mov [edi+2],dl |
348 | mov [edi+2],dl |
351 | add esi,BYTE 4 |
349 | add esi,BYTE 4 |
352 | add edi,BYTE 3 |
350 | add edi,BYTE 3 |
353 | dec ecx |
351 | dec ecx |
354 | jnz .L6 |
352 | jnz .L6 |
355 | 353 | ||
356 | .L7 pop ebp |
354 | .L7: pop ebp |
357 | jmp _x86return |
355 | retn |
358 | 356 | ||
359 | 357 | ||
360 | 358 | ||
361 | 359 | ||
362 | ;; 32 bit RGB 888 to 24 bit BGR 888 |
360 | ;; 32 bit RGB 888 to 24 bit BGR 888 |
363 | 361 | ||
364 | _ConvertX86p32_24BGR888: |
362 | _ConvertX86p32_24BGR888: |
365 | 363 | ||
366 | ; check short |
364 | ; check short |
367 | cmp ecx,BYTE 32 |
365 | cmp ecx,BYTE 32 |
368 | ja .L3 |
366 | ja .L3 |
369 | - | ||
370 | 367 | ||
371 | .L1 ; short loop |
368 | .L1: ; short loop |
372 | mov dl,[esi] |
369 | mov dl,[esi] |
373 | mov bl,[esi+1] |
370 | mov bl,[esi+1] |
374 | mov al,[esi+2] |
371 | mov al,[esi+2] |
375 | mov [edi],al |
372 | mov [edi],al |
376 | mov [edi+1],bl |
373 | mov [edi+1],bl |
377 | mov [edi+2],dl |
374 | mov [edi+2],dl |
378 | add esi,BYTE 4 |
375 | add esi,BYTE 4 |
379 | add edi,BYTE 3 |
376 | add edi,BYTE 3 |
380 | dec ecx |
377 | dec ecx |
381 | jnz .L1 |
378 | jnz .L1 |
382 | .L2 |
379 | .L2: |
383 | jmp _x86return |
380 | retn |
384 | 381 | ||
385 | .L3 ; head |
382 | .L3: ; head |
386 | mov edx,edi |
383 | mov edx,edi |
387 | and edx,BYTE 11b |
384 | and edx,BYTE 11b |
388 | jz .L4 |
385 | jz .L4 |
389 | mov dl,[esi] |
386 | mov dl,[esi] |
390 | mov bl,[esi+1] |
387 | mov bl,[esi+1] |
391 | mov al,[esi+2] |
388 | mov al,[esi+2] |
392 | mov [edi],al |
389 | mov [edi],al |
393 | mov [edi+1],bl |
390 | mov [edi+1],bl |
394 | mov [edi+2],dl |
391 | mov [edi+2],dl |
395 | add esi,BYTE 4 |
392 | add esi,BYTE 4 |
396 | add edi,BYTE 3 |
393 | add edi,BYTE 3 |
397 | dec ecx |
394 | dec ecx |
398 | jmp SHORT .L3 |
395 | jmp SHORT .L3 |
399 | 396 | ||
400 | .L4 ; unroll 4 times |
397 | .L4: ; unroll 4 times |
401 | push ebp |
398 | push ebp |
402 | mov ebp,ecx |
399 | mov ebp,ecx |
403 | shr ebp,2 |
400 | shr ebp,2 |
404 | 401 | ||
405 | ; save count |
402 | ; save count |
406 | push ecx |
403 | push ecx |
407 | 404 | ||
408 | .L5 |
405 | .L5: |
409 | mov eax,[esi] ; first dword eax = [A][R][G][B] |
406 | mov eax,[esi] ; first dword eax = [A][R][G][B] |
410 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
407 | mov ebx,[esi+4] ; second dword ebx = [a][r][g][b] |
411 | 408 | ||
412 | bswap eax ; eax = [B][G][R][A] |
409 | bswap eax ; eax = [B][G][R][A] |
413 | 410 | ||
414 | bswap ebx ; ebx = [b][g][r][a] |
411 | bswap ebx ; ebx = [b][g][r][a] |
415 | 412 | ||
416 | mov al,[esi+4+2] ; eax = [B][G][R][r] |
413 | mov al,[esi+4+2] ; eax = [B][G][R][r] |
417 | mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
414 | mov bh,[esi+4+4+1] ; ebx = [b][g][G][a] |
418 | 415 | ||
419 | ror eax,8 ; eax = [r][B][G][R] (done) |
416 | ror eax,8 ; eax = [r][B][G][R] (done) |
420 | mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
417 | mov bl,[esi+4+4+2] ; ebx = [b][g][G][R] |
421 | 418 | ||
422 | ror ebx,16 ; ebx = [G][R][b][g] (done) |
419 | ror ebx,16 ; ebx = [G][R][b][g] (done) |
423 | mov [edi],eax |
420 | mov [edi],eax |
424 | 421 | ||
425 | mov [edi+4],ebx |
422 | mov [edi+4],ebx |
426 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
423 | mov ecx,[esi+12] ; third dword ecx = [a][r][g][b] |
427 | 424 | ||
428 | bswap ecx ; ecx = [b][g][r][a] |
425 | bswap ecx ; ecx = [b][g][r][a] |
429 | 426 | ||
430 | mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
427 | mov cl,[esi+8] ; ecx = [b][g][r][B] (done) |
431 | add esi,BYTE 4*4 |
428 | add esi,BYTE 4*4 |
432 | 429 | ||
433 | mov [edi+8],ecx |
430 | mov [edi+8],ecx |
434 | add edi,BYTE 3*4 |
431 | add edi,BYTE 3*4 |
435 | 432 | ||
436 | dec ebp |
433 | dec ebp |
437 | jnz .L5 |
434 | jnz .L5 |
438 | 435 | ||
439 | ; check tail |
436 | ; check tail |
440 | pop ecx |
437 | pop ecx |
441 | and ecx,BYTE 11b |
438 | and ecx,BYTE 11b |
442 | jz .L7 |
439 | jz .L7 |
443 | 440 | ||
444 | .L6 ; tail loop |
441 | .L6: ; tail loop |
445 | mov dl,[esi] |
442 | mov dl,[esi] |
446 | mov bl,[esi+1] |
443 | mov bl,[esi+1] |
447 | mov al,[esi+2] |
444 | mov al,[esi+2] |
448 | mov [edi],al |
445 | mov [edi],al |
449 | mov [edi+1],bl |
446 | mov [edi+1],bl |
450 | mov [edi+2],dl |
447 | mov [edi+2],dl |
451 | add esi,BYTE 4 |
448 | add esi,BYTE 4 |
452 | add edi,BYTE 3 |
449 | add edi,BYTE 3 |
453 | dec ecx |
450 | dec ecx |
454 | jnz .L6 |
451 | jnz .L6 |
455 | 452 | ||
456 | .L7 |
453 | .L7: |
457 | pop ebp |
454 | pop ebp |
458 | jmp _x86return |
455 | retn |
459 | 456 | ||
460 | 457 | ||
461 | 458 | ||
462 | 459 | ||
463 | ;; 32 bit RGB 888 to 16 BIT RGB 565 |
460 | ;; 32 bit RGB 888 to 16 BIT RGB 565 |
464 | 461 | ||
465 | _ConvertX86p32_16RGB565: |
462 | _ConvertX86p32_16RGB565: |
466 | ; check short |
463 | ; check short |
467 | cmp ecx,BYTE 16 |
464 | cmp ecx,BYTE 16 |
468 | ja .L3 |
465 | ja .L3 |
469 | 466 | ||
470 | .L1 ; short loop |
467 | .L1: ; short loop |
471 | mov bl,[esi+0] ; blue |
468 | mov bl,[esi+0] ; blue |
472 | mov al,[esi+1] ; green |
469 | mov al,[esi+1] ; green |
473 | mov ah,[esi+2] ; red |
470 | mov ah,[esi+2] ; red |
474 | shr ah,3 |
471 | shr ah,3 |
475 | and al,11111100b |
472 | and al,11111100b |
476 | shl eax,3 |
473 | shl eax,3 |
477 | shr bl,3 |
474 | shr bl,3 |
478 | add al,bl |
475 | add al,bl |
479 | mov [edi+0],al |
476 | mov [edi+0],al |
480 | mov [edi+1],ah |
477 | mov [edi+1],ah |
481 | add esi,BYTE 4 |
478 | add esi,BYTE 4 |
482 | add edi,BYTE 2 |
479 | add edi,BYTE 2 |
483 | dec ecx |
480 | dec ecx |
484 | jnz .L1 |
481 | jnz .L1 |
485 | 482 | ||
486 | .L2: ; End of short loop |
483 | .L2: ; End of short loop |
487 | jmp _x86return |
484 | retn |
488 | 485 | ||
489 | 486 | ||
490 | .L3 ; head |
487 | .L3: ; head |
491 | mov ebx,edi |
488 | mov ebx,edi |
492 | and ebx,BYTE 11b |
489 | and ebx,BYTE 11b |
493 | jz .L4 |
490 | jz .L4 |
494 | 491 | ||
495 | mov bl,[esi+0] ; blue |
492 | mov bl,[esi+0] ; blue |
496 | mov al,[esi+1] ; green |
493 | mov al,[esi+1] ; green |
497 | mov ah,[esi+2] ; red |
494 | mov ah,[esi+2] ; red |
498 | shr ah,3 |
495 | shr ah,3 |
499 | and al,11111100b |
496 | and al,11111100b |
500 | shl eax,3 |
497 | shl eax,3 |
501 | shr bl,3 |
498 | shr bl,3 |
502 | add al,bl |
499 | add al,bl |
503 | mov [edi+0],al |
500 | mov [edi+0],al |
504 | mov [edi+1],ah |
501 | mov [edi+1],ah |
505 | add esi,BYTE 4 |
502 | add esi,BYTE 4 |
506 | add edi,BYTE 2 |
503 | add edi,BYTE 2 |
507 | dec ecx |
504 | dec ecx |
508 | 505 | ||
509 | .L4: |
506 | .L4: |
510 | ; save count |
507 | ; save count |
511 | push ecx |
508 | push ecx |
512 | 509 | ||
513 | ; unroll twice |
510 | ; unroll twice |
514 | shr ecx,1 |
511 | shr ecx,1 |
515 | 512 | ||
516 | ; point arrays to end |
513 | ; point arrays to end |
517 | lea esi,[esi+ecx*8] |
514 | lea esi,[esi+ecx*8] |
518 | lea edi,[edi+ecx*4] |
515 | lea edi,[edi+ecx*4] |
519 | 516 | ||
520 | ; negative counter |
517 | ; negative counter |
521 | neg ecx |
518 | neg ecx |
522 | jmp SHORT .L6 |
519 | jmp SHORT .L6 |
523 | 520 | ||
524 | .L5: |
521 | .L5: |
525 | mov [edi+ecx*4-4],eax |
522 | mov [edi+ecx*4-4],eax |
526 | .L6: |
523 | .L6: |
527 | mov eax,[esi+ecx*8] |
524 | mov eax,[esi+ecx*8] |
528 | 525 | ||
529 | shr ah,2 |
526 | shr ah,2 |
530 | mov ebx,[esi+ecx*8+4] |
527 | mov ebx,[esi+ecx*8+4] |
531 | 528 | ||
532 | shr eax,3 |
529 | shr eax,3 |
533 | mov edx,[esi+ecx*8+4] |
530 | mov edx,[esi+ecx*8+4] |
534 | 531 | ||
535 | shr bh,2 |
532 | shr bh,2 |
536 | mov dl,[esi+ecx*8+2] |
533 | mov dl,[esi+ecx*8+2] |
537 | 534 | ||
538 | shl ebx,13 |
535 | shl ebx,13 |
539 | and eax,000007FFh |
536 | and eax,000007FFh |
540 | 537 | ||
541 | shl edx,8 |
538 | shl edx,8 |
542 | and ebx,07FF0000h |
539 | and ebx,07FF0000h |
543 | 540 | ||
544 | and edx,0F800F800h |
541 | and edx,0F800F800h |
545 | add eax,ebx |
542 | add eax,ebx |
546 | 543 | ||
547 | add eax,edx |
544 | add eax,edx |
548 | inc ecx |
545 | inc ecx |
549 | 546 | ||
550 | jnz .L5 |
547 | jnz .L5 |
551 | 548 | ||
552 | mov [edi+ecx*4-4],eax |
549 | mov [edi+ecx*4-4],eax |
553 | 550 | ||
554 | ; tail |
551 | ; tail |
555 | pop ecx |
552 | pop ecx |
556 | test cl,1 |
553 | test cl,1 |
557 | jz .L7 |
554 | jz .L7 |
558 | 555 | ||
559 | mov bl,[esi+0] ; blue |
556 | mov bl,[esi+0] ; blue |
560 | mov al,[esi+1] ; green |
557 | mov al,[esi+1] ; green |
561 | mov ah,[esi+2] ; red |
558 | mov ah,[esi+2] ; red |
562 | shr ah,3 |
559 | shr ah,3 |
563 | and al,11111100b |
560 | and al,11111100b |
564 | shl eax,3 |
561 | shl eax,3 |
565 | shr bl,3 |
562 | shr bl,3 |
566 | add al,bl |
563 | add al,bl |
567 | mov [edi+0],al |
564 | mov [edi+0],al |
568 | mov [edi+1],ah |
565 | mov [edi+1],ah |
569 | add esi,BYTE 4 |
566 | add esi,BYTE 4 |
570 | add edi,BYTE 2 |
567 | add edi,BYTE 2 |
571 | 568 | ||
572 | .L7: |
569 | .L7: |
573 | jmp _x86return |
570 | retn |
574 | 571 | ||
575 | 572 | ||
576 | 573 | ||
577 | 574 | ||
578 | ;; 32 bit RGB 888 to 16 BIT BGR 565 |
575 | ;; 32 bit RGB 888 to 16 BIT BGR 565 |
579 | 576 | ||
580 | _ConvertX86p32_16BGR565: |
577 | _ConvertX86p32_16BGR565: |
581 | 578 | ||
582 | ; check short |
579 | ; check short |
583 | cmp ecx,BYTE 16 |
580 | cmp ecx,BYTE 16 |
584 | ja .L3 |
581 | ja .L3 |
585 | 582 | ||
586 | .L1 ; short loop |
583 | .L1: ; short loop |
587 | mov ah,[esi+0] ; blue |
584 | mov ah,[esi+0] ; blue |
588 | mov al,[esi+1] ; green |
585 | mov al,[esi+1] ; green |
589 | mov bl,[esi+2] ; red |
586 | mov bl,[esi+2] ; red |
590 | shr ah,3 |
587 | shr ah,3 |
591 | and al,11111100b |
588 | and al,11111100b |
592 | shl eax,3 |
589 | shl eax,3 |
593 | shr bl,3 |
590 | shr bl,3 |
594 | add al,bl |
591 | add al,bl |
595 | mov [edi+0],al |
592 | mov [edi+0],al |
596 | mov [edi+1],ah |
593 | mov [edi+1],ah |
597 | add esi,BYTE 4 |
594 | add esi,BYTE 4 |
598 | add edi,BYTE 2 |
595 | add edi,BYTE 2 |
599 | dec ecx |
596 | dec ecx |
600 | jnz .L1 |
597 | jnz .L1 |
601 | .L2 |
598 | .L2: |
602 | jmp _x86return |
599 | retn |
603 | 600 | ||
604 | .L3 ; head |
601 | .L3: ; head |
605 | mov ebx,edi |
602 | mov ebx,edi |
606 | and ebx,BYTE 11b |
603 | and ebx,BYTE 11b |
607 | jz .L4 |
604 | jz .L4 |
608 | mov ah,[esi+0] ; blue |
605 | mov ah,[esi+0] ; blue |
609 | mov al,[esi+1] ; green |
606 | mov al,[esi+1] ; green |
610 | mov bl,[esi+2] ; red |
607 | mov bl,[esi+2] ; red |
611 | shr ah,3 |
608 | shr ah,3 |
612 | and al,11111100b |
609 | and al,11111100b |
613 | shl eax,3 |
610 | shl eax,3 |
614 | shr bl,3 |
611 | shr bl,3 |
615 | add al,bl |
612 | add al,bl |
616 | mov [edi+0],al |
613 | mov [edi+0],al |
617 | mov [edi+1],ah |
614 | mov [edi+1],ah |
618 | add esi,BYTE 4 |
615 | add esi,BYTE 4 |
619 | add edi,BYTE 2 |
616 | add edi,BYTE 2 |
620 | dec ecx |
617 | dec ecx |
621 | 618 | ||
622 | .L4 ; save count |
619 | .L4: ; save count |
623 | push ecx |
620 | push ecx |
624 | 621 | ||
625 | ; unroll twice |
622 | ; unroll twice |
626 | shr ecx,1 |
623 | shr ecx,1 |
627 | 624 | ||
628 | ; point arrays to end |
625 | ; point arrays to end |
629 | lea esi,[esi+ecx*8] |
626 | lea esi,[esi+ecx*8] |
630 | lea edi,[edi+ecx*4] |
627 | lea edi,[edi+ecx*4] |
631 | 628 | ||
632 | ; negative count |
629 | ; negative count |
633 | neg ecx |
630 | neg ecx |
634 | jmp SHORT .L6 |
631 | jmp SHORT .L6 |
635 | 632 | ||
636 | .L5 |
633 | .L5: |
637 | mov [edi+ecx*4-4],eax |
634 | mov [edi+ecx*4-4],eax |
638 | .L6 |
635 | .L6: |
639 | mov edx,[esi+ecx*8+4] |
636 | mov edx,[esi+ecx*8+4] |
640 | 637 | ||
641 | mov bh,[esi+ecx*8+4] |
638 | mov bh,[esi+ecx*8+4] |
642 | mov ah,[esi+ecx*8] |
639 | mov ah,[esi+ecx*8] |
643 | 640 | ||
644 | shr bh,3 |
641 | shr bh,3 |
645 | mov al,[esi+ecx*8+1] |
642 | mov al,[esi+ecx*8+1] |
646 | 643 | ||
647 | shr ah,3 |
644 | shr ah,3 |
648 | mov bl,[esi+ecx*8+5] |
645 | mov bl,[esi+ecx*8+5] |
649 | 646 | ||
650 | shl eax,3 |
647 | shl eax,3 |
651 | mov dl,[esi+ecx*8+2] |
648 | mov dl,[esi+ecx*8+2] |
652 | 649 | ||
653 | shl ebx,19 |
650 | shl ebx,19 |
654 | and eax,0000FFE0h |
651 | and eax,0000FFE0h |
655 | 652 | ||
656 | shr edx,3 |
653 | shr edx,3 |
657 | and ebx,0FFE00000h |
654 | and ebx,0FFE00000h |
658 | 655 | ||
659 | and edx,001F001Fh |
656 | and edx,001F001Fh |
660 | add eax,ebx |
657 | add eax,ebx |
661 | 658 | ||
662 | add eax,edx |
659 | add eax,edx |
663 | inc ecx |
660 | inc ecx |
664 | 661 | ||
665 | jnz .L5 |
662 | jnz .L5 |
666 | 663 | ||
667 | mov [edi+ecx*4-4],eax |
664 | mov [edi+ecx*4-4],eax |
668 | 665 | ||
669 | ; tail |
666 | ; tail |
670 | pop ecx |
667 | pop ecx |
671 | and ecx,BYTE 1 |
668 | and ecx,BYTE 1 |
672 | jz .L7 |
669 | jz .L7 |
673 | mov ah,[esi+0] ; blue |
670 | mov ah,[esi+0] ; blue |
674 | mov al,[esi+1] ; green |
671 | mov al,[esi+1] ; green |
675 | mov bl,[esi+2] ; red |
672 | mov bl,[esi+2] ; red |
676 | shr ah,3 |
673 | shr ah,3 |
677 | and al,11111100b |
674 | and al,11111100b |
678 | shl eax,3 |
675 | shl eax,3 |
679 | shr bl,3 |
676 | shr bl,3 |
680 | add al,bl |
677 | add al,bl |
681 | mov [edi+0],al |
678 | mov [edi+0],al |
682 | mov [edi+1],ah |
679 | mov [edi+1],ah |
683 | add esi,BYTE 4 |
680 | add esi,BYTE 4 |
684 | add edi,BYTE 2 |
681 | add edi,BYTE 2 |
685 | 682 | ||
686 | .L7 |
683 | .L7: |
687 | jmp _x86return |
684 | retn |
688 | 685 | ||
689 | 686 | ||
690 | 687 | ||
691 | 688 | ||
692 | ;; 32 BIT RGB TO 16 BIT RGB 555 |
689 | ;; 32 BIT RGB TO 16 BIT RGB 555 |
693 | 690 | ||
694 | _ConvertX86p32_16RGB555: |
691 | _ConvertX86p32_16RGB555: |
695 | 692 | ||
696 | ; check short |
693 | ; check short |
697 | cmp ecx,BYTE 16 |
694 | cmp ecx,BYTE 16 |
698 | ja .L3 |
695 | ja .L3 |
699 | 696 | ||
700 | .L1 ; short loop |
697 | .L1: ; short loop |
701 | mov bl,[esi+0] ; blue |
698 | mov bl,[esi+0] ; blue |
702 | mov al,[esi+1] ; green |
699 | mov al,[esi+1] ; green |
703 | mov ah,[esi+2] ; red |
700 | mov ah,[esi+2] ; red |
704 | shr ah,3 |
701 | shr ah,3 |
705 | and al,11111000b |
702 | and al,11111000b |
706 | shl eax,2 |
703 | shl eax,2 |
707 | shr bl,3 |
704 | shr bl,3 |
708 | add al,bl |
705 | add al,bl |
709 | mov [edi+0],al |
706 | mov [edi+0],al |
710 | mov [edi+1],ah |
707 | mov [edi+1],ah |
711 | add esi,BYTE 4 |
708 | add esi,BYTE 4 |
712 | add edi,BYTE 2 |
709 | add edi,BYTE 2 |
713 | dec ecx |
710 | dec ecx |
714 | jnz .L1 |
711 | jnz .L1 |
715 | .L2 |
712 | .L2: |
716 | jmp _x86return |
713 | retn |
717 | 714 | ||
718 | .L3 ; head |
715 | .L3: ; head |
719 | mov ebx,edi |
716 | mov ebx,edi |
720 | and ebx,BYTE 11b |
717 | and ebx,BYTE 11b |
721 | jz .L4 |
718 | jz .L4 |
722 | mov bl,[esi+0] ; blue |
719 | mov bl,[esi+0] ; blue |
723 | mov al,[esi+1] ; green |
720 | mov al,[esi+1] ; green |
724 | mov ah,[esi+2] ; red |
721 | mov ah,[esi+2] ; red |
725 | shr ah,3 |
722 | shr ah,3 |
726 | and al,11111000b |
723 | and al,11111000b |
727 | shl eax,2 |
724 | shl eax,2 |
728 | shr bl,3 |
725 | shr bl,3 |
729 | add al,bl |
726 | add al,bl |
730 | mov [edi+0],al |
727 | mov [edi+0],al |
731 | mov [edi+1],ah |
728 | mov [edi+1],ah |
732 | add esi,BYTE 4 |
729 | add esi,BYTE 4 |
733 | add edi,BYTE 2 |
730 | add edi,BYTE 2 |
734 | dec ecx |
731 | dec ecx |
735 | 732 | ||
736 | .L4 ; save count |
733 | .L4: ; save count |
737 | push ecx |
734 | push ecx |
738 | 735 | ||
739 | ; unroll twice |
736 | ; unroll twice |
740 | shr ecx,1 |
737 | shr ecx,1 |
741 | 738 | ||
742 | ; point arrays to end |
739 | ; point arrays to end |
743 | lea esi,[esi+ecx*8] |
740 | lea esi,[esi+ecx*8] |
744 | lea edi,[edi+ecx*4] |
741 | lea edi,[edi+ecx*4] |
745 | 742 | ||
746 | ; negative counter |
743 | ; negative counter |
747 | neg ecx |
744 | neg ecx |
748 | jmp SHORT .L6 |
745 | jmp SHORT .L6 |
749 | 746 | ||
750 | .L5 |
747 | .L5: |
751 | mov [edi+ecx*4-4],eax |
748 | mov [edi+ecx*4-4],eax |
752 | .L6 |
749 | .L6: |
753 | mov eax,[esi+ecx*8] |
750 | mov eax,[esi+ecx*8] |
754 | 751 | ||
755 | shr ah,3 |
752 | shr ah,3 |
756 | mov ebx,[esi+ecx*8+4] |
753 | mov ebx,[esi+ecx*8+4] |
757 | 754 | ||
758 | shr eax,3 |
755 | shr eax,3 |
759 | mov edx,[esi+ecx*8+4] |
756 | mov edx,[esi+ecx*8+4] |
760 | 757 | ||
761 | shr bh,3 |
758 | shr bh,3 |
762 | mov dl,[esi+ecx*8+2] |
759 | mov dl,[esi+ecx*8+2] |
763 | 760 | ||
764 | shl ebx,13 |
761 | shl ebx,13 |
765 | and eax,000007FFh |
762 | and eax,000007FFh |
766 | 763 | ||
767 | shl edx,7 |
764 | shl edx,7 |
768 | and ebx,07FF0000h |
765 | and ebx,07FF0000h |
769 | 766 | ||
770 | and edx,07C007C00h |
767 | and edx,07C007C00h |
771 | add eax,ebx |
768 | add eax,ebx |
772 | 769 | ||
773 | add eax,edx |
770 | add eax,edx |
774 | inc ecx |
771 | inc ecx |
775 | 772 | ||
776 | jnz .L5 |
773 | jnz .L5 |
777 | 774 | ||
778 | mov [edi+ecx*4-4],eax |
775 | mov [edi+ecx*4-4],eax |
779 | 776 | ||
780 | ; tail |
777 | ; tail |
781 | pop ecx |
778 | pop ecx |
782 | and ecx,BYTE 1 |
779 | and ecx,BYTE 1 |
783 | jz .L7 |
780 | jz .L7 |
784 | mov bl,[esi+0] ; blue |
781 | mov bl,[esi+0] ; blue |
785 | mov al,[esi+1] ; green |
782 | mov al,[esi+1] ; green |
786 | mov ah,[esi+2] ; red |
783 | mov ah,[esi+2] ; red |
787 | shr ah,3 |
784 | shr ah,3 |
788 | and al,11111000b |
785 | and al,11111000b |
789 | shl eax,2 |
786 | shl eax,2 |
790 | shr bl,3 |
787 | shr bl,3 |
791 | add al,bl |
788 | add al,bl |
792 | mov [edi+0],al |
789 | mov [edi+0],al |
793 | mov [edi+1],ah |
790 | mov [edi+1],ah |
794 | add esi,BYTE 4 |
791 | add esi,BYTE 4 |
795 | add edi,BYTE 2 |
792 | add edi,BYTE 2 |
796 | 793 | ||
797 | .L7 |
794 | .L7: |
798 | jmp _x86return |
795 | retn |
799 | 796 | ||
800 | 797 | ||
801 | 798 | ||
802 | 799 | ||
803 | ;; 32 BIT RGB TO 16 BIT BGR 555 |
800 | ;; 32 BIT RGB TO 16 BIT BGR 555 |
804 | 801 | ||
805 | _ConvertX86p32_16BGR555: |
802 | _ConvertX86p32_16BGR555: |
806 | 803 | ||
807 | ; check short |
804 | ; check short |
808 | cmp ecx,BYTE 16 |
805 | cmp ecx,BYTE 16 |
809 | ja .L3 |
806 | ja .L3 |
810 | 807 | ||
811 | 808 | ||
812 | .L1 ; short loop |
809 | .L1: ; short loop |
813 | mov ah,[esi+0] ; blue |
810 | mov ah,[esi+0] ; blue |
814 | mov al,[esi+1] ; green |
811 | mov al,[esi+1] ; green |
815 | mov bl,[esi+2] ; red |
812 | mov bl,[esi+2] ; red |
816 | shr ah,3 |
813 | shr ah,3 |
817 | and al,11111000b |
814 | and al,11111000b |
818 | shl eax,2 |
815 | shl eax,2 |
819 | shr bl,3 |
816 | shr bl,3 |
820 | add al,bl |
817 | add al,bl |
821 | mov [edi+0],al |
818 | mov [edi+0],al |
822 | mov [edi+1],ah |
819 | mov [edi+1],ah |
823 | add esi,BYTE 4 |
820 | add esi,BYTE 4 |
824 | add edi,BYTE 2 |
821 | add edi,BYTE 2 |
825 | dec ecx |
822 | dec ecx |
826 | jnz .L1 |
823 | jnz .L1 |
827 | .L2 |
824 | .L2: |
828 | jmp _x86return |
825 | retn |
829 | 826 | ||
830 | .L3 ; head |
827 | .L3: ; head |
831 | mov ebx,edi |
828 | mov ebx,edi |
832 | and ebx,BYTE 11b |
829 | and ebx,BYTE 11b |
833 | jz .L4 |
830 | jz .L4 |
834 | mov ah,[esi+0] ; blue |
831 | mov ah,[esi+0] ; blue |
835 | mov al,[esi+1] ; green |
832 | mov al,[esi+1] ; green |
836 | mov bl,[esi+2] ; red |
833 | mov bl,[esi+2] ; red |
837 | shr ah,3 |
834 | shr ah,3 |
838 | and al,11111000b |
835 | and al,11111000b |
839 | shl eax,2 |
836 | shl eax,2 |
840 | shr bl,3 |
837 | shr bl,3 |
841 | add al,bl |
838 | add al,bl |
842 | mov [edi+0],al |
839 | mov [edi+0],al |
843 | mov [edi+1],ah |
840 | mov [edi+1],ah |
844 | add esi,BYTE 4 |
841 | add esi,BYTE 4 |
845 | add edi,BYTE 2 |
842 | add edi,BYTE 2 |
846 | dec ecx |
843 | dec ecx |
847 | 844 | ||
848 | .L4 ; save count |
845 | .L4: ; save count |
849 | push ecx |
846 | push ecx |
850 | 847 | ||
851 | ; unroll twice |
848 | ; unroll twice |
852 | shr ecx,1 |
849 | shr ecx,1 |
853 | 850 | ||
854 | ; point arrays to end |
851 | ; point arrays to end |
855 | lea esi,[esi+ecx*8] |
852 | lea esi,[esi+ecx*8] |
856 | lea edi,[edi+ecx*4] |
853 | lea edi,[edi+ecx*4] |
857 | 854 | ||
858 | ; negative counter |
855 | ; negative counter |
859 | neg ecx |
856 | neg ecx |
860 | jmp SHORT .L6 |
857 | jmp SHORT .L6 |
861 | 858 | ||
862 | .L5 |
859 | .L5: |
863 | mov [edi+ecx*4-4],eax |
860 | mov [edi+ecx*4-4],eax |
864 | .L6 |
861 | .L6: |
865 | mov edx,[esi+ecx*8+4] |
862 | mov edx,[esi+ecx*8+4] |
866 | 863 | ||
867 | mov bh,[esi+ecx*8+4] |
864 | mov bh,[esi+ecx*8+4] |
868 | mov ah,[esi+ecx*8] |
865 | mov ah,[esi+ecx*8] |
869 | 866 | ||
870 | shr bh,3 |
867 | shr bh,3 |
871 | mov al,[esi+ecx*8+1] |
868 | mov al,[esi+ecx*8+1] |
872 | 869 | ||
873 | shr ah,3 |
870 | shr ah,3 |
874 | mov bl,[esi+ecx*8+5] |
871 | mov bl,[esi+ecx*8+5] |
875 | 872 | ||
876 | shl eax,2 |
873 | shl eax,2 |
877 | mov dl,[esi+ecx*8+2] |
874 | mov dl,[esi+ecx*8+2] |
878 | 875 | ||
879 | shl ebx,18 |
876 | shl ebx,18 |
880 | and eax,00007FE0h |
877 | and eax,00007FE0h |
881 | 878 | ||
882 | shr edx,3 |
879 | shr edx,3 |
883 | and ebx,07FE00000h |
880 | and ebx,07FE00000h |
884 | 881 | ||
885 | and edx,001F001Fh |
882 | and edx,001F001Fh |
886 | add eax,ebx |
883 | add eax,ebx |
887 | 884 | ||
888 | add eax,edx |
885 | add eax,edx |
889 | inc ecx |
886 | inc ecx |
890 | 887 | ||
891 | jnz .L5 |
888 | jnz .L5 |
892 | 889 | ||
893 | mov [edi+ecx*4-4],eax |
890 | mov [edi+ecx*4-4],eax |
894 | 891 | ||
895 | ; tail |
892 | ; tail |
896 | pop ecx |
893 | pop ecx |
897 | and ecx,BYTE 1 |
894 | and ecx,BYTE 1 |
898 | jz .L7 |
895 | jz .L7 |
899 | mov ah,[esi+0] ; blue |
896 | mov ah,[esi+0] ; blue |
900 | mov al,[esi+1] ; green |
897 | mov al,[esi+1] ; green |
901 | mov bl,[esi+2] ; red |
898 | mov bl,[esi+2] ; red |
902 | shr ah,3 |
899 | shr ah,3 |
903 | and al,11111000b |
900 | and al,11111000b |
904 | shl eax,2 |
901 | shl eax,2 |
905 | shr bl,3 |
902 | shr bl,3 |
906 | add al,bl |
903 | add al,bl |
907 | mov [edi+0],al |
904 | mov [edi+0],al |
908 | mov [edi+1],ah |
905 | mov [edi+1],ah |
909 | add esi,BYTE 4 |
906 | add esi,BYTE 4 |
910 | add edi,BYTE 2 |
907 | add edi,BYTE 2 |
911 | 908 | ||
912 | .L7 |
909 | .L7: |
913 | jmp _x86return |
910 | retn |
914 | 911 | ||
915 | 912 | ||
916 | 913 | ||
917 | 914 | ||
918 | 915 | ||
919 | ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
916 | ;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb) |
920 | ;; This routine writes FOUR pixels at once (dword) and then, if they exist |
917 | ;; This routine writes FOUR pixels at once (dword) and then, if they exist |
921 | ;; the trailing three pixels |
918 | ;; the trailing three pixels |
922 | _ConvertX86p32_8RGB332: |
919 | _ConvertX86p32_8RGB332: |
923 | 920 | ||
924 | 921 | ||
925 | .L_ALIGNED |
922 | .L_ALIGNED: |
926 | push ecx |
923 | push ecx |
927 | 924 | ||
928 | shr ecx,2 ; We will draw 4 pixels at once |
925 | shr ecx,2 ; We will draw 4 pixels at once |
929 | jnz .L1 |
926 | jnz .L1 |
930 | 927 | ||
931 | jmp .L2 ; short jump out of range :( |
928 | jmp .L2 ; short jump out of range :( |
932 | 929 | ||
933 | .L1: |
930 | .L1: |
934 | mov eax,[esi] ; first pair of pixels |
931 | mov eax,[esi] ; first pair of pixels |
935 | mov edx,[esi+4] |
932 | mov edx,[esi+4] |
936 | 933 | ||
937 | shr dl,6 |
934 | shr dl,6 |
938 | mov ebx,eax |
935 | mov ebx,eax |
939 | 936 | ||
940 | shr al,6 |
937 | shr al,6 |
941 | and ah,0e0h |
938 | and ah,0e0h |
942 | 939 | ||
943 | shr ebx,16 |
940 | shr ebx,16 |
944 | and dh,0e0h |
941 | and dh,0e0h |
945 | 942 | ||
946 | shr ah,3 |
943 | shr ah,3 |
947 | and bl,0e0h |
944 | and bl,0e0h |
948 | 945 | ||
949 | shr dh,3 |
946 | shr dh,3 |
950 | 947 | ||
951 | or al,bl |
948 | or al,bl |
952 | 949 | ||
953 | mov ebx,edx |
950 | mov ebx,edx |
954 | or al,ah |
951 | or al,ah |
955 | 952 | ||
956 | shr ebx,16 |
953 | shr ebx,16 |
957 | or dl,dh |
954 | or dl,dh |
958 | 955 | ||
959 | and bl,0e0h |
956 | and bl,0e0h |
960 | 957 | ||
961 | or dl,bl |
958 | or dl,bl |
962 | 959 | ||
963 | mov ah,dl |
960 | mov ah,dl |
964 | 961 | ||
965 | 962 | ||
966 | 963 | ||
967 | mov ebx,[esi+8] ; second pair of pixels |
964 | mov ebx,[esi+8] ; second pair of pixels |
968 | 965 | ||
969 | mov edx,ebx |
966 | mov edx,ebx |
970 | and bh,0e0h |
967 | and bh,0e0h |
971 | 968 | ||
972 | shr bl,6 |
969 | shr bl,6 |
973 | and edx,0e00000h |
970 | and edx,0e00000h |
974 | 971 | ||
975 | shr edx,16 |
972 | shr edx,16 |
976 | 973 | ||
977 | shr bh,3 |
974 | shr bh,3 |
978 | 975 | ||
979 | ror eax,16 |
976 | ror eax,16 |
980 | or bl,dl |
977 | or bl,dl |
981 | 978 | ||
982 | mov edx,[esi+12] |
979 | mov edx,[esi+12] |
983 | or bl,bh |
980 | or bl,bh |
984 | 981 | ||
985 | mov al,bl |
982 | mov al,bl |
986 | 983 | ||
987 | mov ebx,edx |
984 | mov ebx,edx |
988 | and dh,0e0h |
985 | and dh,0e0h |
989 | 986 | ||
990 | shr dl,6 |
987 | shr dl,6 |
991 | and ebx,0e00000h |
988 | and ebx,0e00000h |
992 | 989 | ||
993 | shr dh,3 |
990 | shr dh,3 |
994 | mov ah,dl |
991 | mov ah,dl |
995 | 992 | ||
996 | shr ebx,16 |
993 | shr ebx,16 |
997 | or ah,dh |
994 | or ah,dh |
998 | 995 | ||
999 | or ah,bl |
996 | or ah,bl |
1000 | 997 | ||
1001 | rol eax,16 |
998 | rol eax,16 |
1002 | add esi,BYTE 16 |
999 | add esi,BYTE 16 |
1003 | 1000 | ||
1004 | mov [edi],eax |
1001 | mov [edi],eax |
1005 | add edi,BYTE 4 |
1002 | add edi,BYTE 4 |
1006 | 1003 | ||
1007 | dec ecx |
1004 | dec ecx |
1008 | jz .L2 ; L1 out of range for short jump :( |
1005 | jz .L2 ; L1 out of range for short jump :( |
1009 | 1006 | ||
1010 | jmp .L1 |
1007 | jmp .L1 |
1011 | .L2: |
1008 | .L2: |
1012 | 1009 | ||
1013 | pop ecx |
1010 | pop ecx |
1014 | and ecx,BYTE 3 ; mask out number of pixels to draw |
1011 | and ecx,BYTE 3 ; mask out number of pixels to draw |
1015 | 1012 | ||
1016 | jz .L4 ; Nothing to do anymore |
1013 | jz .L4 ; Nothing to do anymore |
1017 | 1014 | ||
1018 | .L3: |
1015 | .L3: |
1019 | mov eax,[esi] ; single pixel conversion for trailing pixels |
1016 | mov eax,[esi] ; single pixel conversion for trailing pixels |
1020 | 1017 | ||
1021 | mov ebx,eax |
1018 | mov ebx,eax |
1022 | 1019 | ||
1023 | shr al,6 |
1020 | shr al,6 |
1024 | and ah,0e0h |
1021 | and ah,0e0h |
1025 | 1022 | ||
1026 | shr ebx,16 |
1023 | shr ebx,16 |
1027 | 1024 | ||
1028 | shr ah,3 |
1025 | shr ah,3 |
1029 | and bl,0e0h |
1026 | and bl,0e0h |
1030 | 1027 | ||
1031 | or al,ah |
1028 | or al,ah |
1032 | or al,bl |
1029 | or al,bl |
1033 | 1030 | ||
1034 | mov [edi],al |
1031 | mov [edi],al |
1035 | 1032 | ||
1036 | inc edi |
1033 | inc edi |
1037 | add esi,BYTE 4 |
1034 | add esi,BYTE 4 |
1038 | 1035 | ||
1039 | dec ecx |
1036 | dec ecx |
1040 | jnz .L3 |
1037 | jnz .L3 |
1041 | 1038 | ||
1042 | .L4: |
1039 | .L4: |
1043 | jmp _x86return |
1040 | retn |
- | 1041 | ||
- | 1042 | %ifidn __OUTPUT_FORMAT__,elf32 |
|
- | 1043 | section .note.GNU-stack noalloc noexec nowrite progbits |
|
- | 1044 | %endif |