Subversion Repositories Kolibri OS

Rev

Rev 8210 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8210 maxcodehac 1
;
2
; x86 format converters for HERMES
3
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
4
; This source code is licensed under the GNU LGPL
5
;
6
; Please refer to the file COPYING.LIB contained in the distribution for
7
; licensing conditions
8
;
9
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
10
;
11
 
12
BITS 32
13
 
9172 turbocat 14
%include "common.inc"
8210 maxcodehac 15
 
9172 turbocat 16
SDL_FUNC _ConvertX86p32_32BGR888
17
SDL_FUNC _ConvertX86p32_32RGBA888
18
SDL_FUNC _ConvertX86p32_32BGRA888
19
SDL_FUNC _ConvertX86p32_24RGB888
20
SDL_FUNC _ConvertX86p32_24BGR888
21
SDL_FUNC _ConvertX86p32_16RGB565
22
SDL_FUNC _ConvertX86p32_16BGR565
23
SDL_FUNC _ConvertX86p32_16RGB555
24
SDL_FUNC _ConvertX86p32_16BGR555
25
SDL_FUNC _ConvertX86p32_8RGB332
26
 
8210 maxcodehac 27
SECTION .text
28
 
29
;; _Convert_*
30
;; Paramters:
31
;;   ESI = source
32
;;   EDI = dest
33
;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
34
;; Destroys:
35
;;   EAX, EBX, EDX
36
 
37
 
38
_ConvertX86p32_32BGR888:
39
 
40
    ; check short
41
    cmp ecx,BYTE 32
42
    ja .L3
43
 
9172 turbocat 44
.L1: ; short loop
8210 maxcodehac 45
    mov edx,[esi]
46
    bswap edx
47
    ror edx,8
48
    mov [edi],edx
49
    add esi,BYTE 4
50
    add edi,BYTE 4
51
    dec ecx
52
    jnz .L1
9172 turbocat 53
.L2:
54
    retn
8210 maxcodehac 55
 
9172 turbocat 56
.L3: ; save ebp
8210 maxcodehac 57
    push ebp
58
 
59
    ; unroll four times
60
    mov ebp,ecx
61
    shr ebp,2
62
 
63
    ; save count
64
    push ecx
65
 
9172 turbocat 66
.L4:    mov eax,[esi]
8210 maxcodehac 67
        mov ebx,[esi+4]
68
 
69
        bswap eax
70
 
71
        bswap ebx
72
 
73
        ror eax,8
74
        mov ecx,[esi+8]
75
 
76
        ror ebx,8
77
        mov edx,[esi+12]
78
 
79
        bswap ecx
80
 
81
        bswap edx
82
 
83
        ror ecx,8
84
        mov [edi+0],eax
85
 
86
        ror edx,8
87
        mov [edi+4],ebx
88
 
89
        mov [edi+8],ecx
90
        mov [edi+12],edx
91
 
92
        add esi,BYTE 16
93
        add edi,BYTE 16
94
 
95
        dec ebp
96
        jnz .L4
97
 
98
    ; check tail
99
    pop ecx
100
    and ecx,BYTE 11b
101
    jz .L6
102
 
9172 turbocat 103
.L5: ; tail loop
8210 maxcodehac 104
    mov edx,[esi]
105
    bswap edx
106
    ror edx,8
107
    mov [edi],edx
108
    add esi,BYTE 4
109
    add edi,BYTE 4
110
    dec ecx
111
    jnz .L5
112
 
9172 turbocat 113
.L6: pop ebp
114
    retn
8210 maxcodehac 115
 
116
 
117
 
118
 
119
_ConvertX86p32_32RGBA888:
120
 
121
    ; check short
122
    cmp ecx,BYTE 32
123
    ja .L3
124
 
9172 turbocat 125
.L1: ; short loop
8210 maxcodehac 126
    mov edx,[esi]
127
    rol edx,8
128
    mov [edi],edx
129
    add esi,BYTE 4
130
    add edi,BYTE 4
131
    dec ecx
132
    jnz .L1
9172 turbocat 133
.L2:
134
    retn
8210 maxcodehac 135
 
9172 turbocat 136
.L3: ; save ebp
8210 maxcodehac 137
    push ebp
138
 
139
    ; unroll four times
140
    mov ebp,ecx
141
    shr ebp,2
142
 
143
    ; save count
144
    push ecx
145
 
9172 turbocat 146
.L4:    mov eax,[esi]
8210 maxcodehac 147
        mov ebx,[esi+4]
148
 
149
        rol eax,8
150
        mov ecx,[esi+8]
151
 
152
        rol ebx,8
153
        mov edx,[esi+12]
154
 
155
        rol ecx,8
156
        mov [edi+0],eax
157
 
158
        rol edx,8
159
        mov [edi+4],ebx
160
 
161
        mov [edi+8],ecx
162
        mov [edi+12],edx
163
 
164
        add esi,BYTE 16
165
        add edi,BYTE 16
166
 
167
        dec ebp
168
        jnz .L4
169
 
170
    ; check tail
171
    pop ecx
172
    and ecx,BYTE 11b
173
    jz .L6
174
 
9172 turbocat 175
.L5: ; tail loop
8210 maxcodehac 176
    mov edx,[esi]
177
    rol edx,8
178
    mov [edi],edx
179
    add esi,BYTE 4
180
    add edi,BYTE 4
181
    dec ecx
182
    jnz .L5
183
 
9172 turbocat 184
.L6: pop ebp
185
    retn
8210 maxcodehac 186
 
187
 
188
 
189
 
190
_ConvertX86p32_32BGRA888:
191
 
192
    ; check short
193
    cmp ecx,BYTE 32
194
    ja .L3
195
 
9172 turbocat 196
.L1: ; short loop
8210 maxcodehac 197
    mov edx,[esi]
198
    bswap edx
199
    mov [edi],edx
200
    add esi,BYTE 4
201
    add edi,BYTE 4
202
    dec ecx
203
    jnz .L1
9172 turbocat 204
.L2:
205
    retn
8210 maxcodehac 206
 
9172 turbocat 207
.L3: ; save ebp
8210 maxcodehac 208
    push ebp
209
 
210
    ; unroll four times
211
    mov ebp,ecx
212
    shr ebp,2
213
 
214
    ; save count
215
    push ecx
216
 
9172 turbocat 217
.L4:    mov eax,[esi]
8210 maxcodehac 218
        mov ebx,[esi+4]
219
 
220
        mov ecx,[esi+8]
221
        mov edx,[esi+12]
222
 
223
        bswap eax
224
 
225
        bswap ebx
226
 
227
        bswap ecx
228
 
229
        bswap edx
230
 
231
        mov [edi+0],eax
232
        mov [edi+4],ebx
233
 
234
        mov [edi+8],ecx
235
        mov [edi+12],edx
236
 
237
        add esi,BYTE 16
238
        add edi,BYTE 16
239
 
240
        dec ebp
241
        jnz .L4
242
 
243
    ; check tail
244
    pop ecx
245
    and ecx,BYTE 11b
246
    jz .L6
247
 
9172 turbocat 248
.L5: ; tail loop
8210 maxcodehac 249
    mov edx,[esi]
250
    bswap edx
251
    mov [edi],edx
252
    add esi,BYTE 4
253
    add edi,BYTE 4
254
    dec ecx
255
    jnz .L5
256
 
9172 turbocat 257
.L6: pop ebp
258
    retn
8210 maxcodehac 259
 
260
 
261
 
262
 
263
;; 32 bit RGB 888 to 24 BIT RGB 888
264
 
265
_ConvertX86p32_24RGB888:
266
 
267
	; check short
268
	cmp ecx,BYTE 32
269
	ja .L3
270
 
9172 turbocat 271
.L1:	; short loop
8210 maxcodehac 272
	mov al,[esi]
273
	mov bl,[esi+1]
274
	mov dl,[esi+2]
275
	mov [edi],al
276
	mov [edi+1],bl
277
	mov [edi+2],dl
278
	add esi,BYTE 4
279
	add edi,BYTE 3
280
	dec ecx
281
	jnz .L1
9172 turbocat 282
.L2:
283
	retn
8210 maxcodehac 284
 
9172 turbocat 285
.L3:	;	 head
8210 maxcodehac 286
	mov edx,edi
287
	and edx,BYTE 11b
288
	jz .L4
289
	mov al,[esi]
290
	mov bl,[esi+1]
291
	mov dl,[esi+2]
292
	mov [edi],al
293
	mov [edi+1],bl
294
	mov [edi+2],dl
295
	add esi,BYTE 4
296
	add edi,BYTE 3
297
	dec ecx
298
	jmp SHORT .L3
299
 
9172 turbocat 300
.L4: ; unroll 4 times
8210 maxcodehac 301
	push ebp
302
	mov ebp,ecx
303
	shr ebp,2
304
 
305
    ; save count
306
	push ecx
307
 
9172 turbocat 308
.L5:    mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
8210 maxcodehac 309
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
310
 
311
        shl eax,8                       ;                        eax = [R][G][B][.]
312
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
313
 
314
        shl ebx,8                       ;                        ebx = [r][g][b][.]
315
        mov al,[esi+4]                  ;                        eax = [R][G][B][b]
316
 
317
        ror eax,8                       ;                        eax = [b][R][G][B] (done)
318
        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
319
 
320
        mov [edi],eax
321
        add edi,BYTE 3*4
322
 
323
        shl ecx,8                       ;                        ecx = [r][g][b][.]
324
        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
325
 
326
        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
327
        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
328
 
329
        mov [edi+4-3*4],ebx
330
        add esi,BYTE 4*4
331
 
332
        mov [edi+8-3*4],ecx
333
        dec ebp
334
 
335
        jnz .L5
336
 
337
    ; check tail
338
	pop ecx
339
	and ecx,BYTE 11b
340
	jz .L7
341
 
9172 turbocat 342
.L6: ; tail loop
8210 maxcodehac 343
	mov al,[esi]
344
	mov bl,[esi+1]
345
	mov dl,[esi+2]
346
	mov [edi],al
347
	mov [edi+1],bl
348
	mov [edi+2],dl
349
	add esi,BYTE 4
350
	add edi,BYTE 3
351
	dec ecx
352
	jnz .L6
353
 
9172 turbocat 354
.L7:	pop ebp
355
	retn
8210 maxcodehac 356
 
357
 
358
 
359
 
360
;; 32 bit RGB 888 to 24 bit BGR 888
361
 
362
_ConvertX86p32_24BGR888:
363
 
364
	; check short
365
	cmp ecx,BYTE 32
366
	ja .L3
367
 
9172 turbocat 368
.L1:	; short loop
8210 maxcodehac 369
	mov dl,[esi]
370
	mov bl,[esi+1]
371
	mov al,[esi+2]
372
	mov [edi],al
373
	mov [edi+1],bl
374
	mov [edi+2],dl
375
	add esi,BYTE 4
376
	add edi,BYTE 3
377
	dec ecx
378
	jnz .L1
9172 turbocat 379
.L2:
380
	retn
8210 maxcodehac 381
 
9172 turbocat 382
.L3: ; head
8210 maxcodehac 383
	mov edx,edi
384
	and edx,BYTE 11b
385
	jz .L4
386
	mov dl,[esi]
387
	mov bl,[esi+1]
388
	mov al,[esi+2]
389
	mov [edi],al
390
	mov [edi+1],bl
391
	mov [edi+2],dl
392
	add esi,BYTE 4
393
	add edi,BYTE 3
394
	dec ecx
395
	jmp SHORT .L3
396
 
9172 turbocat 397
.L4:	; unroll 4 times
8210 maxcodehac 398
	push ebp
399
	mov ebp,ecx
400
	shr ebp,2
401
 
402
	; save count
403
	push ecx
404
 
9172 turbocat 405
.L5:
8210 maxcodehac 406
	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
407
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
9172 turbocat 408
 
8210 maxcodehac 409
        bswap eax                       ;                        eax = [B][G][R][A]
410
 
411
        bswap ebx                       ;                        ebx = [b][g][r][a]
412
 
413
        mov al,[esi+4+2]                ;                        eax = [B][G][R][r]
414
        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
415
 
416
        ror eax,8                       ;                        eax = [r][B][G][R] (done)
417
        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
418
 
419
        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
420
        mov [edi],eax
421
 
422
        mov [edi+4],ebx
423
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
424
 
425
        bswap ecx                       ;                        ecx = [b][g][r][a]
426
 
427
        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
428
        add esi,BYTE 4*4
429
 
430
        mov [edi+8],ecx
431
        add edi,BYTE 3*4
432
 
433
        dec ebp
434
        jnz .L5
435
 
436
	; check tail
437
	pop ecx
438
	and ecx,BYTE 11b
439
	jz .L7
440
 
9172 turbocat 441
.L6:	; tail loop
8210 maxcodehac 442
	mov dl,[esi]
443
	mov bl,[esi+1]
444
	mov al,[esi+2]
445
	mov [edi],al
446
	mov [edi+1],bl
447
	mov [edi+2],dl
448
	add esi,BYTE 4
449
	add edi,BYTE 3
450
	dec ecx
451
	jnz .L6
452
 
9172 turbocat 453
.L7:
8210 maxcodehac 454
	pop ebp
9172 turbocat 455
	retn
8210 maxcodehac 456
 
457
 
458
 
459
 
460
;; 32 bit RGB 888 to 16 BIT RGB 565
461
 
462
_ConvertX86p32_16RGB565:
463
	; check short
464
	cmp ecx,BYTE 16
465
	ja .L3
466
 
9172 turbocat 467
.L1: ; short loop
8210 maxcodehac 468
	mov bl,[esi+0]    ; blue
469
	mov al,[esi+1]    ; green
470
	mov ah,[esi+2]    ; red
471
	shr ah,3
472
        and al,11111100b
473
	shl eax,3
474
	shr bl,3
475
	add al,bl
476
	mov [edi+0],al
477
	mov [edi+1],ah
478
	add esi,BYTE 4
479
	add edi,BYTE 2
480
	dec ecx
481
	jnz .L1
482
 
483
.L2:				; End of short loop
9172 turbocat 484
	retn
8210 maxcodehac 485
 
486
 
9172 turbocat 487
.L3:	; head
8210 maxcodehac 488
	mov ebx,edi
489
	and ebx,BYTE 11b
490
	jz .L4
491
 
492
	mov bl,[esi+0]    ; blue
493
	mov al,[esi+1]    ; green
494
	mov ah,[esi+2]    ; red
495
	shr ah,3
496
	and al,11111100b
497
	shl eax,3
498
	shr bl,3
499
	add al,bl
500
	mov [edi+0],al
501
	mov [edi+1],ah
502
	add esi,BYTE 4
503
	add edi,BYTE 2
504
	dec ecx
505
 
506
.L4:
507
    ; save count
508
	push ecx
509
 
510
    ; unroll twice
511
	shr ecx,1
512
 
513
    ; point arrays to end
514
	lea esi,[esi+ecx*8]
515
	lea edi,[edi+ecx*4]
516
 
517
    ; negative counter
518
	neg ecx
519
	jmp SHORT .L6
520
 
521
.L5:
522
	mov [edi+ecx*4-4],eax
523
.L6:
524
	mov eax,[esi+ecx*8]
525
 
526
        shr ah,2
527
        mov ebx,[esi+ecx*8+4]
528
 
529
        shr eax,3
530
        mov edx,[esi+ecx*8+4]
531
 
532
        shr bh,2
533
        mov dl,[esi+ecx*8+2]
534
 
535
        shl ebx,13
536
        and eax,000007FFh
537
 
538
        shl edx,8
539
        and ebx,07FF0000h
540
 
541
        and edx,0F800F800h
542
        add eax,ebx
543
 
544
        add eax,edx
545
        inc ecx
546
 
547
        jnz .L5
548
 
549
	mov [edi+ecx*4-4],eax
550
 
551
    ; tail
552
	pop ecx
553
	test cl,1
554
	jz .L7
555
 
556
	mov bl,[esi+0]    ; blue
557
	mov al,[esi+1]    ; green
558
	mov ah,[esi+2]    ; red
559
	shr ah,3
560
	and al,11111100b
561
	shl eax,3
562
	shr bl,3
563
	add al,bl
564
	mov [edi+0],al
565
	mov [edi+1],ah
566
	add esi,BYTE 4
567
	add edi,BYTE 2
568
 
569
.L7:
9172 turbocat 570
	retn
8210 maxcodehac 571
 
572
 
573
 
574
 
575
;; 32 bit RGB 888 to 16 BIT BGR 565
576
 
577
_ConvertX86p32_16BGR565:
578
 
579
	; check short
580
	cmp ecx,BYTE 16
581
	ja .L3
582
 
9172 turbocat 583
.L1:	; short loop
8210 maxcodehac 584
	mov ah,[esi+0]    ; blue
585
	mov al,[esi+1]    ; green
586
	mov bl,[esi+2]    ; red
587
	shr ah,3
588
	and al,11111100b
589
	shl eax,3
590
	shr bl,3
591
	add al,bl
592
	mov [edi+0],al
593
	mov [edi+1],ah
594
	add esi,BYTE 4
595
	add edi,BYTE 2
596
	dec ecx
597
	jnz .L1
9172 turbocat 598
.L2:
599
	retn
8210 maxcodehac 600
 
9172 turbocat 601
.L3:	; head
8210 maxcodehac 602
	mov ebx,edi
603
	and ebx,BYTE 11b
604
	jz .L4
605
	mov ah,[esi+0]    ; blue
606
	mov al,[esi+1]    ; green
607
	mov bl,[esi+2]    ; red
608
	shr ah,3
609
	and al,11111100b
610
	shl eax,3
611
	shr bl,3
612
	add al,bl
613
	mov [edi+0],al
614
	mov [edi+1],ah
615
	add esi,BYTE 4
616
	add edi,BYTE 2
617
	dec ecx
618
 
9172 turbocat 619
.L4:	; save count
8210 maxcodehac 620
	push ecx
621
 
622
	; unroll twice
623
	shr ecx,1
624
 
625
	; point arrays to end
626
	lea esi,[esi+ecx*8]
627
	lea edi,[edi+ecx*4]
628
 
629
	; negative count
630
	neg ecx
631
	jmp SHORT .L6
632
 
9172 turbocat 633
.L5:
8210 maxcodehac 634
	mov [edi+ecx*4-4],eax
9172 turbocat 635
.L6:
8210 maxcodehac 636
	mov edx,[esi+ecx*8+4]
637
 
638
        mov bh,[esi+ecx*8+4]
639
        mov ah,[esi+ecx*8]
640
 
641
        shr bh,3
642
        mov al,[esi+ecx*8+1]
643
 
644
        shr ah,3
645
        mov bl,[esi+ecx*8+5]
646
 
647
        shl eax,3
648
        mov dl,[esi+ecx*8+2]
649
 
650
        shl ebx,19
651
        and eax,0000FFE0h
652
 
653
        shr edx,3
654
        and ebx,0FFE00000h
655
 
656
        and edx,001F001Fh
657
        add eax,ebx
658
 
659
        add eax,edx
660
        inc ecx
661
 
662
        jnz .L5
663
 
664
	mov [edi+ecx*4-4],eax
665
 
666
	; tail
667
	pop ecx
668
	and ecx,BYTE 1
669
	jz .L7
670
	mov ah,[esi+0]    ; blue
671
	mov al,[esi+1]    ; green
672
	mov bl,[esi+2]    ; red
673
	shr ah,3
674
	and al,11111100b
675
	shl eax,3
676
	shr bl,3
677
	add al,bl
678
	mov [edi+0],al
679
	mov [edi+1],ah
680
	add esi,BYTE 4
681
	add edi,BYTE 2
682
 
9172 turbocat 683
.L7:
684
	retn
8210 maxcodehac 685
 
686
 
687
 
688
 
689
;; 32 BIT RGB TO 16 BIT RGB 555
690
 
691
_ConvertX86p32_16RGB555:
692
 
693
	; check short
694
	cmp ecx,BYTE 16
695
	ja .L3
696
 
9172 turbocat 697
.L1:	; short loop
8210 maxcodehac 698
	mov bl,[esi+0]    ; blue
699
	mov al,[esi+1]    ; green
700
	mov ah,[esi+2]    ; red
701
	shr ah,3
702
	and al,11111000b
703
	shl eax,2
704
	shr bl,3
705
	add al,bl
706
	mov [edi+0],al
707
	mov [edi+1],ah
708
	add esi,BYTE 4
709
	add edi,BYTE 2
710
	dec ecx
711
	jnz .L1
9172 turbocat 712
.L2:
713
	retn
8210 maxcodehac 714
 
9172 turbocat 715
.L3:	; head
8210 maxcodehac 716
	mov ebx,edi
717
        and ebx,BYTE 11b
718
	jz .L4
719
	mov bl,[esi+0]    ; blue
720
	mov al,[esi+1]    ; green
721
	mov ah,[esi+2]    ; red
722
	shr ah,3
723
	and al,11111000b
724
	shl eax,2
725
	shr bl,3
726
	add al,bl
727
	mov [edi+0],al
728
	mov [edi+1],ah
729
	add esi,BYTE 4
730
	add edi,BYTE 2
731
	dec ecx
732
 
9172 turbocat 733
.L4:	; save count
8210 maxcodehac 734
	push ecx
735
 
736
	; unroll twice
737
	shr ecx,1
738
 
739
	; point arrays to end
740
	lea esi,[esi+ecx*8]
741
	lea edi,[edi+ecx*4]
742
 
743
	; negative counter
744
	neg ecx
745
	jmp SHORT .L6
746
 
9172 turbocat 747
.L5:
8210 maxcodehac 748
	mov [edi+ecx*4-4],eax
9172 turbocat 749
.L6:
8210 maxcodehac 750
	mov eax,[esi+ecx*8]
751
 
752
        shr ah,3
753
        mov ebx,[esi+ecx*8+4]
754
 
755
        shr eax,3
756
        mov edx,[esi+ecx*8+4]
757
 
758
        shr bh,3
759
        mov dl,[esi+ecx*8+2]
760
 
761
        shl ebx,13
762
        and eax,000007FFh
763
 
764
        shl edx,7
765
        and ebx,07FF0000h
766
 
767
        and edx,07C007C00h
768
        add eax,ebx
769
 
770
        add eax,edx
771
        inc ecx
772
 
773
        jnz .L5
774
 
775
	mov [edi+ecx*4-4],eax
776
 
777
	; tail
778
	pop ecx
779
	and ecx,BYTE 1
780
	jz .L7
781
	mov bl,[esi+0]    ; blue
782
	mov al,[esi+1]    ; green
783
	mov ah,[esi+2]    ; red
784
	shr ah,3
785
	and al,11111000b
786
	shl eax,2
787
	shr bl,3
788
	add al,bl
789
	mov [edi+0],al
790
	mov [edi+1],ah
791
	add esi,BYTE 4
792
	add edi,BYTE 2
793
 
9172 turbocat 794
.L7:
795
	retn
8210 maxcodehac 796
 
797
 
798
 
799
 
800
;; 32 BIT RGB TO 16 BIT BGR 555
801
 
802
_ConvertX86p32_16BGR555:
803
 
804
	; check short
805
	cmp ecx,BYTE 16
806
	ja .L3
807
 
808
 
9172 turbocat 809
.L1:	; short loop
8210 maxcodehac 810
	mov ah,[esi+0]    ; blue
811
	mov al,[esi+1]    ; green
812
	mov bl,[esi+2]    ; red
813
	shr ah,3
814
	and al,11111000b
815
	shl eax,2
816
	shr bl,3
817
	add al,bl
818
	mov [edi+0],al
819
	mov [edi+1],ah
820
	add esi,BYTE 4
821
	add edi,BYTE 2
822
	dec ecx
823
	jnz .L1
9172 turbocat 824
.L2:
825
	retn
8210 maxcodehac 826
 
9172 turbocat 827
.L3:	; head
8210 maxcodehac 828
	mov ebx,edi
829
        and ebx,BYTE 11b
830
	jz .L4
831
	mov ah,[esi+0]    ; blue
832
	mov al,[esi+1]    ; green
833
	mov bl,[esi+2]    ; red
834
	shr ah,3
835
	and al,11111000b
836
	shl eax,2
837
	shr bl,3
838
	add al,bl
839
	mov [edi+0],al
840
	mov [edi+1],ah
841
	add esi,BYTE 4
842
	add edi,BYTE 2
843
	dec ecx
844
 
9172 turbocat 845
.L4:	; save count
8210 maxcodehac 846
	push ecx
847
 
848
	; unroll twice
849
	shr ecx,1
850
 
851
	; point arrays to end
852
	lea esi,[esi+ecx*8]
853
	lea edi,[edi+ecx*4]
854
 
855
	; negative counter
856
	neg ecx
857
	jmp SHORT .L6
858
 
9172 turbocat 859
.L5:
8210 maxcodehac 860
	mov [edi+ecx*4-4],eax
9172 turbocat 861
.L6:
8210 maxcodehac 862
	mov edx,[esi+ecx*8+4]
863
 
864
        mov bh,[esi+ecx*8+4]
865
        mov ah,[esi+ecx*8]
866
 
867
        shr bh,3
868
        mov al,[esi+ecx*8+1]
869
 
870
        shr ah,3
871
        mov bl,[esi+ecx*8+5]
872
 
873
        shl eax,2
874
        mov dl,[esi+ecx*8+2]
875
 
876
        shl ebx,18
877
        and eax,00007FE0h
878
 
879
        shr edx,3
880
        and ebx,07FE00000h
881
 
882
        and edx,001F001Fh
883
        add eax,ebx
884
 
885
        add eax,edx
886
        inc ecx
887
 
888
        jnz .L5
889
 
890
	mov [edi+ecx*4-4],eax
891
 
892
	; tail
893
	pop ecx
894
	and ecx,BYTE 1
895
	jz .L7
896
	mov ah,[esi+0]    ; blue
897
	mov al,[esi+1]    ; green
898
	mov bl,[esi+2]    ; red
899
	shr ah,3
900
	and al,11111000b
901
	shl eax,2
902
	shr bl,3
903
	add al,bl
904
	mov [edi+0],al
905
	mov [edi+1],ah
906
	add esi,BYTE 4
907
	add edi,BYTE 2
908
 
9172 turbocat 909
.L7:
910
	retn
8210 maxcodehac 911
 
912
 
913
 
914
 
915
 
916
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
917
;; This routine writes FOUR pixels at once (dword) and then, if they exist
918
;; the trailing three pixels
919
_ConvertX86p32_8RGB332:
920
 
921
 
9172 turbocat 922
.L_ALIGNED:
8210 maxcodehac 923
	push ecx
924
 
925
	shr ecx,2		; We will draw 4 pixels at once
926
	jnz .L1
927
 
928
	jmp .L2			; short jump out of range :(
929
 
930
.L1:
931
	mov eax,[esi]		; first pair of pixels
932
	mov edx,[esi+4]
933
 
934
	shr dl,6
935
	mov ebx,eax
936
 
937
	shr al,6
938
	and ah,0e0h
939
 
940
	shr ebx,16
941
	and dh,0e0h
942
 
943
	shr ah,3
944
	and bl,0e0h
945
 
946
	shr dh,3
947
 
948
	or al,bl
949
 
950
	mov ebx,edx
951
	or al,ah
952
 
953
	shr ebx,16
954
	or dl,dh
955
 
956
	and bl,0e0h
957
 
958
	or dl,bl
959
 
960
	mov ah,dl
961
 
962
 
963
 
964
	mov ebx,[esi+8]		; second pair of pixels
965
 
966
	mov edx,ebx
967
	and bh,0e0h
968
 
969
	shr bl,6
970
	and edx,0e00000h
971
 
972
	shr edx,16
973
 
974
	shr bh,3
975
 
976
	ror eax,16
977
	or bl,dl
978
 
979
	mov edx,[esi+12]
980
	or bl,bh
981
 
982
	mov al,bl
983
 
984
	mov ebx,edx
985
	and dh,0e0h
986
 
987
	shr dl,6
988
	and ebx,0e00000h
989
 
990
	shr dh,3
991
	mov ah,dl
992
 
993
	shr ebx,16
994
	or ah,dh
995
 
996
	or ah,bl
997
 
998
	rol eax,16
999
	add esi,BYTE 16
1000
 
1001
	mov [edi],eax
1002
	add edi,BYTE 4
1003
 
1004
	dec ecx
1005
	jz .L2			; L1 out of range for short jump :(
1006
 
1007
	jmp .L1
1008
.L2:
1009
 
1010
	pop ecx
1011
	and ecx,BYTE 3		; mask out number of pixels to draw
1012
 
1013
	jz .L4			; Nothing to do anymore
1014
 
1015
.L3:
1016
	mov eax,[esi]		; single pixel conversion for trailing pixels
1017
 
1018
        mov ebx,eax
1019
 
1020
        shr al,6
1021
        and ah,0e0h
1022
 
1023
        shr ebx,16
1024
 
1025
        shr ah,3
1026
        and bl,0e0h
1027
 
1028
        or al,ah
1029
        or al,bl
1030
 
1031
        mov [edi],al
1032
 
1033
        inc edi
1034
        add esi,BYTE 4
1035
 
1036
	dec ecx
1037
	jnz .L3
1038
 
1039
.L4:
9172 turbocat 1040
	retn
1041
 
1042
%ifidn __OUTPUT_FORMAT__,elf32
1043
section .note.GNU-stack noalloc noexec nowrite progbits
1044
%endif