Subversion Repositories Kolibri OS

Rev

Rev 9172 | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
8210 maxcodehac 1
;
2
; x86 format converters for HERMES
3
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
4
; This source code is licensed under the GNU LGPL
5
;
6
; Please refer to the file COPYING.LIB contained in the distribution for
7
; licensing conditions
8
;
9
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
10
;
11
 
12
BITS 32
13
 
9172 turbocat 14
%include "common.inc"
8210 maxcodehac 15
 
9172 turbocat 16
SDL_FUNC _ConvertX86p32_32BGR888
17
SDL_FUNC _ConvertX86p32_32RGBA888
18
SDL_FUNC _ConvertX86p32_32BGRA888
19
SDL_FUNC _ConvertX86p32_24RGB888
20
SDL_FUNC _ConvertX86p32_24BGR888
21
SDL_FUNC _ConvertX86p32_16RGB565
22
SDL_FUNC _ConvertX86p32_16BGR565
23
SDL_FUNC _ConvertX86p32_16RGB555
24
SDL_FUNC _ConvertX86p32_16BGR555
25
SDL_FUNC _ConvertX86p32_8RGB332
26
 
9202 turbocat 27
SDL_FUNC ConvertX86p32_32BGR888
28
SDL_FUNC ConvertX86p32_32RGBA888
29
SDL_FUNC ConvertX86p32_32BGRA888
30
SDL_FUNC ConvertX86p32_24RGB888
31
SDL_FUNC ConvertX86p32_24BGR888
32
SDL_FUNC ConvertX86p32_16RGB565
33
SDL_FUNC ConvertX86p32_16BGR565
34
SDL_FUNC ConvertX86p32_16RGB555
35
SDL_FUNC ConvertX86p32_16BGR555
36
SDL_FUNC ConvertX86p32_8RGB332
37
 
38
 
8210 maxcodehac 39
SECTION .text
40
 
41
;; _Convert_*
42
;; Paramters:
43
;;   ESI = source
44
;;   EDI = dest
45
;;   ECX = amount (NOT 0!!! (the _ConvertX86 routine checks for that though))
46
;; Destroys:
47
;;   EAX, EBX, EDX
48
 
49
 
9202 turbocat 50
ConvertX86p32_32BGR888:
8210 maxcodehac 51
_ConvertX86p32_32BGR888:
52
 
53
    ; check short
54
    cmp ecx,BYTE 32
55
    ja .L3
56
 
9172 turbocat 57
.L1: ; short loop
8210 maxcodehac 58
    mov edx,[esi]
59
    bswap edx
60
    ror edx,8
61
    mov [edi],edx
62
    add esi,BYTE 4
63
    add edi,BYTE 4
64
    dec ecx
65
    jnz .L1
9172 turbocat 66
.L2:
67
    retn
8210 maxcodehac 68
 
9172 turbocat 69
.L3: ; save ebp
8210 maxcodehac 70
    push ebp
71
 
72
    ; unroll four times
73
    mov ebp,ecx
74
    shr ebp,2
75
 
76
    ; save count
77
    push ecx
78
 
9172 turbocat 79
.L4:    mov eax,[esi]
8210 maxcodehac 80
        mov ebx,[esi+4]
81
 
82
        bswap eax
83
 
84
        bswap ebx
85
 
86
        ror eax,8
87
        mov ecx,[esi+8]
88
 
89
        ror ebx,8
90
        mov edx,[esi+12]
91
 
92
        bswap ecx
93
 
94
        bswap edx
95
 
96
        ror ecx,8
97
        mov [edi+0],eax
98
 
99
        ror edx,8
100
        mov [edi+4],ebx
101
 
102
        mov [edi+8],ecx
103
        mov [edi+12],edx
104
 
105
        add esi,BYTE 16
106
        add edi,BYTE 16
107
 
108
        dec ebp
109
        jnz .L4
110
 
111
    ; check tail
112
    pop ecx
113
    and ecx,BYTE 11b
114
    jz .L6
115
 
9172 turbocat 116
.L5: ; tail loop
8210 maxcodehac 117
    mov edx,[esi]
118
    bswap edx
119
    ror edx,8
120
    mov [edi],edx
121
    add esi,BYTE 4
122
    add edi,BYTE 4
123
    dec ecx
124
    jnz .L5
125
 
9172 turbocat 126
.L6: pop ebp
127
    retn
8210 maxcodehac 128
 
129
 
130
 
9202 turbocat 131
ConvertX86p32_32RGBA888:
8210 maxcodehac 132
_ConvertX86p32_32RGBA888:
133
 
134
    ; check short
135
    cmp ecx,BYTE 32
136
    ja .L3
137
 
9172 turbocat 138
.L1: ; short loop
8210 maxcodehac 139
    mov edx,[esi]
140
    rol edx,8
141
    mov [edi],edx
142
    add esi,BYTE 4
143
    add edi,BYTE 4
144
    dec ecx
145
    jnz .L1
9172 turbocat 146
.L2:
147
    retn
8210 maxcodehac 148
 
9172 turbocat 149
.L3: ; save ebp
8210 maxcodehac 150
    push ebp
151
 
152
    ; unroll four times
153
    mov ebp,ecx
154
    shr ebp,2
155
 
156
    ; save count
157
    push ecx
158
 
9172 turbocat 159
.L4:    mov eax,[esi]
8210 maxcodehac 160
        mov ebx,[esi+4]
161
 
162
        rol eax,8
163
        mov ecx,[esi+8]
164
 
165
        rol ebx,8
166
        mov edx,[esi+12]
167
 
168
        rol ecx,8
169
        mov [edi+0],eax
170
 
171
        rol edx,8
172
        mov [edi+4],ebx
173
 
174
        mov [edi+8],ecx
175
        mov [edi+12],edx
176
 
177
        add esi,BYTE 16
178
        add edi,BYTE 16
179
 
180
        dec ebp
181
        jnz .L4
182
 
183
    ; check tail
184
    pop ecx
185
    and ecx,BYTE 11b
186
    jz .L6
187
 
9172 turbocat 188
.L5: ; tail loop
8210 maxcodehac 189
    mov edx,[esi]
190
    rol edx,8
191
    mov [edi],edx
192
    add esi,BYTE 4
193
    add edi,BYTE 4
194
    dec ecx
195
    jnz .L5
196
 
9172 turbocat 197
.L6: pop ebp
198
    retn
8210 maxcodehac 199
 
200
 
201
 
9202 turbocat 202
ConvertX86p32_32BGRA888:
8210 maxcodehac 203
_ConvertX86p32_32BGRA888:
204
 
205
    ; check short
206
    cmp ecx,BYTE 32
207
    ja .L3
208
 
9172 turbocat 209
.L1: ; short loop
8210 maxcodehac 210
    mov edx,[esi]
211
    bswap edx
212
    mov [edi],edx
213
    add esi,BYTE 4
214
    add edi,BYTE 4
215
    dec ecx
216
    jnz .L1
9172 turbocat 217
.L2:
218
    retn
8210 maxcodehac 219
 
9172 turbocat 220
.L3: ; save ebp
8210 maxcodehac 221
    push ebp
222
 
223
    ; unroll four times
224
    mov ebp,ecx
225
    shr ebp,2
226
 
227
    ; save count
228
    push ecx
229
 
9172 turbocat 230
.L4:    mov eax,[esi]
8210 maxcodehac 231
        mov ebx,[esi+4]
232
 
233
        mov ecx,[esi+8]
234
        mov edx,[esi+12]
235
 
236
        bswap eax
237
 
238
        bswap ebx
239
 
240
        bswap ecx
241
 
242
        bswap edx
243
 
244
        mov [edi+0],eax
245
        mov [edi+4],ebx
246
 
247
        mov [edi+8],ecx
248
        mov [edi+12],edx
249
 
250
        add esi,BYTE 16
251
        add edi,BYTE 16
252
 
253
        dec ebp
254
        jnz .L4
255
 
256
    ; check tail
257
    pop ecx
258
    and ecx,BYTE 11b
259
    jz .L6
260
 
9172 turbocat 261
.L5: ; tail loop
8210 maxcodehac 262
    mov edx,[esi]
263
    bswap edx
264
    mov [edi],edx
265
    add esi,BYTE 4
266
    add edi,BYTE 4
267
    dec ecx
268
    jnz .L5
269
 
9172 turbocat 270
.L6: pop ebp
271
    retn
8210 maxcodehac 272
 
273
 
274
 
275
 
276
;; 32 bit RGB 888 to 24 BIT RGB 888
277
 
9202 turbocat 278
ConvertX86p32_24RGB888:
8210 maxcodehac 279
_ConvertX86p32_24RGB888:
280
 
281
	; check short
282
	cmp ecx,BYTE 32
283
	ja .L3
284
 
9172 turbocat 285
.L1:	; short loop
8210 maxcodehac 286
	mov al,[esi]
287
	mov bl,[esi+1]
288
	mov dl,[esi+2]
289
	mov [edi],al
290
	mov [edi+1],bl
291
	mov [edi+2],dl
292
	add esi,BYTE 4
293
	add edi,BYTE 3
294
	dec ecx
295
	jnz .L1
9172 turbocat 296
.L2:
297
	retn
8210 maxcodehac 298
 
9172 turbocat 299
.L3:	;	 head
8210 maxcodehac 300
	mov edx,edi
301
	and edx,BYTE 11b
302
	jz .L4
303
	mov al,[esi]
304
	mov bl,[esi+1]
305
	mov dl,[esi+2]
306
	mov [edi],al
307
	mov [edi+1],bl
308
	mov [edi+2],dl
309
	add esi,BYTE 4
310
	add edi,BYTE 3
311
	dec ecx
312
	jmp SHORT .L3
313
 
9172 turbocat 314
.L4: ; unroll 4 times
8210 maxcodehac 315
	push ebp
316
	mov ebp,ecx
317
	shr ebp,2
318
 
319
    ; save count
320
	push ecx
321
 
9172 turbocat 322
.L5:    mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
8210 maxcodehac 323
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
324
 
325
        shl eax,8                       ;                        eax = [R][G][B][.]
326
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
327
 
328
        shl ebx,8                       ;                        ebx = [r][g][b][.]
329
        mov al,[esi+4]                  ;                        eax = [R][G][B][b]
330
 
331
        ror eax,8                       ;                        eax = [b][R][G][B] (done)
332
        mov bh,[esi+8+1]                ;                        ebx = [r][g][G][.]
333
 
334
        mov [edi],eax
335
        add edi,BYTE 3*4
336
 
337
        shl ecx,8                       ;                        ecx = [r][g][b][.]
338
        mov bl,[esi+8+0]                ;                        ebx = [r][g][G][B]
339
 
340
        rol ebx,16                      ;                        ebx = [G][B][r][g] (done)
341
        mov cl,[esi+8+2]                ;                        ecx = [r][g][b][R] (done)
342
 
343
        mov [edi+4-3*4],ebx
344
        add esi,BYTE 4*4
345
 
346
        mov [edi+8-3*4],ecx
347
        dec ebp
348
 
349
        jnz .L5
350
 
351
    ; check tail
352
	pop ecx
353
	and ecx,BYTE 11b
354
	jz .L7
355
 
9172 turbocat 356
.L6: ; tail loop
8210 maxcodehac 357
	mov al,[esi]
358
	mov bl,[esi+1]
359
	mov dl,[esi+2]
360
	mov [edi],al
361
	mov [edi+1],bl
362
	mov [edi+2],dl
363
	add esi,BYTE 4
364
	add edi,BYTE 3
365
	dec ecx
366
	jnz .L6
367
 
9172 turbocat 368
.L7:	pop ebp
369
	retn
8210 maxcodehac 370
 
371
 
372
 
373
 
374
;; 32 bit RGB 888 to 24 bit BGR 888
375
 
9202 turbocat 376
ConvertX86p32_24BGR888:
8210 maxcodehac 377
_ConvertX86p32_24BGR888:
378
 
379
	; check short
380
	cmp ecx,BYTE 32
381
	ja .L3
382
 
9172 turbocat 383
.L1:	; short loop
8210 maxcodehac 384
	mov dl,[esi]
385
	mov bl,[esi+1]
386
	mov al,[esi+2]
387
	mov [edi],al
388
	mov [edi+1],bl
389
	mov [edi+2],dl
390
	add esi,BYTE 4
391
	add edi,BYTE 3
392
	dec ecx
393
	jnz .L1
9172 turbocat 394
.L2:
395
	retn
8210 maxcodehac 396
 
9172 turbocat 397
.L3: ; head
8210 maxcodehac 398
	mov edx,edi
399
	and edx,BYTE 11b
400
	jz .L4
401
	mov dl,[esi]
402
	mov bl,[esi+1]
403
	mov al,[esi+2]
404
	mov [edi],al
405
	mov [edi+1],bl
406
	mov [edi+2],dl
407
	add esi,BYTE 4
408
	add edi,BYTE 3
409
	dec ecx
410
	jmp SHORT .L3
411
 
9172 turbocat 412
.L4:	; unroll 4 times
8210 maxcodehac 413
	push ebp
414
	mov ebp,ecx
415
	shr ebp,2
416
 
417
	; save count
418
	push ecx
419
 
9172 turbocat 420
.L5:
8210 maxcodehac 421
	mov eax,[esi]                   ; first dword            eax = [A][R][G][B]
422
        mov ebx,[esi+4]                 ; second dword           ebx = [a][r][g][b]
9172 turbocat 423
 
8210 maxcodehac 424
        bswap eax                       ;                        eax = [B][G][R][A]
425
 
426
        bswap ebx                       ;                        ebx = [b][g][r][a]
427
 
428
        mov al,[esi+4+2]                ;                        eax = [B][G][R][r]
429
        mov bh,[esi+4+4+1]              ;                        ebx = [b][g][G][a]
430
 
431
        ror eax,8                       ;                        eax = [r][B][G][R] (done)
432
        mov bl,[esi+4+4+2]              ;                        ebx = [b][g][G][R]
433
 
434
        ror ebx,16                      ;                        ebx = [G][R][b][g] (done)
435
        mov [edi],eax
436
 
437
        mov [edi+4],ebx
438
        mov ecx,[esi+12]                ; third dword            ecx = [a][r][g][b]
439
 
440
        bswap ecx                       ;                        ecx = [b][g][r][a]
441
 
442
        mov cl,[esi+8]                  ;                        ecx = [b][g][r][B] (done)
443
        add esi,BYTE 4*4
444
 
445
        mov [edi+8],ecx
446
        add edi,BYTE 3*4
447
 
448
        dec ebp
449
        jnz .L5
450
 
451
	; check tail
452
	pop ecx
453
	and ecx,BYTE 11b
454
	jz .L7
455
 
9172 turbocat 456
.L6:	; tail loop
8210 maxcodehac 457
	mov dl,[esi]
458
	mov bl,[esi+1]
459
	mov al,[esi+2]
460
	mov [edi],al
461
	mov [edi+1],bl
462
	mov [edi+2],dl
463
	add esi,BYTE 4
464
	add edi,BYTE 3
465
	dec ecx
466
	jnz .L6
467
 
9172 turbocat 468
.L7:
8210 maxcodehac 469
	pop ebp
9172 turbocat 470
	retn
8210 maxcodehac 471
 
472
 
473
 
474
 
475
;; 32 bit RGB 888 to 16 BIT RGB 565
9202 turbocat 476
ConvertX86p32_16RGB565:
8210 maxcodehac 477
_ConvertX86p32_16RGB565:
478
	; check short
479
	cmp ecx,BYTE 16
480
	ja .L3
481
 
9172 turbocat 482
.L1: ; short loop
8210 maxcodehac 483
	mov bl,[esi+0]    ; blue
484
	mov al,[esi+1]    ; green
485
	mov ah,[esi+2]    ; red
486
	shr ah,3
487
        and al,11111100b
488
	shl eax,3
489
	shr bl,3
490
	add al,bl
491
	mov [edi+0],al
492
	mov [edi+1],ah
493
	add esi,BYTE 4
494
	add edi,BYTE 2
495
	dec ecx
496
	jnz .L1
497
 
498
.L2:				; End of short loop
9172 turbocat 499
	retn
8210 maxcodehac 500
 
501
 
9172 turbocat 502
.L3:	; head
8210 maxcodehac 503
	mov ebx,edi
504
	and ebx,BYTE 11b
505
	jz .L4
506
 
507
	mov bl,[esi+0]    ; blue
508
	mov al,[esi+1]    ; green
509
	mov ah,[esi+2]    ; red
510
	shr ah,3
511
	and al,11111100b
512
	shl eax,3
513
	shr bl,3
514
	add al,bl
515
	mov [edi+0],al
516
	mov [edi+1],ah
517
	add esi,BYTE 4
518
	add edi,BYTE 2
519
	dec ecx
520
 
521
.L4:
522
    ; save count
523
	push ecx
524
 
525
    ; unroll twice
526
	shr ecx,1
527
 
528
    ; point arrays to end
529
	lea esi,[esi+ecx*8]
530
	lea edi,[edi+ecx*4]
531
 
532
    ; negative counter
533
	neg ecx
534
	jmp SHORT .L6
535
 
536
.L5:
537
	mov [edi+ecx*4-4],eax
538
.L6:
539
	mov eax,[esi+ecx*8]
540
 
541
        shr ah,2
542
        mov ebx,[esi+ecx*8+4]
543
 
544
        shr eax,3
545
        mov edx,[esi+ecx*8+4]
546
 
547
        shr bh,2
548
        mov dl,[esi+ecx*8+2]
549
 
550
        shl ebx,13
551
        and eax,000007FFh
552
 
553
        shl edx,8
554
        and ebx,07FF0000h
555
 
556
        and edx,0F800F800h
557
        add eax,ebx
558
 
559
        add eax,edx
560
        inc ecx
561
 
562
        jnz .L5
563
 
564
	mov [edi+ecx*4-4],eax
565
 
566
    ; tail
567
	pop ecx
568
	test cl,1
569
	jz .L7
570
 
571
	mov bl,[esi+0]    ; blue
572
	mov al,[esi+1]    ; green
573
	mov ah,[esi+2]    ; red
574
	shr ah,3
575
	and al,11111100b
576
	shl eax,3
577
	shr bl,3
578
	add al,bl
579
	mov [edi+0],al
580
	mov [edi+1],ah
581
	add esi,BYTE 4
582
	add edi,BYTE 2
583
 
584
.L7:
9172 turbocat 585
	retn
8210 maxcodehac 586
 
587
 
588
 
589
 
590
;; 32 bit RGB 888 to 16 BIT BGR 565
591
 
9202 turbocat 592
ConvertX86p32_16BGR565:
8210 maxcodehac 593
_ConvertX86p32_16BGR565:
594
 
595
	; check short
596
	cmp ecx,BYTE 16
597
	ja .L3
598
 
9172 turbocat 599
.L1:	; short loop
8210 maxcodehac 600
	mov ah,[esi+0]    ; blue
601
	mov al,[esi+1]    ; green
602
	mov bl,[esi+2]    ; red
603
	shr ah,3
604
	and al,11111100b
605
	shl eax,3
606
	shr bl,3
607
	add al,bl
608
	mov [edi+0],al
609
	mov [edi+1],ah
610
	add esi,BYTE 4
611
	add edi,BYTE 2
612
	dec ecx
613
	jnz .L1
9172 turbocat 614
.L2:
615
	retn
8210 maxcodehac 616
 
9172 turbocat 617
.L3:	; head
8210 maxcodehac 618
	mov ebx,edi
619
	and ebx,BYTE 11b
620
	jz .L4
621
	mov ah,[esi+0]    ; blue
622
	mov al,[esi+1]    ; green
623
	mov bl,[esi+2]    ; red
624
	shr ah,3
625
	and al,11111100b
626
	shl eax,3
627
	shr bl,3
628
	add al,bl
629
	mov [edi+0],al
630
	mov [edi+1],ah
631
	add esi,BYTE 4
632
	add edi,BYTE 2
633
	dec ecx
634
 
9172 turbocat 635
.L4:	; save count
8210 maxcodehac 636
	push ecx
637
 
638
	; unroll twice
639
	shr ecx,1
640
 
641
	; point arrays to end
642
	lea esi,[esi+ecx*8]
643
	lea edi,[edi+ecx*4]
644
 
645
	; negative count
646
	neg ecx
647
	jmp SHORT .L6
648
 
9172 turbocat 649
.L5:
8210 maxcodehac 650
	mov [edi+ecx*4-4],eax
9172 turbocat 651
.L6:
8210 maxcodehac 652
	mov edx,[esi+ecx*8+4]
653
 
654
        mov bh,[esi+ecx*8+4]
655
        mov ah,[esi+ecx*8]
656
 
657
        shr bh,3
658
        mov al,[esi+ecx*8+1]
659
 
660
        shr ah,3
661
        mov bl,[esi+ecx*8+5]
662
 
663
        shl eax,3
664
        mov dl,[esi+ecx*8+2]
665
 
666
        shl ebx,19
667
        and eax,0000FFE0h
668
 
669
        shr edx,3
670
        and ebx,0FFE00000h
671
 
672
        and edx,001F001Fh
673
        add eax,ebx
674
 
675
        add eax,edx
676
        inc ecx
677
 
678
        jnz .L5
679
 
680
	mov [edi+ecx*4-4],eax
681
 
682
	; tail
683
	pop ecx
684
	and ecx,BYTE 1
685
	jz .L7
686
	mov ah,[esi+0]    ; blue
687
	mov al,[esi+1]    ; green
688
	mov bl,[esi+2]    ; red
689
	shr ah,3
690
	and al,11111100b
691
	shl eax,3
692
	shr bl,3
693
	add al,bl
694
	mov [edi+0],al
695
	mov [edi+1],ah
696
	add esi,BYTE 4
697
	add edi,BYTE 2
698
 
9172 turbocat 699
.L7:
700
	retn
8210 maxcodehac 701
 
702
 
703
 
704
 
705
;; 32 BIT RGB TO 16 BIT RGB 555
9202 turbocat 706
ConvertX86p32_16RGB555:
8210 maxcodehac 707
_ConvertX86p32_16RGB555:
708
 
709
	; check short
710
	cmp ecx,BYTE 16
711
	ja .L3
712
 
9172 turbocat 713
.L1:	; short loop
8210 maxcodehac 714
	mov bl,[esi+0]    ; blue
715
	mov al,[esi+1]    ; green
716
	mov ah,[esi+2]    ; red
717
	shr ah,3
718
	and al,11111000b
719
	shl eax,2
720
	shr bl,3
721
	add al,bl
722
	mov [edi+0],al
723
	mov [edi+1],ah
724
	add esi,BYTE 4
725
	add edi,BYTE 2
726
	dec ecx
727
	jnz .L1
9172 turbocat 728
.L2:
729
	retn
8210 maxcodehac 730
 
9172 turbocat 731
.L3:	; head
8210 maxcodehac 732
	mov ebx,edi
733
        and ebx,BYTE 11b
734
	jz .L4
735
	mov bl,[esi+0]    ; blue
736
	mov al,[esi+1]    ; green
737
	mov ah,[esi+2]    ; red
738
	shr ah,3
739
	and al,11111000b
740
	shl eax,2
741
	shr bl,3
742
	add al,bl
743
	mov [edi+0],al
744
	mov [edi+1],ah
745
	add esi,BYTE 4
746
	add edi,BYTE 2
747
	dec ecx
748
 
9172 turbocat 749
.L4:	; save count
8210 maxcodehac 750
	push ecx
751
 
752
	; unroll twice
753
	shr ecx,1
754
 
755
	; point arrays to end
756
	lea esi,[esi+ecx*8]
757
	lea edi,[edi+ecx*4]
758
 
759
	; negative counter
760
	neg ecx
761
	jmp SHORT .L6
762
 
9172 turbocat 763
.L5:
8210 maxcodehac 764
	mov [edi+ecx*4-4],eax
9172 turbocat 765
.L6:
8210 maxcodehac 766
	mov eax,[esi+ecx*8]
767
 
768
        shr ah,3
769
        mov ebx,[esi+ecx*8+4]
770
 
771
        shr eax,3
772
        mov edx,[esi+ecx*8+4]
773
 
774
        shr bh,3
775
        mov dl,[esi+ecx*8+2]
776
 
777
        shl ebx,13
778
        and eax,000007FFh
779
 
780
        shl edx,7
781
        and ebx,07FF0000h
782
 
783
        and edx,07C007C00h
784
        add eax,ebx
785
 
786
        add eax,edx
787
        inc ecx
788
 
789
        jnz .L5
790
 
791
	mov [edi+ecx*4-4],eax
792
 
793
	; tail
794
	pop ecx
795
	and ecx,BYTE 1
796
	jz .L7
797
	mov bl,[esi+0]    ; blue
798
	mov al,[esi+1]    ; green
799
	mov ah,[esi+2]    ; red
800
	shr ah,3
801
	and al,11111000b
802
	shl eax,2
803
	shr bl,3
804
	add al,bl
805
	mov [edi+0],al
806
	mov [edi+1],ah
807
	add esi,BYTE 4
808
	add edi,BYTE 2
809
 
9172 turbocat 810
.L7:
811
	retn
8210 maxcodehac 812
 
813
 
814
 
815
 
816
;; 32 BIT RGB TO 16 BIT BGR 555
9202 turbocat 817
ConvertX86p32_16BGR555:
8210 maxcodehac 818
_ConvertX86p32_16BGR555:
819
 
820
	; check short
821
	cmp ecx,BYTE 16
822
	ja .L3
823
 
824
 
9172 turbocat 825
.L1:	; short loop
8210 maxcodehac 826
	mov ah,[esi+0]    ; blue
827
	mov al,[esi+1]    ; green
828
	mov bl,[esi+2]    ; red
829
	shr ah,3
830
	and al,11111000b
831
	shl eax,2
832
	shr bl,3
833
	add al,bl
834
	mov [edi+0],al
835
	mov [edi+1],ah
836
	add esi,BYTE 4
837
	add edi,BYTE 2
838
	dec ecx
839
	jnz .L1
9172 turbocat 840
.L2:
841
	retn
8210 maxcodehac 842
 
9172 turbocat 843
.L3:	; head
8210 maxcodehac 844
	mov ebx,edi
845
        and ebx,BYTE 11b
846
	jz .L4
847
	mov ah,[esi+0]    ; blue
848
	mov al,[esi+1]    ; green
849
	mov bl,[esi+2]    ; red
850
	shr ah,3
851
	and al,11111000b
852
	shl eax,2
853
	shr bl,3
854
	add al,bl
855
	mov [edi+0],al
856
	mov [edi+1],ah
857
	add esi,BYTE 4
858
	add edi,BYTE 2
859
	dec ecx
860
 
9172 turbocat 861
.L4:	; save count
8210 maxcodehac 862
	push ecx
863
 
864
	; unroll twice
865
	shr ecx,1
866
 
867
	; point arrays to end
868
	lea esi,[esi+ecx*8]
869
	lea edi,[edi+ecx*4]
870
 
871
	; negative counter
872
	neg ecx
873
	jmp SHORT .L6
874
 
9172 turbocat 875
.L5:
8210 maxcodehac 876
	mov [edi+ecx*4-4],eax
9172 turbocat 877
.L6:
8210 maxcodehac 878
	mov edx,[esi+ecx*8+4]
879
 
880
        mov bh,[esi+ecx*8+4]
881
        mov ah,[esi+ecx*8]
882
 
883
        shr bh,3
884
        mov al,[esi+ecx*8+1]
885
 
886
        shr ah,3
887
        mov bl,[esi+ecx*8+5]
888
 
889
        shl eax,2
890
        mov dl,[esi+ecx*8+2]
891
 
892
        shl ebx,18
893
        and eax,00007FE0h
894
 
895
        shr edx,3
896
        and ebx,07FE00000h
897
 
898
        and edx,001F001Fh
899
        add eax,ebx
900
 
901
        add eax,edx
902
        inc ecx
903
 
904
        jnz .L5
905
 
906
	mov [edi+ecx*4-4],eax
907
 
908
	; tail
909
	pop ecx
910
	and ecx,BYTE 1
911
	jz .L7
912
	mov ah,[esi+0]    ; blue
913
	mov al,[esi+1]    ; green
914
	mov bl,[esi+2]    ; red
915
	shr ah,3
916
	and al,11111000b
917
	shl eax,2
918
	shr bl,3
919
	add al,bl
920
	mov [edi+0],al
921
	mov [edi+1],ah
922
	add esi,BYTE 4
923
	add edi,BYTE 2
924
 
9172 turbocat 925
.L7:
926
	retn
8210 maxcodehac 927
 
928
 
929
 
930
 
931
 
932
;; FROM 32 BIT RGB to 8 BIT RGB (rrrgggbbb)
933
;; This routine writes FOUR pixels at once (dword) and then, if they exist
934
;; the trailing three pixels
9202 turbocat 935
 
936
ConvertX86p32_8RGB332:
8210 maxcodehac 937
_ConvertX86p32_8RGB332:
938
 
9172 turbocat 939
.L_ALIGNED:
8210 maxcodehac 940
	push ecx
941
 
942
	shr ecx,2		; We will draw 4 pixels at once
943
	jnz .L1
944
 
945
	jmp .L2			; short jump out of range :(
946
 
947
.L1:
948
	mov eax,[esi]		; first pair of pixels
949
	mov edx,[esi+4]
950
 
951
	shr dl,6
952
	mov ebx,eax
953
 
954
	shr al,6
955
	and ah,0e0h
956
 
957
	shr ebx,16
958
	and dh,0e0h
959
 
960
	shr ah,3
961
	and bl,0e0h
962
 
963
	shr dh,3
964
 
965
	or al,bl
966
 
967
	mov ebx,edx
968
	or al,ah
969
 
970
	shr ebx,16
971
	or dl,dh
972
 
973
	and bl,0e0h
974
 
975
	or dl,bl
976
 
977
	mov ah,dl
978
 
979
 
980
 
981
	mov ebx,[esi+8]		; second pair of pixels
982
 
983
	mov edx,ebx
984
	and bh,0e0h
985
 
986
	shr bl,6
987
	and edx,0e00000h
988
 
989
	shr edx,16
990
 
991
	shr bh,3
992
 
993
	ror eax,16
994
	or bl,dl
995
 
996
	mov edx,[esi+12]
997
	or bl,bh
998
 
999
	mov al,bl
1000
 
1001
	mov ebx,edx
1002
	and dh,0e0h
1003
 
1004
	shr dl,6
1005
	and ebx,0e00000h
1006
 
1007
	shr dh,3
1008
	mov ah,dl
1009
 
1010
	shr ebx,16
1011
	or ah,dh
1012
 
1013
	or ah,bl
1014
 
1015
	rol eax,16
1016
	add esi,BYTE 16
1017
 
1018
	mov [edi],eax
1019
	add edi,BYTE 4
1020
 
1021
	dec ecx
1022
	jz .L2			; L1 out of range for short jump :(
1023
 
1024
	jmp .L1
1025
.L2:
1026
 
1027
	pop ecx
1028
	and ecx,BYTE 3		; mask out number of pixels to draw
1029
 
1030
	jz .L4			; Nothing to do anymore
1031
 
1032
.L3:
1033
	mov eax,[esi]		; single pixel conversion for trailing pixels
1034
 
1035
        mov ebx,eax
1036
 
1037
        shr al,6
1038
        and ah,0e0h
1039
 
1040
        shr ebx,16
1041
 
1042
        shr ah,3
1043
        and bl,0e0h
1044
 
1045
        or al,ah
1046
        or al,bl
1047
 
1048
        mov [edi],al
1049
 
1050
        inc edi
1051
        add esi,BYTE 4
1052
 
1053
	dec ecx
1054
	jnz .L3
1055
 
1056
.L4:
9172 turbocat 1057
	retn
1058
 
1059
%ifidn __OUTPUT_FORMAT__,elf32
1060
section .note.GNU-stack noalloc noexec nowrite progbits
1061
%endif