Subversion Repositories Kolibri OS

Rev

Rev 9172 | Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
8210 maxcodehac 1
;
2
; pII-optimised MMX format converters for HERMES
3
; Copyright (c) 1998 Christian Nentwich (c.nentwich@cs.ucl.ac.uk)
4
;   and (c) 1999 Jonathan Matthew (jmatthew@uq.net.au)
5
; This source code is licensed under the GNU LGPL
6
;
7
; Please refer to the file COPYING.LIB contained in the distribution for
8
; licensing conditions
9
;
10
; COPYRIGHT NOTICE
11
;
12
; This file partly contains code that is (c) Intel Corporation, specifically
13
; the mode detection routine, and the converter to 15 bit (8 pixel
14
; conversion routine from the mmx programming tutorial pages).
15
;
16
;
17
; These routines aren't exactly pII optimised - it's just that as they
18
; are, they're terrible on p5 MMXs, but less so on pIIs.  Someone needs to
19
; optimise them for p5 MMXs..
20
 
21
BITS 32
22
 
23
 
24
GLOBAL _ConvertMMXpII32_24RGB888
25
GLOBAL _ConvertMMXpII32_16RGB565
26
GLOBAL _ConvertMMXpII32_16BGR565
27
GLOBAL _ConvertMMXpII32_16RGB555
28
GLOBAL _ConvertMMXpII32_16BGR555
29
 
30
EXTERN _mmxreturn
31
 
32
SECTION .data
33
 
34
ALIGN 8
35
 
36
;; Constants for conversion routines
37
 
38
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
39
 
40
mmx32_rgb565_b dd 000000f8h, 000000f8h
41
mmx32_rgb565_g dd 0000fc00h, 0000fc00h
42
mmx32_rgb565_r dd 00f80000h, 00f80000h
43
 
44
mmx32_rgb555_rb dd 00f800f8h,00f800f8h
45
mmx32_rgb555_g dd 0000f800h,0000f800h
46
mmx32_rgb555_mul dd 20000008h,20000008h
47
mmx32_bgr555_mul dd 00082000h,00082000h
48
 
49
 
50
 
51
SECTION .text
52
 
53
_ConvertMMXpII32_24RGB888:
54
 
55
        ; set up mm6 as the mask, mm7 as zero
56
        movq mm6, qword [mmx32_rgb888_mask]
57
        pxor mm7, mm7
58
 
59
        mov edx, ecx                    ; save ecx
60
        and ecx, 0fffffffch             ; clear lower two bits
61
        jnz .L1
62
        jmp .L2
63
 
64
.L1:
65
 
66
        movq mm0, [esi]                 ; A R G B a r g b
67
        pand mm0, mm6                   ; 0 R G B 0 r g b
68
        movq mm1, [esi+8]               ; A R G B a r g b
69
        pand mm1, mm6                   ; 0 R G B 0 r g b
70
 
71
        movq mm2, mm0                   ; 0 R G B 0 r g b
72
        punpckhdq mm2, mm7              ; 0 0 0 0 0 R G B
73
        punpckldq mm0, mm7              ; 0 0 0 0 0 r g b
74
        psllq mm2, 24                   ; 0 0 R G B 0 0 0
75
        por mm0, mm2                    ; 0 0 R G B r g b
76
 
77
        movq mm3, mm1                   ; 0 R G B 0 r g b
78
        psllq mm3, 48                   ; g b 0 0 0 0 0 0
79
        por mm0, mm3                    ; g b R G B r g b
80
 
81
        movq mm4, mm1                   ; 0 R G B 0 r g b
82
        punpckhdq mm4, mm7              ; 0 0 0 0 0 R G B
83
        punpckldq mm1, mm7              ; 0 0 0 0 0 r g b
84
        psrlq mm1, 16                   ; 0 0 0 R G B 0 r
85
        psllq mm4, 8                    ; 0 0 0 0 R G B 0
86
        por mm1, mm4                    ; 0 0 0 0 R G B r
87
 
88
        movq [edi], mm0
89
        add esi, BYTE 16
90
        movd [edi+8], mm1
91
        add edi, BYTE 12
92
        sub ecx, BYTE 4
93
        jnz .L1
94
 
95
.L2:
96
        mov ecx, edx
97
        and ecx, BYTE 3
98
        jz .L4
99
.L3:
100
        mov al, [esi]
101
        mov bl, [esi+1]
102
        mov dl, [esi+2]
103
        mov [edi], al
104
        mov [edi+1], bl
105
        mov [edi+2], dl
106
        add esi, BYTE 4
107
        add edi, BYTE 3
108
        dec ecx
109
        jnz .L3
110
.L4:
111
        jmp _mmxreturn
112
 
113
 
114
 
115
_ConvertMMXpII32_16RGB565:
116
 
117
        ; set up masks
118
        movq mm5, [mmx32_rgb565_b]
119
        movq mm6, [mmx32_rgb565_g]
120
        movq mm7, [mmx32_rgb565_r]
121
 
122
        mov edx, ecx
123
        shr ecx, 2
124
        jnz .L1
125
        jmp .L2         ; not necessary at the moment, but doesn't hurt (much)
126
 
127
.L1:
128
        movq mm0, [esi]         ; argb
129
        movq mm1, mm0           ; argb
130
        pand mm0, mm6           ; 00g0
131
        movq mm3, mm1           ; argb
132
        pand mm1, mm5           ; 000b
133
        pand mm3, mm7           ; 0r00
134
        pslld mm1, 2            ; 0 0 000000bb bbb00000
135
        por mm0, mm1            ; 0 0 ggggggbb bbb00000
136
        psrld mm0, 5            ; 0 0 00000ggg gggbbbbb
137
 
138
        movq mm4, [esi+8]       ; argb
139
        movq mm2, mm4           ; argb
140
        pand mm4, mm6           ; 00g0
141
        movq mm1, mm2           ; argb
142
        pand mm2, mm5           ; 000b
143
        pand mm1, mm7           ; 0r00
144
        pslld mm2, 2            ; 0 0 000000bb bbb00000
145
        por mm4, mm2            ; 0 0 ggggggbb bbb00000
146
        psrld mm4, 5            ; 0 0 00000ggg gggbbbbb
147
 
148
        packuswb mm3, mm1       ; R 0 r 0
149
        packssdw mm0, mm4       ; as above.. ish
150
        por mm0, mm3            ; done.
151
        movq [edi], mm0
152
 
153
        add esi, 16
154
        add edi, 8
155
        dec ecx
156
        jnz .L1
157
 
158
.L2:
159
        mov ecx, edx
160
        and ecx, BYTE 3
161
        jz .L4
162
.L3:
163
        mov al, [esi]
164
        mov bh, [esi+1]
165
        mov ah, [esi+2]
166
        shr al, 3
167
        and eax, 0F81Fh            ; BYTE?
168
        shr ebx, 5
169
        and ebx, 07E0h             ; BYTE?
170
        add eax, ebx
171
        mov [edi], al
172
        mov [edi+1], ah
173
        add esi, BYTE 4
174
        add edi, BYTE 2
175
        dec ecx
176
        jnz .L3
177
 
178
.L4:
179
	jmp _mmxreturn
180
 
181
 
182
_ConvertMMXpII32_16BGR565:
183
 
184
        movq mm5, [mmx32_rgb565_r]
185
        movq mm6, [mmx32_rgb565_g]
186
        movq mm7, [mmx32_rgb565_b]
187
 
188
        mov edx, ecx
189
        shr ecx, 2
190
        jnz .L1
191
        jmp .L2
192
 
193
.L1:
194
        movq mm0, [esi]                 ; a r g b
195
        movq mm1, mm0                   ; a r g b
196
        pand mm0, mm6                   ; 0 0 g 0
197
        movq mm3, mm1                   ; a r g b
198
        pand mm1, mm5                   ; 0 r 0 0
199
        pand mm3, mm7                   ; 0 0 0 b
200
 
201
        psllq mm3, 16                   ; 0 b 0 0
202
        psrld mm1, 14                   ; 0 0 000000rr rrr00000
203
        por mm0, mm1                    ; 0 0 ggggggrr rrr00000
204
        psrld mm0, 5                    ; 0 0 00000ggg gggrrrrr
205
 
206
        movq mm4, [esi+8]               ; a r g b
207
        movq mm2, mm4                   ; a r g b
208
        pand mm4, mm6                   ; 0 0 g 0
209
        movq mm1, mm2                   ; a r g b
210
        pand mm2, mm5                   ; 0 r 0 0
211
        pand mm1, mm7                   ; 0 0 0 b
212
 
213
        psllq mm1, 16                   ; 0 b 0 0
214
        psrld mm2, 14                   ; 0 0 000000rr rrr00000
215
        por mm4, mm2                    ; 0 0 ggggggrr rrr00000
216
        psrld mm4, 5                    ; 0 0 00000ggg gggrrrrr
217
 
218
        packuswb mm3, mm1               ; BBBBB000 00000000 bbbbb000 00000000
219
        packssdw mm0, mm4               ; 00000GGG GGGRRRRR 00000GGG GGGRRRRR
220
        por mm0, mm3                    ; BBBBBGGG GGGRRRRR bbbbbggg gggrrrrr
221
        movq [edi], mm0
222
 
223
        add esi, BYTE 16
224
        add edi, BYTE 8
225
        dec ecx
226
        jnz .L1
227
 
228
.L2:
229
        and edx, BYTE 3
230
        jz .L4
231
.L3:
232
        mov al, [esi+2]
233
        mov bh, [esi+1]
234
        mov ah, [esi]
235
        shr al, 3
236
        and eax, 0F81Fh                    ; BYTE ?
237
        shr ebx, 5
238
        and ebx, 07E0h                     ; BYTE ?
239
        add eax, ebx
240
        mov [edi], al
241
        mov [edi+1], ah
242
        add esi, BYTE 4
243
        add edi, BYTE 2
244
        dec edx
245
        jnz .L3
246
 
247
.L4:
248
        jmp _mmxreturn
249
 
250
_ConvertMMXpII32_16BGR555:
251
 
252
        ; the 16BGR555 converter is identical to the RGB555 one,
253
        ; except it uses a different multiplier for the pmaddwd
254
        ; instruction.  cool huh.
255
 
256
        movq mm7, qword [mmx32_bgr555_mul]
257
        jmp _convert_bgr555_cheat
258
 
259
; This is the same as the Intel version.. they obviously went to
260
; much more trouble to expand/coil the loop than I did, so theirs
261
; would almost certainly be faster, even if only a little.
262
; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
263
; (I think) a more accurate name..
264
_ConvertMMXpII32_16RGB555:
265
 
266
        movq mm7,qword [mmx32_rgb555_mul]
267
_convert_bgr555_cheat:
268
        movq mm6,qword [mmx32_rgb555_g]
269
 
270
	mov edx,ecx		           ; Save ecx
271
 
272
        and ecx,BYTE 0fffffff8h            ; clear lower three bits
273
	jnz .L_OK
274
        jmp .L2
275
 
276
.L_OK:
277
 
278
	movq mm2,[esi+8]
279
 
280
	movq mm0,[esi]
281
	movq mm3,mm2
282
 
283
	pand mm3,qword [mmx32_rgb555_rb]
284
	movq mm1,mm0
285
 
286
	pand mm1,qword [mmx32_rgb555_rb]
287
	pmaddwd mm3,mm7
288
 
289
	pmaddwd mm1,mm7
290
	pand mm2,mm6
291
 
292
.L1:
293
	movq mm4,[esi+24]
294
	pand mm0,mm6
295
 
296
	movq mm5,[esi+16]
297
	por mm3,mm2
298
 
299
	psrld mm3,6
300
	por mm1,mm0
301
 
302
	movq mm0,mm4
303
	psrld mm1,6
304
 
305
	pand mm0,qword [mmx32_rgb555_rb]
306
	packssdw mm1,mm3
307
 
308
	movq mm3,mm5
309
	pmaddwd mm0,mm7
310
 
311
	pand mm3,qword [mmx32_rgb555_rb]
312
	pand mm4,mm6
313
 
314
	movq [edi],mm1
315
	pmaddwd mm3,mm7
316
 
317
        add esi,BYTE 32
318
	por mm4,mm0
319
 
320
	pand mm5,mm6
321
	psrld mm4,6
322
 
323
	movq mm2,[esi+8]
324
	por mm5,mm3
325
 
326
	movq mm0,[esi]
327
	psrld mm5,6
328
 
329
	movq mm3,mm2
330
	movq mm1,mm0
331
 
332
	pand mm3,qword [mmx32_rgb555_rb]
333
	packssdw mm5,mm4
334
 
335
	pand mm1,qword [mmx32_rgb555_rb]
336
	pand mm2,mm6
337
 
338
	movq [edi+8],mm5
339
	pmaddwd mm3,mm7
340
 
341
	pmaddwd mm1,mm7
342
        add edi,BYTE 16
343
 
344
        sub ecx,BYTE 8
345
	jz .L2
346
        jmp .L1
347
 
348
 
349
.L2:
350
	mov ecx,edx
351
 
352
        and ecx,BYTE 7
353
	jz .L4
354
 
355
.L3:
356
	mov ebx,[esi]
357
        add esi,BYTE 4
358
 
359
        mov eax,ebx
360
        mov edx,ebx
361
 
362
        shr eax,3
363
        shr edx,6
364
 
365
        and eax,BYTE 0000000000011111b
366
        and edx,     0000001111100000b
367
 
368
        shr ebx,9
369
 
370
        or eax,edx
371
 
372
        and ebx,     0111110000000000b
373
 
374
        or eax,ebx
375
 
376
        mov [edi],ax
377
        add edi,BYTE 2
378
 
379
	dec ecx
380
	jnz .L3
381
 
382
.L4:
383
	jmp _mmxreturn
384