Subversion Repositories Kolibri OS

Rev

Rev 8210 | Go to most recent revision | Show entire file | Regard whitespace | Details | Blame | Last modification | View Log | RSS feed

Rev 8210 Rev 9172
Line 18... Line 18...
18
; are, they're terrible on p5 MMXs, but less so on pIIs.  Someone needs to
18
; are, they're terrible on p5 MMXs, but less so on pIIs.  Someone needs to
19
; optimise them for p5 MMXs..
19
; optimise them for p5 MMXs..
Line 20... Line 20...
20
 
20
 
Line -... Line 21...
-
 
21
BITS 32
Line 21... Line 22...
21
BITS 32
22
 
22
 
23
%include "common.inc"
23
	
24
	
24
GLOBAL _ConvertMMXpII32_24RGB888
25
SDL_FUNC _ConvertMMXpII32_24RGB888
25
GLOBAL _ConvertMMXpII32_16RGB565
26
SDL_FUNC _ConvertMMXpII32_16RGB565
26
GLOBAL _ConvertMMXpII32_16BGR565
27
SDL_FUNC _ConvertMMXpII32_16BGR565
27
GLOBAL _ConvertMMXpII32_16RGB555
28
SDL_FUNC _ConvertMMXpII32_16RGB555
28
GLOBAL _ConvertMMXpII32_16BGR555
29
SDL_FUNC _ConvertMMXpII32_16BGR555
-
 
30
 
-
 
31
;; Macros for conversion routines
29
 
32
 
-
 
33
%macro _push_immq_mask 1
30
EXTERN _mmxreturn
34
	push dword %1
-
 
35
	push dword %1
-
 
36
%endmacro
-
 
37
 
31
 
38
%macro load_immq 2
32
SECTION .data
39
	_push_immq_mask %2
33
	
40
	movq %1, [esp]
-
 
41
%endmacro
-
 
42
 
-
 
43
%macro pand_immq 2
34
ALIGN 8
44
	_push_immq_mask %2
35
 
45
	pand %1, [esp]
-
 
46
%endmacro
36
;; Constants for conversion routines
47
 
-
 
48
%define CLEANUP_IMMQ_LOADS(num) \
37
 
49
	add esp, byte 8 * num
38
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
50
 
39
 
51
%define mmx32_rgb888_mask 00ffffffh
40
mmx32_rgb565_b dd 000000f8h, 000000f8h
52
%define mmx32_rgb565_b 000000f8h
41
mmx32_rgb565_g dd 0000fc00h, 0000fc00h
53
%define mmx32_rgb565_g 0000fc00h
42
mmx32_rgb565_r dd 00f80000h, 00f80000h
54
%define mmx32_rgb565_r 00f80000h
43
 
55
 
44
mmx32_rgb555_rb dd 00f800f8h,00f800f8h
56
%define mmx32_rgb555_rb 00f800f8h
45
mmx32_rgb555_g dd 0000f800h,0000f800h
-
 
46
mmx32_rgb555_mul dd 20000008h,20000008h
-
 
Line 47... Line 57...
47
mmx32_bgr555_mul dd 00082000h,00082000h
57
%define mmx32_rgb555_g 0000f800h
Line 48... Line 58...
48
 
58
%define mmx32_rgb555_mul 20000008h
Line 49... Line 59...
49
 
59
%define mmx32_bgr555_mul 00082000h
50
			
60
 
-
 
61
SECTION .text
51
SECTION .text
62
 
Line 52... Line 63...
52
 
63
_ConvertMMXpII32_24RGB888:
53
_ConvertMMXpII32_24RGB888:
64
 
54
 
65
        ; set up mm6 as the mask, mm7 as zero
Line 106... Line 117...
106
        add esi, BYTE 4
117
        add esi, BYTE 4
107
        add edi, BYTE 3
118
        add edi, BYTE 3
108
        dec ecx
119
        dec ecx
109
        jnz .L3
120
        jnz .L3
110
.L4:
121
.L4:
111
        jmp _mmxreturn
122
        retn
Line 112... Line 123...
112
 
123
 
Line 113... Line 124...
113
 
124
 
114
 
125
 
115
_ConvertMMXpII32_16RGB565:
126
_ConvertMMXpII32_16RGB565:
116
 
127
 
-
 
128
        ; set up masks
Line 117... Line 129...
117
        ; set up masks
129
        load_immq mm5, mmx32_rgb565_b
118
        movq mm5, [mmx32_rgb565_b]
130
        load_immq mm6, mmx32_rgb565_g
119
        movq mm6, [mmx32_rgb565_g]
131
        load_immq mm7, mmx32_rgb565_r
120
        movq mm7, [mmx32_rgb565_r]
132
        CLEANUP_IMMQ_LOADS(3)
Line 174... Line 186...
174
        add edi, BYTE 2
186
        add edi, BYTE 2
175
        dec ecx
187
        dec ecx
176
        jnz .L3
188
        jnz .L3
Line 177... Line 189...
177
 
189
 
178
.L4:
190
.L4:
Line 179... Line 191...
179
	jmp _mmxreturn
191
	retn
Line 180... Line 192...
180
 
192
 
181
	
193
	
182
_ConvertMMXpII32_16BGR565:
194
_ConvertMMXpII32_16BGR565:
-
 
195
 
Line 183... Line 196...
183
 
196
        load_immq mm5, mmx32_rgb565_r
184
        movq mm5, [mmx32_rgb565_r]
197
        load_immq mm6, mmx32_rgb565_g
185
        movq mm6, [mmx32_rgb565_g]
198
        load_immq mm7, mmx32_rgb565_b
186
        movq mm7, [mmx32_rgb565_b]
199
        CLEANUP_IMMQ_LOADS(3)
Line 243... Line 256...
243
        add edi, BYTE 2
256
        add edi, BYTE 2
244
        dec edx
257
        dec edx
245
        jnz .L3
258
        jnz .L3
Line 246... Line 259...
246
 
259
 
247
.L4:
260
.L4:
Line 248... Line 261...
248
        jmp _mmxreturn
261
        retn
Line 249... Line 262...
249
 
262
 
250
_ConvertMMXpII32_16BGR555:
263
_ConvertMMXpII32_16BGR555:
251
 
264
 
Line 252... Line 265...
252
        ; the 16BGR555 converter is identical to the RGB555 one,
265
        ; the 16BGR555 converter is identical to the RGB555 one,
253
        ; except it uses a different multiplier for the pmaddwd
266
        ; except it uses a different multiplier for the pmaddwd
Line 254... Line 267...
254
        ; instruction.  cool huh.
267
        ; instruction.  cool huh.
255
 
268
 
256
        movq mm7, qword [mmx32_bgr555_mul]
269
        load_immq mm7, mmx32_bgr555_mul
257
        jmp _convert_bgr555_cheat
270
        jmp _convert_bgr555_cheat
258
 
271
 
259
; This is the same as the Intel version.. they obviously went to
272
; This is the same as the Intel version.. they obviously went to
Line 260... Line 273...
260
; much more trouble to expand/coil the loop than I did, so theirs
273
; much more trouble to expand/coil the loop than I did, so theirs
261
; would almost certainly be faster, even if only a little.
274
; would almost certainly be faster, even if only a little.
262
; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
275
; I did rename 'mmx32_rgb555_add' to 'mmx32_rgb555_mul', which is
-
 
276
; (I think) a more accurate name..
Line 263... Line 277...
263
; (I think) a more accurate name..
277
_ConvertMMXpII32_16RGB555:
Line 264... Line 278...
264
_ConvertMMXpII32_16RGB555:
278
 
265
 
279
	load_immq mm7, mmx32_rgb555_mul
266
        movq mm7,qword [mmx32_rgb555_mul]
280
_convert_bgr555_cheat:
Line 267... Line 281...
267
_convert_bgr555_cheat:
281
	load_immq mm6, mmx32_rgb555_g
Line 268... Line 282...
268
        movq mm6,qword [mmx32_rgb555_g]
282
	CLEANUP_IMMQ_LOADS(2)
Line 269... Line 283...
269
        
283
        
270
	mov edx,ecx		           ; Save ecx 
284
	mov edx,ecx		           ; Save ecx 
Line 271... Line 285...
271
 
285
 
272
        and ecx,BYTE 0fffffff8h            ; clear lower three bits
286
        and ecx,DWORD 0fffffff8h            ; clear lower three bits
Line 273... Line 287...
273
	jnz .L_OK
287
	jnz .L_OK
274
        jmp .L2 
288
        jmp near .L2 
Line -... Line 289...
-
 
289
 
-
 
290
.L_OK:
275
 
291
	
276
.L_OK:
292
	movq mm2,[esi+8]
Line 277... Line 293...
277
	
293
 
278
	movq mm2,[esi+8]
294
	movq mm0,[esi]
Line 300... Line 316...
300
	por mm1,mm0
316
	por mm1,mm0
Line 301... Line 317...
301
 
317
 
302
	movq mm0,mm4
318
	movq mm0,mm4
Line 303... Line 319...
303
	psrld mm1,6
319
	psrld mm1,6
304
 
320
 
Line 305... Line 321...
305
	pand mm0,qword [mmx32_rgb555_rb]
321
	pand_immq mm0, mmx32_rgb555_rb
306
	packssdw mm1,mm3
322
	packssdw mm1,mm3
Line 307... Line 323...
307
 
323
 
308
	movq mm3,mm5
324
	movq mm3,mm5
Line 309... Line 325...
309
	pmaddwd mm0,mm7
325
	pmaddwd mm0,mm7
310
 
326
 
Line 327... Line 343...
327
	psrld mm5,6
343
	psrld mm5,6
Line 328... Line 344...
328
 
344
 
329
	movq mm3,mm2
345
	movq mm3,mm2
Line 330... Line 346...
330
	movq mm1,mm0
346
	movq mm1,mm0
331
 
347
 
Line 332... Line 348...
332
	pand mm3,qword [mmx32_rgb555_rb]
348
	pand_immq mm3, mmx32_rgb555_rb
333
	packssdw mm5,mm4
349
	packssdw mm5,mm4
Line -... Line 350...
-
 
350
 
-
 
351
	pand_immq mm1, mmx32_rgb555_rb
334
 
352
	pand mm2,mm6
335
	pand mm1,qword [mmx32_rgb555_rb]
353
 
Line 336... Line 354...
336
	pand mm2,mm6
354
	CLEANUP_IMMQ_LOADS(4)
337
 
355
 
Line 378... Line 396...
378
 
396
 
379
	dec ecx
397
	dec ecx
Line 380... Line 398...
380
	jnz .L3	
398
	jnz .L3	
381
 
399
 
382
.L4:		
-
 
383
	jmp _mmxreturn
-
 
Line -... Line 400...
-
 
400
.L4:		
-
 
401
	retn
-
 
402