Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
;******************************************************************************
2
;* optimized bswap buffer functions
3
;* Copyright (c) 2008 Loren Merritt
4
;* Copyright (c) 2003-2013 Michael Niedermayer
5
;* Copyright (c) 2013 Daniel Kang
6
;*
7
;* This file is part of FFmpeg.
8
;*
9
;* FFmpeg is free software; you can redistribute it and/or
10
;* modify it under the terms of the GNU Lesser General Public
11
;* License as published by the Free Software Foundation; either
12
;* version 2.1 of the License, or (at your option) any later version.
13
;*
14
;* FFmpeg is distributed in the hope that it will be useful,
15
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
16
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17
;* Lesser General Public License for more details.
18
;*
19
;* You should have received a copy of the GNU Lesser General Public
20
;* License along with FFmpeg; if not, write to the Free Software
21
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
22
;******************************************************************************
23
 
24
%include "libavutil/x86/x86util.asm"
25
 
26
SECTION_RODATA
27
pb_bswap32: db 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12
28
 
29
cextern pb_80
30
 
31
SECTION .text
32
 
33
; %1 = aligned/unaligned
34
%macro BSWAP_LOOPS  1
35
    mov      r3d, r2d
36
    sar      r2d, 3
37
    jz       .left4_%1
38
.loop8_%1:
39
    mov%1    m0, [r1 +  0]
40
    mov%1    m1, [r1 + 16]
41
%if cpuflag(ssse3)
42
    pshufb   m0, m2
43
    pshufb   m1, m2
44
    mov%1    [r0 +  0], m0
45
    mov%1    [r0 + 16], m1
46
%else
47
    pshuflw  m0, m0, 10110001b
48
    pshuflw  m1, m1, 10110001b
49
    pshufhw  m0, m0, 10110001b
50
    pshufhw  m1, m1, 10110001b
51
    mova     m2, m0
52
    mova     m3, m1
53
    psllw    m0, 8
54
    psllw    m1, 8
55
    psrlw    m2, 8
56
    psrlw    m3, 8
57
    por      m2, m0
58
    por      m3, m1
59
    mov%1    [r0 +  0], m2
60
    mov%1    [r0 + 16], m3
61
%endif
62
    add      r0, 32
63
    add      r1, 32
64
    dec      r2d
65
    jnz      .loop8_%1
66
.left4_%1:
67
    mov      r2d, r3d
68
    test     r3d, 4
69
    jz       .left
70
    mov%1    m0, [r1]
71
%if cpuflag(ssse3)
72
    pshufb   m0, m2
73
    mov%1    [r0], m0
74
%else
75
    pshuflw  m0, m0, 10110001b
76
    pshufhw  m0, m0, 10110001b
77
    mova     m2, m0
78
    psllw    m0, 8
79
    psrlw    m2, 8
80
    por      m2, m0
81
    mov%1    [r0], m2
82
%endif
83
    add      r1, 16
84
    add      r0, 16
85
%endmacro
86
 
87
; void ff_bswap_buf(uint32_t *dst, const uint32_t *src, int w);
88
%macro BSWAP32_BUF 0
89
%if cpuflag(ssse3)
90
cglobal bswap32_buf, 3,4,3
91
    mov      r3, r1
92
    mova     m2, [pb_bswap32]
93
%else
94
cglobal bswap32_buf, 3,4,5
95
    mov      r3, r1
96
%endif
97
    or       r3, r0
98
    test     r3, 15
99
    jz       .start_align
100
    BSWAP_LOOPS  u
101
    jmp      .left
102
.start_align:
103
    BSWAP_LOOPS  a
104
.left:
105
%if cpuflag(ssse3)
106
    test     r2d, 2
107
    jz       .left1
108
    movq     m0, [r1]
109
    pshufb   m0, m2
110
    movq     [r0], m0
111
    add      r1, 8
112
    add      r0, 8
113
.left1:
114
    test     r2d, 1
115
    jz       .end
116
    mov      r2d, [r1]
117
    bswap    r2d
118
    mov      [r0], r2d
119
%else
120
    and      r2d, 3
121
    jz       .end
122
.loop2:
123
    mov      r3d, [r1]
124
    bswap    r3d
125
    mov      [r0], r3d
126
    add      r1, 4
127
    add      r0, 4
128
    dec      r2d
129
    jnz      .loop2
130
%endif
131
.end:
132
    RET
133
%endmacro
134
 
135
INIT_XMM sse2
136
BSWAP32_BUF
137
 
138
INIT_XMM ssse3
139
BSWAP32_BUF