Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6148 serge 1
;******************************************************************************
2
;* Copyright (c) 2012 Michael Niedermayer
3
;*
4
;* This file is part of FFmpeg.
5
;*
6
;* FFmpeg is free software; you can redistribute it and/or
7
;* modify it under the terms of the GNU Lesser General Public
8
;* License as published by the Free Software Foundation; either
9
;* version 2.1 of the License, or (at your option) any later version.
10
;*
11
;* FFmpeg is distributed in the hope that it will be useful,
12
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
13
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
;* Lesser General Public License for more details.
15
;*
16
;* You should have received a copy of the GNU Lesser General Public
17
;* License along with FFmpeg; if not, write to the Free Software
18
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
;******************************************************************************
20
 
21
%include "libavutil/x86/x86util.asm"
22
 
23
 
24
SECTION_RODATA 32
25
dw1: times 8  dd 1
26
w1 : times 16 dw 1
27
 
28
SECTION .text
29
 
30
%macro MIX2_FLT 1
31
cglobal mix_2_1_%1_float, 7, 7, 6, out, in1, in2, coeffp, index1, index2, len
32
%ifidn %1, a
33
    test in1q, mmsize-1
34
        jne mix_2_1_float_u_int %+ SUFFIX
35
    test in2q, mmsize-1
36
        jne mix_2_1_float_u_int %+ SUFFIX
37
    test outq, mmsize-1
38
        jne mix_2_1_float_u_int %+ SUFFIX
39
%else
40
mix_2_1_float_u_int %+ SUFFIX
41
%endif
42
    VBROADCASTSS m4, [coeffpq + 4*index1q]
43
    VBROADCASTSS m5, [coeffpq + 4*index2q]
44
    shl lend    , 2
45
    add in1q    , lenq
46
    add in2q    , lenq
47
    add outq    , lenq
48
    neg lenq
49
.next:
50
%ifidn %1, a
51
    mulps        m0, m4, [in1q + lenq         ]
52
    mulps        m1, m5, [in2q + lenq         ]
53
    mulps        m2, m4, [in1q + lenq + mmsize]
54
    mulps        m3, m5, [in2q + lenq + mmsize]
55
%else
56
    movu         m0, [in1q + lenq         ]
57
    movu         m1, [in2q + lenq         ]
58
    movu         m2, [in1q + lenq + mmsize]
59
    movu         m3, [in2q + lenq + mmsize]
60
    mulps        m0, m0, m4
61
    mulps        m1, m1, m5
62
    mulps        m2, m2, m4
63
    mulps        m3, m3, m5
64
%endif
65
    addps        m0, m0, m1
66
    addps        m2, m2, m3
67
    mov%1  [outq + lenq         ], m0
68
    mov%1  [outq + lenq + mmsize], m2
69
    add        lenq, mmsize*2
70
        jl .next
71
    REP_RET
72
%endmacro
73
 
74
%macro MIX1_FLT 1
75
cglobal mix_1_1_%1_float, 5, 5, 3, out, in, coeffp, index, len
76
%ifidn %1, a
77
    test inq, mmsize-1
78
        jne mix_1_1_float_u_int %+ SUFFIX
79
    test outq, mmsize-1
80
        jne mix_1_1_float_u_int %+ SUFFIX
81
%else
82
mix_1_1_float_u_int %+ SUFFIX
83
%endif
84
    VBROADCASTSS m2, [coeffpq + 4*indexq]
85
    shl lenq    , 2
86
    add inq     , lenq
87
    add outq    , lenq
88
    neg lenq
89
.next:
90
%ifidn %1, a
91
    mulps        m0, m2, [inq + lenq         ]
92
    mulps        m1, m2, [inq + lenq + mmsize]
93
%else
94
    movu         m0, [inq + lenq         ]
95
    movu         m1, [inq + lenq + mmsize]
96
    mulps        m0, m0, m2
97
    mulps        m1, m1, m2
98
%endif
99
    mov%1  [outq + lenq         ], m0
100
    mov%1  [outq + lenq + mmsize], m1
101
    add        lenq, mmsize*2
102
        jl .next
103
    REP_RET
104
%endmacro
105
 
106
%macro MIX1_INT16 1
107
cglobal mix_1_1_%1_int16, 5, 5, 6, out, in, coeffp, index, len
108
%ifidn %1, a
109
    test inq, mmsize-1
110
        jne mix_1_1_int16_u_int %+ SUFFIX
111
    test outq, mmsize-1
112
        jne mix_1_1_int16_u_int %+ SUFFIX
113
%else
114
mix_1_1_int16_u_int %+ SUFFIX
115
%endif
116
    movd   m4, [coeffpq + 4*indexq]
117
    SPLATW m5, m4
118
    psllq  m4, 32
119
    psrlq  m4, 48
120
    mova   m0, [w1]
121
    psllw  m0, m4
122
    psrlw  m0, 1
123
    punpcklwd m5, m0
124
    add lenq    , lenq
125
    add inq     , lenq
126
    add outq    , lenq
127
    neg lenq
128
.next:
129
    mov%1        m0, [inq + lenq         ]
130
    mov%1        m2, [inq + lenq + mmsize]
131
    mova         m1, m0
132
    mova         m3, m2
133
    punpcklwd    m0, [w1]
134
    punpckhwd    m1, [w1]
135
    punpcklwd    m2, [w1]
136
    punpckhwd    m3, [w1]
137
    pmaddwd      m0, m5
138
    pmaddwd      m1, m5
139
    pmaddwd      m2, m5
140
    pmaddwd      m3, m5
141
    psrad        m0, m4
142
    psrad        m1, m4
143
    psrad        m2, m4
144
    psrad        m3, m4
145
    packssdw     m0, m1
146
    packssdw     m2, m3
147
    mov%1  [outq + lenq         ], m0
148
    mov%1  [outq + lenq + mmsize], m2
149
    add        lenq, mmsize*2
150
        jl .next
151
%if mmsize == 8
152
    emms
153
    RET
154
%else
155
    REP_RET
156
%endif
157
%endmacro
158
 
159
%macro MIX2_INT16 1
160
cglobal mix_2_1_%1_int16, 7, 7, 8, out, in1, in2, coeffp, index1, index2, len
161
%ifidn %1, a
162
    test in1q, mmsize-1
163
        jne mix_2_1_int16_u_int %+ SUFFIX
164
    test in2q, mmsize-1
165
        jne mix_2_1_int16_u_int %+ SUFFIX
166
    test outq, mmsize-1
167
        jne mix_2_1_int16_u_int %+ SUFFIX
168
%else
169
mix_2_1_int16_u_int %+ SUFFIX
170
%endif
171
    movd   m4, [coeffpq + 4*index1q]
172
    movd   m6, [coeffpq + 4*index2q]
173
    SPLATW m5, m4
174
    SPLATW m6, m6
175
    psllq  m4, 32
176
    psrlq  m4, 48
177
    mova   m7, [dw1]
178
    pslld  m7, m4
179
    psrld  m7, 1
180
    punpcklwd m5, m6
181
    add lend    , lend
182
    add in1q    , lenq
183
    add in2q    , lenq
184
    add outq    , lenq
185
    neg lenq
186
.next:
187
    mov%1        m0, [in1q + lenq         ]
188
    mov%1        m2, [in2q + lenq         ]
189
    mova         m1, m0
190
    punpcklwd    m0, m2
191
    punpckhwd    m1, m2
192
 
193
    mov%1        m2, [in1q + lenq + mmsize]
194
    mov%1        m6, [in2q + lenq + mmsize]
195
    mova         m3, m2
196
    punpcklwd    m2, m6
197
    punpckhwd    m3, m6
198
 
199
    pmaddwd      m0, m5
200
    pmaddwd      m1, m5
201
    pmaddwd      m2, m5
202
    pmaddwd      m3, m5
203
    paddd        m0, m7
204
    paddd        m1, m7
205
    paddd        m2, m7
206
    paddd        m3, m7
207
    psrad        m0, m4
208
    psrad        m1, m4
209
    psrad        m2, m4
210
    psrad        m3, m4
211
    packssdw     m0, m1
212
    packssdw     m2, m3
213
    mov%1  [outq + lenq         ], m0
214
    mov%1  [outq + lenq + mmsize], m2
215
    add        lenq, mmsize*2
216
        jl .next
217
%if mmsize == 8
218
    emms
219
    RET
220
%else
221
    REP_RET
222
%endif
223
%endmacro
224
 
225
 
226
INIT_MMX mmx
227
MIX1_INT16 u
228
MIX1_INT16 a
229
MIX2_INT16 u
230
MIX2_INT16 a
231
 
232
INIT_XMM sse
233
MIX2_FLT u
234
MIX2_FLT a
235
MIX1_FLT u
236
MIX1_FLT a
237
 
238
INIT_XMM sse2
239
MIX1_INT16 u
240
MIX1_INT16 a
241
MIX2_INT16 u
242
MIX2_INT16 a
243
 
244
%if HAVE_AVX_EXTERNAL
245
INIT_YMM avx
246
MIX2_FLT u
247
MIX2_FLT a
248
MIX1_FLT u
249
MIX1_FLT a
250
%endif