Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * VP8 DSP functions x86-optimized
3
 * Copyright (c) 2010 Ronald S. Bultje 
4
 * Copyright (c) 2010 Jason Garrett-Glaser 
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
 
23
#include "libavutil/cpu.h"
24
#include "libavutil/mem.h"
25
#include "libavutil/x86/asm.h"
26
#include "libavutil/x86/cpu.h"
27
#include "libavcodec/vp8dsp.h"
28
 
29
#if HAVE_YASM
30
 
31
/*
32
 * MC functions
33
 */
34
void ff_put_vp8_epel4_h4_mmxext(uint8_t *dst, ptrdiff_t dststride,
35
                                uint8_t *src, ptrdiff_t srcstride,
36
                                int height, int mx, int my);
37
void ff_put_vp8_epel4_h6_mmxext(uint8_t *dst, ptrdiff_t dststride,
38
                                uint8_t *src, ptrdiff_t srcstride,
39
                                int height, int mx, int my);
40
void ff_put_vp8_epel4_v4_mmxext(uint8_t *dst, ptrdiff_t dststride,
41
                                uint8_t *src, ptrdiff_t srcstride,
42
                                int height, int mx, int my);
43
void ff_put_vp8_epel4_v6_mmxext(uint8_t *dst, ptrdiff_t dststride,
44
                                uint8_t *src, ptrdiff_t srcstride,
45
                                int height, int mx, int my);
46
 
47
void ff_put_vp8_epel8_h4_sse2  (uint8_t *dst, ptrdiff_t dststride,
48
                                uint8_t *src, ptrdiff_t srcstride,
49
                                int height, int mx, int my);
50
void ff_put_vp8_epel8_h6_sse2  (uint8_t *dst, ptrdiff_t dststride,
51
                                uint8_t *src, ptrdiff_t srcstride,
52
                                int height, int mx, int my);
53
void ff_put_vp8_epel8_v4_sse2  (uint8_t *dst, ptrdiff_t dststride,
54
                                uint8_t *src, ptrdiff_t srcstride,
55
                                int height, int mx, int my);
56
void ff_put_vp8_epel8_v6_sse2  (uint8_t *dst, ptrdiff_t dststride,
57
                                uint8_t *src, ptrdiff_t srcstride,
58
                                int height, int mx, int my);
59
 
60
void ff_put_vp8_epel4_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
61
                                uint8_t *src, ptrdiff_t srcstride,
62
                                int height, int mx, int my);
63
void ff_put_vp8_epel4_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
64
                                uint8_t *src, ptrdiff_t srcstride,
65
                                int height, int mx, int my);
66
void ff_put_vp8_epel4_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
67
                                uint8_t *src, ptrdiff_t srcstride,
68
                                int height, int mx, int my);
69
void ff_put_vp8_epel4_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
70
                                uint8_t *src, ptrdiff_t srcstride,
71
                                int height, int mx, int my);
72
void ff_put_vp8_epel8_h4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
73
                                uint8_t *src, ptrdiff_t srcstride,
74
                                int height, int mx, int my);
75
void ff_put_vp8_epel8_h6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
76
                                uint8_t *src, ptrdiff_t srcstride,
77
                                int height, int mx, int my);
78
void ff_put_vp8_epel8_v4_ssse3 (uint8_t *dst, ptrdiff_t dststride,
79
                                uint8_t *src, ptrdiff_t srcstride,
80
                                int height, int mx, int my);
81
void ff_put_vp8_epel8_v6_ssse3 (uint8_t *dst, ptrdiff_t dststride,
82
                                uint8_t *src, ptrdiff_t srcstride,
83
                                int height, int mx, int my);
84
 
85
void ff_put_vp8_bilinear4_h_mmxext(uint8_t *dst, ptrdiff_t dststride,
86
                                   uint8_t *src, ptrdiff_t srcstride,
87
                                   int height, int mx, int my);
88
void ff_put_vp8_bilinear8_h_sse2  (uint8_t *dst, ptrdiff_t dststride,
89
                                   uint8_t *src, ptrdiff_t srcstride,
90
                                   int height, int mx, int my);
91
void ff_put_vp8_bilinear4_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
92
                                   uint8_t *src, ptrdiff_t srcstride,
93
                                   int height, int mx, int my);
94
void ff_put_vp8_bilinear8_h_ssse3 (uint8_t *dst, ptrdiff_t dststride,
95
                                   uint8_t *src, ptrdiff_t srcstride,
96
                                   int height, int mx, int my);
97
 
98
void ff_put_vp8_bilinear4_v_mmxext(uint8_t *dst, ptrdiff_t dststride,
99
                                   uint8_t *src, ptrdiff_t srcstride,
100
                                   int height, int mx, int my);
101
void ff_put_vp8_bilinear8_v_sse2  (uint8_t *dst, ptrdiff_t dststride,
102
                                   uint8_t *src, ptrdiff_t srcstride,
103
                                   int height, int mx, int my);
104
void ff_put_vp8_bilinear4_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
105
                                   uint8_t *src, ptrdiff_t srcstride,
106
                                   int height, int mx, int my);
107
void ff_put_vp8_bilinear8_v_ssse3 (uint8_t *dst, ptrdiff_t dststride,
108
                                   uint8_t *src, ptrdiff_t srcstride,
109
                                   int height, int mx, int my);
110
 
111
 
112
void ff_put_vp8_pixels8_mmx (uint8_t *dst, ptrdiff_t dststride,
113
                             uint8_t *src, ptrdiff_t srcstride,
114
                             int height, int mx, int my);
115
void ff_put_vp8_pixels16_mmx(uint8_t *dst, ptrdiff_t dststride,
116
                             uint8_t *src, ptrdiff_t srcstride,
117
                             int height, int mx, int my);
118
void ff_put_vp8_pixels16_sse(uint8_t *dst, ptrdiff_t dststride,
119
                             uint8_t *src, ptrdiff_t srcstride,
120
                             int height, int mx, int my);
121
 
122
#define TAP_W16(OPT, FILTERTYPE, TAPTYPE) \
123
static void ff_put_vp8_ ## FILTERTYPE ## 16_ ## TAPTYPE ## _ ## OPT( \
124
    uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
125
    ptrdiff_t srcstride, int height, int mx, int my) \
126
{ \
127
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
128
        dst,     dststride, src,     srcstride, height, mx, my); \
129
    ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
130
        dst + 8, dststride, src + 8, srcstride, height, mx, my); \
131
}
132
#define TAP_W8(OPT, FILTERTYPE, TAPTYPE) \
133
static void ff_put_vp8_ ## FILTERTYPE ## 8_ ## TAPTYPE ## _ ## OPT( \
134
    uint8_t *dst,  ptrdiff_t dststride, uint8_t *src, \
135
    ptrdiff_t srcstride, int height, int mx, int my) \
136
{ \
137
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
138
        dst,     dststride, src,     srcstride, height, mx, my); \
139
    ff_put_vp8_ ## FILTERTYPE ## 4_ ## TAPTYPE ## _ ## OPT( \
140
        dst + 4, dststride, src + 4, srcstride, height, mx, my); \
141
}
142
 
143
#if ARCH_X86_32
144
TAP_W8 (mmxext, epel, h4)
145
TAP_W8 (mmxext, epel, h6)
146
TAP_W16(mmxext, epel, h6)
147
TAP_W8 (mmxext, epel, v4)
148
TAP_W8 (mmxext, epel, v6)
149
TAP_W16(mmxext, epel, v6)
150
TAP_W8 (mmxext, bilinear, h)
151
TAP_W16(mmxext, bilinear, h)
152
TAP_W8 (mmxext, bilinear, v)
153
TAP_W16(mmxext, bilinear, v)
154
#endif
155
 
156
TAP_W16(sse2,  epel, h6)
157
TAP_W16(sse2,  epel, v6)
158
TAP_W16(sse2,  bilinear, h)
159
TAP_W16(sse2,  bilinear, v)
160
 
161
TAP_W16(ssse3, epel, h6)
162
TAP_W16(ssse3, epel, v6)
163
TAP_W16(ssse3, bilinear, h)
164
TAP_W16(ssse3, bilinear, v)
165
 
166
#define HVTAP(OPT, ALIGN, TAPNUMX, TAPNUMY, SIZE, MAXHEIGHT) \
167
static void ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## v ## TAPNUMY ## _ ## OPT( \
168
    uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
169
    ptrdiff_t srcstride, int height, int mx, int my) \
170
{ \
171
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + TAPNUMY - 1)]; \
172
    uint8_t *tmpptr = tmp + SIZE * (TAPNUMY / 2 - 1); \
173
    src -= srcstride * (TAPNUMY / 2 - 1); \
174
    ff_put_vp8_epel ## SIZE ## _h ## TAPNUMX ## _ ## OPT( \
175
        tmp, SIZE,      src,    srcstride, height + TAPNUMY - 1, mx, my); \
176
    ff_put_vp8_epel ## SIZE ## _v ## TAPNUMY ## _ ## OPT( \
177
        dst, dststride, tmpptr, SIZE,      height,               mx, my); \
178
}
179
 
180
#if ARCH_X86_32
181
#define HVTAPMMX(x, y) \
182
HVTAP(mmxext, 8, x, y,  4,  8) \
183
HVTAP(mmxext, 8, x, y,  8, 16)
184
 
185
HVTAP(mmxext, 8, 6, 6, 16, 16)
186
#else
187
#define HVTAPMMX(x, y) \
188
HVTAP(mmxext, 8, x, y,  4,  8)
189
#endif
190
 
191
HVTAPMMX(4, 4)
192
HVTAPMMX(4, 6)
193
HVTAPMMX(6, 4)
194
HVTAPMMX(6, 6)
195
 
196
#define HVTAPSSE2(x, y, w) \
197
HVTAP(sse2,  16, x, y, w, 16) \
198
HVTAP(ssse3, 16, x, y, w, 16)
199
 
200
HVTAPSSE2(4, 4, 8)
201
HVTAPSSE2(4, 6, 8)
202
HVTAPSSE2(6, 4, 8)
203
HVTAPSSE2(6, 6, 8)
204
HVTAPSSE2(6, 6, 16)
205
 
206
HVTAP(ssse3, 16, 4, 4, 4, 8)
207
HVTAP(ssse3, 16, 4, 6, 4, 8)
208
HVTAP(ssse3, 16, 6, 4, 4, 8)
209
HVTAP(ssse3, 16, 6, 6, 4, 8)
210
 
211
#define HVBILIN(OPT, ALIGN, SIZE, MAXHEIGHT) \
212
static void ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT( \
213
    uint8_t *dst, ptrdiff_t dststride, uint8_t *src, \
214
    ptrdiff_t srcstride, int height, int mx, int my) \
215
{ \
216
    DECLARE_ALIGNED(ALIGN, uint8_t, tmp)[SIZE * (MAXHEIGHT + 2)]; \
217
    ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT( \
218
        tmp, SIZE,      src, srcstride, height + 1, mx, my); \
219
    ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT( \
220
        dst, dststride, tmp, SIZE,      height,     mx, my); \
221
}
222
 
223
HVBILIN(mmxext,  8,  4,  8)
224
#if ARCH_X86_32
225
HVBILIN(mmxext,  8,  8, 16)
226
HVBILIN(mmxext,  8, 16, 16)
227
#endif
228
HVBILIN(sse2,  8,  8, 16)
229
HVBILIN(sse2,  8, 16, 16)
230
HVBILIN(ssse3, 8,  4,  8)
231
HVBILIN(ssse3, 8,  8, 16)
232
HVBILIN(ssse3, 8, 16, 16)
233
 
234
void ff_vp8_idct_dc_add_mmx(uint8_t *dst, int16_t block[16],
235
                            ptrdiff_t stride);
236
void ff_vp8_idct_dc_add_sse4(uint8_t *dst, int16_t block[16],
237
                             ptrdiff_t stride);
238
void ff_vp8_idct_dc_add4y_mmx(uint8_t *dst, int16_t block[4][16],
239
                               ptrdiff_t stride);
240
void ff_vp8_idct_dc_add4y_sse2(uint8_t *dst, int16_t block[4][16],
241
                               ptrdiff_t stride);
242
void ff_vp8_idct_dc_add4uv_mmx(uint8_t *dst, int16_t block[2][16],
243
                               ptrdiff_t stride);
244
void ff_vp8_luma_dc_wht_mmx(int16_t block[4][4][16], int16_t dc[16]);
245
void ff_vp8_luma_dc_wht_sse(int16_t block[4][4][16], int16_t dc[16]);
246
void ff_vp8_idct_add_mmx(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
247
void ff_vp8_idct_add_sse(uint8_t *dst, int16_t block[16], ptrdiff_t stride);
248
 
249
#define DECLARE_LOOP_FILTER(NAME)                                       \
250
void ff_vp8_v_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
251
                                          ptrdiff_t stride,             \
252
                                          int flim);                    \
253
void ff_vp8_h_loop_filter_simple_ ## NAME(uint8_t *dst,                 \
254
                                          ptrdiff_t stride,             \
255
                                          int flim);                    \
256
void ff_vp8_v_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
257
                                             ptrdiff_t stride,          \
258
                                             int e, int i, int hvt);    \
259
void ff_vp8_h_loop_filter16y_inner_ ## NAME (uint8_t *dst,              \
260
                                             ptrdiff_t stride,          \
261
                                             int e, int i, int hvt);    \
262
void ff_vp8_v_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
263
                                             uint8_t *dstV,             \
264
                                             ptrdiff_t s,               \
265
                                             int e, int i, int hvt);    \
266
void ff_vp8_h_loop_filter8uv_inner_ ## NAME (uint8_t *dstU,             \
267
                                             uint8_t *dstV,             \
268
                                             ptrdiff_t s,               \
269
                                             int e, int i, int hvt);    \
270
void ff_vp8_v_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
271
                                             ptrdiff_t stride,          \
272
                                             int e, int i, int hvt);    \
273
void ff_vp8_h_loop_filter16y_mbedge_ ## NAME(uint8_t *dst,              \
274
                                             ptrdiff_t stride,          \
275
                                             int e, int i, int hvt);    \
276
void ff_vp8_v_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
277
                                             uint8_t *dstV,             \
278
                                             ptrdiff_t s,               \
279
                                             int e, int i, int hvt);    \
280
void ff_vp8_h_loop_filter8uv_mbedge_ ## NAME(uint8_t *dstU,             \
281
                                             uint8_t *dstV,             \
282
                                             ptrdiff_t s,               \
283
                                             int e, int i, int hvt);
284
 
285
DECLARE_LOOP_FILTER(mmx)
286
DECLARE_LOOP_FILTER(mmxext)
287
DECLARE_LOOP_FILTER(sse2)
288
DECLARE_LOOP_FILTER(ssse3)
289
DECLARE_LOOP_FILTER(sse4)
290
 
291
#endif /* HAVE_YASM */
292
 
293
#define VP8_LUMA_MC_FUNC(IDX, SIZE, OPT) \
294
    c->put_vp8_epel_pixels_tab[IDX][0][2] = ff_put_vp8_epel ## SIZE ## _h6_ ## OPT; \
295
    c->put_vp8_epel_pixels_tab[IDX][2][0] = ff_put_vp8_epel ## SIZE ## _v6_ ## OPT; \
296
    c->put_vp8_epel_pixels_tab[IDX][2][2] = ff_put_vp8_epel ## SIZE ## _h6v6_ ## OPT
297
 
298
#define VP8_MC_FUNC(IDX, SIZE, OPT) \
299
    c->put_vp8_epel_pixels_tab[IDX][0][1] = ff_put_vp8_epel ## SIZE ## _h4_ ## OPT; \
300
    c->put_vp8_epel_pixels_tab[IDX][1][0] = ff_put_vp8_epel ## SIZE ## _v4_ ## OPT; \
301
    c->put_vp8_epel_pixels_tab[IDX][1][1] = ff_put_vp8_epel ## SIZE ## _h4v4_ ## OPT; \
302
    c->put_vp8_epel_pixels_tab[IDX][1][2] = ff_put_vp8_epel ## SIZE ## _h6v4_ ## OPT; \
303
    c->put_vp8_epel_pixels_tab[IDX][2][1] = ff_put_vp8_epel ## SIZE ## _h4v6_ ## OPT; \
304
    VP8_LUMA_MC_FUNC(IDX, SIZE, OPT)
305
 
306
#define VP8_BILINEAR_MC_FUNC(IDX, SIZE, OPT) \
307
    c->put_vp8_bilinear_pixels_tab[IDX][0][1] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
308
    c->put_vp8_bilinear_pixels_tab[IDX][0][2] = ff_put_vp8_bilinear ## SIZE ## _h_ ## OPT; \
309
    c->put_vp8_bilinear_pixels_tab[IDX][1][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
310
    c->put_vp8_bilinear_pixels_tab[IDX][1][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
311
    c->put_vp8_bilinear_pixels_tab[IDX][1][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
312
    c->put_vp8_bilinear_pixels_tab[IDX][2][0] = ff_put_vp8_bilinear ## SIZE ## _v_ ## OPT; \
313
    c->put_vp8_bilinear_pixels_tab[IDX][2][1] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT; \
314
    c->put_vp8_bilinear_pixels_tab[IDX][2][2] = ff_put_vp8_bilinear ## SIZE ## _hv_ ## OPT
315
 
316
 
317
av_cold void ff_vp8dsp_init_x86(VP8DSPContext* c)
318
{
319
#if HAVE_YASM
320
    int cpu_flags = av_get_cpu_flags();
321
 
322
    if (EXTERNAL_MMX(cpu_flags)) {
323
        c->vp8_idct_dc_add    = ff_vp8_idct_dc_add_mmx;
324
        c->vp8_idct_dc_add4uv = ff_vp8_idct_dc_add4uv_mmx;
325
#if ARCH_X86_32
326
        c->vp8_idct_dc_add4y  = ff_vp8_idct_dc_add4y_mmx;
327
        c->vp8_idct_add       = ff_vp8_idct_add_mmx;
328
        c->vp8_luma_dc_wht    = ff_vp8_luma_dc_wht_mmx;
329
        c->put_vp8_epel_pixels_tab[0][0][0]     =
330
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_mmx;
331
#endif
332
        c->put_vp8_epel_pixels_tab[1][0][0]     =
333
        c->put_vp8_bilinear_pixels_tab[1][0][0] = ff_put_vp8_pixels8_mmx;
334
 
335
#if ARCH_X86_32
336
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_mmx;
337
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_mmx;
338
 
339
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmx;
340
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmx;
341
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmx;
342
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmx;
343
 
344
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmx;
345
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmx;
346
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmx;
347
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmx;
348
#endif
349
    }
350
 
351
    /* note that 4-tap width=16 functions are missing because w=16
352
     * is only used for luma, and luma is always a copy or sixtap. */
353
    if (EXTERNAL_MMXEXT(cpu_flags)) {
354
        VP8_MC_FUNC(2, 4, mmxext);
355
        VP8_BILINEAR_MC_FUNC(2, 4, mmxext);
356
#if ARCH_X86_32
357
        VP8_LUMA_MC_FUNC(0, 16, mmxext);
358
        VP8_MC_FUNC(1, 8, mmxext);
359
        VP8_BILINEAR_MC_FUNC(0, 16, mmxext);
360
        VP8_BILINEAR_MC_FUNC(1,  8, mmxext);
361
 
362
        c->vp8_v_loop_filter_simple   = ff_vp8_v_loop_filter_simple_mmxext;
363
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_mmxext;
364
 
365
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_mmxext;
366
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_mmxext;
367
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_mmxext;
368
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_mmxext;
369
 
370
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_mmxext;
371
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_mmxext;
372
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_mmxext;
373
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_mmxext;
374
#endif
375
    }
376
 
377
    if (EXTERNAL_SSE(cpu_flags)) {
378
        c->vp8_idct_add                         = ff_vp8_idct_add_sse;
379
        c->vp8_luma_dc_wht                      = ff_vp8_luma_dc_wht_sse;
380
        c->put_vp8_epel_pixels_tab[0][0][0]     =
381
        c->put_vp8_bilinear_pixels_tab[0][0][0] = ff_put_vp8_pixels16_sse;
382
    }
383
 
384
    if (HAVE_SSE2_EXTERNAL && cpu_flags & (AV_CPU_FLAG_SSE2 | AV_CPU_FLAG_SSE2SLOW)) {
385
        VP8_LUMA_MC_FUNC(0, 16, sse2);
386
        VP8_MC_FUNC(1, 8, sse2);
387
        VP8_BILINEAR_MC_FUNC(0, 16, sse2);
388
        VP8_BILINEAR_MC_FUNC(1, 8, sse2);
389
 
390
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_sse2;
391
 
392
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_sse2;
393
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_sse2;
394
 
395
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_sse2;
396
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_sse2;
397
    }
398
 
399
    if (EXTERNAL_SSE2(cpu_flags)) {
400
        c->vp8_idct_dc_add4y          = ff_vp8_idct_dc_add4y_sse2;
401
 
402
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_sse2;
403
 
404
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_sse2;
405
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_sse2;
406
 
407
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse2;
408
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse2;
409
    }
410
 
411
    if (EXTERNAL_SSSE3(cpu_flags)) {
412
        VP8_LUMA_MC_FUNC(0, 16, ssse3);
413
        VP8_MC_FUNC(1, 8, ssse3);
414
        VP8_MC_FUNC(2, 4, ssse3);
415
        VP8_BILINEAR_MC_FUNC(0, 16, ssse3);
416
        VP8_BILINEAR_MC_FUNC(1, 8, ssse3);
417
        VP8_BILINEAR_MC_FUNC(2, 4, ssse3);
418
 
419
        c->vp8_v_loop_filter_simple = ff_vp8_v_loop_filter_simple_ssse3;
420
        c->vp8_h_loop_filter_simple = ff_vp8_h_loop_filter_simple_ssse3;
421
 
422
        c->vp8_v_loop_filter16y_inner = ff_vp8_v_loop_filter16y_inner_ssse3;
423
        c->vp8_h_loop_filter16y_inner = ff_vp8_h_loop_filter16y_inner_ssse3;
424
        c->vp8_v_loop_filter8uv_inner = ff_vp8_v_loop_filter8uv_inner_ssse3;
425
        c->vp8_h_loop_filter8uv_inner = ff_vp8_h_loop_filter8uv_inner_ssse3;
426
 
427
        c->vp8_v_loop_filter16y       = ff_vp8_v_loop_filter16y_mbedge_ssse3;
428
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_ssse3;
429
        c->vp8_v_loop_filter8uv       = ff_vp8_v_loop_filter8uv_mbedge_ssse3;
430
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_ssse3;
431
    }
432
 
433
    if (EXTERNAL_SSE4(cpu_flags)) {
434
        c->vp8_idct_dc_add                  = ff_vp8_idct_dc_add_sse4;
435
 
436
        c->vp8_h_loop_filter_simple   = ff_vp8_h_loop_filter_simple_sse4;
437
        c->vp8_h_loop_filter16y       = ff_vp8_h_loop_filter16y_mbedge_sse4;
438
        c->vp8_h_loop_filter8uv       = ff_vp8_h_loop_filter8uv_mbedge_sse4;
439
    }
440
#endif /* HAVE_YASM */
441
}