Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | /* |
2 | * Copyright (C) 2010 David Conrad |
||
3 | * Copyright (C) 2010 Ronald S. Bultje |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | /** |
||
23 | * @file |
||
24 | * VP8 compatible video decoder |
||
25 | */ |
||
26 | |||
27 | #include "dsputil.h" |
||
28 | #include "vp8dsp.h" |
||
29 | #include "libavutil/common.h" |
||
30 | |||
31 | // TODO: Maybe add dequant |
||
32 | static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16]) |
||
33 | { |
||
34 | int i, t0, t1, t2, t3; |
||
35 | |||
36 | for (i = 0; i < 4; i++) { |
||
37 | t0 = dc[0*4+i] + dc[3*4+i]; |
||
38 | t1 = dc[1*4+i] + dc[2*4+i]; |
||
39 | t2 = dc[1*4+i] - dc[2*4+i]; |
||
40 | t3 = dc[0*4+i] - dc[3*4+i]; |
||
41 | |||
42 | dc[0*4+i] = t0 + t1; |
||
43 | dc[1*4+i] = t3 + t2; |
||
44 | dc[2*4+i] = t0 - t1; |
||
45 | dc[3*4+i] = t3 - t2; |
||
46 | } |
||
47 | |||
48 | for (i = 0; i < 4; i++) { |
||
49 | t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding |
||
50 | t1 = dc[i*4+1] + dc[i*4+2]; |
||
51 | t2 = dc[i*4+1] - dc[i*4+2]; |
||
52 | t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding |
||
53 | dc[i*4+0] = 0; |
||
54 | dc[i*4+1] = 0; |
||
55 | dc[i*4+2] = 0; |
||
56 | dc[i*4+3] = 0; |
||
57 | |||
58 | block[i][0][0] = (t0 + t1) >> 3; |
||
59 | block[i][1][0] = (t3 + t2) >> 3; |
||
60 | block[i][2][0] = (t0 - t1) >> 3; |
||
61 | block[i][3][0] = (t3 - t2) >> 3; |
||
62 | } |
||
63 | } |
||
64 | |||
65 | static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16]) |
||
66 | { |
||
67 | int i, val = (dc[0] + 3) >> 3; |
||
68 | dc[0] = 0; |
||
69 | |||
70 | for (i = 0; i < 4; i++) { |
||
71 | block[i][0][0] = val; |
||
72 | block[i][1][0] = val; |
||
73 | block[i][2][0] = val; |
||
74 | block[i][3][0] = val; |
||
75 | } |
||
76 | } |
||
77 | |||
78 | #define MUL_20091(a) ((((a)*20091) >> 16) + (a)) |
||
79 | #define MUL_35468(a) (((a)*35468) >> 16) |
||
80 | |||
81 | static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
||
82 | { |
||
83 | int i, t0, t1, t2, t3; |
||
84 | int16_t tmp[16]; |
||
85 | |||
86 | for (i = 0; i < 4; i++) { |
||
87 | t0 = block[0*4+i] + block[2*4+i]; |
||
88 | t1 = block[0*4+i] - block[2*4+i]; |
||
89 | t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); |
||
90 | t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); |
||
91 | block[0*4+i] = 0; |
||
92 | block[1*4+i] = 0; |
||
93 | block[2*4+i] = 0; |
||
94 | block[3*4+i] = 0; |
||
95 | |||
96 | tmp[i*4+0] = t0 + t3; |
||
97 | tmp[i*4+1] = t1 + t2; |
||
98 | tmp[i*4+2] = t1 - t2; |
||
99 | tmp[i*4+3] = t0 - t3; |
||
100 | } |
||
101 | |||
102 | for (i = 0; i < 4; i++) { |
||
103 | t0 = tmp[0*4+i] + tmp[2*4+i]; |
||
104 | t1 = tmp[0*4+i] - tmp[2*4+i]; |
||
105 | t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); |
||
106 | t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); |
||
107 | |||
108 | dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); |
||
109 | dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); |
||
110 | dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); |
||
111 | dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); |
||
112 | dst += stride; |
||
113 | } |
||
114 | } |
||
115 | |||
116 | static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride) |
||
117 | { |
||
118 | int i, dc = (block[0] + 4) >> 3; |
||
119 | block[0] = 0; |
||
120 | |||
121 | for (i = 0; i < 4; i++) { |
||
122 | dst[0] = av_clip_uint8(dst[0] + dc); |
||
123 | dst[1] = av_clip_uint8(dst[1] + dc); |
||
124 | dst[2] = av_clip_uint8(dst[2] + dc); |
||
125 | dst[3] = av_clip_uint8(dst[3] + dc); |
||
126 | dst += stride; |
||
127 | } |
||
128 | } |
||
129 | |||
130 | static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride) |
||
131 | { |
||
132 | vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride); |
||
133 | vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride); |
||
134 | vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride); |
||
135 | vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride); |
||
136 | } |
||
137 | |||
138 | static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride) |
||
139 | { |
||
140 | vp8_idct_dc_add_c(dst+ 0, block[0], stride); |
||
141 | vp8_idct_dc_add_c(dst+ 4, block[1], stride); |
||
142 | vp8_idct_dc_add_c(dst+ 8, block[2], stride); |
||
143 | vp8_idct_dc_add_c(dst+12, block[3], stride); |
||
144 | } |
||
145 | |||
146 | // because I like only having two parameters to pass functions... |
||
147 | #define LOAD_PIXELS\ |
||
148 | int av_unused p3 = p[-4*stride];\ |
||
149 | int av_unused p2 = p[-3*stride];\ |
||
150 | int av_unused p1 = p[-2*stride];\ |
||
151 | int av_unused p0 = p[-1*stride];\ |
||
152 | int av_unused q0 = p[ 0*stride];\ |
||
153 | int av_unused q1 = p[ 1*stride];\ |
||
154 | int av_unused q2 = p[ 2*stride];\ |
||
155 | int av_unused q3 = p[ 3*stride]; |
||
156 | |||
157 | #define clip_int8(n) (cm[n+0x80]-0x80) |
||
158 | |||
159 | static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap) |
||
160 | { |
||
161 | LOAD_PIXELS |
||
162 | int a, f1, f2; |
||
163 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
||
164 | |||
165 | a = 3*(q0 - p0); |
||
166 | |||
167 | if (is4tap) |
||
168 | a += clip_int8(p1 - q1); |
||
169 | |||
170 | a = clip_int8(a); |
||
171 | |||
172 | // We deviate from the spec here with c(a+3) >> 3 |
||
173 | // since that's what libvpx does. |
||
174 | f1 = FFMIN(a+4, 127) >> 3; |
||
175 | f2 = FFMIN(a+3, 127) >> 3; |
||
176 | |||
177 | // Despite what the spec says, we do need to clamp here to |
||
178 | // be bitexact with libvpx. |
||
179 | p[-1*stride] = cm[p0 + f2]; |
||
180 | p[ 0*stride] = cm[q0 - f1]; |
||
181 | |||
182 | // only used for _inner on blocks without high edge variance |
||
183 | if (!is4tap) { |
||
184 | a = (f1+1)>>1; |
||
185 | p[-2*stride] = cm[p1 + a]; |
||
186 | p[ 1*stride] = cm[q1 - a]; |
||
187 | } |
||
188 | } |
||
189 | |||
190 | static av_always_inline int simple_limit(uint8_t *p, ptrdiff_t stride, int flim) |
||
191 | { |
||
192 | LOAD_PIXELS |
||
193 | return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; |
||
194 | } |
||
195 | |||
196 | /** |
||
197 | * E - limit at the macroblock edge |
||
198 | * I - limit for interior difference |
||
199 | */ |
||
200 | static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I) |
||
201 | { |
||
202 | LOAD_PIXELS |
||
203 | return simple_limit(p, stride, E) |
||
204 | && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I |
||
205 | && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; |
||
206 | } |
||
207 | |||
208 | // high edge variance |
||
209 | static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh) |
||
210 | { |
||
211 | LOAD_PIXELS |
||
212 | return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; |
||
213 | } |
||
214 | |||
215 | static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride) |
||
216 | { |
||
217 | int a0, a1, a2, w; |
||
218 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; |
||
219 | |||
220 | LOAD_PIXELS |
||
221 | |||
222 | w = clip_int8(p1-q1); |
||
223 | w = clip_int8(w + 3*(q0-p0)); |
||
224 | |||
225 | a0 = (27*w + 63) >> 7; |
||
226 | a1 = (18*w + 63) >> 7; |
||
227 | a2 = ( 9*w + 63) >> 7; |
||
228 | |||
229 | p[-3*stride] = cm[p2 + a2]; |
||
230 | p[-2*stride] = cm[p1 + a1]; |
||
231 | p[-1*stride] = cm[p0 + a0]; |
||
232 | p[ 0*stride] = cm[q0 - a0]; |
||
233 | p[ 1*stride] = cm[q1 - a1]; |
||
234 | p[ 2*stride] = cm[q2 - a2]; |
||
235 | } |
||
236 | |||
237 | #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \ |
||
238 | static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\ |
||
239 | int flim_E, int flim_I, int hev_thresh)\ |
||
240 | {\ |
||
241 | int i;\ |
||
242 | \ |
||
243 | for (i = 0; i < size; i++)\ |
||
244 | if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ |
||
245 | if (hev(dst+i*stridea, strideb, hev_thresh))\ |
||
246 | filter_common(dst+i*stridea, strideb, 1);\ |
||
247 | else\ |
||
248 | filter_mbedge(dst+i*stridea, strideb);\ |
||
249 | }\ |
||
250 | }\ |
||
251 | \ |
||
252 | static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\ |
||
253 | int flim_E, int flim_I, int hev_thresh)\ |
||
254 | {\ |
||
255 | int i;\ |
||
256 | \ |
||
257 | for (i = 0; i < size; i++)\ |
||
258 | if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ |
||
259 | int hv = hev(dst+i*stridea, strideb, hev_thresh);\ |
||
260 | if (hv) \ |
||
261 | filter_common(dst+i*stridea, strideb, 1);\ |
||
262 | else \ |
||
263 | filter_common(dst+i*stridea, strideb, 0);\ |
||
264 | }\ |
||
265 | } |
||
266 | |||
267 | LOOP_FILTER(v, 16, 1, stride,) |
||
268 | LOOP_FILTER(h, 16, stride, 1,) |
||
269 | |||
270 | #define UV_LOOP_FILTER(dir, stridea, strideb) \ |
||
271 | LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \ |
||
272 | static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\ |
||
273 | int fE, int fI, int hev_thresh)\ |
||
274 | {\ |
||
275 | vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\ |
||
276 | vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\ |
||
277 | }\ |
||
278 | static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\ |
||
279 | int fE, int fI, int hev_thresh)\ |
||
280 | {\ |
||
281 | vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\ |
||
282 | vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\ |
||
283 | } |
||
284 | |||
285 | UV_LOOP_FILTER(v, 1, stride) |
||
286 | UV_LOOP_FILTER(h, stride, 1) |
||
287 | |||
288 | static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) |
||
289 | { |
||
290 | int i; |
||
291 | |||
292 | for (i = 0; i < 16; i++) |
||
293 | if (simple_limit(dst+i, stride, flim)) |
||
294 | filter_common(dst+i, stride, 1); |
||
295 | } |
||
296 | |||
297 | static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim) |
||
298 | { |
||
299 | int i; |
||
300 | |||
301 | for (i = 0; i < 16; i++) |
||
302 | if (simple_limit(dst+i*stride, 1, flim)) |
||
303 | filter_common(dst+i*stride, 1, 1); |
||
304 | } |
||
305 | |||
306 | static const uint8_t subpel_filters[7][6] = { |
||
307 | { 0, 6, 123, 12, 1, 0 }, |
||
308 | { 2, 11, 108, 36, 8, 1 }, |
||
309 | { 0, 9, 93, 50, 6, 0 }, |
||
310 | { 3, 16, 77, 77, 16, 3 }, |
||
311 | { 0, 6, 50, 93, 9, 0 }, |
||
312 | { 1, 8, 36, 108, 11, 2 }, |
||
313 | { 0, 1, 12, 123, 6, 0 }, |
||
314 | }; |
||
315 | |||
316 | #define PUT_PIXELS(WIDTH) \ |
||
317 | static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \ |
||
318 | int i; \ |
||
319 | for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \ |
||
320 | memcpy(dst, src, WIDTH); \ |
||
321 | } \ |
||
322 | } |
||
323 | |||
324 | PUT_PIXELS(16) |
||
325 | PUT_PIXELS(8) |
||
326 | PUT_PIXELS(4) |
||
327 | |||
328 | #define FILTER_6TAP(src, F, stride) \ |
||
329 | cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ |
||
330 | F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7] |
||
331 | |||
332 | #define FILTER_4TAP(src, F, stride) \ |
||
333 | cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ |
||
334 | F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7] |
||
335 | |||
336 | #define VP8_EPEL_H(SIZE, TAPS) \ |
||
337 | static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ |
||
338 | { \ |
||
339 | const uint8_t *filter = subpel_filters[mx-1]; \ |
||
340 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
||
341 | int x, y; \ |
||
342 | \ |
||
343 | for (y = 0; y < h; y++) { \ |
||
344 | for (x = 0; x < SIZE; x++) \ |
||
345 | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \ |
||
346 | dst += dststride; \ |
||
347 | src += srcstride; \ |
||
348 | } \ |
||
349 | } |
||
350 | #define VP8_EPEL_V(SIZE, TAPS) \ |
||
351 | static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ |
||
352 | { \ |
||
353 | const uint8_t *filter = subpel_filters[my-1]; \ |
||
354 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
||
355 | int x, y; \ |
||
356 | \ |
||
357 | for (y = 0; y < h; y++) { \ |
||
358 | for (x = 0; x < SIZE; x++) \ |
||
359 | dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \ |
||
360 | dst += dststride; \ |
||
361 | src += srcstride; \ |
||
362 | } \ |
||
363 | } |
||
364 | #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \ |
||
365 | static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \ |
||
366 | { \ |
||
367 | const uint8_t *filter = subpel_filters[mx-1]; \ |
||
368 | const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \ |
||
369 | int x, y; \ |
||
370 | uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \ |
||
371 | uint8_t *tmp = tmp_array; \ |
||
372 | src -= (2-(VTAPS==4))*srcstride; \ |
||
373 | \ |
||
374 | for (y = 0; y < h+VTAPS-1; y++) { \ |
||
375 | for (x = 0; x < SIZE; x++) \ |
||
376 | tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \ |
||
377 | tmp += SIZE; \ |
||
378 | src += srcstride; \ |
||
379 | } \ |
||
380 | \ |
||
381 | tmp = tmp_array + (2-(VTAPS==4))*SIZE; \ |
||
382 | filter = subpel_filters[my-1]; \ |
||
383 | \ |
||
384 | for (y = 0; y < h; y++) { \ |
||
385 | for (x = 0; x < SIZE; x++) \ |
||
386 | dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \ |
||
387 | dst += dststride; \ |
||
388 | tmp += SIZE; \ |
||
389 | } \ |
||
390 | } |
||
391 | |||
392 | VP8_EPEL_H(16, 4) |
||
393 | VP8_EPEL_H(8, 4) |
||
394 | VP8_EPEL_H(4, 4) |
||
395 | VP8_EPEL_H(16, 6) |
||
396 | VP8_EPEL_H(8, 6) |
||
397 | VP8_EPEL_H(4, 6) |
||
398 | VP8_EPEL_V(16, 4) |
||
399 | VP8_EPEL_V(8, 4) |
||
400 | VP8_EPEL_V(4, 4) |
||
401 | VP8_EPEL_V(16, 6) |
||
402 | VP8_EPEL_V(8, 6) |
||
403 | VP8_EPEL_V(4, 6) |
||
404 | VP8_EPEL_HV(16, 4, 4) |
||
405 | VP8_EPEL_HV(8, 4, 4) |
||
406 | VP8_EPEL_HV(4, 4, 4) |
||
407 | VP8_EPEL_HV(16, 4, 6) |
||
408 | VP8_EPEL_HV(8, 4, 6) |
||
409 | VP8_EPEL_HV(4, 4, 6) |
||
410 | VP8_EPEL_HV(16, 6, 4) |
||
411 | VP8_EPEL_HV(8, 6, 4) |
||
412 | VP8_EPEL_HV(4, 6, 4) |
||
413 | VP8_EPEL_HV(16, 6, 6) |
||
414 | VP8_EPEL_HV(8, 6, 6) |
||
415 | VP8_EPEL_HV(4, 6, 6) |
||
416 | |||
417 | #define VP8_BILINEAR(SIZE) \ |
||
418 | static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \ |
||
419 | { \ |
||
420 | int a = 8-mx, b = mx; \ |
||
421 | int x, y; \ |
||
422 | \ |
||
423 | for (y = 0; y < h; y++) { \ |
||
424 | for (x = 0; x < SIZE; x++) \ |
||
425 | dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ |
||
426 | dst += dstride; \ |
||
427 | src += sstride; \ |
||
428 | } \ |
||
429 | } \ |
||
430 | static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \ |
||
431 | { \ |
||
432 | int c = 8-my, d = my; \ |
||
433 | int x, y; \ |
||
434 | \ |
||
435 | for (y = 0; y < h; y++) { \ |
||
436 | for (x = 0; x < SIZE; x++) \ |
||
437 | dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \ |
||
438 | dst += dstride; \ |
||
439 | src += sstride; \ |
||
440 | } \ |
||
441 | } \ |
||
442 | \ |
||
443 | static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \ |
||
444 | { \ |
||
445 | int a = 8-mx, b = mx; \ |
||
446 | int c = 8-my, d = my; \ |
||
447 | int x, y; \ |
||
448 | uint8_t tmp_array[(2*SIZE+1)*SIZE]; \ |
||
449 | uint8_t *tmp = tmp_array; \ |
||
450 | \ |
||
451 | for (y = 0; y < h+1; y++) { \ |
||
452 | for (x = 0; x < SIZE; x++) \ |
||
453 | tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \ |
||
454 | tmp += SIZE; \ |
||
455 | src += sstride; \ |
||
456 | } \ |
||
457 | \ |
||
458 | tmp = tmp_array; \ |
||
459 | \ |
||
460 | for (y = 0; y < h; y++) { \ |
||
461 | for (x = 0; x < SIZE; x++) \ |
||
462 | dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \ |
||
463 | dst += dstride; \ |
||
464 | tmp += SIZE; \ |
||
465 | } \ |
||
466 | } |
||
467 | |||
468 | VP8_BILINEAR(16) |
||
469 | VP8_BILINEAR(8) |
||
470 | VP8_BILINEAR(4) |
||
471 | |||
472 | #define VP8_MC_FUNC(IDX, SIZE) \ |
||
473 | dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
||
474 | dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ |
||
475 | dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ |
||
476 | dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ |
||
477 | dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ |
||
478 | dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ |
||
479 | dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ |
||
480 | dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ |
||
481 | dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c |
||
482 | |||
483 | #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \ |
||
484 | dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \ |
||
485 | dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \ |
||
486 | dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \ |
||
487 | dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \ |
||
488 | dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
||
489 | dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
||
490 | dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \ |
||
491 | dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \ |
||
492 | dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c |
||
493 | |||
494 | av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) |
||
495 | { |
||
496 | dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; |
||
497 | dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c; |
||
498 | dsp->vp8_idct_add = vp8_idct_add_c; |
||
499 | dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; |
||
500 | dsp->vp8_idct_dc_add4y = vp8_idct_dc_add4y_c; |
||
501 | dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c; |
||
502 | |||
503 | dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c; |
||
504 | dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c; |
||
505 | dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c; |
||
506 | dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c; |
||
507 | |||
508 | dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c; |
||
509 | dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c; |
||
510 | dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c; |
||
511 | dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c; |
||
512 | |||
513 | dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; |
||
514 | dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; |
||
515 | |||
516 | VP8_MC_FUNC(0, 16); |
||
517 | VP8_MC_FUNC(1, 8); |
||
518 | VP8_MC_FUNC(2, 4); |
||
519 | |||
520 | VP8_BILINEAR_MC_FUNC(0, 16); |
||
521 | VP8_BILINEAR_MC_FUNC(1, 8); |
||
522 | VP8_BILINEAR_MC_FUNC(2, 4); |
||
523 | |||
524 | if (ARCH_ARM) |
||
525 | ff_vp8dsp_init_arm(dsp); |
||
526 | if (ARCH_PPC) |
||
527 | ff_vp8dsp_init_ppc(dsp); |
||
528 | if (ARCH_X86) |
||
529 | ff_vp8dsp_init_x86(dsp); |
||
530 | }>>>>>>>>>>>>>>>>>>>>>=>=>=>=>=>=>=>>>>>>> |