Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2010 David Conrad
  3.  * Copyright (C) 2010 Ronald S. Bultje
  4.  *
  5.  * This file is part of FFmpeg.
  6.  *
  7.  * FFmpeg is free software; you can redistribute it and/or
  8.  * modify it under the terms of the GNU Lesser General Public
  9.  * License as published by the Free Software Foundation; either
  10.  * version 2.1 of the License, or (at your option) any later version.
  11.  *
  12.  * FFmpeg is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15.  * Lesser General Public License for more details.
  16.  *
  17.  * You should have received a copy of the GNU Lesser General Public
  18.  * License along with FFmpeg; if not, write to the Free Software
  19.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20.  */
  21.  
  22. /**
  23.  * @file
  24.  * VP8 compatible video decoder
  25.  */
  26.  
  27. #include "dsputil.h"
  28. #include "vp8dsp.h"
  29. #include "libavutil/common.h"
  30.  
  31. // TODO: Maybe add dequant
  32. static void vp8_luma_dc_wht_c(int16_t block[4][4][16], int16_t dc[16])
  33. {
  34.     int i, t0, t1, t2, t3;
  35.  
  36.     for (i = 0; i < 4; i++) {
  37.         t0 = dc[0*4+i] + dc[3*4+i];
  38.         t1 = dc[1*4+i] + dc[2*4+i];
  39.         t2 = dc[1*4+i] - dc[2*4+i];
  40.         t3 = dc[0*4+i] - dc[3*4+i];
  41.  
  42.         dc[0*4+i] = t0 + t1;
  43.         dc[1*4+i] = t3 + t2;
  44.         dc[2*4+i] = t0 - t1;
  45.         dc[3*4+i] = t3 - t2;
  46.     }
  47.  
  48.     for (i = 0; i < 4; i++) {
  49.         t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding
  50.         t1 = dc[i*4+1] + dc[i*4+2];
  51.         t2 = dc[i*4+1] - dc[i*4+2];
  52.         t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding
  53.         dc[i*4+0] = 0;
  54.         dc[i*4+1] = 0;
  55.         dc[i*4+2] = 0;
  56.         dc[i*4+3] = 0;
  57.  
  58.         block[i][0][0] = (t0 + t1) >> 3;
  59.         block[i][1][0] = (t3 + t2) >> 3;
  60.         block[i][2][0] = (t0 - t1) >> 3;
  61.         block[i][3][0] = (t3 - t2) >> 3;
  62.     }
  63. }
  64.  
  65. static void vp8_luma_dc_wht_dc_c(int16_t block[4][4][16], int16_t dc[16])
  66. {
  67.     int i, val = (dc[0] + 3) >> 3;
  68.     dc[0] = 0;
  69.  
  70.     for (i = 0; i < 4; i++) {
  71.         block[i][0][0] = val;
  72.         block[i][1][0] = val;
  73.         block[i][2][0] = val;
  74.         block[i][3][0] = val;
  75.     }
  76. }
  77.  
  78. #define MUL_20091(a) ((((a)*20091) >> 16) + (a))
  79. #define MUL_35468(a)  (((a)*35468) >> 16)
  80.  
  81. static void vp8_idct_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
  82. {
  83.     int i, t0, t1, t2, t3;
  84.     int16_t tmp[16];
  85.  
  86.     for (i = 0; i < 4; i++) {
  87.         t0 = block[0*4+i] + block[2*4+i];
  88.         t1 = block[0*4+i] - block[2*4+i];
  89.         t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]);
  90.         t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]);
  91.         block[0*4+i] = 0;
  92.         block[1*4+i] = 0;
  93.         block[2*4+i] = 0;
  94.         block[3*4+i] = 0;
  95.  
  96.         tmp[i*4+0] = t0 + t3;
  97.         tmp[i*4+1] = t1 + t2;
  98.         tmp[i*4+2] = t1 - t2;
  99.         tmp[i*4+3] = t0 - t3;
  100.     }
  101.  
  102.     for (i = 0; i < 4; i++) {
  103.         t0 = tmp[0*4+i] + tmp[2*4+i];
  104.         t1 = tmp[0*4+i] - tmp[2*4+i];
  105.         t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]);
  106.         t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]);
  107.  
  108.         dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3));
  109.         dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3));
  110.         dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3));
  111.         dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3));
  112.         dst += stride;
  113.     }
  114. }
  115.  
  116. static void vp8_idct_dc_add_c(uint8_t *dst, int16_t block[16], ptrdiff_t stride)
  117. {
  118.     int i, dc = (block[0] + 4) >> 3;
  119.     block[0] = 0;
  120.  
  121.     for (i = 0; i < 4; i++) {
  122.         dst[0] = av_clip_uint8(dst[0] + dc);
  123.         dst[1] = av_clip_uint8(dst[1] + dc);
  124.         dst[2] = av_clip_uint8(dst[2] + dc);
  125.         dst[3] = av_clip_uint8(dst[3] + dc);
  126.         dst += stride;
  127.     }
  128. }
  129.  
  130. static void vp8_idct_dc_add4uv_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
  131. {
  132.     vp8_idct_dc_add_c(dst+stride*0+0, block[0], stride);
  133.     vp8_idct_dc_add_c(dst+stride*0+4, block[1], stride);
  134.     vp8_idct_dc_add_c(dst+stride*4+0, block[2], stride);
  135.     vp8_idct_dc_add_c(dst+stride*4+4, block[3], stride);
  136. }
  137.  
  138. static void vp8_idct_dc_add4y_c(uint8_t *dst, int16_t block[4][16], ptrdiff_t stride)
  139. {
  140.     vp8_idct_dc_add_c(dst+ 0, block[0], stride);
  141.     vp8_idct_dc_add_c(dst+ 4, block[1], stride);
  142.     vp8_idct_dc_add_c(dst+ 8, block[2], stride);
  143.     vp8_idct_dc_add_c(dst+12, block[3], stride);
  144. }
  145.  
  146. // because I like only having two parameters to pass functions...
  147. #define LOAD_PIXELS\
  148.     int av_unused p3 = p[-4*stride];\
  149.     int av_unused p2 = p[-3*stride];\
  150.     int av_unused p1 = p[-2*stride];\
  151.     int av_unused p0 = p[-1*stride];\
  152.     int av_unused q0 = p[ 0*stride];\
  153.     int av_unused q1 = p[ 1*stride];\
  154.     int av_unused q2 = p[ 2*stride];\
  155.     int av_unused q3 = p[ 3*stride];
  156.  
  157. #define clip_int8(n) (cm[n+0x80]-0x80)
  158.  
  159. static av_always_inline void filter_common(uint8_t *p, ptrdiff_t stride, int is4tap)
  160. {
  161.     LOAD_PIXELS
  162.     int a, f1, f2;
  163.     const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  164.  
  165.     a = 3*(q0 - p0);
  166.  
  167.     if (is4tap)
  168.         a += clip_int8(p1 - q1);
  169.  
  170.     a = clip_int8(a);
  171.  
  172.     // We deviate from the spec here with c(a+3) >> 3
  173.     // since that's what libvpx does.
  174.     f1 = FFMIN(a+4, 127) >> 3;
  175.     f2 = FFMIN(a+3, 127) >> 3;
  176.  
  177.     // Despite what the spec says, we do need to clamp here to
  178.     // be bitexact with libvpx.
  179.     p[-1*stride] = cm[p0 + f2];
  180.     p[ 0*stride] = cm[q0 - f1];
  181.  
  182.     // only used for _inner on blocks without high edge variance
  183.     if (!is4tap) {
  184.         a = (f1+1)>>1;
  185.         p[-2*stride] = cm[p1 + a];
  186.         p[ 1*stride] = cm[q1 - a];
  187.     }
  188. }
  189.  
  190. static av_always_inline int simple_limit(uint8_t *p, ptrdiff_t stride, int flim)
  191. {
  192.     LOAD_PIXELS
  193.     return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim;
  194. }
  195.  
  196. /**
  197.  * E - limit at the macroblock edge
  198.  * I - limit for interior difference
  199.  */
  200. static av_always_inline int normal_limit(uint8_t *p, ptrdiff_t stride, int E, int I)
  201. {
  202.     LOAD_PIXELS
  203.     return simple_limit(p, stride, E)
  204.         && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I
  205.         && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I;
  206. }
  207.  
  208. // high edge variance
  209. static av_always_inline int hev(uint8_t *p, ptrdiff_t stride, int thresh)
  210. {
  211.     LOAD_PIXELS
  212.     return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh;
  213. }
  214.  
  215. static av_always_inline void filter_mbedge(uint8_t *p, ptrdiff_t stride)
  216. {
  217.     int a0, a1, a2, w;
  218.     const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
  219.  
  220.     LOAD_PIXELS
  221.  
  222.     w = clip_int8(p1-q1);
  223.     w = clip_int8(w + 3*(q0-p0));
  224.  
  225.     a0 = (27*w + 63) >> 7;
  226.     a1 = (18*w + 63) >> 7;
  227.     a2 = ( 9*w + 63) >> 7;
  228.  
  229.     p[-3*stride] = cm[p2 + a2];
  230.     p[-2*stride] = cm[p1 + a1];
  231.     p[-1*stride] = cm[p0 + a0];
  232.     p[ 0*stride] = cm[q0 - a0];
  233.     p[ 1*stride] = cm[q1 - a1];
  234.     p[ 2*stride] = cm[q2 - a2];
  235. }
  236.  
  237. #define LOOP_FILTER(dir, size, stridea, strideb, maybe_inline) \
  238. static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, ptrdiff_t stride,\
  239.                                      int flim_E, int flim_I, int hev_thresh)\
  240. {\
  241.     int i;\
  242. \
  243.     for (i = 0; i < size; i++)\
  244.         if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
  245.             if (hev(dst+i*stridea, strideb, hev_thresh))\
  246.                 filter_common(dst+i*stridea, strideb, 1);\
  247.             else\
  248.                 filter_mbedge(dst+i*stridea, strideb);\
  249.         }\
  250. }\
  251. \
  252. static maybe_inline void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, ptrdiff_t stride,\
  253.                                       int flim_E, int flim_I, int hev_thresh)\
  254. {\
  255.     int i;\
  256. \
  257.     for (i = 0; i < size; i++)\
  258.         if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\
  259.             int hv = hev(dst+i*stridea, strideb, hev_thresh);\
  260.             if (hv) \
  261.                 filter_common(dst+i*stridea, strideb, 1);\
  262.             else \
  263.                 filter_common(dst+i*stridea, strideb, 0);\
  264.         }\
  265. }
  266.  
  267. LOOP_FILTER(v, 16, 1, stride,)
  268. LOOP_FILTER(h, 16, stride, 1,)
  269.  
  270. #define UV_LOOP_FILTER(dir, stridea, strideb) \
  271. LOOP_FILTER(dir, 8, stridea, strideb, av_always_inline) \
  272. static void vp8_ ## dir ## _loop_filter8uv_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
  273.                                       int fE, int fI, int hev_thresh)\
  274. {\
  275.   vp8_ ## dir ## _loop_filter8_c(dstU, stride, fE, fI, hev_thresh);\
  276.   vp8_ ## dir ## _loop_filter8_c(dstV, stride, fE, fI, hev_thresh);\
  277. }\
  278. static void vp8_ ## dir ## _loop_filter8uv_inner_c(uint8_t *dstU, uint8_t *dstV, ptrdiff_t stride,\
  279.                                       int fE, int fI, int hev_thresh)\
  280. {\
  281.   vp8_ ## dir ## _loop_filter8_inner_c(dstU, stride, fE, fI, hev_thresh);\
  282.   vp8_ ## dir ## _loop_filter8_inner_c(dstV, stride, fE, fI, hev_thresh);\
  283. }
  284.  
  285. UV_LOOP_FILTER(v, 1, stride)
  286. UV_LOOP_FILTER(h, stride, 1)
  287.  
  288. static void vp8_v_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
  289. {
  290.     int i;
  291.  
  292.     for (i = 0; i < 16; i++)
  293.         if (simple_limit(dst+i, stride, flim))
  294.             filter_common(dst+i, stride, 1);
  295. }
  296.  
  297. static void vp8_h_loop_filter_simple_c(uint8_t *dst, ptrdiff_t stride, int flim)
  298. {
  299.     int i;
  300.  
  301.     for (i = 0; i < 16; i++)
  302.         if (simple_limit(dst+i*stride, 1, flim))
  303.             filter_common(dst+i*stride, 1, 1);
  304. }
  305.  
  306. static const uint8_t subpel_filters[7][6] = {
  307.     { 0,   6, 123,  12,   1,   0 },
  308.     { 2,  11, 108,  36,   8,   1 },
  309.     { 0,   9,  93,  50,   6,   0 },
  310.     { 3,  16,  77,  77,  16,   3 },
  311.     { 0,   6,  50,  93,   9,   0 },
  312.     { 1,   8,  36, 108,  11,   2 },
  313.     { 0,   1,  12, 123,   6,   0 },
  314. };
  315.  
  316. #define PUT_PIXELS(WIDTH) \
  317. static void put_vp8_pixels ## WIDTH ##_c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int x, int y) { \
  318.     int i; \
  319.     for (i = 0; i < h; i++, dst+= dststride, src+= srcstride) { \
  320.         memcpy(dst, src, WIDTH); \
  321.     } \
  322. }
  323.  
  324. PUT_PIXELS(16)
  325. PUT_PIXELS(8)
  326. PUT_PIXELS(4)
  327.  
  328. #define FILTER_6TAP(src, F, stride) \
  329.     cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \
  330.         F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7]
  331.  
  332. #define FILTER_4TAP(src, F, stride) \
  333.     cm[(F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \
  334.         F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7]
  335.  
  336. #define VP8_EPEL_H(SIZE, TAPS) \
  337. static void put_vp8_epel ## SIZE ## _h ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
  338. { \
  339.     const uint8_t *filter = subpel_filters[mx-1]; \
  340.     const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
  341.     int x, y; \
  342. \
  343.     for (y = 0; y < h; y++) { \
  344.         for (x = 0; x < SIZE; x++) \
  345.             dst[x] = FILTER_ ## TAPS ## TAP(src, filter, 1); \
  346.         dst += dststride; \
  347.         src += srcstride; \
  348.     } \
  349. }
  350. #define VP8_EPEL_V(SIZE, TAPS) \
  351. static void put_vp8_epel ## SIZE ## _v ## TAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
  352. { \
  353.     const uint8_t *filter = subpel_filters[my-1]; \
  354.     const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
  355.     int x, y; \
  356. \
  357.     for (y = 0; y < h; y++) { \
  358.         for (x = 0; x < SIZE; x++) \
  359.             dst[x] = FILTER_ ## TAPS ## TAP(src, filter, srcstride); \
  360.         dst += dststride; \
  361.         src += srcstride; \
  362.     } \
  363. }
  364. #define VP8_EPEL_HV(SIZE, HTAPS, VTAPS) \
  365. static void put_vp8_epel ## SIZE ## _h ## HTAPS ## v ## VTAPS ## _c(uint8_t *dst, ptrdiff_t dststride, uint8_t *src, ptrdiff_t srcstride, int h, int mx, int my) \
  366. { \
  367.     const uint8_t *filter = subpel_filters[mx-1]; \
  368.     const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; \
  369.     int x, y; \
  370.     uint8_t tmp_array[(2*SIZE+VTAPS-1)*SIZE]; \
  371.     uint8_t *tmp = tmp_array; \
  372.     src -= (2-(VTAPS==4))*srcstride; \
  373. \
  374.     for (y = 0; y < h+VTAPS-1; y++) { \
  375.         for (x = 0; x < SIZE; x++) \
  376.             tmp[x] = FILTER_ ## HTAPS ## TAP(src, filter, 1); \
  377.         tmp += SIZE; \
  378.         src += srcstride; \
  379.     } \
  380. \
  381.     tmp = tmp_array + (2-(VTAPS==4))*SIZE; \
  382.     filter = subpel_filters[my-1]; \
  383. \
  384.     for (y = 0; y < h; y++) { \
  385.         for (x = 0; x < SIZE; x++) \
  386.             dst[x] = FILTER_ ## VTAPS ## TAP(tmp, filter, SIZE); \
  387.         dst += dststride; \
  388.         tmp += SIZE; \
  389.     } \
  390. }
  391.  
  392. VP8_EPEL_H(16, 4)
  393. VP8_EPEL_H(8,  4)
  394. VP8_EPEL_H(4,  4)
  395. VP8_EPEL_H(16, 6)
  396. VP8_EPEL_H(8,  6)
  397. VP8_EPEL_H(4,  6)
  398. VP8_EPEL_V(16, 4)
  399. VP8_EPEL_V(8,  4)
  400. VP8_EPEL_V(4,  4)
  401. VP8_EPEL_V(16, 6)
  402. VP8_EPEL_V(8,  6)
  403. VP8_EPEL_V(4,  6)
  404. VP8_EPEL_HV(16, 4, 4)
  405. VP8_EPEL_HV(8,  4, 4)
  406. VP8_EPEL_HV(4,  4, 4)
  407. VP8_EPEL_HV(16, 4, 6)
  408. VP8_EPEL_HV(8,  4, 6)
  409. VP8_EPEL_HV(4,  4, 6)
  410. VP8_EPEL_HV(16, 6, 4)
  411. VP8_EPEL_HV(8,  6, 4)
  412. VP8_EPEL_HV(4,  6, 4)
  413. VP8_EPEL_HV(16, 6, 6)
  414. VP8_EPEL_HV(8,  6, 6)
  415. VP8_EPEL_HV(4,  6, 6)
  416.  
  417. #define VP8_BILINEAR(SIZE) \
  418. static void put_vp8_bilinear ## SIZE ## _h_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
  419. { \
  420.     int a = 8-mx, b = mx; \
  421.     int x, y; \
  422. \
  423.     for (y = 0; y < h; y++) { \
  424.         for (x = 0; x < SIZE; x++) \
  425.             dst[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
  426.         dst += dstride; \
  427.         src += sstride; \
  428.     } \
  429. } \
  430. static void put_vp8_bilinear ## SIZE ## _v_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
  431. { \
  432.     int c = 8-my, d = my; \
  433.     int x, y; \
  434. \
  435.     for (y = 0; y < h; y++) { \
  436.         for (x = 0; x < SIZE; x++) \
  437.             dst[x] = (c*src[x] + d*src[x+sstride] + 4) >> 3; \
  438.         dst += dstride; \
  439.         src += sstride; \
  440.     } \
  441. } \
  442. \
  443. static void put_vp8_bilinear ## SIZE ## _hv_c(uint8_t *dst, ptrdiff_t dstride, uint8_t *src, ptrdiff_t sstride, int h, int mx, int my) \
  444. { \
  445.     int a = 8-mx, b = mx; \
  446.     int c = 8-my, d = my; \
  447.     int x, y; \
  448.     uint8_t tmp_array[(2*SIZE+1)*SIZE]; \
  449.     uint8_t *tmp = tmp_array; \
  450. \
  451.     for (y = 0; y < h+1; y++) { \
  452.         for (x = 0; x < SIZE; x++) \
  453.             tmp[x] = (a*src[x] + b*src[x+1] + 4) >> 3; \
  454.         tmp += SIZE; \
  455.         src += sstride; \
  456.     } \
  457. \
  458.     tmp = tmp_array; \
  459. \
  460.     for (y = 0; y < h; y++) { \
  461.         for (x = 0; x < SIZE; x++) \
  462.             dst[x] = (c*tmp[x] + d*tmp[x+SIZE] + 4) >> 3; \
  463.         dst += dstride; \
  464.         tmp += SIZE; \
  465.     } \
  466. }
  467.  
  468. VP8_BILINEAR(16)
  469. VP8_BILINEAR(8)
  470. VP8_BILINEAR(4)
  471.  
  472. #define VP8_MC_FUNC(IDX, SIZE) \
  473.     dsp->put_vp8_epel_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
  474.     dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \
  475.     dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \
  476.     dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \
  477.     dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \
  478.     dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \
  479.     dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \
  480.     dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \
  481.     dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c
  482.  
  483. #define VP8_BILINEAR_MC_FUNC(IDX, SIZE) \
  484.     dsp->put_vp8_bilinear_pixels_tab[IDX][0][0] = put_vp8_pixels ## SIZE ## _c; \
  485.     dsp->put_vp8_bilinear_pixels_tab[IDX][0][1] = put_vp8_bilinear ## SIZE ## _h_c; \
  486.     dsp->put_vp8_bilinear_pixels_tab[IDX][0][2] = put_vp8_bilinear ## SIZE ## _h_c; \
  487.     dsp->put_vp8_bilinear_pixels_tab[IDX][1][0] = put_vp8_bilinear ## SIZE ## _v_c; \
  488.     dsp->put_vp8_bilinear_pixels_tab[IDX][1][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
  489.     dsp->put_vp8_bilinear_pixels_tab[IDX][1][2] = put_vp8_bilinear ## SIZE ## _hv_c; \
  490.     dsp->put_vp8_bilinear_pixels_tab[IDX][2][0] = put_vp8_bilinear ## SIZE ## _v_c; \
  491.     dsp->put_vp8_bilinear_pixels_tab[IDX][2][1] = put_vp8_bilinear ## SIZE ## _hv_c; \
  492.     dsp->put_vp8_bilinear_pixels_tab[IDX][2][2] = put_vp8_bilinear ## SIZE ## _hv_c
  493.  
  494. av_cold void ff_vp8dsp_init(VP8DSPContext *dsp)
  495. {
  496.     dsp->vp8_luma_dc_wht    = vp8_luma_dc_wht_c;
  497.     dsp->vp8_luma_dc_wht_dc = vp8_luma_dc_wht_dc_c;
  498.     dsp->vp8_idct_add       = vp8_idct_add_c;
  499.     dsp->vp8_idct_dc_add    = vp8_idct_dc_add_c;
  500.     dsp->vp8_idct_dc_add4y  = vp8_idct_dc_add4y_c;
  501.     dsp->vp8_idct_dc_add4uv = vp8_idct_dc_add4uv_c;
  502.  
  503.     dsp->vp8_v_loop_filter16y = vp8_v_loop_filter16_c;
  504.     dsp->vp8_h_loop_filter16y = vp8_h_loop_filter16_c;
  505.     dsp->vp8_v_loop_filter8uv = vp8_v_loop_filter8uv_c;
  506.     dsp->vp8_h_loop_filter8uv = vp8_h_loop_filter8uv_c;
  507.  
  508.     dsp->vp8_v_loop_filter16y_inner = vp8_v_loop_filter16_inner_c;
  509.     dsp->vp8_h_loop_filter16y_inner = vp8_h_loop_filter16_inner_c;
  510.     dsp->vp8_v_loop_filter8uv_inner = vp8_v_loop_filter8uv_inner_c;
  511.     dsp->vp8_h_loop_filter8uv_inner = vp8_h_loop_filter8uv_inner_c;
  512.  
  513.     dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c;
  514.     dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c;
  515.  
  516.     VP8_MC_FUNC(0, 16);
  517.     VP8_MC_FUNC(1, 8);
  518.     VP8_MC_FUNC(2, 4);
  519.  
  520.     VP8_BILINEAR_MC_FUNC(0, 16);
  521.     VP8_BILINEAR_MC_FUNC(1, 8);
  522.     VP8_BILINEAR_MC_FUNC(2, 4);
  523.  
  524.     if (ARCH_ARM)
  525.         ff_vp8dsp_init_arm(dsp);
  526.     if (ARCH_PPC)
  527.         ff_vp8dsp_init_ppc(dsp);
  528.     if (ARCH_X86)
  529.         ff_vp8dsp_init_x86(dsp);
  530. }
  531.