Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2004 The FFmpeg Project
  3.  *
  4.  * This file is part of FFmpeg.
  5.  *
  6.  * FFmpeg is free software; you can redistribute it and/or
  7.  * modify it under the terms of the GNU Lesser General Public
  8.  * License as published by the Free Software Foundation; either
  9.  * version 2.1 of the License, or (at your option) any later version.
  10.  *
  11.  * FFmpeg is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14.  * Lesser General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU Lesser General Public
  17.  * License along with FFmpeg; if not, write to the Free Software
  18.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19.  */
  20.  
  21. /**
  22.  * @file
  23.  * Standard C DSP-oriented functions cribbed from the original VP3
  24.  * source code.
  25.  */
  26.  
  27. #include "libavutil/attributes.h"
  28. #include "libavutil/common.h"
  29. #include "libavutil/intreadwrite.h"
  30.  
  31. #include "avcodec.h"
  32. #include "rnd_avg.h"
  33. #include "vp3dsp.h"
  34.  
  35. #define IdctAdjustBeforeShift 8
  36. #define xC1S7 64277
  37. #define xC2S6 60547
  38. #define xC3S5 54491
  39. #define xC4S4 46341
  40. #define xC5S3 36410
  41. #define xC6S2 25080
  42. #define xC7S1 12785
  43.  
  44. #define M(a, b) (((a) * (b)) >> 16)
  45.  
  46. static av_always_inline void idct(uint8_t *dst, int stride,
  47.                                   int16_t *input, int type)
  48. {
  49.     int16_t *ip = input;
  50.  
  51.     int A, B, C, D, Ad, Bd, Cd, Dd, E, F, G, H;
  52.     int Ed, Gd, Add, Bdd, Fd, Hd;
  53.  
  54.     int i;
  55.  
  56.     /* Inverse DCT on the rows now */
  57.     for (i = 0; i < 8; i++) {
  58.         /* Check for non-zero values */
  59.         if (ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
  60.             ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8]) {
  61.             A = M(xC1S7, ip[1 * 8]) + M(xC7S1, ip[7 * 8]);
  62.             B = M(xC7S1, ip[1 * 8]) - M(xC1S7, ip[7 * 8]);
  63.             C = M(xC3S5, ip[3 * 8]) + M(xC5S3, ip[5 * 8]);
  64.             D = M(xC3S5, ip[5 * 8]) - M(xC5S3, ip[3 * 8]);
  65.  
  66.             Ad = M(xC4S4, (A - C));
  67.             Bd = M(xC4S4, (B - D));
  68.  
  69.             Cd = A + C;
  70.             Dd = B + D;
  71.  
  72.             E = M(xC4S4, (ip[0 * 8] + ip[4 * 8]));
  73.             F = M(xC4S4, (ip[0 * 8] - ip[4 * 8]));
  74.  
  75.             G = M(xC2S6, ip[2 * 8]) + M(xC6S2, ip[6 * 8]);
  76.             H = M(xC6S2, ip[2 * 8]) - M(xC2S6, ip[6 * 8]);
  77.  
  78.             Ed = E - G;
  79.             Gd = E + G;
  80.  
  81.             Add = F + Ad;
  82.             Bdd = Bd - H;
  83.  
  84.             Fd = F - Ad;
  85.             Hd = Bd + H;
  86.  
  87.             /*  Final sequence of operations over-write original inputs. */
  88.             ip[0 * 8] = Gd + Cd;
  89.             ip[7 * 8] = Gd - Cd;
  90.  
  91.             ip[1 * 8] = Add + Hd;
  92.             ip[2 * 8] = Add - Hd;
  93.  
  94.             ip[3 * 8] = Ed + Dd;
  95.             ip[4 * 8] = Ed - Dd;
  96.  
  97.             ip[5 * 8] = Fd + Bdd;
  98.             ip[6 * 8] = Fd - Bdd;
  99.         }
  100.  
  101.         ip += 1;            /* next row */
  102.     }
  103.  
  104.     ip = input;
  105.  
  106.     for (i = 0; i < 8; i++) {
  107.         /* Check for non-zero values (bitwise or faster than ||) */
  108.         if (ip[1] | ip[2] | ip[3] |
  109.             ip[4] | ip[5] | ip[6] | ip[7]) {
  110.             A = M(xC1S7, ip[1]) + M(xC7S1, ip[7]);
  111.             B = M(xC7S1, ip[1]) - M(xC1S7, ip[7]);
  112.             C = M(xC3S5, ip[3]) + M(xC5S3, ip[5]);
  113.             D = M(xC3S5, ip[5]) - M(xC5S3, ip[3]);
  114.  
  115.             Ad = M(xC4S4, (A - C));
  116.             Bd = M(xC4S4, (B - D));
  117.  
  118.             Cd = A + C;
  119.             Dd = B + D;
  120.  
  121.             E = M(xC4S4, (ip[0] + ip[4])) + 8;
  122.             F = M(xC4S4, (ip[0] - ip[4])) + 8;
  123.  
  124.             if (type == 1) { // HACK
  125.                 E += 16 * 128;
  126.                 F += 16 * 128;
  127.             }
  128.  
  129.             G = M(xC2S6, ip[2]) + M(xC6S2, ip[6]);
  130.             H = M(xC6S2, ip[2]) - M(xC2S6, ip[6]);
  131.  
  132.             Ed = E - G;
  133.             Gd = E + G;
  134.  
  135.             Add = F + Ad;
  136.             Bdd = Bd - H;
  137.  
  138.             Fd = F - Ad;
  139.             Hd = Bd + H;
  140.  
  141.             /* Final sequence of operations over-write original inputs. */
  142.             if (type == 1) {
  143.                 dst[0 * stride] = av_clip_uint8((Gd + Cd) >> 4);
  144.                 dst[7 * stride] = av_clip_uint8((Gd - Cd) >> 4);
  145.  
  146.                 dst[1 * stride] = av_clip_uint8((Add + Hd) >> 4);
  147.                 dst[2 * stride] = av_clip_uint8((Add - Hd) >> 4);
  148.  
  149.                 dst[3 * stride] = av_clip_uint8((Ed + Dd) >> 4);
  150.                 dst[4 * stride] = av_clip_uint8((Ed - Dd) >> 4);
  151.  
  152.                 dst[5 * stride] = av_clip_uint8((Fd + Bdd) >> 4);
  153.                 dst[6 * stride] = av_clip_uint8((Fd - Bdd) >> 4);
  154.             } else {
  155.                 dst[0 * stride] = av_clip_uint8(dst[0 * stride] + ((Gd + Cd) >> 4));
  156.                 dst[7 * stride] = av_clip_uint8(dst[7 * stride] + ((Gd - Cd) >> 4));
  157.  
  158.                 dst[1 * stride] = av_clip_uint8(dst[1 * stride] + ((Add + Hd) >> 4));
  159.                 dst[2 * stride] = av_clip_uint8(dst[2 * stride] + ((Add - Hd) >> 4));
  160.  
  161.                 dst[3 * stride] = av_clip_uint8(dst[3 * stride] + ((Ed + Dd) >> 4));
  162.                 dst[4 * stride] = av_clip_uint8(dst[4 * stride] + ((Ed - Dd) >> 4));
  163.  
  164.                 dst[5 * stride] = av_clip_uint8(dst[5 * stride] + ((Fd + Bdd) >> 4));
  165.                 dst[6 * stride] = av_clip_uint8(dst[6 * stride] + ((Fd - Bdd) >> 4));
  166.             }
  167.         } else {
  168.             if (type == 1) {
  169.                 dst[0*stride] =
  170.                 dst[1*stride] =
  171.                 dst[2*stride] =
  172.                 dst[3*stride] =
  173.                 dst[4*stride] =
  174.                 dst[5*stride] =
  175.                 dst[6*stride] =
  176.                 dst[7*stride] = av_clip_uint8(128 + ((xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20));
  177.             } else {
  178.                 if (ip[0]) {
  179.                     int v = (xC4S4 * ip[0] + (IdctAdjustBeforeShift << 16)) >> 20;
  180.                     dst[0 * stride] = av_clip_uint8(dst[0 * stride] + v);
  181.                     dst[1 * stride] = av_clip_uint8(dst[1 * stride] + v);
  182.                     dst[2 * stride] = av_clip_uint8(dst[2 * stride] + v);
  183.                     dst[3 * stride] = av_clip_uint8(dst[3 * stride] + v);
  184.                     dst[4 * stride] = av_clip_uint8(dst[4 * stride] + v);
  185.                     dst[5 * stride] = av_clip_uint8(dst[5 * stride] + v);
  186.                     dst[6 * stride] = av_clip_uint8(dst[6 * stride] + v);
  187.                     dst[7 * stride] = av_clip_uint8(dst[7 * stride] + v);
  188.                 }
  189.             }
  190.         }
  191.  
  192.         ip += 8;            /* next column */
  193.         dst++;
  194.     }
  195. }
  196.  
  197. static void vp3_idct_put_c(uint8_t *dest /* align 8 */, int line_size,
  198.                            int16_t *block /* align 16 */)
  199. {
  200.     idct(dest, line_size, block, 1);
  201.     memset(block, 0, sizeof(*block) * 64);
  202. }
  203.  
  204. static void vp3_idct_add_c(uint8_t *dest /* align 8 */, int line_size,
  205.                            int16_t *block /* align 16 */)
  206. {
  207.     idct(dest, line_size, block, 2);
  208.     memset(block, 0, sizeof(*block) * 64);
  209. }
  210.  
  211. static void vp3_idct_dc_add_c(uint8_t *dest /* align 8 */, int line_size,
  212.                               int16_t *block /* align 16 */)
  213. {
  214.     int i, dc = (block[0] + 15) >> 5;
  215.  
  216.     for (i = 0; i < 8; i++) {
  217.         dest[0] = av_clip_uint8(dest[0] + dc);
  218.         dest[1] = av_clip_uint8(dest[1] + dc);
  219.         dest[2] = av_clip_uint8(dest[2] + dc);
  220.         dest[3] = av_clip_uint8(dest[3] + dc);
  221.         dest[4] = av_clip_uint8(dest[4] + dc);
  222.         dest[5] = av_clip_uint8(dest[5] + dc);
  223.         dest[6] = av_clip_uint8(dest[6] + dc);
  224.         dest[7] = av_clip_uint8(dest[7] + dc);
  225.         dest   += line_size;
  226.     }
  227.     block[0] = 0;
  228. }
  229.  
  230. static void vp3_v_loop_filter_c(uint8_t *first_pixel, int stride,
  231.                                 int *bounding_values)
  232. {
  233.     unsigned char *end;
  234.     int filter_value;
  235.     const int nstride = -stride;
  236.  
  237.     for (end = first_pixel + 8; first_pixel < end; first_pixel++) {
  238.         filter_value = (first_pixel[2 * nstride] - first_pixel[stride]) +
  239.                        (first_pixel[0] - first_pixel[nstride]) * 3;
  240.         filter_value = bounding_values[(filter_value + 4) >> 3];
  241.  
  242.         first_pixel[nstride] = av_clip_uint8(first_pixel[nstride] + filter_value);
  243.         first_pixel[0]       = av_clip_uint8(first_pixel[0] - filter_value);
  244.     }
  245. }
  246.  
  247. static void vp3_h_loop_filter_c(uint8_t *first_pixel, int stride,
  248.                                 int *bounding_values)
  249. {
  250.     unsigned char *end;
  251.     int filter_value;
  252.  
  253.     for (end = first_pixel + 8 * stride; first_pixel != end; first_pixel += stride) {
  254.         filter_value = (first_pixel[-2] - first_pixel[1]) +
  255.                        (first_pixel[ 0] - first_pixel[-1]) * 3;
  256.         filter_value = bounding_values[(filter_value + 4) >> 3];
  257.  
  258.         first_pixel[-1] = av_clip_uint8(first_pixel[-1] + filter_value);
  259.         first_pixel[ 0] = av_clip_uint8(first_pixel[ 0] - filter_value);
  260.     }
  261. }
  262.  
  263. static void put_no_rnd_pixels_l2(uint8_t *dst, const uint8_t *src1,
  264.                                  const uint8_t *src2, ptrdiff_t stride, int h)
  265. {
  266.     int i;
  267.  
  268.     for (i = 0; i < h; i++) {
  269.         uint32_t a, b;
  270.  
  271.         a = AV_RN32(&src1[i * stride]);
  272.         b = AV_RN32(&src2[i * stride]);
  273.         AV_WN32A(&dst[i * stride], no_rnd_avg32(a, b));
  274.         a = AV_RN32(&src1[i * stride + 4]);
  275.         b = AV_RN32(&src2[i * stride + 4]);
  276.         AV_WN32A(&dst[i * stride + 4], no_rnd_avg32(a, b));
  277.     }
  278. }
  279.  
  280. av_cold void ff_vp3dsp_init(VP3DSPContext *c, int flags)
  281. {
  282.     c->put_no_rnd_pixels_l2 = put_no_rnd_pixels_l2;
  283.  
  284.     c->idct_put      = vp3_idct_put_c;
  285.     c->idct_add      = vp3_idct_add_c;
  286.     c->idct_dc_add   = vp3_idct_dc_add_c;
  287.     c->v_loop_filter = vp3_v_loop_filter_c;
  288.     c->h_loop_filter = vp3_h_loop_filter_c;
  289.  
  290.     if (ARCH_ARM)
  291.         ff_vp3dsp_init_arm(c, flags);
  292.     if (ARCH_PPC)
  293.         ff_vp3dsp_init_ppc(c, flags);
  294.     if (ARCH_X86)
  295.         ff_vp3dsp_init_x86(c, flags);
  296. }
  297.