Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * This file is part of FFmpeg.
  3.  *
  4.  * FFmpeg is free software; you can redistribute it and/or
  5.  * modify it under the terms of the GNU Lesser General Public
  6.  * License as published by the Free Software Foundation; either
  7.  * version 2.1 of the License, or (at your option) any later version.
  8.  *
  9.  * FFmpeg is distributed in the hope that it will be useful,
  10.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  11.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  12.  * Lesser General Public License for more details.
  13.  *
  14.  * You should have received a copy of the GNU Lesser General Public
  15.  * License along with FFmpeg; if not, write to the Free Software
  16.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  17.  */
  18.  
  19. #include "config.h"
  20. #include <stdint.h>
  21. #if HAVE_ALTIVEC_H
  22. #include <altivec.h>
  23. #endif
  24.  
  25. #include "libavutil/attributes.h"
  26. #include "libavutil/cpu.h"
  27. #include "libavutil/ppc/cpu.h"
  28. #include "libavutil/ppc/types_altivec.h"
  29. #include "libavutil/ppc/util_altivec.h"
  30. #include "libavcodec/mpegvideoencdsp.h"
  31.  
  32. #if HAVE_ALTIVEC
  33.  
  34. #if HAVE_VSX
  35. static int pix_norm1_altivec(uint8_t *pix, int line_size)
  36. {
  37.     int i, s = 0;
  38.     const vector unsigned int zero =
  39.         (const vector unsigned int) vec_splat_u32(0);
  40.     vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
  41.     vector signed int sum;
  42.  
  43.     for (i = 0; i < 16; i++) {
  44.         /* Read the potentially unaligned pixels. */
  45.         //vector unsigned char pixl = vec_ld(0,  pix);
  46.         //vector unsigned char pixr = vec_ld(15, pix);
  47.         //vector unsigned char pixv = vec_perm(pixl, pixr, perm);
  48.         vector unsigned char pixv = vec_vsx_ld(0,  pix);
  49.  
  50.         /* Square the values, and add them to our sum. */
  51.         sv = vec_msum(pixv, pixv, sv);
  52.  
  53.         pix += line_size;
  54.     }
  55.     /* Sum up the four partial sums, and put the result into s. */
  56.     sum = vec_sums((vector signed int) sv, (vector signed int) zero);
  57.     sum = vec_splat(sum, 3);
  58.     vec_ste(sum, 0, &s);
  59.     return s;
  60. }
  61. #else
  62. static int pix_norm1_altivec(uint8_t *pix, int line_size)
  63. {
  64.     int i, s = 0;
  65.     const vector unsigned int zero =
  66.         (const vector unsigned int) vec_splat_u32(0);
  67.     vector unsigned char perm = vec_lvsl(0, pix);
  68.     vector unsigned int sv = (vector unsigned int) vec_splat_u32(0);
  69.     vector signed int sum;
  70.  
  71.     for (i = 0; i < 16; i++) {
  72.         /* Read the potentially unaligned pixels. */
  73.         vector unsigned char pixl = vec_ld(0,  pix);
  74.         vector unsigned char pixr = vec_ld(15, pix);
  75.         vector unsigned char pixv = vec_perm(pixl, pixr, perm);
  76.  
  77.         /* Square the values, and add them to our sum. */
  78.         sv = vec_msum(pixv, pixv, sv);
  79.  
  80.         pix += line_size;
  81.     }
  82.     /* Sum up the four partial sums, and put the result into s. */
  83.     sum = vec_sums((vector signed int) sv, (vector signed int) zero);
  84.     sum = vec_splat(sum, 3);
  85.     vec_ste(sum, 0, &s);
  86.  
  87.     return s;
  88. }
  89. #endif /* HAVE_VSX */
  90.  
  91. #if HAVE_VSX
  92. static int pix_sum_altivec(uint8_t *pix, int line_size)
  93. {
  94.     int i, s;
  95.     const vector unsigned int zero =
  96.         (const vector unsigned int) vec_splat_u32(0);
  97.     vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
  98.     vector signed int sumdiffs;
  99.  
  100.     for (i = 0; i < 16; i++) {
  101.         /* Read the potentially unaligned 16 pixels into t1. */
  102.         //vector unsigned char pixl = vec_ld(0,  pix);
  103.         //vector unsigned char pixr = vec_ld(15, pix);
  104.         //vector unsigned char t1   = vec_perm(pixl, pixr, perm);
  105.         vector unsigned char t1   = vec_vsx_ld(0,  pix);
  106.  
  107.         /* Add each 4 pixel group together and put 4 results into sad. */
  108.         sad = vec_sum4s(t1, sad);
  109.  
  110.         pix += line_size;
  111.     }
  112.  
  113.     /* Sum up the four partial sums, and put the result into s. */
  114.     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
  115.     sumdiffs = vec_splat(sumdiffs, 3);
  116.     vec_ste(sumdiffs, 0, &s);
  117.     return s;
  118. }
  119. #else
  120. static int pix_sum_altivec(uint8_t *pix, int line_size)
  121. {
  122.     int i, s;
  123.     const vector unsigned int zero =
  124.         (const vector unsigned int) vec_splat_u32(0);
  125.     vector unsigned char perm = vec_lvsl(0, pix);
  126.     vector unsigned int sad = (vector unsigned int) vec_splat_u32(0);
  127.     vector signed int sumdiffs;
  128.  
  129.     for (i = 0; i < 16; i++) {
  130.         /* Read the potentially unaligned 16 pixels into t1. */
  131.         vector unsigned char pixl = vec_ld(0,  pix);
  132.         vector unsigned char pixr = vec_ld(15, pix);
  133.         vector unsigned char t1   = vec_perm(pixl, pixr, perm);
  134.  
  135.         /* Add each 4 pixel group together and put 4 results into sad. */
  136.         sad = vec_sum4s(t1, sad);
  137.  
  138.         pix += line_size;
  139.     }
  140.  
  141.     /* Sum up the four partial sums, and put the result into s. */
  142.     sumdiffs = vec_sums((vector signed int) sad, (vector signed int) zero);
  143.     sumdiffs = vec_splat(sumdiffs, 3);
  144.     vec_ste(sumdiffs, 0, &s);
  145.  
  146.     return s;
  147. }
  148.  
  149. #endif /* HAVE_VSX */
  150.  
  151. #endif /* HAVE_ALTIVEC */
  152.  
  153. av_cold void ff_mpegvideoencdsp_init_ppc(MpegvideoEncDSPContext *c,
  154.                                          AVCodecContext *avctx)
  155. {
  156. #if HAVE_ALTIVEC
  157.     if (!PPC_ALTIVEC(av_get_cpu_flags()))
  158.         return;
  159.  
  160.     c->pix_norm1 = pix_norm1_altivec;
  161.     c->pix_sum   = pix_sum_altivec;
  162. #endif /* HAVE_ALTIVEC */
  163. }
  164.