Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * AltiVec-enhanced yuv-to-yuv conversion routines.
  3.  *
  4.  * Copyright (C) 2004 Romain Dolbeau <romain@dolbeau.org>
  5.  * based on the equivalent C code in swscale.c
  6.  *
  7.  * This file is part of FFmpeg.
  8.  *
  9.  * FFmpeg is free software; you can redistribute it and/or
  10.  * modify it under the terms of the GNU Lesser General Public
  11.  * License as published by the Free Software Foundation; either
  12.  * version 2.1 of the License, or (at your option) any later version.
  13.  *
  14.  * FFmpeg is distributed in the hope that it will be useful,
  15.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17.  * Lesser General Public License for more details.
  18.  *
  19.  * You should have received a copy of the GNU Lesser General Public
  20.  * License along with FFmpeg; if not, write to the Free Software
  21.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22.  */
  23.  
  24. #include <inttypes.h>
  25.  
  26. #include "config.h"
  27. #include "libavutil/attributes.h"
  28. #include "libavutil/cpu.h"
  29. #include "libswscale/swscale.h"
  30. #include "libswscale/swscale_internal.h"
  31.  
  32. #if HAVE_ALTIVEC
  33.  
  34. static int yv12toyuy2_unscaled_altivec(SwsContext *c, const uint8_t *src[],
  35.                                        int srcStride[], int srcSliceY,
  36.                                        int srcSliceH, uint8_t *dstParam[],
  37.                                        int dstStride_a[])
  38. {
  39.     uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
  40.     // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
  41.     //            srcStride[0], srcStride[1], dstStride[0]);
  42.     const uint8_t *ysrc   = src[0];
  43.     const uint8_t *usrc   = src[1];
  44.     const uint8_t *vsrc   = src[2];
  45.     const int width       = c->srcW;
  46.     const int height      = srcSliceH;
  47.     const int lumStride   = srcStride[0];
  48.     const int chromStride = srcStride[1];
  49.     const int dstStride   = dstStride_a[0];
  50.     const vector unsigned char yperm = vec_lvsl(0, ysrc);
  51.     const int vertLumPerChroma       = 2;
  52.     register unsigned int y;
  53.  
  54.     /* This code assumes:
  55.      *
  56.      * 1) dst is 16 bytes-aligned
  57.      * 2) dstStride is a multiple of 16
  58.      * 3) width is a multiple of 16
  59.      * 4) lum & chrom stride are multiples of 8
  60.      */
  61.  
  62.     for (y = 0; y < height; y++) {
  63.         int i;
  64.         for (i = 0; i < width - 31; i += 32) {
  65.             const unsigned int j          = i >> 1;
  66.             vector unsigned char v_yA     = vec_ld(i, ysrc);
  67.             vector unsigned char v_yB     = vec_ld(i + 16, ysrc);
  68.             vector unsigned char v_yC     = vec_ld(i + 32, ysrc);
  69.             vector unsigned char v_y1     = vec_perm(v_yA, v_yB, yperm);
  70.             vector unsigned char v_y2     = vec_perm(v_yB, v_yC, yperm);
  71.             vector unsigned char v_uA     = vec_ld(j, usrc);
  72.             vector unsigned char v_uB     = vec_ld(j + 16, usrc);
  73.             vector unsigned char v_u      = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
  74.             vector unsigned char v_vA     = vec_ld(j, vsrc);
  75.             vector unsigned char v_vB     = vec_ld(j + 16, vsrc);
  76.             vector unsigned char v_v      = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
  77.             vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
  78.             vector unsigned char v_uv_b   = vec_mergel(v_u, v_v);
  79.             vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
  80.             vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
  81.             vector unsigned char v_yuy2_2 = vec_mergeh(v_y2, v_uv_b);
  82.             vector unsigned char v_yuy2_3 = vec_mergel(v_y2, v_uv_b);
  83.             vec_st(v_yuy2_0, (i << 1), dst);
  84.             vec_st(v_yuy2_1, (i << 1) + 16, dst);
  85.             vec_st(v_yuy2_2, (i << 1) + 32, dst);
  86.             vec_st(v_yuy2_3, (i << 1) + 48, dst);
  87.         }
  88.         if (i < width) {
  89.             const unsigned int j          = i >> 1;
  90.             vector unsigned char v_y1     = vec_ld(i, ysrc);
  91.             vector unsigned char v_u      = vec_ld(j, usrc);
  92.             vector unsigned char v_v      = vec_ld(j, vsrc);
  93.             vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
  94.             vector unsigned char v_yuy2_0 = vec_mergeh(v_y1, v_uv_a);
  95.             vector unsigned char v_yuy2_1 = vec_mergel(v_y1, v_uv_a);
  96.             vec_st(v_yuy2_0, (i << 1), dst);
  97.             vec_st(v_yuy2_1, (i << 1) + 16, dst);
  98.         }
  99.         if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
  100.             usrc += chromStride;
  101.             vsrc += chromStride;
  102.         }
  103.         ysrc += lumStride;
  104.         dst  += dstStride;
  105.     }
  106.  
  107.     return srcSliceH;
  108. }
  109.  
  110. static int yv12touyvy_unscaled_altivec(SwsContext *c, const uint8_t *src[],
  111.                                        int srcStride[], int srcSliceY,
  112.                                        int srcSliceH, uint8_t *dstParam[],
  113.                                        int dstStride_a[])
  114. {
  115.     uint8_t *dst = dstParam[0] + dstStride_a[0] * srcSliceY;
  116.     // yv12toyuy2(src[0], src[1], src[2], dst, c->srcW, srcSliceH,
  117.     //            srcStride[0], srcStride[1], dstStride[0]);
  118.     const uint8_t *ysrc              = src[0];
  119.     const uint8_t *usrc              = src[1];
  120.     const uint8_t *vsrc              = src[2];
  121.     const int width                  = c->srcW;
  122.     const int height                 = srcSliceH;
  123.     const int lumStride              = srcStride[0];
  124.     const int chromStride            = srcStride[1];
  125.     const int dstStride              = dstStride_a[0];
  126.     const int vertLumPerChroma       = 2;
  127.     const vector unsigned char yperm = vec_lvsl(0, ysrc);
  128.     register unsigned int y;
  129.  
  130.     /* This code assumes:
  131.      *
  132.      * 1) dst is 16 bytes-aligned
  133.      * 2) dstStride is a multiple of 16
  134.      * 3) width is a multiple of 16
  135.      * 4) lum & chrom stride are multiples of 8
  136.      */
  137.  
  138.     for (y = 0; y < height; y++) {
  139.         int i;
  140.         for (i = 0; i < width - 31; i += 32) {
  141.             const unsigned int j          = i >> 1;
  142.             vector unsigned char v_yA     = vec_ld(i, ysrc);
  143.             vector unsigned char v_yB     = vec_ld(i + 16, ysrc);
  144.             vector unsigned char v_yC     = vec_ld(i + 32, ysrc);
  145.             vector unsigned char v_y1     = vec_perm(v_yA, v_yB, yperm);
  146.             vector unsigned char v_y2     = vec_perm(v_yB, v_yC, yperm);
  147.             vector unsigned char v_uA     = vec_ld(j, usrc);
  148.             vector unsigned char v_uB     = vec_ld(j + 16, usrc);
  149.             vector unsigned char v_u      = vec_perm(v_uA, v_uB, vec_lvsl(j, usrc));
  150.             vector unsigned char v_vA     = vec_ld(j, vsrc);
  151.             vector unsigned char v_vB     = vec_ld(j + 16, vsrc);
  152.             vector unsigned char v_v      = vec_perm(v_vA, v_vB, vec_lvsl(j, vsrc));
  153.             vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
  154.             vector unsigned char v_uv_b   = vec_mergel(v_u, v_v);
  155.             vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
  156.             vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
  157.             vector unsigned char v_uyvy_2 = vec_mergeh(v_uv_b, v_y2);
  158.             vector unsigned char v_uyvy_3 = vec_mergel(v_uv_b, v_y2);
  159.             vec_st(v_uyvy_0, (i << 1), dst);
  160.             vec_st(v_uyvy_1, (i << 1) + 16, dst);
  161.             vec_st(v_uyvy_2, (i << 1) + 32, dst);
  162.             vec_st(v_uyvy_3, (i << 1) + 48, dst);
  163.         }
  164.         if (i < width) {
  165.             const unsigned int j          = i >> 1;
  166.             vector unsigned char v_y1     = vec_ld(i, ysrc);
  167.             vector unsigned char v_u      = vec_ld(j, usrc);
  168.             vector unsigned char v_v      = vec_ld(j, vsrc);
  169.             vector unsigned char v_uv_a   = vec_mergeh(v_u, v_v);
  170.             vector unsigned char v_uyvy_0 = vec_mergeh(v_uv_a, v_y1);
  171.             vector unsigned char v_uyvy_1 = vec_mergel(v_uv_a, v_y1);
  172.             vec_st(v_uyvy_0, (i << 1), dst);
  173.             vec_st(v_uyvy_1, (i << 1) + 16, dst);
  174.         }
  175.         if ((y & (vertLumPerChroma - 1)) == vertLumPerChroma - 1) {
  176.             usrc += chromStride;
  177.             vsrc += chromStride;
  178.         }
  179.         ysrc += lumStride;
  180.         dst  += dstStride;
  181.     }
  182.     return srcSliceH;
  183. }
  184.  
  185. #endif /* HAVE_ALTIVEC */
  186.  
  187. av_cold void ff_get_unscaled_swscale_ppc(SwsContext *c)
  188. {
  189. #if HAVE_ALTIVEC
  190.     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
  191.         return;
  192.  
  193.     if (!(c->srcW & 15) && !(c->flags & SWS_BITEXACT) &&
  194.         c->srcFormat == AV_PIX_FMT_YUV420P) {
  195.         enum AVPixelFormat dstFormat = c->dstFormat;
  196.  
  197.         // unscaled YV12 -> packed YUV, we want speed
  198.         if (dstFormat == AV_PIX_FMT_YUYV422)
  199.             c->swscale = yv12toyuy2_unscaled_altivec;
  200.         else if (dstFormat == AV_PIX_FMT_UYVY422)
  201.             c->swscale = yv12touyvy_unscaled_altivec;
  202.     }
  203. #endif /* HAVE_ALTIVEC */
  204. }
  205.