Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2002 Dieter Shirley
  3.  *
  4.  * dct_unquantize_h263_altivec:
  5.  * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
  6.  *
  7.  * This file is part of FFmpeg.
  8.  *
  9.  * FFmpeg is free software; you can redistribute it and/or
  10.  * modify it under the terms of the GNU Lesser General Public
  11.  * License as published by the Free Software Foundation; either
  12.  * version 2.1 of the License, or (at your option) any later version.
  13.  *
  14.  * FFmpeg is distributed in the hope that it will be useful,
  15.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17.  * Lesser General Public License for more details.
  18.  *
  19.  * You should have received a copy of the GNU Lesser General Public
  20.  * License along with FFmpeg; if not, write to the Free Software
  21.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  22.  */
  23.  
  24. #include <stdlib.h>
  25. #include <stdio.h>
  26.  
  27. #include "config.h"
  28. #include "libavutil/attributes.h"
  29. #include "libavutil/cpu.h"
  30. #include "libavutil/ppc/types_altivec.h"
  31. #include "libavutil/ppc/util_altivec.h"
  32. #include "libavcodec/mpegvideo.h"
  33. #include "dsputil_altivec.h"
  34.  
  35. #if HAVE_ALTIVEC
  36.  
  37. /* AltiVec version of dct_unquantize_h263
  38.    this code assumes `block' is 16 bytes-aligned */
  39. static void dct_unquantize_h263_altivec(MpegEncContext *s,
  40.                                  int16_t *block, int n, int qscale)
  41. {
  42.     int i, level, qmul, qadd;
  43.     int nCoeffs;
  44.  
  45.     assert(s->block_last_index[n]>=0);
  46.  
  47.     qadd = (qscale - 1) | 1;
  48.     qmul = qscale << 1;
  49.  
  50.     if (s->mb_intra) {
  51.         if (!s->h263_aic) {
  52.             if (n < 4)
  53.                 block[0] = block[0] * s->y_dc_scale;
  54.             else
  55.                 block[0] = block[0] * s->c_dc_scale;
  56.         }else
  57.             qadd = 0;
  58.         i = 1;
  59.         nCoeffs= 63; //does not always use zigzag table
  60.     } else {
  61.         i = 0;
  62.         nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ];
  63.     }
  64.  
  65.     {
  66.         register const vector signed short vczero = (const vector signed short)vec_splat_s16(0);
  67.         DECLARE_ALIGNED(16, short, qmul8) = qmul;
  68.         DECLARE_ALIGNED(16, short, qadd8) = qadd;
  69.         register vector signed short blockv, qmulv, qaddv, nqaddv, temp1;
  70.         register vector bool short blockv_null, blockv_neg;
  71.         register short backup_0 = block[0];
  72.         register int j = 0;
  73.  
  74.         qmulv = vec_splat((vec_s16)vec_lde(0, &qmul8), 0);
  75.         qaddv = vec_splat((vec_s16)vec_lde(0, &qadd8), 0);
  76.         nqaddv = vec_sub(vczero, qaddv);
  77.  
  78.         // vectorize all the 16 bytes-aligned blocks
  79.         // of 8 elements
  80.         for(; (j + 7) <= nCoeffs ; j+=8) {
  81.             blockv = vec_ld(j << 1, block);
  82.             blockv_neg = vec_cmplt(blockv, vczero);
  83.             blockv_null = vec_cmpeq(blockv, vczero);
  84.             // choose between +qadd or -qadd as the third operand
  85.             temp1 = vec_sel(qaddv, nqaddv, blockv_neg);
  86.             // multiply & add (block{i,i+7} * qmul [+-] qadd)
  87.             temp1 = vec_mladd(blockv, qmulv, temp1);
  88.             // put 0 where block[{i,i+7} used to have 0
  89.             blockv = vec_sel(temp1, blockv, blockv_null);
  90.             vec_st(blockv, j << 1, block);
  91.         }
  92.  
  93.         // if nCoeffs isn't a multiple of 8, finish the job
  94.         // using good old scalar units.
  95.         // (we could do it using a truncated vector,
  96.         // but I'm not sure it's worth the hassle)
  97.         for(; j <= nCoeffs ; j++) {
  98.             level = block[j];
  99.             if (level) {
  100.                 if (level < 0) {
  101.                     level = level * qmul - qadd;
  102.                 } else {
  103.                     level = level * qmul + qadd;
  104.                 }
  105.                 block[j] = level;
  106.             }
  107.         }
  108.  
  109.         if (i == 1) {
  110.             // cheat. this avoid special-casing the first iteration
  111.             block[0] = backup_0;
  112.         }
  113.     }
  114. }
  115.  
  116. #endif /* HAVE_ALTIVEC */
  117.  
  118. av_cold void ff_MPV_common_init_ppc(MpegEncContext *s)
  119. {
  120. #if HAVE_ALTIVEC
  121.     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
  122.         return;
  123.  
  124.     if ((s->avctx->dct_algo == FF_DCT_AUTO) ||
  125.         (s->avctx->dct_algo == FF_DCT_ALTIVEC)) {
  126.         s->dct_unquantize_h263_intra = dct_unquantize_h263_altivec;
  127.         s->dct_unquantize_h263_inter = dct_unquantize_h263_altivec;
  128.     }
  129. #endif /* HAVE_ALTIVEC */
  130. }
  131.