Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Alpha optimized DSP utils
  3.  * Copyright (c) 2002 Falk Hueffner <falk@debian.org>
  4.  *
  5.  * This file is part of FFmpeg.
  6.  *
  7.  * FFmpeg is free software; you can redistribute it and/or
  8.  * modify it under the terms of the GNU Lesser General Public
  9.  * License as published by the Free Software Foundation; either
  10.  * version 2.1 of the License, or (at your option) any later version.
  11.  *
  12.  * FFmpeg is distributed in the hope that it will be useful,
  13.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  14.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15.  * Lesser General Public License for more details.
  16.  *
  17.  * You should have received a copy of the GNU Lesser General Public
  18.  * License along with FFmpeg; if not, write to the Free Software
  19.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  20.  */
  21.  
  22. #include "libavutil/attributes.h"
  23. #include "libavcodec/dsputil.h"
  24. #include "dsputil_alpha.h"
  25. #include "asm.h"
  26.  
  27. void (*put_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
  28.                                  int line_size);
  29. void (*add_pixels_clamped_axp_p)(const int16_t *block, uint8_t *pixels,
  30.                                  int line_size);
  31.  
  32. #if 0
  33. /* These functions were the base for the optimized assembler routines,
  34.    and remain here for documentation purposes.  */
  35. static void put_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
  36.                                    ptrdiff_t line_size)
  37. {
  38.     int i = 8;
  39.     uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
  40.  
  41.     do {
  42.         uint64_t shorts0, shorts1;
  43.  
  44.         shorts0 = ldq(block);
  45.         shorts0 = maxsw4(shorts0, 0);
  46.         shorts0 = minsw4(shorts0, clampmask);
  47.         stl(pkwb(shorts0), pixels);
  48.  
  49.         shorts1 = ldq(block + 4);
  50.         shorts1 = maxsw4(shorts1, 0);
  51.         shorts1 = minsw4(shorts1, clampmask);
  52.         stl(pkwb(shorts1), pixels + 4);
  53.  
  54.         pixels += line_size;
  55.         block += 8;
  56.     } while (--i);
  57. }
  58.  
  59. void add_pixels_clamped_mvi(const int16_t *block, uint8_t *pixels,
  60.                             ptrdiff_t line_size)
  61. {
  62.     int h = 8;
  63.     /* Keep this function a leaf function by generating the constants
  64.        manually (mainly for the hack value ;-).  */
  65.     uint64_t clampmask = zap(-1, 0xaa); /* 0x00ff00ff00ff00ff */
  66.     uint64_t signmask  = zap(-1, 0x33);
  67.     signmask ^= signmask >> 1;  /* 0x8000800080008000 */
  68.  
  69.     do {
  70.         uint64_t shorts0, pix0, signs0;
  71.         uint64_t shorts1, pix1, signs1;
  72.  
  73.         shorts0 = ldq(block);
  74.         shorts1 = ldq(block + 4);
  75.  
  76.         pix0    = unpkbw(ldl(pixels));
  77.         /* Signed subword add (MMX paddw).  */
  78.         signs0  = shorts0 & signmask;
  79.         shorts0 &= ~signmask;
  80.         shorts0 += pix0;
  81.         shorts0 ^= signs0;
  82.         /* Clamp. */
  83.         shorts0 = maxsw4(shorts0, 0);
  84.         shorts0 = minsw4(shorts0, clampmask);
  85.  
  86.         /* Next 4.  */
  87.         pix1    = unpkbw(ldl(pixels + 4));
  88.         signs1  = shorts1 & signmask;
  89.         shorts1 &= ~signmask;
  90.         shorts1 += pix1;
  91.         shorts1 ^= signs1;
  92.         shorts1 = maxsw4(shorts1, 0);
  93.         shorts1 = minsw4(shorts1, clampmask);
  94.  
  95.         stl(pkwb(shorts0), pixels);
  96.         stl(pkwb(shorts1), pixels + 4);
  97.  
  98.         pixels += line_size;
  99.         block += 8;
  100.     } while (--h);
  101. }
  102. #endif
  103.  
  104. static void clear_blocks_axp(int16_t *blocks) {
  105.     uint64_t *p = (uint64_t *) blocks;
  106.     int n = sizeof(int16_t) * 6 * 64;
  107.  
  108.     do {
  109.         p[0] = 0;
  110.         p[1] = 0;
  111.         p[2] = 0;
  112.         p[3] = 0;
  113.         p[4] = 0;
  114.         p[5] = 0;
  115.         p[6] = 0;
  116.         p[7] = 0;
  117.         p += 8;
  118.         n -= 8 * 8;
  119.     } while (n);
  120. }
  121.  
  122. av_cold void ff_dsputil_init_alpha(DSPContext *c, AVCodecContext *avctx)
  123. {
  124.     const int high_bit_depth = avctx->bits_per_raw_sample > 8;
  125.  
  126.     if (!high_bit_depth) {
  127.         c->clear_blocks = clear_blocks_axp;
  128.     }
  129.  
  130.     /* amask clears all bits that correspond to present features.  */
  131.     if (amask(AMASK_MVI) == 0) {
  132.         c->put_pixels_clamped = put_pixels_clamped_mvi_asm;
  133.         c->add_pixels_clamped = add_pixels_clamped_mvi_asm;
  134.  
  135.         if (!high_bit_depth)
  136.             c->get_pixels   = get_pixels_mvi;
  137.         c->diff_pixels      = diff_pixels_mvi;
  138.         c->sad[0]           = pix_abs16x16_mvi_asm;
  139.         c->sad[1]           = pix_abs8x8_mvi;
  140.         c->pix_abs[0][0]    = pix_abs16x16_mvi_asm;
  141.         c->pix_abs[1][0]    = pix_abs8x8_mvi;
  142.         c->pix_abs[0][1]    = pix_abs16x16_x2_mvi;
  143.         c->pix_abs[0][2]    = pix_abs16x16_y2_mvi;
  144.         c->pix_abs[0][3]    = pix_abs16x16_xy2_mvi;
  145.     }
  146.  
  147.     put_pixels_clamped_axp_p = c->put_pixels_clamped;
  148.     add_pixels_clamped_axp_p = c->add_pixels_clamped;
  149.  
  150.     if (!avctx->lowres && avctx->bits_per_raw_sample <= 8 &&
  151.         (avctx->idct_algo == FF_IDCT_AUTO ||
  152.          avctx->idct_algo == FF_IDCT_SIMPLEALPHA)) {
  153.         c->idct_put = ff_simple_idct_put_axp;
  154.         c->idct_add = ff_simple_idct_add_axp;
  155.         c->idct =     ff_simple_idct_axp;
  156.     }
  157. }
  158.