Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2015 Manojkumar Bhosale (Manojkumar.Bhosale@imgtec.com)
  3.  *
  4.  * This file is part of FFmpeg.
  5.  *
  6.  * FFmpeg is free software; you can redistribute it and/or
  7.  * modify it under the terms of the GNU Lesser General Public
  8.  * License as published by the Free Software Foundation; either
  9.  * version 2.1 of the License, or (at your option) any later version.
  10.  *
  11.  * FFmpeg is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14.  * Lesser General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU Lesser General Public
  17.  * License along with FFmpeg; if not, write to the Free Software
  18.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19.  */
  20.  
  21. #include "libavutil/mips/generic_macros_msa.h"
  22. #include "idctdsp_mips.h"
  23.  
  24. static void put_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
  25.                                    int32_t stride)
  26. {
  27.     uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
  28.     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  29.  
  30.     LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
  31.     CLIP_SH4_0_255(in0, in1, in2, in3);
  32.     CLIP_SH4_0_255(in4, in5, in6, in7);
  33.     PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
  34.     PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
  35.  
  36.     in0_d = __msa_copy_u_d((v2i64) in0, 0);
  37.     in1_d = __msa_copy_u_d((v2i64) in1, 0);
  38.     in2_d = __msa_copy_u_d((v2i64) in2, 0);
  39.     in3_d = __msa_copy_u_d((v2i64) in3, 0);
  40.     in4_d = __msa_copy_u_d((v2i64) in4, 0);
  41.     in5_d = __msa_copy_u_d((v2i64) in5, 0);
  42.     in6_d = __msa_copy_u_d((v2i64) in6, 0);
  43.     in7_d = __msa_copy_u_d((v2i64) in7, 0);
  44.     SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
  45.     pixels += 4 * stride;
  46.     SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
  47. }
  48.  
  49. static void put_signed_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
  50.                                           int32_t stride)
  51. {
  52.     uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
  53.     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  54.  
  55.     LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
  56.  
  57.     in0 += 128;
  58.     in1 += 128;
  59.     in2 += 128;
  60.     in3 += 128;
  61.     in4 += 128;
  62.     in5 += 128;
  63.     in6 += 128;
  64.     in7 += 128;
  65.  
  66.     CLIP_SH4_0_255(in0, in1, in2, in3);
  67.     CLIP_SH4_0_255(in4, in5, in6, in7);
  68.     PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
  69.     PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
  70.  
  71.     in0_d = __msa_copy_u_d((v2i64) in0, 0);
  72.     in1_d = __msa_copy_u_d((v2i64) in1, 0);
  73.     in2_d = __msa_copy_u_d((v2i64) in2, 0);
  74.     in3_d = __msa_copy_u_d((v2i64) in3, 0);
  75.     in4_d = __msa_copy_u_d((v2i64) in4, 0);
  76.     in5_d = __msa_copy_u_d((v2i64) in5, 0);
  77.     in6_d = __msa_copy_u_d((v2i64) in6, 0);
  78.     in7_d = __msa_copy_u_d((v2i64) in7, 0);
  79.     SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
  80.     pixels += 4 * stride;
  81.     SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
  82. }
  83.  
  84. static void add_pixels_clamped_msa(const int16_t *block, uint8_t *pixels,
  85.                                    int32_t stride)
  86. {
  87.     uint64_t in0_d, in1_d, in2_d, in3_d, in4_d, in5_d, in6_d, in7_d;
  88.     v8i16 in0, in1, in2, in3, in4, in5, in6, in7;
  89.     v16u8 pix_in0, pix_in1, pix_in2, pix_in3;
  90.     v16u8 pix_in4, pix_in5, pix_in6, pix_in7;
  91.     v8u16 pix0, pix1, pix2, pix3, pix4, pix5, pix6, pix7;
  92.     v8i16 zero = { 0 };
  93.  
  94.     LD_SH8(block, 8, in0, in1, in2, in3, in4, in5, in6, in7);
  95.     LD_UB8(pixels, stride, pix_in0, pix_in1, pix_in2,
  96.            pix_in3, pix_in4, pix_in5, pix_in6, pix_in7);
  97.  
  98.     ILVR_B4_UH(zero, pix_in0, zero, pix_in1, zero, pix_in2, zero, pix_in3,
  99.                pix0, pix1, pix2, pix3);
  100.     ILVR_B4_UH(zero, pix_in4, zero, pix_in5, zero, pix_in6, zero, pix_in7,
  101.                pix4, pix5, pix6, pix7);
  102.  
  103.     in0 += (v8i16) pix0;
  104.     in1 += (v8i16) pix1;
  105.     in2 += (v8i16) pix2;
  106.     in3 += (v8i16) pix3;
  107.     in4 += (v8i16) pix4;
  108.     in5 += (v8i16) pix5;
  109.     in6 += (v8i16) pix6;
  110.     in7 += (v8i16) pix7;
  111.  
  112.     CLIP_SH4_0_255(in0, in1, in2, in3);
  113.     CLIP_SH4_0_255(in4, in5, in6, in7);
  114.     PCKEV_B4_SH(in0, in0, in1, in1, in2, in2, in3, in3, in0, in1, in2, in3);
  115.     PCKEV_B4_SH(in4, in4, in5, in5, in6, in6, in7, in7, in4, in5, in6, in7);
  116.  
  117.     in0_d = __msa_copy_u_d((v2i64) in0, 0);
  118.     in1_d = __msa_copy_u_d((v2i64) in1, 0);
  119.     in2_d = __msa_copy_u_d((v2i64) in2, 0);
  120.     in3_d = __msa_copy_u_d((v2i64) in3, 0);
  121.     in4_d = __msa_copy_u_d((v2i64) in4, 0);
  122.     in5_d = __msa_copy_u_d((v2i64) in5, 0);
  123.     in6_d = __msa_copy_u_d((v2i64) in6, 0);
  124.     in7_d = __msa_copy_u_d((v2i64) in7, 0);
  125.     SD4(in0_d, in1_d, in2_d, in3_d, pixels, stride);
  126.     pixels += 4 * stride;
  127.     SD4(in4_d, in5_d, in6_d, in7_d, pixels, stride);
  128. }
  129.  
  130. void ff_put_pixels_clamped_msa(const int16_t *block,
  131.                                uint8_t *av_restrict pixels,
  132.                                ptrdiff_t line_size)
  133. {
  134.     put_pixels_clamped_msa(block, pixels, line_size);
  135. }
  136.  
  137. void ff_put_signed_pixels_clamped_msa(const int16_t *block,
  138.                                       uint8_t *av_restrict pixels,
  139.                                       ptrdiff_t line_size)
  140. {
  141.     put_signed_pixels_clamped_msa(block, pixels, line_size);
  142. }
  143.  
  144. void ff_add_pixels_clamped_msa(const int16_t *block,
  145.                                uint8_t *av_restrict pixels,
  146.                                ptrdiff_t line_size)
  147. {
  148.     add_pixels_clamped_msa(block, pixels, line_size);
  149. }
  150.