Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org>
  3.  *
  4.  * This file is part of FFmpeg.
  5.  *
  6.  * FFmpeg is free software; you can redistribute it and/or
  7.  * modify it under the terms of the GNU Lesser General Public
  8.  * License as published by the Free Software Foundation; either
  9.  * version 2.1 of the License, or (at your option) any later version.
  10.  *
  11.  * FFmpeg is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14.  * Lesser General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU Lesser General Public
  17.  * License along with FFmpeg; if not, write to the Free Software
  18.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19.  */
  20.  
  21. #include "config.h"
  22. #include "libavutil/attributes.h"
  23. #include "libavutil/cpu.h"
  24. #include "libavutil/intreadwrite.h"
  25. #include "libavutil/ppc/types_altivec.h"
  26. #include "libavutil/ppc/util_altivec.h"
  27. #include "libavcodec/h264qpel.h"
  28. #include "dsputil_altivec.h"
  29.  
  30. #if HAVE_ALTIVEC
  31.  
  32. #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
  33. #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
  34.  
  35. #define OP_U8_ALTIVEC                          PUT_OP_U8_ALTIVEC
  36. #define PREFIX_h264_qpel16_h_lowpass_altivec   put_h264_qpel16_h_lowpass_altivec
  37. #define PREFIX_h264_qpel16_h_lowpass_num       altivec_put_h264_qpel16_h_lowpass_num
  38. #define PREFIX_h264_qpel16_v_lowpass_altivec   put_h264_qpel16_v_lowpass_altivec
  39. #define PREFIX_h264_qpel16_v_lowpass_num       altivec_put_h264_qpel16_v_lowpass_num
  40. #define PREFIX_h264_qpel16_hv_lowpass_altivec  put_h264_qpel16_hv_lowpass_altivec
  41. #define PREFIX_h264_qpel16_hv_lowpass_num      altivec_put_h264_qpel16_hv_lowpass_num
  42. #include "h264qpel_template.c"
  43. #undef OP_U8_ALTIVEC
  44. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  45. #undef PREFIX_h264_qpel16_h_lowpass_num
  46. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  47. #undef PREFIX_h264_qpel16_v_lowpass_num
  48. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  49. #undef PREFIX_h264_qpel16_hv_lowpass_num
  50.  
  51. #define OP_U8_ALTIVEC                          AVG_OP_U8_ALTIVEC
  52. #define PREFIX_h264_qpel16_h_lowpass_altivec   avg_h264_qpel16_h_lowpass_altivec
  53. #define PREFIX_h264_qpel16_h_lowpass_num       altivec_avg_h264_qpel16_h_lowpass_num
  54. #define PREFIX_h264_qpel16_v_lowpass_altivec   avg_h264_qpel16_v_lowpass_altivec
  55. #define PREFIX_h264_qpel16_v_lowpass_num       altivec_avg_h264_qpel16_v_lowpass_num
  56. #define PREFIX_h264_qpel16_hv_lowpass_altivec  avg_h264_qpel16_hv_lowpass_altivec
  57. #define PREFIX_h264_qpel16_hv_lowpass_num      altivec_avg_h264_qpel16_hv_lowpass_num
  58. #include "h264qpel_template.c"
  59. #undef OP_U8_ALTIVEC
  60. #undef PREFIX_h264_qpel16_h_lowpass_altivec
  61. #undef PREFIX_h264_qpel16_h_lowpass_num
  62. #undef PREFIX_h264_qpel16_v_lowpass_altivec
  63. #undef PREFIX_h264_qpel16_v_lowpass_num
  64. #undef PREFIX_h264_qpel16_hv_lowpass_altivec
  65. #undef PREFIX_h264_qpel16_hv_lowpass_num
  66.  
  67. #define H264_MC(OPNAME, SIZE, CODETYPE) \
  68. static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  69. {\
  70.     ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
  71. }\
  72. \
  73. static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  74. { \
  75.     DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  76.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  77.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  78. }\
  79. \
  80. static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  81. {\
  82.     OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  83. }\
  84. \
  85. static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  86. {\
  87.     DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  88.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  89.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
  90. }\
  91. \
  92. static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  93. {\
  94.     DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  95.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  96.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
  97. }\
  98. \
  99. static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  100. {\
  101.     OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
  102. }\
  103. \
  104. static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  105. {\
  106.     DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\
  107.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
  108.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
  109. }\
  110. \
  111. static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  112. {\
  113.     DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  114.     DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  115.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  116.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  117.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  118. }\
  119. \
  120. static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  121. {\
  122.     DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  123.     DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  124.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  125.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  126.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  127. }\
  128. \
  129. static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  130. {\
  131.     DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  132.     DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  133.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  134.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  135.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  136. }\
  137. \
  138. static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  139. {\
  140.     DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  141.     DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  142.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  143.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  144.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
  145. }\
  146. \
  147. static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  148. {\
  149.     DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  150.     OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
  151. }\
  152. \
  153. static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  154. {\
  155.     DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  156.     DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  157.     DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  158.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
  159.     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  160.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  161. }\
  162. \
  163. static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  164. {\
  165.     DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\
  166.     DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  167.     DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  168.     put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
  169.     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  170.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
  171. }\
  172. \
  173. static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  174. {\
  175.     DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  176.     DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  177.     DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  178.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
  179.     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  180.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  181. }\
  182. \
  183. static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
  184. {\
  185.     DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\
  186.     DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\
  187.     DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\
  188.     put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
  189.     put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
  190.     OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
  191. }\
  192.  
  193. static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  194.                                     const uint8_t * src2, int dst_stride,
  195.                                     int src_stride1, int h)
  196. {
  197.     int i;
  198.     vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  199.  
  200.     mask_ = vec_lvsl(0, src2);
  201.  
  202.     for (i = 0; i < h; i++) {
  203.  
  204.         tmp1 = vec_ld(i * src_stride1, src1);
  205.         mask = vec_lvsl(i * src_stride1, src1);
  206.         tmp2 = vec_ld(i * src_stride1 + 15, src1);
  207.  
  208.         a = vec_perm(tmp1, tmp2, mask);
  209.  
  210.         tmp1 = vec_ld(i * 16, src2);
  211.         tmp2 = vec_ld(i * 16 + 15, src2);
  212.  
  213.         b = vec_perm(tmp1, tmp2, mask_);
  214.  
  215.         tmp1 = vec_ld(0, dst);
  216.         mask = vec_lvsl(0, dst);
  217.         tmp2 = vec_ld(15, dst);
  218.  
  219.         d = vec_avg(a, b);
  220.  
  221.         edges = vec_perm(tmp2, tmp1, mask);
  222.  
  223.         align = vec_lvsr(0, dst);
  224.  
  225.         tmp2 = vec_perm(d, edges, align);
  226.         tmp1 = vec_perm(edges, d, align);
  227.  
  228.         vec_st(tmp2, 15, dst);
  229.         vec_st(tmp1, 0 , dst);
  230.  
  231.         dst += dst_stride;
  232.     }
  233. }
  234.  
  235. static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
  236.                                     const uint8_t * src2, int dst_stride,
  237.                                     int src_stride1, int h)
  238. {
  239.     int i;
  240.     vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align;
  241.  
  242.     mask_ = vec_lvsl(0, src2);
  243.  
  244.     for (i = 0; i < h; i++) {
  245.  
  246.         tmp1 = vec_ld(i * src_stride1, src1);
  247.         mask = vec_lvsl(i * src_stride1, src1);
  248.         tmp2 = vec_ld(i * src_stride1 + 15, src1);
  249.  
  250.         a = vec_perm(tmp1, tmp2, mask);
  251.  
  252.         tmp1 = vec_ld(i * 16, src2);
  253.         tmp2 = vec_ld(i * 16 + 15, src2);
  254.  
  255.         b = vec_perm(tmp1, tmp2, mask_);
  256.  
  257.         tmp1 = vec_ld(0, dst);
  258.         mask = vec_lvsl(0, dst);
  259.         tmp2 = vec_ld(15, dst);
  260.  
  261.         d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
  262.  
  263.         edges = vec_perm(tmp2, tmp1, mask);
  264.  
  265.         align = vec_lvsr(0, dst);
  266.  
  267.         tmp2 = vec_perm(d, edges, align);
  268.         tmp1 = vec_perm(edges, d, align);
  269.  
  270.         vec_st(tmp2, 15, dst);
  271.         vec_st(tmp1, 0 , dst);
  272.  
  273.         dst += dst_stride;
  274.     }
  275. }
  276.  
  277. /* Implemented but could be faster
  278. #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  279. #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
  280.  */
  281.  
  282. H264_MC(put_, 16, altivec)
  283. H264_MC(avg_, 16, altivec)
  284. #endif /* HAVE_ALTIVEC */
  285.  
  286. av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth)
  287. {
  288. #if HAVE_ALTIVEC
  289.     const int high_bit_depth = bit_depth > 8;
  290.  
  291.     if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
  292.         return;
  293.  
  294.     if (!high_bit_depth) {
  295. #define dspfunc(PFX, IDX, NUM) \
  296.         c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
  297.         c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
  298.         c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
  299.         c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
  300.         c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
  301.         c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
  302.         c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
  303.         c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
  304.         c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
  305.         c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
  306.         c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
  307.         c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
  308.         c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
  309.         c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
  310.         c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
  311.         c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
  312.  
  313.         dspfunc(put_h264_qpel, 0, 16);
  314.         dspfunc(avg_h264_qpel, 0, 16);
  315. #undef dspfunc
  316.     }
  317. #endif /* HAVE_ALTIVEC */
  318. }
  319.