Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * FFT  transform, optimized with VSX built-in functions
  3.  * Copyright (c) 2014 Rong Yan
  4.  *
  5.  * This algorithm (though not any of the implementation details) is
  6.  * based on libdjbfft by D. J. Bernstein.
  7.  *
  8.  * This file is part of FFmpeg.
  9.  *
  10.  * FFmpeg is free software; you can redistribute it and/or
  11.  * modify it under the terms of the GNU Lesser General Public
  12.  * License as published by the Free Software Foundation; either
  13.  * version 2.1 of the License, or (at your option) any later version.
  14.  *
  15.  * FFmpeg is distributed in the hope that it will be useful,
  16.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18.  * Lesser General Public License for more details.
  19.  *
  20.  * You should have received a copy of the GNU Lesser General Public
  21.  * License along with FFmpeg; if not, write to the Free Software
  22.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23.  */
  24.  
  25.  
  26. #include "config.h"
  27. #include "libavutil/cpu.h"
  28. #include "libavutil/ppc/types_altivec.h"
  29. #include "libavutil/ppc/util_altivec.h"
  30. #include "libavcodec/fft.h"
  31. #include "libavcodec/fft-internal.h"
  32. #include "fft_vsx.h"
  33.  
  34. #if HAVE_VSX
  35.  
  36. static void fft32_vsx_interleave(FFTComplex *z)
  37. {
  38.     fft16_vsx_interleave(z);
  39.     fft8_vsx_interleave(z+16);
  40.     fft8_vsx_interleave(z+24);
  41.     pass_vsx_interleave(z,ff_cos_32,4);
  42. }
  43.  
  44. static void fft64_vsx_interleave(FFTComplex *z)
  45. {
  46.     fft32_vsx_interleave(z);
  47.     fft16_vsx_interleave(z+32);
  48.     fft16_vsx_interleave(z+48);
  49.     pass_vsx_interleave(z,ff_cos_64, 8);
  50. }
  51. static void fft128_vsx_interleave(FFTComplex *z)
  52. {
  53.     fft64_vsx_interleave(z);
  54.     fft32_vsx_interleave(z+64);
  55.     fft32_vsx_interleave(z+96);
  56.     pass_vsx_interleave(z,ff_cos_128,16);
  57. }
  58. static void fft256_vsx_interleave(FFTComplex *z)
  59. {
  60.     fft128_vsx_interleave(z);
  61.     fft64_vsx_interleave(z+128);
  62.     fft64_vsx_interleave(z+192);
  63.     pass_vsx_interleave(z,ff_cos_256,32);
  64. }
  65. static void fft512_vsx_interleave(FFTComplex *z)
  66. {
  67.     fft256_vsx_interleave(z);
  68.     fft128_vsx_interleave(z+256);
  69.     fft128_vsx_interleave(z+384);
  70.     pass_vsx_interleave(z,ff_cos_512,64);
  71. }
  72. static void fft1024_vsx_interleave(FFTComplex *z)
  73. {
  74.     fft512_vsx_interleave(z);
  75.     fft256_vsx_interleave(z+512);
  76.     fft256_vsx_interleave(z+768);
  77.     pass_vsx_interleave(z,ff_cos_1024,128);
  78.  
  79. }
  80. static void fft2048_vsx_interleave(FFTComplex *z)
  81. {
  82.     fft1024_vsx_interleave(z);
  83.     fft512_vsx_interleave(z+1024);
  84.     fft512_vsx_interleave(z+1536);
  85.     pass_vsx_interleave(z,ff_cos_2048,256);
  86. }
  87. static void fft4096_vsx_interleave(FFTComplex *z)
  88. {
  89.     fft2048_vsx_interleave(z);
  90.     fft1024_vsx_interleave(z+2048);
  91.     fft1024_vsx_interleave(z+3072);
  92.     pass_vsx_interleave(z,ff_cos_4096, 512);
  93. }
  94. static void fft8192_vsx_interleave(FFTComplex *z)
  95. {
  96.     fft4096_vsx_interleave(z);
  97.     fft2048_vsx_interleave(z+4096);
  98.     fft2048_vsx_interleave(z+6144);
  99.     pass_vsx_interleave(z,ff_cos_8192,1024);
  100. }
  101. static void fft16384_vsx_interleave(FFTComplex *z)
  102. {
  103.     fft8192_vsx_interleave(z);
  104.     fft4096_vsx_interleave(z+8192);
  105.     fft4096_vsx_interleave(z+12288);
  106.     pass_vsx_interleave(z,ff_cos_16384,2048);
  107. }
  108. static void fft32768_vsx_interleave(FFTComplex *z)
  109. {
  110.     fft16384_vsx_interleave(z);
  111.     fft8192_vsx_interleave(z+16384);
  112.     fft8192_vsx_interleave(z+24576);
  113.     pass_vsx_interleave(z,ff_cos_32768,4096);
  114. }
  115. static void fft65536_vsx_interleave(FFTComplex *z)
  116. {
  117.     fft32768_vsx_interleave(z);
  118.     fft16384_vsx_interleave(z+32768);
  119.     fft16384_vsx_interleave(z+49152);
  120.     pass_vsx_interleave(z,ff_cos_65536,8192);
  121. }
  122.  
  123. static void fft32_vsx(FFTComplex *z)
  124. {
  125.     fft16_vsx(z);
  126.     fft8_vsx(z+16);
  127.     fft8_vsx(z+24);
  128.     pass_vsx(z,ff_cos_32,4);
  129. }
  130.  
  131. static void fft64_vsx(FFTComplex *z)
  132. {
  133.     fft32_vsx(z);
  134.     fft16_vsx(z+32);
  135.     fft16_vsx(z+48);
  136.     pass_vsx(z,ff_cos_64, 8);
  137. }
  138. static void fft128_vsx(FFTComplex *z)
  139. {
  140.     fft64_vsx(z);
  141.     fft32_vsx(z+64);
  142.     fft32_vsx(z+96);
  143.     pass_vsx(z,ff_cos_128,16);
  144. }
  145. static void fft256_vsx(FFTComplex *z)
  146. {
  147.     fft128_vsx(z);
  148.     fft64_vsx(z+128);
  149.     fft64_vsx(z+192);
  150.     pass_vsx(z,ff_cos_256,32);
  151. }
  152. static void fft512_vsx(FFTComplex *z)
  153. {
  154.     fft256_vsx(z);
  155.     fft128_vsx(z+256);
  156.     fft128_vsx(z+384);
  157.     pass_vsx(z,ff_cos_512,64);
  158. }
  159. static void fft1024_vsx(FFTComplex *z)
  160. {
  161.     fft512_vsx(z);
  162.     fft256_vsx(z+512);
  163.     fft256_vsx(z+768);
  164.     pass_vsx(z,ff_cos_1024,128);
  165.  
  166. }
  167. static void fft2048_vsx(FFTComplex *z)
  168. {
  169.     fft1024_vsx(z);
  170.     fft512_vsx(z+1024);
  171.     fft512_vsx(z+1536);
  172.     pass_vsx(z,ff_cos_2048,256);
  173. }
  174. static void fft4096_vsx(FFTComplex *z)
  175. {
  176.     fft2048_vsx(z);
  177.     fft1024_vsx(z+2048);
  178.     fft1024_vsx(z+3072);
  179.     pass_vsx(z,ff_cos_4096, 512);
  180. }
  181. static void fft8192_vsx(FFTComplex *z)
  182. {
  183.     fft4096_vsx(z);
  184.     fft2048_vsx(z+4096);
  185.     fft2048_vsx(z+6144);
  186.     pass_vsx(z,ff_cos_8192,1024);
  187. }
  188. static void fft16384_vsx(FFTComplex *z)
  189. {
  190.     fft8192_vsx(z);
  191.     fft4096_vsx(z+8192);
  192.     fft4096_vsx(z+12288);
  193.     pass_vsx(z,ff_cos_16384,2048);
  194. }
  195. static void fft32768_vsx(FFTComplex *z)
  196. {
  197.     fft16384_vsx(z);
  198.     fft8192_vsx(z+16384);
  199.     fft8192_vsx(z+24576);
  200.     pass_vsx(z,ff_cos_32768,4096);
  201. }
  202. static void fft65536_vsx(FFTComplex *z)
  203. {
  204.     fft32768_vsx(z);
  205.     fft16384_vsx(z+32768);
  206.     fft16384_vsx(z+49152);
  207.     pass_vsx(z,ff_cos_65536,8192);
  208. }
  209.  
  210. static void (* const fft_dispatch_vsx[])(FFTComplex*) = {
  211.     fft4_vsx, fft8_vsx, fft16_vsx, fft32_vsx, fft64_vsx, fft128_vsx, fft256_vsx, fft512_vsx, fft1024_vsx,
  212.     fft2048_vsx, fft4096_vsx, fft8192_vsx, fft16384_vsx, fft32768_vsx, fft65536_vsx,
  213. };
  214. static void (* const fft_dispatch_vsx_interleave[])(FFTComplex*) = {
  215.     fft4_vsx_interleave, fft8_vsx_interleave, fft16_vsx_interleave, fft32_vsx_interleave, fft64_vsx_interleave,
  216.     fft128_vsx_interleave, fft256_vsx_interleave, fft512_vsx_interleave, fft1024_vsx_interleave,
  217.     fft2048_vsx_interleave, fft4096_vsx_interleave, fft8192_vsx_interleave, fft16384_vsx_interleave, fft32768_vsx_interleave, fft65536_vsx_interleave,
  218. };
  219. void ff_fft_calc_interleave_vsx(FFTContext *s, FFTComplex *z)
  220. {
  221.      fft_dispatch_vsx_interleave[s->nbits-2](z);
  222. }
  223. void ff_fft_calc_vsx(FFTContext *s, FFTComplex *z)
  224. {
  225.      fft_dispatch_vsx[s->nbits-2](z);
  226. }
  227. #endif /* HAVE_VSX */
  228.