Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Format Conversion Utils
  3.  * Copyright (c) 2000, 2001 Fabrice Bellard
  4.  * Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
  5.  *
  6.  * MMX optimization by Nick Kurshev <nickols_k@mail.ru>
  7.  *
  8.  * This file is part of FFmpeg.
  9.  *
  10.  * FFmpeg is free software; you can redistribute it and/or
  11.  * modify it under the terms of the GNU Lesser General Public
  12.  * License as published by the Free Software Foundation; either
  13.  * version 2.1 of the License, or (at your option) any later version.
  14.  *
  15.  * FFmpeg is distributed in the hope that it will be useful,
  16.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  17.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18.  * Lesser General Public License for more details.
  19.  *
  20.  * You should have received a copy of the GNU Lesser General Public
  21.  * License along with FFmpeg; if not, write to the Free Software
  22.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  23.  */
  24.  
  25. #include "libavutil/attributes.h"
  26. #include "libavutil/cpu.h"
  27. #include "libavutil/x86/asm.h"
  28. #include "libavutil/x86/cpu.h"
  29. #include "libavcodec/fmtconvert.h"
  30.  
  31. #if HAVE_YASM
  32.  
  33. void ff_int32_to_float_fmul_scalar_sse (float *dst, const int32_t *src, float mul, int len);
  34. void ff_int32_to_float_fmul_scalar_sse2(float *dst, const int32_t *src, float mul, int len);
  35.  
  36. void ff_float_to_int16_3dnow(int16_t *dst, const float *src, long len);
  37. void ff_float_to_int16_sse  (int16_t *dst, const float *src, long len);
  38. void ff_float_to_int16_sse2 (int16_t *dst, const float *src, long len);
  39.  
  40. void ff_float_to_int16_step_3dnow(int16_t *dst, const float *src, long len, long step);
  41. void ff_float_to_int16_step_sse  (int16_t *dst, const float *src, long len, long step);
  42. void ff_float_to_int16_step_sse2 (int16_t *dst, const float *src, long len, long step);
  43.  
  44. void ff_float_to_int16_interleave2_3dnow(int16_t *dst, const float **src, long len);
  45. void ff_float_to_int16_interleave2_sse  (int16_t *dst, const float **src, long len);
  46. void ff_float_to_int16_interleave2_sse2 (int16_t *dst, const float **src, long len);
  47.  
  48. void ff_float_to_int16_interleave6_sse(int16_t *dst, const float **src, int len);
  49. void ff_float_to_int16_interleave6_3dnow(int16_t *dst, const float **src, int len);
  50. void ff_float_to_int16_interleave6_3dnowext(int16_t *dst, const float **src, int len);
  51.  
  52. #define ff_float_to_int16_interleave6_sse2 ff_float_to_int16_interleave6_sse
  53.  
  54. #define FLOAT_TO_INT16_INTERLEAVE(cpu) \
  55. /* gcc pessimizes register allocation if this is in the same function as float_to_int16_interleave_sse2*/\
  56. static av_noinline void float_to_int16_interleave_misc_##cpu(int16_t *dst, const float **src, long len, int channels){\
  57.     int c;\
  58.     for(c=0; c<channels; c++){\
  59.         ff_float_to_int16_step_##cpu(dst+c, src[c], len, channels);\
  60.     }\
  61. }\
  62. \
  63. static void float_to_int16_interleave_##cpu(int16_t *dst, const float **src, long len, int channels){\
  64.     if(channels==1)\
  65.         ff_float_to_int16_##cpu(dst, src[0], len);\
  66.     else if(channels==2){\
  67.         ff_float_to_int16_interleave2_##cpu(dst, src, len);\
  68.     }else if(channels==6){\
  69.         ff_float_to_int16_interleave6_##cpu(dst, src, len);\
  70.     }else\
  71.         float_to_int16_interleave_misc_##cpu(dst, src, len, channels);\
  72. }
  73.  
  74. FLOAT_TO_INT16_INTERLEAVE(3dnow)
  75. FLOAT_TO_INT16_INTERLEAVE(sse)
  76. FLOAT_TO_INT16_INTERLEAVE(sse2)
  77.  
  78. static void float_to_int16_interleave_3dnowext(int16_t *dst, const float **src,
  79.                                                long len, int channels)
  80. {
  81.     if(channels==6)
  82.         ff_float_to_int16_interleave6_3dnowext(dst, src, len);
  83.     else
  84.         float_to_int16_interleave_3dnow(dst, src, len, channels);
  85. }
  86.  
  87. void ff_float_interleave2_mmx(float *dst, const float **src, unsigned int len);
  88. void ff_float_interleave2_sse(float *dst, const float **src, unsigned int len);
  89.  
  90. void ff_float_interleave6_mmx(float *dst, const float **src, unsigned int len);
  91. void ff_float_interleave6_sse(float *dst, const float **src, unsigned int len);
  92.  
  93. static void float_interleave_mmx(float *dst, const float **src,
  94.                                  unsigned int len, int channels)
  95. {
  96.     if (channels == 2) {
  97.         ff_float_interleave2_mmx(dst, src, len);
  98.     } else if (channels == 6)
  99.         ff_float_interleave6_mmx(dst, src, len);
  100.     else
  101.         ff_float_interleave_c(dst, src, len, channels);
  102. }
  103.  
  104. static void float_interleave_sse(float *dst, const float **src,
  105.                                  unsigned int len, int channels)
  106. {
  107.     if (channels == 2) {
  108.         ff_float_interleave2_sse(dst, src, len);
  109.     } else if (channels == 6)
  110.         ff_float_interleave6_sse(dst, src, len);
  111.     else
  112.         ff_float_interleave_c(dst, src, len, channels);
  113. }
  114. #endif /* HAVE_YASM */
  115.  
  116. av_cold void ff_fmt_convert_init_x86(FmtConvertContext *c, AVCodecContext *avctx)
  117. {
  118. #if HAVE_YASM
  119.     int cpu_flags = av_get_cpu_flags();
  120.  
  121.     if (EXTERNAL_MMX(cpu_flags)) {
  122.         c->float_interleave = float_interleave_mmx;
  123.     }
  124.     if (EXTERNAL_AMD3DNOW(cpu_flags)) {
  125.         if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  126.             c->float_to_int16            = ff_float_to_int16_3dnow;
  127.             c->float_to_int16_interleave = float_to_int16_interleave_3dnow;
  128.         }
  129.     }
  130.     if (EXTERNAL_AMD3DNOWEXT(cpu_flags)) {
  131.         if (!(avctx->flags & CODEC_FLAG_BITEXACT)) {
  132.             c->float_to_int16_interleave = float_to_int16_interleave_3dnowext;
  133.         }
  134.     }
  135.     if (EXTERNAL_SSE(cpu_flags)) {
  136.         c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse;
  137.         c->float_to_int16             = ff_float_to_int16_sse;
  138.         c->float_to_int16_interleave  = float_to_int16_interleave_sse;
  139.         c->float_interleave           = float_interleave_sse;
  140.     }
  141.     if (EXTERNAL_SSE2(cpu_flags)) {
  142.         c->int32_to_float_fmul_scalar = ff_int32_to_float_fmul_scalar_sse2;
  143.         c->float_to_int16             = ff_float_to_int16_sse2;
  144.         c->float_to_int16_interleave  = float_to_int16_interleave_sse2;
  145.     }
  146. #endif /* HAVE_YASM */
  147. }
  148.