Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at)
  3.  *
  4.  * This file is part of libswresample
  5.  *
  6.  * libswresample is free software; you can redistribute it and/or
  7.  * modify it under the terms of the GNU Lesser General Public
  8.  * License as published by the Free Software Foundation; either
  9.  * version 2.1 of the License, or (at your option) any later version.
  10.  *
  11.  * libswresample is distributed in the hope that it will be useful,
  12.  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  13.  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14.  * Lesser General Public License for more details.
  15.  *
  16.  * You should have received a copy of the GNU Lesser General Public
  17.  * License along with libswresample; if not, write to the Free Software
  18.  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
  19.  */
  20.  
  21. #include "libswresample/swresample_internal.h"
  22. #include "libswresample/audioconvert.h"
  23.  
  24. #define PROTO(pre, in, out, cap) void ff ## pre ## _ ##in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
  25. #define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
  26. #define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
  27. #define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx)
  28. PROTO4()
  29. PROTO4(_pack_2ch)
  30. PROTO4(_pack_6ch)
  31. PROTO4(_unpack_2ch)
  32.  
  33. av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
  34.                                  enum AVSampleFormat out_fmt,
  35.                                  enum AVSampleFormat in_fmt,
  36.                                  int channels){
  37.     int mm_flags = av_get_cpu_flags();
  38.  
  39.     ac->simd_f= NULL;
  40.  
  41. //FIXME add memcpy case
  42.  
  43. #define MULTI_CAPS_FUNC(flag, cap) \
  44.     if (mm_flags & flag) {\
  45.         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
  46.             ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
  47.         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
  48.             ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
  49.     }
  50.  
  51. MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
  52. MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
  53.  
  54.     if(mm_flags & AV_CPU_FLAG_MMX) {
  55.         if(channels == 6) {
  56.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
  57.                 ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
  58.         }
  59.     }
  60.  
  61.     if(mm_flags & AV_CPU_FLAG_SSE2) {
  62.         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
  63.             ac->simd_f =  ff_int32_to_float_a_sse2;
  64.         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
  65.             ac->simd_f =  ff_int16_to_float_a_sse2;
  66.         if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
  67.             ac->simd_f =  ff_float_to_int32_a_sse2;
  68.         if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
  69.             ac->simd_f =  ff_float_to_int16_a_sse2;
  70.  
  71.         if(channels == 2) {
  72.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
  73.                 ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
  74.             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
  75.                 ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
  76.             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
  77.                 ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
  78.             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
  79.                 ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
  80.  
  81.             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
  82.                 ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
  83.             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
  84.                 ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
  85.             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
  86.                 ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
  87.             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
  88.                 ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;
  89.  
  90.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
  91.                 ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
  92.             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
  93.                 ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
  94.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
  95.                 ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
  96.             if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
  97.                 ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
  98.             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
  99.                 ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
  100.             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
  101.                 ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
  102.             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
  103.                 ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
  104.             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
  105.                 ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
  106.         }
  107.     }
  108.     if(mm_flags & AV_CPU_FLAG_SSSE3) {
  109.         if(channels == 2) {
  110.             if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
  111.                 ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
  112.             if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
  113.                 ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
  114.             if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
  115.                 ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
  116.         }
  117.     }
  118.     if(mm_flags & AV_CPU_FLAG_SSE4) {
  119.         if(channels == 6) {
  120.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
  121.                 ac->simd_f =  ff_pack_6ch_float_to_float_a_sse4;
  122.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
  123.                 ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse4;
  124.             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
  125.                 ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse4;
  126.         }
  127.     }
  128.     if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
  129.         if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
  130.             ac->simd_f =  ff_int32_to_float_a_avx;
  131.         if(channels == 6) {
  132.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
  133.                 ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
  134.             if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
  135.                 ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
  136.             if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
  137.                 ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
  138.         }
  139.     }
  140. }
  141.  
  142. #define D(type, simd) \
  143. mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
  144. mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
  145.  
  146. D(float, sse)
  147. D(float, avx)
  148. D(int16, mmx)
  149. D(int16, sse2)
  150.  
  151.  
  152. av_cold void swri_rematrix_init_x86(struct SwrContext *s){
  153.     int mm_flags = av_get_cpu_flags();
  154.     int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
  155.     int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
  156.     int num    = nb_in * nb_out;
  157.     int i,j;
  158.  
  159.     s->mix_1_1_simd = NULL;
  160.     s->mix_2_1_simd = NULL;
  161.  
  162.     if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
  163.         if(mm_flags & AV_CPU_FLAG_MMX) {
  164.             s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
  165.             s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
  166.         }
  167.         if(mm_flags & AV_CPU_FLAG_SSE2) {
  168.             s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
  169.             s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
  170.         }
  171.         s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
  172.         s->native_simd_one    = av_mallocz(2 * sizeof(int16_t));
  173.         for(i=0; i<nb_out; i++){
  174.             int sh = 0;
  175.             for(j=0; j<nb_in; j++)
  176.                 sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
  177.             sh = FFMAX(av_log2(sh) - 14, 0);
  178.             for(j=0; j<nb_in; j++) {
  179.                 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
  180.                 ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
  181.                     ((((int*)s->native_matrix)[i * nb_in + j]) + (1<<sh>>1)) >> sh;
  182.             }
  183.         }
  184.         ((int16_t*)s->native_simd_one)[1] = 14;
  185.         ((int16_t*)s->native_simd_one)[0] = 16384;
  186.     } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
  187.         if(mm_flags & AV_CPU_FLAG_SSE) {
  188.             s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
  189.             s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
  190.         }
  191.         if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
  192.             s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
  193.             s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
  194.         }
  195.         s->native_simd_matrix = av_mallocz(num * sizeof(float));
  196.         memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
  197.         s->native_simd_one = av_mallocz(sizeof(float));
  198.         memcpy(s->native_simd_one, s->native_one, sizeof(float));
  199.     }
  200. }
  201.