Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6148 serge 1
/*
2
 * Copyright (C) 2012 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * This file is part of libswresample
5
 *
6
 * libswresample is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * libswresample is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with libswresample; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
 
21
#include "libswresample/swresample_internal.h"
22
#include "libswresample/audioconvert.h"
23
 
24
#define PROTO(pre, in, out, cap) void ff ## pre ## _ ##in## _to_ ##out## _a_ ##cap(uint8_t **dst, const uint8_t **src, int len);
25
#define PROTO2(pre, out, cap) PROTO(pre, int16, out, cap) PROTO(pre, int32, out, cap) PROTO(pre, float, out, cap)
26
#define PROTO3(pre, cap) PROTO2(pre, int16, cap) PROTO2(pre, int32, cap) PROTO2(pre, float, cap)
27
#define PROTO4(pre) PROTO3(pre, mmx) PROTO3(pre, sse) PROTO3(pre, sse2) PROTO3(pre, ssse3) PROTO3(pre, sse4) PROTO3(pre, avx)
28
PROTO4()
29
PROTO4(_pack_2ch)
30
PROTO4(_pack_6ch)
31
PROTO4(_unpack_2ch)
32
 
33
av_cold void swri_audio_convert_init_x86(struct AudioConvert *ac,
34
                                 enum AVSampleFormat out_fmt,
35
                                 enum AVSampleFormat in_fmt,
36
                                 int channels){
37
    int mm_flags = av_get_cpu_flags();
38
 
39
    ac->simd_f= NULL;
40
 
41
//FIXME add memcpy case
42
 
43
#define MULTI_CAPS_FUNC(flag, cap) \
44
    if (mm_flags & flag) {\
45
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S16P)\
46
            ac->simd_f =  ff_int16_to_int32_a_ ## cap;\
47
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_S32P)\
48
            ac->simd_f =  ff_int32_to_int16_a_ ## cap;\
49
    }
50
 
51
MULTI_CAPS_FUNC(AV_CPU_FLAG_MMX, mmx)
52
MULTI_CAPS_FUNC(AV_CPU_FLAG_SSE2, sse2)
53
 
54
    if(mm_flags & AV_CPU_FLAG_MMX) {
55
        if(channels == 6) {
56
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
57
                ac->simd_f =  ff_pack_6ch_float_to_float_a_mmx;
58
        }
59
    }
60
 
61
    if(mm_flags & AV_CPU_FLAG_SSE2) {
62
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
63
            ac->simd_f =  ff_int32_to_float_a_sse2;
64
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S16P)
65
            ac->simd_f =  ff_int16_to_float_a_sse2;
66
        if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_FLTP)
67
            ac->simd_f =  ff_float_to_int32_a_sse2;
68
        if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S16P && in_fmt == AV_SAMPLE_FMT_FLTP)
69
            ac->simd_f =  ff_float_to_int16_a_sse2;
70
 
71
        if(channels == 2) {
72
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
73
                ac->simd_f =  ff_pack_2ch_int32_to_int32_a_sse2;
74
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S16P)
75
                ac->simd_f =  ff_pack_2ch_int16_to_int16_a_sse2;
76
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_S16P)
77
                ac->simd_f =  ff_pack_2ch_int16_to_int32_a_sse2;
78
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_S32P)
79
                ac->simd_f =  ff_pack_2ch_int32_to_int16_a_sse2;
80
 
81
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_FLT || out_fmt == AV_SAMPLE_FMT_S32P && in_fmt == AV_SAMPLE_FMT_S32)
82
                ac->simd_f =  ff_unpack_2ch_int32_to_int32_a_sse2;
83
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
84
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_sse2;
85
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
86
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_sse2;
87
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S32)
88
                ac->simd_f =  ff_unpack_2ch_int32_to_int16_a_sse2;
89
 
90
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
91
                ac->simd_f =  ff_pack_2ch_int32_to_float_a_sse2;
92
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
93
                ac->simd_f =  ff_pack_2ch_float_to_int32_a_sse2;
94
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S16P)
95
                ac->simd_f =  ff_pack_2ch_int16_to_float_a_sse2;
96
            if(   out_fmt == AV_SAMPLE_FMT_S16  && in_fmt == AV_SAMPLE_FMT_FLTP)
97
                ac->simd_f =  ff_pack_2ch_float_to_int16_a_sse2;
98
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S32)
99
                ac->simd_f =  ff_unpack_2ch_int32_to_float_a_sse2;
100
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_FLT)
101
                ac->simd_f =  ff_unpack_2ch_float_to_int32_a_sse2;
102
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
103
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_sse2;
104
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_FLT)
105
                ac->simd_f =  ff_unpack_2ch_float_to_int16_a_sse2;
106
        }
107
    }
108
    if(mm_flags & AV_CPU_FLAG_SSSE3) {
109
        if(channels == 2) {
110
            if(   out_fmt == AV_SAMPLE_FMT_S16P  && in_fmt == AV_SAMPLE_FMT_S16)
111
                ac->simd_f =  ff_unpack_2ch_int16_to_int16_a_ssse3;
112
            if(   out_fmt == AV_SAMPLE_FMT_S32P  && in_fmt == AV_SAMPLE_FMT_S16)
113
                ac->simd_f =  ff_unpack_2ch_int16_to_int32_a_ssse3;
114
            if(   out_fmt == AV_SAMPLE_FMT_FLTP  && in_fmt == AV_SAMPLE_FMT_S16)
115
                ac->simd_f =  ff_unpack_2ch_int16_to_float_a_ssse3;
116
        }
117
    }
118
    if(mm_flags & AV_CPU_FLAG_SSE4) {
119
        if(channels == 6) {
120
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
121
                ac->simd_f =  ff_pack_6ch_float_to_float_a_sse4;
122
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
123
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_sse4;
124
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
125
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_sse4;
126
        }
127
    }
128
    if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
129
        if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P)
130
            ac->simd_f =  ff_int32_to_float_a_avx;
131
        if(channels == 6) {
132
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P)
133
                ac->simd_f =  ff_pack_6ch_float_to_float_a_avx;
134
            if(   out_fmt == AV_SAMPLE_FMT_FLT  && in_fmt == AV_SAMPLE_FMT_S32P)
135
                ac->simd_f =  ff_pack_6ch_int32_to_float_a_avx;
136
            if(   out_fmt == AV_SAMPLE_FMT_S32  && in_fmt == AV_SAMPLE_FMT_FLTP)
137
                ac->simd_f =  ff_pack_6ch_float_to_int32_a_avx;
138
        }
139
    }
140
}
141
 
142
#define D(type, simd) \
143
mix_1_1_func_type ff_mix_1_1_a_## type ## _ ## simd;\
144
mix_2_1_func_type ff_mix_2_1_a_## type ## _ ## simd;
145
 
146
D(float, sse)
147
D(float, avx)
148
D(int16, mmx)
149
D(int16, sse2)
150
 
151
 
152
av_cold void swri_rematrix_init_x86(struct SwrContext *s){
153
    int mm_flags = av_get_cpu_flags();
154
    int nb_in  = av_get_channel_layout_nb_channels(s->in_ch_layout);
155
    int nb_out = av_get_channel_layout_nb_channels(s->out_ch_layout);
156
    int num    = nb_in * nb_out;
157
    int i,j;
158
 
159
    s->mix_1_1_simd = NULL;
160
    s->mix_2_1_simd = NULL;
161
 
162
    if (s->midbuf.fmt == AV_SAMPLE_FMT_S16P){
163
        if(mm_flags & AV_CPU_FLAG_MMX) {
164
            s->mix_1_1_simd = ff_mix_1_1_a_int16_mmx;
165
            s->mix_2_1_simd = ff_mix_2_1_a_int16_mmx;
166
        }
167
        if(mm_flags & AV_CPU_FLAG_SSE2) {
168
            s->mix_1_1_simd = ff_mix_1_1_a_int16_sse2;
169
            s->mix_2_1_simd = ff_mix_2_1_a_int16_sse2;
170
        }
171
        s->native_simd_matrix = av_mallocz(2 * num * sizeof(int16_t));
172
        s->native_simd_one    = av_mallocz(2 * sizeof(int16_t));
173
        for(i=0; i
174
            int sh = 0;
175
            for(j=0; j
176
                sh = FFMAX(sh, FFABS(((int*)s->native_matrix)[i * nb_in + j]));
177
            sh = FFMAX(av_log2(sh) - 14, 0);
178
            for(j=0; j
179
                ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)+1] = 15 - sh;
180
                ((int16_t*)s->native_simd_matrix)[2*(i * nb_in + j)] =
181
                    ((((int*)s->native_matrix)[i * nb_in + j]) + (1<>1)) >> sh;
182
            }
183
        }
184
        ((int16_t*)s->native_simd_one)[1] = 14;
185
        ((int16_t*)s->native_simd_one)[0] = 16384;
186
    } else if(s->midbuf.fmt == AV_SAMPLE_FMT_FLTP){
187
        if(mm_flags & AV_CPU_FLAG_SSE) {
188
            s->mix_1_1_simd = ff_mix_1_1_a_float_sse;
189
            s->mix_2_1_simd = ff_mix_2_1_a_float_sse;
190
        }
191
        if(HAVE_AVX_EXTERNAL && mm_flags & AV_CPU_FLAG_AVX) {
192
            s->mix_1_1_simd = ff_mix_1_1_a_float_avx;
193
            s->mix_2_1_simd = ff_mix_2_1_a_float_avx;
194
        }
195
        s->native_simd_matrix = av_mallocz(num * sizeof(float));
196
        memcpy(s->native_simd_matrix, s->native_matrix, num * sizeof(float));
197
        s->native_simd_one = av_mallocz(sizeof(float));
198
        memcpy(s->native_simd_one, s->native_one, sizeof(float));
199
    }
200
}