Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | /* |
2 | * audio encoder psychoacoustic model |
||
3 | * Copyright (C) 2008 Konstantin Shishkov |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | #ifndef AVCODEC_PSYMODEL_H |
||
23 | #define AVCODEC_PSYMODEL_H |
||
24 | |||
25 | #include "avcodec.h" |
||
26 | |||
27 | /** maximum possible number of bands */ |
||
28 | #define PSY_MAX_BANDS 128 |
||
29 | /** maximum number of channels */ |
||
30 | #define PSY_MAX_CHANS 20 |
||
31 | |||
32 | #define AAC_CUTOFF(s) (s->bit_rate ? FFMIN3(4000 + s->bit_rate/8, 12000 + s->bit_rate/32, s->sample_rate / 2) : (s->sample_rate / 2)) |
||
33 | |||
34 | /** |
||
35 | * single band psychoacoustic information |
||
36 | */ |
||
37 | typedef struct FFPsyBand { |
||
38 | int bits; |
||
39 | float energy; |
||
40 | float threshold; |
||
41 | float distortion; |
||
42 | float perceptual_weight; |
||
43 | } FFPsyBand; |
||
44 | |||
45 | /** |
||
46 | * single channel psychoacoustic information |
||
47 | */ |
||
48 | typedef struct FFPsyChannel { |
||
49 | FFPsyBand psy_bands[PSY_MAX_BANDS]; ///< channel bands information |
||
50 | float entropy; ///< total PE for this channel |
||
51 | } FFPsyChannel; |
||
52 | |||
53 | /** |
||
54 | * psychoacoustic information for an arbitrary group of channels |
||
55 | */ |
||
56 | typedef struct FFPsyChannelGroup { |
||
57 | FFPsyChannel *ch[PSY_MAX_CHANS]; ///< pointers to the individual channels in the group |
||
58 | uint8_t num_ch; ///< number of channels in this group |
||
59 | uint8_t coupling[PSY_MAX_BANDS]; ///< allow coupling for this band in the group |
||
60 | } FFPsyChannelGroup; |
||
61 | |||
62 | /** |
||
63 | * windowing related information |
||
64 | */ |
||
65 | typedef struct FFPsyWindowInfo { |
||
66 | int window_type[3]; ///< window type (short/long/transitional, etc.) - current, previous and next |
||
67 | int window_shape; ///< window shape (sine/KBD/whatever) |
||
68 | int num_windows; ///< number of windows in a frame |
||
69 | int grouping[8]; ///< window grouping (for e.g. AAC) |
||
70 | int *window_sizes; ///< sequence of window sizes inside one frame (for eg. WMA) |
||
71 | } FFPsyWindowInfo; |
||
72 | |||
73 | /** |
||
74 | * context used by psychoacoustic model |
||
75 | */ |
||
76 | typedef struct FFPsyContext { |
||
77 | AVCodecContext *avctx; ///< encoder context |
||
78 | const struct FFPsyModel *model; ///< encoder-specific model functions |
||
79 | |||
80 | FFPsyChannel *ch; ///< single channel information |
||
81 | FFPsyChannelGroup *group; ///< channel group information |
||
82 | int num_groups; ///< number of channel groups |
||
83 | |||
84 | uint8_t **bands; ///< scalefactor band sizes for possible frame sizes |
||
85 | int *num_bands; ///< number of scalefactor bands for possible frame sizes |
||
86 | int num_lens; ///< number of scalefactor band sets |
||
87 | |||
88 | struct { |
||
89 | int size; ///< size of the bitresevoir in bits |
||
90 | int bits; ///< number of bits used in the bitresevoir |
||
91 | } bitres; |
||
92 | |||
93 | void* model_priv_data; ///< psychoacoustic model implementation private data |
||
94 | } FFPsyContext; |
||
95 | |||
96 | /** |
||
97 | * codec-specific psychoacoustic model implementation |
||
98 | */ |
||
99 | typedef struct FFPsyModel { |
||
100 | const char *name; |
||
101 | int (*init) (FFPsyContext *apc); |
||
102 | |||
103 | /** |
||
104 | * Suggest window sequence for channel. |
||
105 | * |
||
106 | * @param ctx model context |
||
107 | * @param audio samples for the current frame |
||
108 | * @param la lookahead samples (NULL when unavailable) |
||
109 | * @param channel number of channel element to analyze |
||
110 | * @param prev_type previous window type |
||
111 | * |
||
112 | * @return suggested window information in a structure |
||
113 | */ |
||
114 | FFPsyWindowInfo (*window)(FFPsyContext *ctx, const float *audio, const float *la, int channel, int prev_type); |
||
115 | |||
116 | /** |
||
117 | * Perform psychoacoustic analysis and set band info (threshold, energy) for a group of channels. |
||
118 | * |
||
119 | * @param ctx model context |
||
120 | * @param channel channel number of the first channel in the group to perform analysis on |
||
121 | * @param coeffs array of pointers to the transformed coefficients |
||
122 | * @param wi window information for the channels in the group |
||
123 | */ |
||
124 | void (*analyze)(FFPsyContext *ctx, int channel, const float **coeffs, const FFPsyWindowInfo *wi); |
||
125 | |||
126 | void (*end) (FFPsyContext *apc); |
||
127 | } FFPsyModel; |
||
128 | |||
129 | /** |
||
130 | * Initialize psychoacoustic model. |
||
131 | * |
||
132 | * @param ctx model context |
||
133 | * @param avctx codec context |
||
134 | * @param num_lens number of possible frame lengths |
||
135 | * @param bands scalefactor band lengths for all frame lengths |
||
136 | * @param num_bands number of scalefactor bands for all frame lengths |
||
137 | * @param num_groups number of channel groups |
||
138 | * @param group_map array with # of channels in group - 1, for each group |
||
139 | * |
||
140 | * @return zero if successful, a negative value if not |
||
141 | */ |
||
142 | int ff_psy_init(FFPsyContext *ctx, AVCodecContext *avctx, int num_lens, |
||
143 | const uint8_t **bands, const int *num_bands, |
||
144 | int num_groups, const uint8_t *group_map); |
||
145 | |||
146 | /** |
||
147 | * Determine what group a channel belongs to. |
||
148 | * |
||
149 | * @param ctx psymodel context |
||
150 | * @param channel channel to locate the group for |
||
151 | * |
||
152 | * @return pointer to the FFPsyChannelGroup this channel belongs to |
||
153 | */ |
||
154 | FFPsyChannelGroup *ff_psy_find_group(FFPsyContext *ctx, int channel); |
||
155 | |||
156 | /** |
||
157 | * Cleanup model context at the end. |
||
158 | * |
||
159 | * @param ctx model context |
||
160 | */ |
||
161 | void ff_psy_end(FFPsyContext *ctx); |
||
162 | |||
163 | |||
164 | /************************************************************************** |
||
165 | * Audio preprocessing stuff. * |
||
166 | * This should be moved into some audio filter eventually. * |
||
167 | **************************************************************************/ |
||
168 | struct FFPsyPreprocessContext; |
||
169 | |||
170 | /** |
||
171 | * psychoacoustic model audio preprocessing initialization |
||
172 | */ |
||
173 | struct FFPsyPreprocessContext *ff_psy_preprocess_init(AVCodecContext *avctx); |
||
174 | |||
175 | /** |
||
176 | * Preprocess several channel in audio frame in order to compress it better. |
||
177 | * |
||
178 | * @param ctx preprocessing context |
||
179 | * @param audio samples to be filtered (in place) |
||
180 | * @param channels number of channel to preprocess |
||
181 | */ |
||
182 | void ff_psy_preprocess(struct FFPsyPreprocessContext *ctx, float **audio, int channels); |
||
183 | |||
184 | /** |
||
185 | * Cleanup audio preprocessing module. |
||
186 | */ |
||
187 | void ff_psy_preprocess_end(struct FFPsyPreprocessContext *ctx); |
||
188 | |||
189 | #endif /* AVCODEC_PSYMODEL_H */>>>>>>>>>>>>>>>>>>>>> |