Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (c) 2012
3
 *      MIPS Technologies, Inc., California.
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions
7
 * are met:
8
 * 1. Redistributions of source code must retain the above copyright
9
 *    notice, this list of conditions and the following disclaimer.
10
 * 2. Redistributions in binary form must reproduce the above copyright
11
 *    notice, this list of conditions and the following disclaimer in the
12
 *    documentation and/or other materials provided with the distribution.
13
 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14
 *    contributors may be used to endorse or promote products derived from
15
 *    this software without specific prior written permission.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE MIPS TECHNOLOGIES, INC. ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE MIPS TECHNOLOGIES, INC. BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * Authors:  Darko Laus      (darko@mips.com)
30
 *           Djordje Pesut   (djordje@mips.com)
31
 *           Mirjana Vulin   (mvulin@mips.com)
32
 *
33
 * This file is part of FFmpeg.
34
 *
35
 * FFmpeg is free software; you can redistribute it and/or
36
 * modify it under the terms of the GNU Lesser General Public
37
 * License as published by the Free Software Foundation; either
38
 * version 2.1 of the License, or (at your option) any later version.
39
 *
40
 * FFmpeg is distributed in the hope that it will be useful,
41
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
42
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
43
 * Lesser General Public License for more details.
44
 *
45
 * You should have received a copy of the GNU Lesser General Public
46
 * License along with FFmpeg; if not, write to the Free Software
47
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
48
 */
49
 
50
/**
51
 * @file
52
 * Reference: libavcodec/aacdec.c
53
 */
54
 
55
#include "libavcodec/aac.h"
56
#include "aacdec_mips.h"
57
#include "libavcodec/aactab.h"
58
#include "libavcodec/sinewin.h"
59
 
60
#if HAVE_INLINE_ASM
61
static av_always_inline int lcg_random(unsigned previous_val)
62
{
63
    union { unsigned u; int s; } v = { previous_val * 1664525u + 1013904223 };
64
    return v.s;
65
}
66
 
67
static void imdct_and_windowing_mips(AACContext *ac, SingleChannelElement *sce)
68
{
69
    IndividualChannelStream *ics = &sce->ics;
70
    float *in    = sce->coeffs;
71
    float *out   = sce->ret;
72
    float *saved = sce->saved;
73
    const float *swindow      = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
74
    const float *lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
75
    const float *swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
76
    float *buf  = ac->buf_mdct;
77
    int i;
78
 
79
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
80
        for (i = 0; i < 1024; i += 128)
81
            ac->mdct_small.imdct_half(&ac->mdct_small, buf + i, in + i);
82
    } else
83
        ac->mdct.imdct_half(&ac->mdct, buf, in);
84
 
85
    /* window overlapping
86
     * NOTE: To simplify the overlapping code, all 'meaningless' short to long
87
     * and long to short transitions are considered to be short to short
88
     * transitions. This leaves just two cases (long to long and short to short)
89
     * with a little special sauce for EIGHT_SHORT_SEQUENCE.
90
     */
91
    if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
92
            (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
93
        ac->fdsp.vector_fmul_window(    out,               saved,            buf,         lwindow_prev, 512);
94
    } else {
95
        {
96
            float *buf1 = saved;
97
            float *buf2 = out;
98
            int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
99
            int loop_end;
100
 
101
            /* loop unrolled 8 times */
102
            __asm__ volatile (
103
                ".set push                               \n\t"
104
                ".set noreorder                          \n\t"
105
                "addiu   %[loop_end], %[src],      1792  \n\t"
106
            "1:                                          \n\t"
107
                "lw      %[temp0],    0(%[src])          \n\t"
108
                "lw      %[temp1],    4(%[src])          \n\t"
109
                "lw      %[temp2],    8(%[src])          \n\t"
110
                "lw      %[temp3],    12(%[src])         \n\t"
111
                "lw      %[temp4],    16(%[src])         \n\t"
112
                "lw      %[temp5],    20(%[src])         \n\t"
113
                "lw      %[temp6],    24(%[src])         \n\t"
114
                "lw      %[temp7],    28(%[src])         \n\t"
115
                "addiu   %[src],      %[src],      32    \n\t"
116
                "sw      %[temp0],    0(%[dst])          \n\t"
117
                "sw      %[temp1],    4(%[dst])          \n\t"
118
                "sw      %[temp2],    8(%[dst])          \n\t"
119
                "sw      %[temp3],    12(%[dst])         \n\t"
120
                "sw      %[temp4],    16(%[dst])         \n\t"
121
                "sw      %[temp5],    20(%[dst])         \n\t"
122
                "sw      %[temp6],    24(%[dst])         \n\t"
123
                "sw      %[temp7],    28(%[dst])         \n\t"
124
                "bne     %[src],      %[loop_end], 1b    \n\t"
125
                " addiu  %[dst],      %[dst],      32    \n\t"
126
                ".set pop                                \n\t"
127
 
128
                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
129
                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
130
                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
131
                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
132
                  [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
133
                  [dst]"+r"(buf2)
134
                :
135
                : "memory"
136
            );
137
        }
138
 
139
        if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
140
            {
141
                float wi;
142
                float wj;
143
                int i;
144
                float temp0, temp1, temp2, temp3;
145
                float *dst0 = out + 448 + 0*128;
146
                float *dst1 = dst0 + 64 + 63;
147
                float *dst2 = saved + 63;
148
                float *win0 = (float*)swindow;
149
                float *win1 = win0 + 64 + 63;
150
                float *win0_prev = (float*)swindow_prev;
151
                float *win1_prev = win0_prev + 64 + 63;
152
                float *src0_prev = saved + 448;
153
                float *src1_prev = buf + 0*128 + 63;
154
                float *src0 = buf + 0*128 + 64;
155
                float *src1 = buf + 1*128 + 63;
156
 
157
                for(i = 0; i < 64; i++)
158
                {
159
                    temp0 = src0_prev[0];
160
                    temp1 = src1_prev[0];
161
                    wi = *win0_prev;
162
                    wj = *win1_prev;
163
                    temp2 = src0[0];
164
                    temp3 = src1[0];
165
                    dst0[0] = temp0 * wj - temp1 * wi;
166
                    dst1[0] = temp0 * wi + temp1 * wj;
167
 
168
                    wi = *win0;
169
                    wj = *win1;
170
 
171
                    temp0 = src0[128];
172
                    temp1 = src1[128];
173
                    dst0[128] = temp2 * wj - temp3 * wi;
174
                    dst1[128] = temp2 * wi + temp3 * wj;
175
 
176
                    temp2 = src0[256];
177
                    temp3 = src1[256];
178
                    dst0[256] = temp0 * wj - temp1 * wi;
179
                    dst1[256] = temp0 * wi + temp1 * wj;
180
                    dst0[384] = temp2 * wj - temp3 * wi;
181
                    dst1[384] = temp2 * wi + temp3 * wj;
182
 
183
                    temp0 = src0[384];
184
                    temp1 = src1[384];
185
                    dst0[512] = temp0 * wj - temp1 * wi;
186
                    dst2[0] = temp0 * wi + temp1 * wj;
187
 
188
                    src0++;
189
                    src1--;
190
                    src0_prev++;
191
                    src1_prev--;
192
                    win0++;
193
                    win1--;
194
                    win0_prev++;
195
                    win1_prev--;
196
                    dst0++;
197
                    dst1--;
198
                    dst2--;
199
                }
200
            }
201
        } else {
202
            ac->fdsp.vector_fmul_window(out + 448,         saved + 448,      buf,         swindow_prev, 64);
203
            {
204
                float *buf1 = buf + 64;
205
                float *buf2 = out + 576;
206
                int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
207
                int loop_end;
208
 
209
                /* loop unrolled 8 times */
210
                __asm__ volatile (
211
                    ".set push                               \n\t"
212
                    ".set noreorder                          \n\t"
213
                    "addiu   %[loop_end], %[src],      1792  \n\t"
214
                "1:                                          \n\t"
215
                    "lw      %[temp0],    0(%[src])          \n\t"
216
                    "lw      %[temp1],    4(%[src])          \n\t"
217
                    "lw      %[temp2],    8(%[src])          \n\t"
218
                    "lw      %[temp3],    12(%[src])         \n\t"
219
                    "lw      %[temp4],    16(%[src])         \n\t"
220
                    "lw      %[temp5],    20(%[src])         \n\t"
221
                    "lw      %[temp6],    24(%[src])         \n\t"
222
                    "lw      %[temp7],    28(%[src])         \n\t"
223
                    "addiu   %[src],      %[src],      32    \n\t"
224
                    "sw      %[temp0],    0(%[dst])          \n\t"
225
                    "sw      %[temp1],    4(%[dst])          \n\t"
226
                    "sw      %[temp2],    8(%[dst])          \n\t"
227
                    "sw      %[temp3],    12(%[dst])         \n\t"
228
                    "sw      %[temp4],    16(%[dst])         \n\t"
229
                    "sw      %[temp5],    20(%[dst])         \n\t"
230
                    "sw      %[temp6],    24(%[dst])         \n\t"
231
                    "sw      %[temp7],    28(%[dst])         \n\t"
232
                    "bne     %[src],      %[loop_end], 1b    \n\t"
233
                    " addiu  %[dst],      %[dst],      32    \n\t"
234
                    ".set pop                                \n\t"
235
 
236
                    : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
237
                      [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
238
                      [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
239
                      [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
240
                      [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
241
                      [dst]"+r"(buf2)
242
                    :
243
                    : "memory"
244
                );
245
            }
246
        }
247
    }
248
 
249
    // buffer update
250
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
251
        ac->fdsp.vector_fmul_window(saved + 64,  buf + 4*128 + 64, buf + 5*128, swindow, 64);
252
        ac->fdsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 64);
253
        ac->fdsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 64);
254
        {
255
            float *buf1 = buf + 7*128 + 64;
256
            float *buf2 = saved + 448;
257
            int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
258
            int loop_end;
259
 
260
            /* loop unrolled 8 times */
261
            __asm__ volatile (
262
                ".set push                                \n\t"
263
                ".set noreorder                           \n\t"
264
                "addiu   %[loop_end], %[src],       256   \n\t"
265
            "1:                                           \n\t"
266
                "lw      %[temp0],    0(%[src])           \n\t"
267
                "lw      %[temp1],    4(%[src])           \n\t"
268
                "lw      %[temp2],    8(%[src])           \n\t"
269
                "lw      %[temp3],    12(%[src])          \n\t"
270
                "lw      %[temp4],    16(%[src])          \n\t"
271
                "lw      %[temp5],    20(%[src])          \n\t"
272
                "lw      %[temp6],    24(%[src])          \n\t"
273
                "lw      %[temp7],    28(%[src])          \n\t"
274
                "addiu   %[src],      %[src],       32    \n\t"
275
                "sw      %[temp0],    0(%[dst])           \n\t"
276
                "sw      %[temp1],    4(%[dst])           \n\t"
277
                "sw      %[temp2],    8(%[dst])           \n\t"
278
                "sw      %[temp3],    12(%[dst])          \n\t"
279
                "sw      %[temp4],    16(%[dst])          \n\t"
280
                "sw      %[temp5],    20(%[dst])          \n\t"
281
                "sw      %[temp6],    24(%[dst])          \n\t"
282
                "sw      %[temp7],    28(%[dst])          \n\t"
283
                "bne     %[src],      %[loop_end],  1b    \n\t"
284
                " addiu  %[dst],      %[dst],       32    \n\t"
285
                ".set pop                                 \n\t"
286
 
287
                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
288
                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
289
                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
290
                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
291
                  [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
292
                  [dst]"+r"(buf2)
293
                :
294
                : "memory"
295
            );
296
        }
297
    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
298
        float *buf1 = buf + 512;
299
        float *buf2 = saved;
300
        int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
301
        int loop_end;
302
 
303
        /* loop unrolled 8 times */
304
        __asm__ volatile (
305
            ".set push                                \n\t"
306
            ".set noreorder                           \n\t"
307
            "addiu   %[loop_end], %[src],       1792  \n\t"
308
        "1:                                           \n\t"
309
            "lw      %[temp0],    0(%[src])           \n\t"
310
            "lw      %[temp1],    4(%[src])           \n\t"
311
            "lw      %[temp2],    8(%[src])           \n\t"
312
            "lw      %[temp3],    12(%[src])          \n\t"
313
            "lw      %[temp4],    16(%[src])          \n\t"
314
            "lw      %[temp5],    20(%[src])          \n\t"
315
            "lw      %[temp6],    24(%[src])          \n\t"
316
            "lw      %[temp7],    28(%[src])          \n\t"
317
            "addiu   %[src],      %[src],       32    \n\t"
318
            "sw      %[temp0],    0(%[dst])           \n\t"
319
            "sw      %[temp1],    4(%[dst])           \n\t"
320
            "sw      %[temp2],    8(%[dst])           \n\t"
321
            "sw      %[temp3],    12(%[dst])          \n\t"
322
            "sw      %[temp4],    16(%[dst])          \n\t"
323
            "sw      %[temp5],    20(%[dst])          \n\t"
324
            "sw      %[temp6],    24(%[dst])          \n\t"
325
            "sw      %[temp7],    28(%[dst])          \n\t"
326
            "bne     %[src],      %[loop_end],  1b    \n\t"
327
            " addiu  %[dst],      %[dst],       32    \n\t"
328
            ".set pop                                 \n\t"
329
 
330
            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
331
              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
332
              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
333
              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
334
              [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
335
              [dst]"+r"(buf2)
336
            :
337
            : "memory"
338
        );
339
        {
340
            float *buf1 = buf + 7*128 + 64;
341
            float *buf2 = saved + 448;
342
            int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
343
            int loop_end;
344
 
345
            /* loop unrolled 8 times */
346
            __asm__ volatile (
347
                ".set push                                 \n\t"
348
                ".set noreorder                            \n\t"
349
                "addiu   %[loop_end], %[src],        256   \n\t"
350
            "1:                                            \n\t"
351
                "lw      %[temp0],    0(%[src])            \n\t"
352
                "lw      %[temp1],    4(%[src])            \n\t"
353
                "lw      %[temp2],    8(%[src])            \n\t"
354
                "lw      %[temp3],    12(%[src])           \n\t"
355
                "lw      %[temp4],    16(%[src])           \n\t"
356
                "lw      %[temp5],    20(%[src])           \n\t"
357
                "lw      %[temp6],    24(%[src])           \n\t"
358
                "lw      %[temp7],    28(%[src])           \n\t"
359
                "addiu   %[src],      %[src],        32    \n\t"
360
                "sw      %[temp0],    0(%[dst])            \n\t"
361
                "sw      %[temp1],    4(%[dst])            \n\t"
362
                "sw      %[temp2],    8(%[dst])            \n\t"
363
                "sw      %[temp3],    12(%[dst])           \n\t"
364
                "sw      %[temp4],    16(%[dst])           \n\t"
365
                "sw      %[temp5],    20(%[dst])           \n\t"
366
                "sw      %[temp6],    24(%[dst])           \n\t"
367
                "sw      %[temp7],    28(%[dst])           \n\t"
368
                "bne     %[src],      %[loop_end],   1b    \n\t"
369
                " addiu  %[dst],      %[dst],        32    \n\t"
370
                ".set pop                                  \n\t"
371
 
372
                : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
373
                  [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
374
                  [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
375
                  [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
376
                  [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
377
                  [dst]"+r"(buf2)
378
                :
379
                : "memory"
380
            );
381
        }
382
    } else { // LONG_STOP or ONLY_LONG
383
        float *buf1 = buf + 512;
384
        float *buf2 = saved;
385
        int temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7;
386
        int loop_end;
387
 
388
        /* loop unrolled 8 times */
389
        __asm__ volatile (
390
            ".set push                                 \n\t"
391
            ".set noreorder                            \n\t"
392
            "addiu   %[loop_end], %[src],        2048  \n\t"
393
        "1:                                            \n\t"
394
            "lw      %[temp0],    0(%[src])            \n\t"
395
            "lw      %[temp1],    4(%[src])            \n\t"
396
            "lw      %[temp2],    8(%[src])            \n\t"
397
            "lw      %[temp3],    12(%[src])           \n\t"
398
            "lw      %[temp4],    16(%[src])           \n\t"
399
            "lw      %[temp5],    20(%[src])           \n\t"
400
            "lw      %[temp6],    24(%[src])           \n\t"
401
            "lw      %[temp7],    28(%[src])           \n\t"
402
            "addiu   %[src],      %[src],        32    \n\t"
403
            "sw      %[temp0],    0(%[dst])            \n\t"
404
            "sw      %[temp1],    4(%[dst])            \n\t"
405
            "sw      %[temp2],    8(%[dst])            \n\t"
406
            "sw      %[temp3],    12(%[dst])           \n\t"
407
            "sw      %[temp4],    16(%[dst])           \n\t"
408
            "sw      %[temp5],    20(%[dst])           \n\t"
409
            "sw      %[temp6],    24(%[dst])           \n\t"
410
            "sw      %[temp7],    28(%[dst])           \n\t"
411
            "bne     %[src],      %[loop_end],   1b    \n\t"
412
            " addiu  %[dst],      %[dst],        32    \n\t"
413
            ".set pop                                  \n\t"
414
 
415
            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
416
              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
417
              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
418
              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
419
              [loop_end]"=&r"(loop_end), [src]"+r"(buf1),
420
              [dst]"+r"(buf2)
421
            :
422
            : "memory"
423
        );
424
    }
425
}
426
 
427
static void apply_ltp_mips(AACContext *ac, SingleChannelElement *sce)
428
{
429
    const LongTermPrediction *ltp = &sce->ics.ltp;
430
    const uint16_t *offsets = sce->ics.swb_offset;
431
    int i, sfb;
432
    int j, k;
433
 
434
    if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
435
        float *predTime = sce->ret;
436
        float *predFreq = ac->buf_mdct;
437
        float *p_predTime;
438
        int16_t num_samples = 2048;
439
 
440
        if (ltp->lag < 1024)
441
            num_samples = ltp->lag + 1024;
442
            j = (2048 - num_samples) >> 2;
443
            k = (2048 - num_samples) & 3;
444
            p_predTime = &predTime[num_samples];
445
 
446
        for (i = 0; i < num_samples; i++)
447
            predTime[i] = sce->ltp_state[i + 2048 - ltp->lag] * ltp->coef;
448
        for (i = 0; i < j; i++) {
449
 
450
            /* loop unrolled 4 times */
451
            __asm__ volatile (
452
                "sw      $0,              0(%[p_predTime])        \n\t"
453
                "sw      $0,              4(%[p_predTime])        \n\t"
454
                "sw      $0,              8(%[p_predTime])        \n\t"
455
                "sw      $0,              12(%[p_predTime])       \n\t"
456
                "addiu   %[p_predTime],   %[p_predTime],     16   \n\t"
457
 
458
                : [p_predTime]"+r"(p_predTime)
459
                :
460
                : "memory"
461
            );
462
        }
463
        for (i = 0; i < k; i++) {
464
 
465
            __asm__ volatile (
466
                "sw      $0,              0(%[p_predTime])        \n\t"
467
                "addiu   %[p_predTime],   %[p_predTime],     4    \n\t"
468
 
469
                : [p_predTime]"+r"(p_predTime)
470
                :
471
                : "memory"
472
            );
473
        }
474
 
475
        ac->windowing_and_mdct_ltp(ac, predFreq, predTime, &sce->ics);
476
 
477
        if (sce->tns.present)
478
            ac->apply_tns(predFreq, &sce->tns, &sce->ics, 0);
479
 
480
        for (sfb = 0; sfb < FFMIN(sce->ics.max_sfb, MAX_LTP_LONG_SFB); sfb++)
481
            if (ltp->used[sfb])
482
                for (i = offsets[sfb]; i < offsets[sfb + 1]; i++)
483
                    sce->coeffs[i] += predFreq[i];
484
    }
485
}
486
 
487
#if HAVE_MIPSFPU
488
static void update_ltp_mips(AACContext *ac, SingleChannelElement *sce)
489
{
490
    IndividualChannelStream *ics = &sce->ics;
491
    float *saved     = sce->saved;
492
    float *saved_ltp = sce->coeffs;
493
    const float *lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
494
    const float *swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
495
    int i;
496
    int loop_end, loop_end1, loop_end2;
497
    float temp0, temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8, temp9, temp10, temp11;
498
 
499
    if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
500
        float *buf = saved;
501
        float *buf0 = saved_ltp;
502
        float *p_saved_ltp = saved_ltp + 576;
503
        float *ptr1 = &saved_ltp[512];
504
        float *ptr2 = &ac->buf_mdct[1023];
505
        float *ptr3 = (float*)&swindow[63];
506
        loop_end1 = (int)(p_saved_ltp + 448);
507
 
508
        /* loop unrolled 8 times */
509
        __asm__ volatile (
510
            ".set push                                     \n\t"
511
            ".set noreorder                                \n\t"
512
            "addiu   %[loop_end],   %[src],         2048   \n\t"
513
        "1:                                                \n\t"
514
            "lw      %[temp0],      0(%[src])              \n\t"
515
            "lw      %[temp1],      4(%[src])              \n\t"
516
            "lw      %[temp2],      8(%[src])              \n\t"
517
            "lw      %[temp3],      12(%[src])             \n\t"
518
            "lw      %[temp4],      16(%[src])             \n\t"
519
            "lw      %[temp5],      20(%[src])             \n\t"
520
            "lw      %[temp6],      24(%[src])             \n\t"
521
            "lw      %[temp7],      28(%[src])             \n\t"
522
            "addiu   %[src],        %[src],         32     \n\t"
523
            "sw      %[temp0],      0(%[dst])              \n\t"
524
            "sw      %[temp1],      4(%[dst])              \n\t"
525
            "sw      %[temp2],      8(%[dst])              \n\t"
526
            "sw      %[temp3],      12(%[dst])             \n\t"
527
            "sw      %[temp4],      16(%[dst])             \n\t"
528
            "sw      %[temp5],      20(%[dst])             \n\t"
529
            "sw      %[temp6],      24(%[dst])             \n\t"
530
            "sw      %[temp7],      28(%[dst])             \n\t"
531
            "bne     %[src],        %[loop_end],    1b     \n\t"
532
            " addiu  %[dst],        %[dst],         32     \n\t"
533
            ".set pop                                      \n\t"
534
 
535
            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
536
              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
537
              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
538
              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
539
              [loop_end]"=&r"(loop_end), [src]"+r"(buf),
540
              [dst]"+r"(buf0)
541
            :
542
            : "memory"
543
        );
544
 
545
        /* loop unrolled 8 times */
546
        __asm__ volatile (
547
        "1:                                                   \n\t"
548
            "sw     $0,              0(%[p_saved_ltp])        \n\t"
549
            "sw     $0,              4(%[p_saved_ltp])        \n\t"
550
            "sw     $0,              8(%[p_saved_ltp])        \n\t"
551
            "sw     $0,              12(%[p_saved_ltp])       \n\t"
552
            "sw     $0,              16(%[p_saved_ltp])       \n\t"
553
            "sw     $0,              20(%[p_saved_ltp])       \n\t"
554
            "sw     $0,              24(%[p_saved_ltp])       \n\t"
555
            "sw     $0,              28(%[p_saved_ltp])       \n\t"
556
            "addiu  %[p_saved_ltp],  %[p_saved_ltp],     32   \n\t"
557
            "bne    %[p_saved_ltp],  %[loop_end1],       1b   \n\t"
558
 
559
            : [p_saved_ltp]"+r"(p_saved_ltp)
560
            : [loop_end1]"r"(loop_end1)
561
            : "memory"
562
        );
563
 
564
        ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
565
        for (i = 0; i < 16; i++){
566
            /* loop unrolled 4 times */
567
            __asm__ volatile (
568
                "lwc1    %[temp0],    0(%[ptr2])                \n\t"
569
                "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
570
                "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
571
                "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
572
                "lwc1    %[temp4],    0(%[ptr3])                \n\t"
573
                "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
574
                "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
575
                "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
576
                "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
577
                "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
578
                "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
579
                "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
580
                "swc1    %[temp8],    0(%[ptr1])                \n\t"
581
                "swc1    %[temp9],    4(%[ptr1])                \n\t"
582
                "swc1    %[temp10],   8(%[ptr1])                \n\t"
583
                "swc1    %[temp11],   12(%[ptr1])               \n\t"
584
                "addiu   %[ptr1],     %[ptr1],      16          \n\t"
585
                "addiu   %[ptr2],     %[ptr2],      -16         \n\t"
586
                "addiu   %[ptr3],     %[ptr3],      -16         \n\t"
587
 
588
                : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
589
                  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
590
                  [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
591
                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
592
                  [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
593
                  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
594
                  [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
595
                :
596
                : "memory"
597
            );
598
        }
599
    } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
600
        float *buff0 = saved;
601
        float *buff1 = saved_ltp;
602
        float *ptr1 = &saved_ltp[512];
603
        float *ptr2 = &ac->buf_mdct[1023];
604
        float *ptr3 = (float*)&swindow[63];
605
        loop_end = (int)(saved + 448);
606
 
607
        /* loop unrolled 8 times */
608
        __asm__ volatile (
609
            ".set push                                  \n\t"
610
            ".set noreorder                             \n\t"
611
        "1:                                             \n\t"
612
            "lw      %[temp0],    0(%[src])             \n\t"
613
            "lw      %[temp1],    4(%[src])             \n\t"
614
            "lw      %[temp2],    8(%[src])             \n\t"
615
            "lw      %[temp3],    12(%[src])            \n\t"
616
            "lw      %[temp4],    16(%[src])            \n\t"
617
            "lw      %[temp5],    20(%[src])            \n\t"
618
            "lw      %[temp6],    24(%[src])            \n\t"
619
            "lw      %[temp7],    28(%[src])            \n\t"
620
            "addiu   %[src],      %[src],         32    \n\t"
621
            "sw      %[temp0],    0(%[dst])             \n\t"
622
            "sw      %[temp1],    4(%[dst])             \n\t"
623
            "sw      %[temp2],    8(%[dst])             \n\t"
624
            "sw      %[temp3],    12(%[dst])            \n\t"
625
            "sw      %[temp4],    16(%[dst])            \n\t"
626
            "sw      %[temp5],    20(%[dst])            \n\t"
627
            "sw      %[temp6],    24(%[dst])            \n\t"
628
            "sw      %[temp7],    28(%[dst])            \n\t"
629
            "sw      $0,          2304(%[dst])          \n\t"
630
            "sw      $0,          2308(%[dst])          \n\t"
631
            "sw      $0,          2312(%[dst])          \n\t"
632
            "sw      $0,          2316(%[dst])          \n\t"
633
            "sw      $0,          2320(%[dst])          \n\t"
634
            "sw      $0,          2324(%[dst])          \n\t"
635
            "sw      $0,          2328(%[dst])          \n\t"
636
            "sw      $0,          2332(%[dst])          \n\t"
637
            "bne     %[src],      %[loop_end],    1b    \n\t"
638
            " addiu  %[dst],      %[dst],         32    \n\t"
639
            ".set pop                                   \n\t"
640
 
641
            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
642
              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
643
              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
644
              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
645
              [src]"+r"(buff0), [dst]"+r"(buff1)
646
            : [loop_end]"r"(loop_end)
647
            : "memory"
648
        );
649
        ac->fdsp.vector_fmul_reverse(saved_ltp + 448, ac->buf_mdct + 960,     &swindow[64],      64);
650
        for (i = 0; i < 16; i++){
651
            /* loop unrolled 8 times */
652
            __asm__ volatile (
653
                "lwc1    %[temp0],    0(%[ptr2])                \n\t"
654
                "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
655
                "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
656
                "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
657
                "lwc1    %[temp4],    0(%[ptr3])                \n\t"
658
                "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
659
                "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
660
                "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
661
                "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
662
                "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
663
                "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
664
                "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
665
                "swc1    %[temp8],    0(%[ptr1])                \n\t"
666
                "swc1    %[temp9],    4(%[ptr1])                \n\t"
667
                "swc1    %[temp10],   8(%[ptr1])                \n\t"
668
                "swc1    %[temp11],   12(%[ptr1])               \n\t"
669
                "addiu   %[ptr1],     %[ptr1],      16          \n\t"
670
                "addiu   %[ptr2],     %[ptr2],      -16         \n\t"
671
                "addiu   %[ptr3],     %[ptr3],      -16         \n\t"
672
 
673
                : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
674
                  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
675
                  [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
676
                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
677
                  [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
678
                  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
679
                  [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2), [ptr3]"+r"(ptr3)
680
                :
681
                : "memory"
682
            );
683
        }
684
    } else { // LONG_STOP or ONLY_LONG
685
        float *ptr1, *ptr2, *ptr3;
686
        ac->fdsp.vector_fmul_reverse(saved_ltp,       ac->buf_mdct + 512,     &lwindow[512],     512);
687
 
688
        ptr1 = &saved_ltp[512];
689
        ptr2 = &ac->buf_mdct[1023];
690
        ptr3 = (float*)&lwindow[511];
691
 
692
        for (i = 0; i < 512; i+=4){
693
            /* loop unrolled 4 times */
694
            __asm__ volatile (
695
                "lwc1    %[temp0],    0(%[ptr2])                \n\t"
696
                "lwc1    %[temp1],    -4(%[ptr2])               \n\t"
697
                "lwc1    %[temp2],    -8(%[ptr2])               \n\t"
698
                "lwc1    %[temp3],    -12(%[ptr2])              \n\t"
699
                "lwc1    %[temp4],    0(%[ptr3])                \n\t"
700
                "lwc1    %[temp5],    -4(%[ptr3])               \n\t"
701
                "lwc1    %[temp6],    -8(%[ptr3])               \n\t"
702
                "lwc1    %[temp7],    -12(%[ptr3])              \n\t"
703
                "mul.s   %[temp8],    %[temp0],     %[temp4]    \n\t"
704
                "mul.s   %[temp9],    %[temp1],     %[temp5]    \n\t"
705
                "mul.s   %[temp10],   %[temp2],     %[temp6]    \n\t"
706
                "mul.s   %[temp11],   %[temp3],     %[temp7]    \n\t"
707
                "swc1    %[temp8],    0(%[ptr1])                \n\t"
708
                "swc1    %[temp9],    4(%[ptr1])                \n\t"
709
                "swc1    %[temp10],   8(%[ptr1])                \n\t"
710
                "swc1    %[temp11],   12(%[ptr1])               \n\t"
711
                "addiu   %[ptr1],     %[ptr1],      16          \n\t"
712
                "addiu   %[ptr2],     %[ptr2],      -16         \n\t"
713
                "addiu   %[ptr3],     %[ptr3],      -16         \n\t"
714
 
715
                : [temp0]"=&f"(temp0), [temp1]"=&f"(temp1),
716
                  [temp2]"=&f"(temp2), [temp3]"=&f"(temp3),
717
                  [temp4]"=&f"(temp4), [temp5]"=&f"(temp5),
718
                  [temp6]"=&f"(temp6), [temp7]"=&f"(temp7),
719
                  [temp8]"=&f"(temp8), [temp9]"=&f"(temp9),
720
                  [temp10]"=&f"(temp10), [temp11]"=&f"(temp11),
721
                  [ptr1]"+r"(ptr1), [ptr2]"+r"(ptr2),
722
                  [ptr3]"+r"(ptr3)
723
                :
724
                : "memory"
725
            );
726
        }
727
    }
728
 
729
    {
730
        float *buf1 = sce->ltp_state+1024;
731
        float *buf2 = sce->ltp_state;
732
        float *buf3 = sce->ret;
733
        float *buf4 = sce->ltp_state+1024;
734
        float *buf5 = saved_ltp;
735
        float *buf6 = sce->ltp_state+2048;
736
 
737
        /* loops unrolled 8 times */
738
        __asm__ volatile (
739
            ".set push                                    \n\t"
740
            ".set noreorder                               \n\t"
741
            "addiu   %[loop_end],   %[src],         4096  \n\t"
742
            "addiu   %[loop_end1],  %[src1],        4096  \n\t"
743
            "addiu   %[loop_end2],  %[src2],        4096  \n\t"
744
        "1:                                               \n\t"
745
            "lw      %[temp0],      0(%[src])             \n\t"
746
            "lw      %[temp1],      4(%[src])             \n\t"
747
            "lw      %[temp2],      8(%[src])             \n\t"
748
            "lw      %[temp3],      12(%[src])            \n\t"
749
            "lw      %[temp4],      16(%[src])            \n\t"
750
            "lw      %[temp5],      20(%[src])            \n\t"
751
            "lw      %[temp6],      24(%[src])            \n\t"
752
            "lw      %[temp7],      28(%[src])            \n\t"
753
            "addiu   %[src],        %[src],         32    \n\t"
754
            "sw      %[temp0],      0(%[dst])             \n\t"
755
            "sw      %[temp1],      4(%[dst])             \n\t"
756
            "sw      %[temp2],      8(%[dst])             \n\t"
757
            "sw      %[temp3],      12(%[dst])            \n\t"
758
            "sw      %[temp4],      16(%[dst])            \n\t"
759
            "sw      %[temp5],      20(%[dst])            \n\t"
760
            "sw      %[temp6],      24(%[dst])            \n\t"
761
            "sw      %[temp7],      28(%[dst])            \n\t"
762
            "bne     %[src],        %[loop_end],    1b    \n\t"
763
            " addiu  %[dst],        %[dst],         32    \n\t"
764
        "2:                                               \n\t"
765
            "lw      %[temp0],      0(%[src1])            \n\t"
766
            "lw      %[temp1],      4(%[src1])            \n\t"
767
            "lw      %[temp2],      8(%[src1])            \n\t"
768
            "lw      %[temp3],      12(%[src1])           \n\t"
769
            "lw      %[temp4],      16(%[src1])           \n\t"
770
            "lw      %[temp5],      20(%[src1])           \n\t"
771
            "lw      %[temp6],      24(%[src1])           \n\t"
772
            "lw      %[temp7],      28(%[src1])           \n\t"
773
            "addiu   %[src1],       %[src1],        32    \n\t"
774
            "sw      %[temp0],      0(%[dst1])            \n\t"
775
            "sw      %[temp1],      4(%[dst1])            \n\t"
776
            "sw      %[temp2],      8(%[dst1])            \n\t"
777
            "sw      %[temp3],      12(%[dst1])           \n\t"
778
            "sw      %[temp4],      16(%[dst1])           \n\t"
779
            "sw      %[temp5],      20(%[dst1])           \n\t"
780
            "sw      %[temp6],      24(%[dst1])           \n\t"
781
            "sw      %[temp7],      28(%[dst1])           \n\t"
782
            "bne     %[src1],       %[loop_end1],   2b    \n\t"
783
            " addiu  %[dst1],       %[dst1],        32    \n\t"
784
        "3:                                               \n\t"
785
            "lw      %[temp0],      0(%[src2])            \n\t"
786
            "lw      %[temp1],      4(%[src2])            \n\t"
787
            "lw      %[temp2],      8(%[src2])            \n\t"
788
            "lw      %[temp3],      12(%[src2])           \n\t"
789
            "lw      %[temp4],      16(%[src2])           \n\t"
790
            "lw      %[temp5],      20(%[src2])           \n\t"
791
            "lw      %[temp6],      24(%[src2])           \n\t"
792
            "lw      %[temp7],      28(%[src2])           \n\t"
793
            "addiu   %[src2],       %[src2],        32    \n\t"
794
            "sw      %[temp0],      0(%[dst2])            \n\t"
795
            "sw      %[temp1],      4(%[dst2])            \n\t"
796
            "sw      %[temp2],      8(%[dst2])            \n\t"
797
            "sw      %[temp3],      12(%[dst2])           \n\t"
798
            "sw      %[temp4],      16(%[dst2])           \n\t"
799
            "sw      %[temp5],      20(%[dst2])           \n\t"
800
            "sw      %[temp6],      24(%[dst2])           \n\t"
801
            "sw      %[temp7],      28(%[dst2])           \n\t"
802
            "bne     %[src2],       %[loop_end2],   3b    \n\t"
803
            " addiu  %[dst2],       %[dst2],        32    \n\t"
804
            ".set pop                                     \n\t"
805
 
806
            : [temp0]"=&r"(temp0), [temp1]"=&r"(temp1),
807
              [temp2]"=&r"(temp2), [temp3]"=&r"(temp3),
808
              [temp4]"=&r"(temp4), [temp5]"=&r"(temp5),
809
              [temp6]"=&r"(temp6), [temp7]"=&r"(temp7),
810
              [loop_end]"=&r"(loop_end), [loop_end1]"=&r"(loop_end1),
811
              [loop_end2]"=&r"(loop_end2), [src]"+r"(buf1),
812
              [dst]"+r"(buf2), [src1]"+r"(buf3), [dst1]"+r"(buf4),
813
              [src2]"+r"(buf5), [dst2]"+r"(buf6)
814
            :
815
            : "memory"
816
        );
817
    }
818
}
819
#endif /* HAVE_MIPSFPU */
820
#endif /* HAVE_INLINE_ASM */
821
 
822
void ff_aacdec_init_mips(AACContext *c)
823
{
824
#if HAVE_INLINE_ASM
825
    c->imdct_and_windowing         = imdct_and_windowing_mips;
826
    c->apply_ltp                   = apply_ltp_mips;
827
#if HAVE_MIPSFPU
828
    c->update_ltp                  = update_ltp_mips;
829
#endif /* HAVE_MIPSFPU */
830
#endif /* HAVE_INLINE_ASM */
831
}