Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (c) 2011 Mans Rullgard 
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
 
21
#include "libavutil/arm/asm.S"
22
 
23
function ff_ac3_max_msb_abs_int16_neon, export=1
24
        vmov.i16        q0,  #0
25
        vmov.i16        q2,  #0
26
1:      vld1.16         {q1},     [r0,:128]!
27
        vabs.s16        q1,  q1
28
        vld1.16         {q3},     [r0,:128]!
29
        vabs.s16        q3,  q3
30
        vorr            q0,  q0,  q1
31
        vorr            q2,  q2,  q3
32
        subs            r1,  r1,  #16
33
        bgt             1b
34
        vorr            q0,  q0,  q2
35
        vorr            d0,  d0,  d1
36
        vpmax.u16       d0,  d0,  d0
37
        vpmax.u16       d0,  d0,  d0
38
        vmov.u16        r0,  d0[0]
39
        bx              lr
40
endfunc
41
 
42
function ff_ac3_exponent_min_neon, export=1
43
        cmp             r1,  #0
44
        it              eq
45
        bxeq            lr
46
        push            {lr}
47
        mov             r12, #256
48
1:
49
        vld1.8          {q0},     [r0,:128]
50
        mov             lr,  r1
51
        add             r3,  r0,  #256
52
2:      vld1.8          {q1},     [r3,:128], r12
53
        subs            lr,  lr,  #1
54
        vmin.u8         q0,  q0,  q1
55
        bgt             2b
56
        subs            r2,  r2,  #16
57
        vst1.8          {q0},     [r0,:128]!
58
        bgt             1b
59
        pop             {pc}
60
endfunc
61
 
62
function ff_ac3_lshift_int16_neon, export=1
63
        vdup.16         q0,  r2
64
1:      vld1.16         {q1},     [r0,:128]
65
        vshl.s16        q1,  q1,  q0
66
        vst1.16         {q1},     [r0,:128]!
67
        subs            r1,  r1,  #8
68
        bgt             1b
69
        bx              lr
70
endfunc
71
 
72
function ff_ac3_rshift_int32_neon, export=1
73
        rsb             r2,  r2,  #0
74
        vdup.32         q0,  r2
75
1:      vld1.32         {q1},     [r0,:128]
76
        vshl.s32        q1,  q1,  q0
77
        vst1.32         {q1},     [r0,:128]!
78
        subs            r1,  r1,  #4
79
        bgt             1b
80
        bx              lr
81
endfunc
82
 
83
function ff_float_to_fixed24_neon, export=1
84
1:      vld1.32         {q0-q1},  [r1,:128]!
85
        vcvt.s32.f32    q0,  q0,  #24
86
        vld1.32         {q2-q3},  [r1,:128]!
87
        vcvt.s32.f32    q1,  q1,  #24
88
        vcvt.s32.f32    q2,  q2,  #24
89
        vst1.32         {q0-q1},  [r0,:128]!
90
        vcvt.s32.f32    q3,  q3,  #24
91
        vst1.32         {q2-q3},  [r0,:128]!
92
        subs            r2,  r2,  #16
93
        bgt             1b
94
        bx              lr
95
endfunc
96
 
97
function ff_ac3_extract_exponents_neon, export=1
98
        vmov.i32        q15, #8
99
1:
100
        vld1.32         {q0},     [r1,:128]!
101
        vabs.s32        q1,  q0
102
        vclz.i32        q3,  q1
103
        vsub.i32        q3,  q3,  q15
104
        vmovn.i32       d6,  q3
105
        vmovn.i16       d6,  q3
106
        vst1.32         {d6[0]},  [r0,:32]!
107
        subs            r2,  r2,  #4
108
        bgt             1b
109
        bx              lr
110
endfunc
111
 
112
function ff_ac3_sum_square_butterfly_int32_neon, export=1
113
        vmov.i64        q0,  #0
114
        vmov.i64        q1,  #0
115
        vmov.i64        q2,  #0
116
        vmov.i64        q3,  #0
117
1:
118
        vld1.32         {d16},    [r1]!
119
        vld1.32         {d17},    [r2]!
120
        vadd.s32        d18, d16, d17
121
        vsub.s32        d19, d16, d17
122
        vmlal.s32       q0,  d16, d16
123
        vmlal.s32       q1,  d17, d17
124
        vmlal.s32       q2,  d18, d18
125
        vmlal.s32       q3,  d19, d19
126
        subs            r3,  r3,  #2
127
        bgt             1b
128
        vadd.s64        d0,  d0,  d1
129
        vadd.s64        d1,  d2,  d3
130
        vadd.s64        d2,  d4,  d5
131
        vadd.s64        d3,  d6,  d7
132
        vst1.64         {q0-q1},  [r0]
133
        bx              lr
134
endfunc
135
 
136
function ff_ac3_sum_square_butterfly_float_neon, export=1
137
        vmov.f32        q0,  #0.0
138
        vmov.f32        q1,  #0.0
139
1:
140
        vld1.32         {d16},    [r1]!
141
        vld1.32         {d17},    [r2]!
142
        vadd.f32        d18, d16, d17
143
        vsub.f32        d19, d16, d17
144
        vmla.f32        d0,  d16, d16
145
        vmla.f32        d1,  d17, d17
146
        vmla.f32        d2,  d18, d18
147
        vmla.f32        d3,  d19, d19
148
        subs            r3,  r3,  #2
149
        bgt             1b
150
        vpadd.f32       d0,  d0,  d1
151
        vpadd.f32       d1,  d2,  d3
152
        vst1.32         {q0},     [r0]
153
        bx              lr
154
endfunc