Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * Copyright (c) 2013 RISC OS Open Ltd |
||
3 | * Author: Ben Avison |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | #include "libavutil/arm/asm.S" |
||
23 | |||
24 | CONTEXT .req a1 |
||
25 | ORIGOUT .req a2 |
||
26 | IN .req a3 |
||
27 | OUT .req v1 |
||
28 | REVTAB .req v2 |
||
29 | TCOS .req v3 |
||
30 | TSIN .req v4 |
||
31 | OLDFPSCR .req v5 |
||
32 | J0 .req a2 |
||
33 | J1 .req a4 |
||
34 | J2 .req ip |
||
35 | J3 .req lr |
||
36 | |||
37 | .macro prerotation_innerloop |
||
38 | .set trig_lo, k |
||
39 | .set trig_hi, n4 - k - 2 |
||
40 | .set in_lo, trig_lo * 2 |
||
41 | .set in_hi, trig_hi * 2 |
||
42 | vldr d8, [TCOS, #trig_lo*4] @ s16,s17 |
||
43 | vldr d9, [TCOS, #trig_hi*4] @ s18,s19 |
||
44 | vldr s0, [IN, #in_hi*4 + 12] |
||
45 | vldr s1, [IN, #in_hi*4 + 4] |
||
46 | vldr s2, [IN, #in_lo*4 + 12] |
||
47 | vldr s3, [IN, #in_lo*4 + 4] |
||
48 | vmul.f s8, s0, s16 @ vector operation |
||
49 | vldr d10, [TSIN, #trig_lo*4] @ s20,s21 |
||
50 | vldr d11, [TSIN, #trig_hi*4] @ s22,s23 |
||
51 | vldr s4, [IN, #in_lo*4] |
||
52 | vldr s5, [IN, #in_lo*4 + 8] |
||
53 | vldr s6, [IN, #in_hi*4] |
||
54 | vldr s7, [IN, #in_hi*4 + 8] |
||
55 | ldr J0, [REVTAB, #trig_lo*2] |
||
56 | vmul.f s12, s0, s20 @ vector operation |
||
57 | ldr J2, [REVTAB, #trig_hi*2] |
||
58 | mov J1, J0, lsr #16 |
||
59 | and J0, J0, #255 @ halfword value will be < n4 |
||
60 | vmls.f s8, s4, s20 @ vector operation |
||
61 | mov J3, J2, lsr #16 |
||
62 | and J2, J2, #255 @ halfword value will be < n4 |
||
63 | add J0, OUT, J0, lsl #3 |
||
64 | vmla.f s12, s4, s16 @ vector operation |
||
65 | add J1, OUT, J1, lsl #3 |
||
66 | add J2, OUT, J2, lsl #3 |
||
67 | add J3, OUT, J3, lsl #3 |
||
68 | vstr s8, [J0] |
||
69 | vstr s9, [J1] |
||
70 | vstr s10, [J2] |
||
71 | vstr s11, [J3] |
||
72 | vstr s12, [J0, #4] |
||
73 | vstr s13, [J1, #4] |
||
74 | vstr s14, [J2, #4] |
||
75 | vstr s15, [J3, #4] |
||
76 | .set k, k + 2 |
||
77 | .endm |
||
78 | |||
79 | .macro postrotation_innerloop tail, head |
||
80 | .set trig_lo_head, n8 - k - 2 |
||
81 | .set trig_hi_head, n8 + k |
||
82 | .set out_lo_head, trig_lo_head * 2 |
||
83 | .set out_hi_head, trig_hi_head * 2 |
||
84 | .set trig_lo_tail, n8 - (k - 2) - 2 |
||
85 | .set trig_hi_tail, n8 + (k - 2) |
||
86 | .set out_lo_tail, trig_lo_tail * 2 |
||
87 | .set out_hi_tail, trig_hi_tail * 2 |
||
88 | .if (k & 2) == 0 |
||
89 | TCOS_D0_HEAD .req d10 @ s20,s21 |
||
90 | TCOS_D1_HEAD .req d11 @ s22,s23 |
||
91 | TCOS_S0_TAIL .req s24 |
||
92 | .else |
||
93 | TCOS_D0_HEAD .req d12 @ s24,s25 |
||
94 | TCOS_D1_HEAD .req d13 @ s26,s27 |
||
95 | TCOS_S0_TAIL .req s20 |
||
96 | .endif |
||
97 | .ifnc "\tail","" |
||
98 | vmls.f s8, s0, TCOS_S0_TAIL @ vector operation |
||
99 | .endif |
||
100 | .ifnc "\head","" |
||
101 | vldr d8, [TSIN, #trig_lo_head*4] @ s16,s17 |
||
102 | vldr d9, [TSIN, #trig_hi_head*4] @ s18,s19 |
||
103 | vldr TCOS_D0_HEAD, [TCOS, #trig_lo_head*4] |
||
104 | .endif |
||
105 | .ifnc "\tail","" |
||
106 | vmla.f s12, s4, TCOS_S0_TAIL @ vector operation |
||
107 | .endif |
||
108 | .ifnc "\head","" |
||
109 | vldr s0, [OUT, #out_lo_head*4] |
||
110 | vldr s1, [OUT, #out_lo_head*4 + 8] |
||
111 | vldr s2, [OUT, #out_hi_head*4] |
||
112 | vldr s3, [OUT, #out_hi_head*4 + 8] |
||
113 | vldr s4, [OUT, #out_lo_head*4 + 4] |
||
114 | vldr s5, [OUT, #out_lo_head*4 + 12] |
||
115 | vldr s6, [OUT, #out_hi_head*4 + 4] |
||
116 | vldr s7, [OUT, #out_hi_head*4 + 12] |
||
117 | .endif |
||
118 | .ifnc "\tail","" |
||
119 | vstr s8, [OUT, #out_lo_tail*4] |
||
120 | vstr s9, [OUT, #out_lo_tail*4 + 8] |
||
121 | vstr s10, [OUT, #out_hi_tail*4] |
||
122 | vstr s11, [OUT, #out_hi_tail*4 + 8] |
||
123 | .endif |
||
124 | .ifnc "\head","" |
||
125 | vmul.f s8, s4, s16 @ vector operation |
||
126 | .endif |
||
127 | .ifnc "\tail","" |
||
128 | vstr s12, [OUT, #out_hi_tail*4 + 12] |
||
129 | vstr s13, [OUT, #out_hi_tail*4 + 4] |
||
130 | vstr s14, [OUT, #out_lo_tail*4 + 12] |
||
131 | vstr s15, [OUT, #out_lo_tail*4 + 4] |
||
132 | .endif |
||
133 | .ifnc "\head","" |
||
134 | vmul.f s12, s0, s16 @ vector operation |
||
135 | vldr TCOS_D1_HEAD, [TCOS, #trig_hi_head*4] |
||
136 | .endif |
||
137 | .unreq TCOS_D0_HEAD |
||
138 | .unreq TCOS_D1_HEAD |
||
139 | .unreq TCOS_S0_TAIL |
||
140 | .ifnc "\head","" |
||
141 | .set k, k + 2 |
||
142 | .endif |
||
143 | .endm |
||
144 | |||
145 | |||
146 | /* void ff_imdct_half_vfp(FFTContext *s, |
||
147 | * FFTSample *output, |
||
148 | * const FFTSample *input) |
||
149 | */ |
||
150 | function ff_imdct_half_vfp, export=1 |
||
151 | ldr ip, [CONTEXT, #5*4] @ mdct_bits |
||
152 | teq ip, #6 |
||
153 | it ne |
||
154 | bne X(ff_imdct_half_c) @ only case currently accelerated is the one used by DCA |
||
155 | |||
156 | .set n, 1<<6 |
||
157 | .set n2, n/2 |
||
158 | .set n4, n/4 |
||
159 | .set n8, n/8 |
||
160 | |||
161 | push {v1-v5,lr} |
||
162 | vpush {s16-s27} |
||
163 | fmrx OLDFPSCR, FPSCR |
||
164 | ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 |
||
165 | fmxr FPSCR, lr |
||
166 | mov OUT, ORIGOUT |
||
167 | ldr REVTAB, [CONTEXT, #2*4] |
||
168 | ldr TCOS, [CONTEXT, #6*4] |
||
169 | ldr TSIN, [CONTEXT, #7*4] |
||
170 | |||
171 | .set k, 0 |
||
172 | .rept n8/2 |
||
173 | prerotation_innerloop |
||
174 | .endr |
||
175 | |||
176 | fmxr FPSCR, OLDFPSCR |
||
177 | mov a1, OUT |
||
178 | bl X(ff_fft16_vfp) |
||
179 | ldr lr, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1 |
||
180 | fmxr FPSCR, lr |
||
181 | |||
182 | .set k, 0 |
||
183 | postrotation_innerloop , head |
||
184 | .rept n8/2 - 1 |
||
185 | postrotation_innerloop tail, head |
||
186 | .endr |
||
187 | postrotation_innerloop tail |
||
188 | |||
189 | fmxr FPSCR, OLDFPSCR |
||
190 | vpop {s16-s27} |
||
191 | pop {v1-v5,pc} |
||
192 | endfunc |
||
193 | |||
194 | .unreq CONTEXT |
||
195 | .unreq ORIGOUT |
||
196 | .unreq IN |
||
197 | .unreq OUT |
||
198 | .unreq REVTAB |
||
199 | .unreq TCOS |
||
200 | .unreq TSIN |
||
201 | .unreq OLDFPSCR |
||
202 | .unreq J0 |
||
203 | .unreq J1 |
||
204 | .unreq J2 |
||
205 | .unreq J36 |