Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | /* |
2 | * Copyright (c) 2008 Mans Rullgard |
||
3 | * |
||
4 | * This file is part of libswresample. |
||
5 | * |
||
6 | * libswresample is free software; you can redistribute it and/or |
||
7 | * modify it under the terms of the GNU Lesser General Public |
||
8 | * License as published by the Free Software Foundation; either |
||
9 | * version 2.1 of the License, or (at your option) any later version. |
||
10 | * |
||
11 | * libswresample is distributed in the hope that it will be useful, |
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
14 | * Lesser General Public License for more details. |
||
15 | * |
||
16 | * You should have received a copy of the GNU Lesser General Public |
||
17 | * License along with libswresample; if not, write to the Free Software |
||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
19 | */ |
||
20 | |||
21 | #include "config.h" |
||
22 | #include "libavutil/arm/asm.S" |
||
23 | |||
24 | function swri_oldapi_conv_flt_to_s16_neon, export=1 |
||
25 | subs r2, r2, #8 |
||
26 | vld1.32 {q0}, [r1,:128]! |
||
27 | vcvt.s32.f32 q8, q0, #31 |
||
28 | vld1.32 {q1}, [r1,:128]! |
||
29 | vcvt.s32.f32 q9, q1, #31 |
||
30 | beq 3f |
||
31 | bics r12, r2, #15 |
||
32 | beq 2f |
||
33 | 1: subs r12, r12, #16 |
||
34 | vqrshrn.s32 d4, q8, #16 |
||
35 | vld1.32 {q0}, [r1,:128]! |
||
36 | vcvt.s32.f32 q0, q0, #31 |
||
37 | vqrshrn.s32 d5, q9, #16 |
||
38 | vld1.32 {q1}, [r1,:128]! |
||
39 | vcvt.s32.f32 q1, q1, #31 |
||
40 | vqrshrn.s32 d6, q0, #16 |
||
41 | vst1.16 {q2}, [r0,:128]! |
||
42 | vqrshrn.s32 d7, q1, #16 |
||
43 | vld1.32 {q8}, [r1,:128]! |
||
44 | vcvt.s32.f32 q8, q8, #31 |
||
45 | vld1.32 {q9}, [r1,:128]! |
||
46 | vcvt.s32.f32 q9, q9, #31 |
||
47 | vst1.16 {q3}, [r0,:128]! |
||
48 | bne 1b |
||
49 | ands r2, r2, #15 |
||
50 | beq 3f |
||
51 | 2: vld1.32 {q0}, [r1,:128]! |
||
52 | vqrshrn.s32 d4, q8, #16 |
||
53 | vcvt.s32.f32 q0, q0, #31 |
||
54 | vld1.32 {q1}, [r1,:128]! |
||
55 | vqrshrn.s32 d5, q9, #16 |
||
56 | vcvt.s32.f32 q1, q1, #31 |
||
57 | vqrshrn.s32 d6, q0, #16 |
||
58 | vst1.16 {q2}, [r0,:128]! |
||
59 | vqrshrn.s32 d7, q1, #16 |
||
60 | vst1.16 {q3}, [r0,:128]! |
||
61 | bx lr |
||
62 | 3: vqrshrn.s32 d4, q8, #16 |
||
63 | vqrshrn.s32 d5, q9, #16 |
||
64 | vst1.16 {q2}, [r0,:128]! |
||
65 | bx lr |
||
66 | endfunc |
||
67 | |||
68 | function swri_oldapi_conv_fltp_to_s16_2ch_neon, export=1 |
||
69 | ldm r1, {r1, r3} |
||
70 | subs r2, r2, #8 |
||
71 | vld1.32 {q0}, [r1,:128]! |
||
72 | vcvt.s32.f32 q8, q0, #31 |
||
73 | vld1.32 {q1}, [r1,:128]! |
||
74 | vcvt.s32.f32 q9, q1, #31 |
||
75 | vld1.32 {q10}, [r3,:128]! |
||
76 | vcvt.s32.f32 q10, q10, #31 |
||
77 | vld1.32 {q11}, [r3,:128]! |
||
78 | vcvt.s32.f32 q11, q11, #31 |
||
79 | beq 3f |
||
80 | bics r12, r2, #15 |
||
81 | beq 2f |
||
82 | 1: subs r12, r12, #16 |
||
83 | vld1.32 {q0}, [r1,:128]! |
||
84 | vcvt.s32.f32 q0, q0, #31 |
||
85 | vsri.32 q10, q8, #16 |
||
86 | vld1.32 {q1}, [r1,:128]! |
||
87 | vcvt.s32.f32 q1, q1, #31 |
||
88 | vld1.32 {q12}, [r3,:128]! |
||
89 | vcvt.s32.f32 q12, q12, #31 |
||
90 | vld1.32 {q13}, [r3,:128]! |
||
91 | vsri.32 q11, q9, #16 |
||
92 | vst1.16 {q10}, [r0,:128]! |
||
93 | vcvt.s32.f32 q13, q13, #31 |
||
94 | vst1.16 {q11}, [r0,:128]! |
||
95 | vsri.32 q12, q0, #16 |
||
96 | vld1.32 {q8}, [r1,:128]! |
||
97 | vsri.32 q13, q1, #16 |
||
98 | vst1.16 {q12}, [r0,:128]! |
||
99 | vcvt.s32.f32 q8, q8, #31 |
||
100 | vld1.32 {q9}, [r1,:128]! |
||
101 | vcvt.s32.f32 q9, q9, #31 |
||
102 | vld1.32 {q10}, [r3,:128]! |
||
103 | vcvt.s32.f32 q10, q10, #31 |
||
104 | vld1.32 {q11}, [r3,:128]! |
||
105 | vcvt.s32.f32 q11, q11, #31 |
||
106 | vst1.16 {q13}, [r0,:128]! |
||
107 | bne 1b |
||
108 | ands r2, r2, #15 |
||
109 | beq 3f |
||
110 | 2: vsri.32 q10, q8, #16 |
||
111 | vld1.32 {q0}, [r1,:128]! |
||
112 | vcvt.s32.f32 q0, q0, #31 |
||
113 | vld1.32 {q1}, [r1,:128]! |
||
114 | vcvt.s32.f32 q1, q1, #31 |
||
115 | vld1.32 {q12}, [r3,:128]! |
||
116 | vcvt.s32.f32 q12, q12, #31 |
||
117 | vsri.32 q11, q9, #16 |
||
118 | vld1.32 {q13}, [r3,:128]! |
||
119 | vcvt.s32.f32 q13, q13, #31 |
||
120 | vst1.16 {q10}, [r0,:128]! |
||
121 | vsri.32 q12, q0, #16 |
||
122 | vst1.16 {q11}, [r0,:128]! |
||
123 | vsri.32 q13, q1, #16 |
||
124 | vst1.16 {q12-q13},[r0,:128]! |
||
125 | bx lr |
||
126 | 3: vsri.32 q10, q8, #16 |
||
127 | vsri.32 q11, q9, #16 |
||
128 | vst1.16 {q10-q11},[r0,:128]! |
||
129 | bx lr |
||
130 | endfunc |
||
131 | |||
132 | function swri_oldapi_conv_fltp_to_s16_nch_neon, export=1 |
||
133 | cmp r3, #2 |
||
134 | itt lt |
||
135 | ldrlt r1, [r1] |
||
136 | blt swri_oldapi_conv_flt_to_s16_neon |
||
137 | beq swri_oldapi_conv_fltp_to_s16_2ch_neon |
||
138 | |||
139 | push {r4-r8, lr} |
||
140 | cmp r3, #4 |
||
141 | lsl r12, r3, #1 |
||
142 | blt 4f |
||
143 | |||
144 | @ 4 channels |
||
145 | 5: ldm r1!, {r4-r7} |
||
146 | mov lr, r2 |
||
147 | mov r8, r0 |
||
148 | vld1.32 {q8}, [r4,:128]! |
||
149 | vcvt.s32.f32 q8, q8, #31 |
||
150 | vld1.32 {q9}, [r5,:128]! |
||
151 | vcvt.s32.f32 q9, q9, #31 |
||
152 | vld1.32 {q10}, [r6,:128]! |
||
153 | vcvt.s32.f32 q10, q10, #31 |
||
154 | vld1.32 {q11}, [r7,:128]! |
||
155 | vcvt.s32.f32 q11, q11, #31 |
||
156 | 6: subs lr, lr, #8 |
||
157 | vld1.32 {q0}, [r4,:128]! |
||
158 | vcvt.s32.f32 q0, q0, #31 |
||
159 | vsri.32 q9, q8, #16 |
||
160 | vld1.32 {q1}, [r5,:128]! |
||
161 | vcvt.s32.f32 q1, q1, #31 |
||
162 | vsri.32 q11, q10, #16 |
||
163 | vld1.32 {q2}, [r6,:128]! |
||
164 | vcvt.s32.f32 q2, q2, #31 |
||
165 | vzip.32 d18, d22 |
||
166 | vld1.32 {q3}, [r7,:128]! |
||
167 | vcvt.s32.f32 q3, q3, #31 |
||
168 | vzip.32 d19, d23 |
||
169 | vst1.16 {d18}, [r8], r12 |
||
170 | vsri.32 q1, q0, #16 |
||
171 | vst1.16 {d22}, [r8], r12 |
||
172 | vsri.32 q3, q2, #16 |
||
173 | vst1.16 {d19}, [r8], r12 |
||
174 | vzip.32 d2, d6 |
||
175 | vst1.16 {d23}, [r8], r12 |
||
176 | vzip.32 d3, d7 |
||
177 | beq 7f |
||
178 | vld1.32 {q8}, [r4,:128]! |
||
179 | vcvt.s32.f32 q8, q8, #31 |
||
180 | vst1.16 {d2}, [r8], r12 |
||
181 | vld1.32 {q9}, [r5,:128]! |
||
182 | vcvt.s32.f32 q9, q9, #31 |
||
183 | vst1.16 {d6}, [r8], r12 |
||
184 | vld1.32 {q10}, [r6,:128]! |
||
185 | vcvt.s32.f32 q10, q10, #31 |
||
186 | vst1.16 {d3}, [r8], r12 |
||
187 | vld1.32 {q11}, [r7,:128]! |
||
188 | vcvt.s32.f32 q11, q11, #31 |
||
189 | vst1.16 {d7}, [r8], r12 |
||
190 | b 6b |
||
191 | 7: vst1.16 {d2}, [r8], r12 |
||
192 | vst1.16 {d6}, [r8], r12 |
||
193 | vst1.16 {d3}, [r8], r12 |
||
194 | vst1.16 {d7}, [r8], r12 |
||
195 | subs r3, r3, #4 |
||
196 | it eq |
||
197 | popeq {r4-r8, pc} |
||
198 | cmp r3, #4 |
||
199 | add r0, r0, #8 |
||
200 | bge 5b |
||
201 | |||
202 | @ 2 channels |
||
203 | 4: cmp r3, #2 |
||
204 | blt 4f |
||
205 | ldm r1!, {r4-r5} |
||
206 | mov lr, r2 |
||
207 | mov r8, r0 |
||
208 | tst lr, #8 |
||
209 | vld1.32 {q8}, [r4,:128]! |
||
210 | vcvt.s32.f32 q8, q8, #31 |
||
211 | vld1.32 {q9}, [r5,:128]! |
||
212 | vcvt.s32.f32 q9, q9, #31 |
||
213 | vld1.32 {q10}, [r4,:128]! |
||
214 | vcvt.s32.f32 q10, q10, #31 |
||
215 | vld1.32 {q11}, [r5,:128]! |
||
216 | vcvt.s32.f32 q11, q11, #31 |
||
217 | beq 6f |
||
218 | subs lr, lr, #8 |
||
219 | beq 7f |
||
220 | vsri.32 d18, d16, #16 |
||
221 | vsri.32 d19, d17, #16 |
||
222 | vld1.32 {q8}, [r4,:128]! |
||
223 | vcvt.s32.f32 q8, q8, #31 |
||
224 | vst1.32 {d18[0]}, [r8], r12 |
||
225 | vsri.32 d22, d20, #16 |
||
226 | vst1.32 {d18[1]}, [r8], r12 |
||
227 | vsri.32 d23, d21, #16 |
||
228 | vst1.32 {d19[0]}, [r8], r12 |
||
229 | vst1.32 {d19[1]}, [r8], r12 |
||
230 | vld1.32 {q9}, [r5,:128]! |
||
231 | vcvt.s32.f32 q9, q9, #31 |
||
232 | vst1.32 {d22[0]}, [r8], r12 |
||
233 | vst1.32 {d22[1]}, [r8], r12 |
||
234 | vld1.32 {q10}, [r4,:128]! |
||
235 | vcvt.s32.f32 q10, q10, #31 |
||
236 | vst1.32 {d23[0]}, [r8], r12 |
||
237 | vst1.32 {d23[1]}, [r8], r12 |
||
238 | vld1.32 {q11}, [r5,:128]! |
||
239 | vcvt.s32.f32 q11, q11, #31 |
||
240 | 6: subs lr, lr, #16 |
||
241 | vld1.32 {q0}, [r4,:128]! |
||
242 | vcvt.s32.f32 q0, q0, #31 |
||
243 | vsri.32 d18, d16, #16 |
||
244 | vld1.32 {q1}, [r5,:128]! |
||
245 | vcvt.s32.f32 q1, q1, #31 |
||
246 | vsri.32 d19, d17, #16 |
||
247 | vld1.32 {q2}, [r4,:128]! |
||
248 | vcvt.s32.f32 q2, q2, #31 |
||
249 | vld1.32 {q3}, [r5,:128]! |
||
250 | vcvt.s32.f32 q3, q3, #31 |
||
251 | vst1.32 {d18[0]}, [r8], r12 |
||
252 | vsri.32 d22, d20, #16 |
||
253 | vst1.32 {d18[1]}, [r8], r12 |
||
254 | vsri.32 d23, d21, #16 |
||
255 | vst1.32 {d19[0]}, [r8], r12 |
||
256 | vsri.32 d2, d0, #16 |
||
257 | vst1.32 {d19[1]}, [r8], r12 |
||
258 | vsri.32 d3, d1, #16 |
||
259 | vst1.32 {d22[0]}, [r8], r12 |
||
260 | vsri.32 d6, d4, #16 |
||
261 | vst1.32 {d22[1]}, [r8], r12 |
||
262 | vsri.32 d7, d5, #16 |
||
263 | vst1.32 {d23[0]}, [r8], r12 |
||
264 | vst1.32 {d23[1]}, [r8], r12 |
||
265 | beq 6f |
||
266 | vld1.32 {q8}, [r4,:128]! |
||
267 | vcvt.s32.f32 q8, q8, #31 |
||
268 | vst1.32 {d2[0]}, [r8], r12 |
||
269 | vst1.32 {d2[1]}, [r8], r12 |
||
270 | vld1.32 {q9}, [r5,:128]! |
||
271 | vcvt.s32.f32 q9, q9, #31 |
||
272 | vst1.32 {d3[0]}, [r8], r12 |
||
273 | vst1.32 {d3[1]}, [r8], r12 |
||
274 | vld1.32 {q10}, [r4,:128]! |
||
275 | vcvt.s32.f32 q10, q10, #31 |
||
276 | vst1.32 {d6[0]}, [r8], r12 |
||
277 | vst1.32 {d6[1]}, [r8], r12 |
||
278 | vld1.32 {q11}, [r5,:128]! |
||
279 | vcvt.s32.f32 q11, q11, #31 |
||
280 | vst1.32 {d7[0]}, [r8], r12 |
||
281 | vst1.32 {d7[1]}, [r8], r12 |
||
282 | bgt 6b |
||
283 | 6: vst1.32 {d2[0]}, [r8], r12 |
||
284 | vst1.32 {d2[1]}, [r8], r12 |
||
285 | vst1.32 {d3[0]}, [r8], r12 |
||
286 | vst1.32 {d3[1]}, [r8], r12 |
||
287 | vst1.32 {d6[0]}, [r8], r12 |
||
288 | vst1.32 {d6[1]}, [r8], r12 |
||
289 | vst1.32 {d7[0]}, [r8], r12 |
||
290 | vst1.32 {d7[1]}, [r8], r12 |
||
291 | b 8f |
||
292 | 7: vsri.32 d18, d16, #16 |
||
293 | vsri.32 d19, d17, #16 |
||
294 | vst1.32 {d18[0]}, [r8], r12 |
||
295 | vsri.32 d22, d20, #16 |
||
296 | vst1.32 {d18[1]}, [r8], r12 |
||
297 | vsri.32 d23, d21, #16 |
||
298 | vst1.32 {d19[0]}, [r8], r12 |
||
299 | vst1.32 {d19[1]}, [r8], r12 |
||
300 | vst1.32 {d22[0]}, [r8], r12 |
||
301 | vst1.32 {d22[1]}, [r8], r12 |
||
302 | vst1.32 {d23[0]}, [r8], r12 |
||
303 | vst1.32 {d23[1]}, [r8], r12 |
||
304 | 8: subs r3, r3, #2 |
||
305 | add r0, r0, #4 |
||
306 | it eq |
||
307 | popeq {r4-r8, pc} |
||
308 | |||
309 | @ 1 channel |
||
310 | 4: ldr r4, [r1] |
||
311 | tst r2, #8 |
||
312 | mov lr, r2 |
||
313 | mov r5, r0 |
||
314 | vld1.32 {q0}, [r4,:128]! |
||
315 | vcvt.s32.f32 q0, q0, #31 |
||
316 | vld1.32 {q1}, [r4,:128]! |
||
317 | vcvt.s32.f32 q1, q1, #31 |
||
318 | bne 8f |
||
319 | 6: subs lr, lr, #16 |
||
320 | vld1.32 {q2}, [r4,:128]! |
||
321 | vcvt.s32.f32 q2, q2, #31 |
||
322 | vld1.32 {q3}, [r4,:128]! |
||
323 | vcvt.s32.f32 q3, q3, #31 |
||
324 | vst1.16 {d0[1]}, [r5,:16], r12 |
||
325 | vst1.16 {d0[3]}, [r5,:16], r12 |
||
326 | vst1.16 {d1[1]}, [r5,:16], r12 |
||
327 | vst1.16 {d1[3]}, [r5,:16], r12 |
||
328 | vst1.16 {d2[1]}, [r5,:16], r12 |
||
329 | vst1.16 {d2[3]}, [r5,:16], r12 |
||
330 | vst1.16 {d3[1]}, [r5,:16], r12 |
||
331 | vst1.16 {d3[3]}, [r5,:16], r12 |
||
332 | beq 7f |
||
333 | vld1.32 {q0}, [r4,:128]! |
||
334 | vcvt.s32.f32 q0, q0, #31 |
||
335 | vld1.32 {q1}, [r4,:128]! |
||
336 | vcvt.s32.f32 q1, q1, #31 |
||
337 | 7: vst1.16 {d4[1]}, [r5,:16], r12 |
||
338 | vst1.16 {d4[3]}, [r5,:16], r12 |
||
339 | vst1.16 {d5[1]}, [r5,:16], r12 |
||
340 | vst1.16 {d5[3]}, [r5,:16], r12 |
||
341 | vst1.16 {d6[1]}, [r5,:16], r12 |
||
342 | vst1.16 {d6[3]}, [r5,:16], r12 |
||
343 | vst1.16 {d7[1]}, [r5,:16], r12 |
||
344 | vst1.16 {d7[3]}, [r5,:16], r12 |
||
345 | bgt 6b |
||
346 | pop {r4-r8, pc} |
||
347 | 8: subs lr, lr, #8 |
||
348 | vst1.16 {d0[1]}, [r5,:16], r12 |
||
349 | vst1.16 {d0[3]}, [r5,:16], r12 |
||
350 | vst1.16 {d1[1]}, [r5,:16], r12 |
||
351 | vst1.16 {d1[3]}, [r5,:16], r12 |
||
352 | vst1.16 {d2[1]}, [r5,:16], r12 |
||
353 | vst1.16 {d2[3]}, [r5,:16], r12 |
||
354 | vst1.16 {d3[1]}, [r5,:16], r12 |
||
355 | vst1.16 {d3[3]}, [r5,:16], r12 |
||
356 | it eq |
||
357 | popeq {r4-r8, pc} |
||
358 | vld1.32 {q0}, [r4,:128]! |
||
359 | vcvt.s32.f32 q0, q0, #31 |
||
360 | vld1.32 {q1}, [r4,:128]! |
||
361 | vcvt.s32.f32 q1, q1, #31 |
||
362 | b 6b |
||
363 | endfunc |