Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | /* |
2 | * Copyright (c) 2008 Siarhei Siamashka |
||
3 | * |
||
4 | * This file is part of FFmpeg |
||
5 | * |
||
6 | * FFmpeg is free software; you can redistribute it and/or |
||
7 | * modify it under the terms of the GNU Lesser General Public |
||
8 | * License as published by the Free Software Foundation; either |
||
9 | * version 2.1 of the License, or (at your option) any later version. |
||
10 | * |
||
11 | * FFmpeg is distributed in the hope that it will be useful, |
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
14 | * Lesser General Public License for more details. |
||
15 | * |
||
16 | * You should have received a copy of the GNU Lesser General Public |
||
17 | * License along with FFmpeg; if not, write to the Free Software |
||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
19 | */ |
||
20 | |||
21 | #include "config.h" |
||
22 | #include "asm.S" |
||
23 | |||
24 | /** |
||
25 | * Assume that len is a positive number and is multiple of 8 |
||
26 | */ |
||
27 | @ void ff_vector_fmul_vfp(float *dst, const float *src0, const float *src1, int len) |
||
28 | function ff_vector_fmul_vfp, export=1 |
||
29 | vpush {d8-d15} |
||
30 | fmrx r12, fpscr |
||
31 | orr r12, r12, #(3 << 16) /* set vector size to 4 */ |
||
32 | fmxr fpscr, r12 |
||
33 | |||
34 | vldmia r1!, {s0-s3} |
||
35 | vldmia r2!, {s8-s11} |
||
36 | vldmia r1!, {s4-s7} |
||
37 | vldmia r2!, {s12-s15} |
||
38 | vmul.f32 s8, s0, s8 |
||
39 | 1: |
||
40 | subs r3, r3, #16 |
||
41 | vmul.f32 s12, s4, s12 |
||
42 | itttt ge |
||
43 | vldmiage r1!, {s16-s19} |
||
44 | vldmiage r2!, {s24-s27} |
||
45 | vldmiage r1!, {s20-s23} |
||
46 | vldmiage r2!, {s28-s31} |
||
47 | it ge |
||
48 | vmulge.f32 s24, s16, s24 |
||
49 | vstmia r0!, {s8-s11} |
||
50 | vstmia r0!, {s12-s15} |
||
51 | it ge |
||
52 | vmulge.f32 s28, s20, s28 |
||
53 | itttt gt |
||
54 | vldmiagt r1!, {s0-s3} |
||
55 | vldmiagt r2!, {s8-s11} |
||
56 | vldmiagt r1!, {s4-s7} |
||
57 | vldmiagt r2!, {s12-s15} |
||
58 | ittt ge |
||
59 | vmulge.f32 s8, s0, s8 |
||
60 | vstmiage r0!, {s24-s27} |
||
61 | vstmiage r0!, {s28-s31} |
||
62 | bgt 1b |
||
63 | |||
64 | bic r12, r12, #(7 << 16) /* set vector size back to 1 */ |
||
65 | fmxr fpscr, r12 |
||
66 | vpop {d8-d15} |
||
67 | bx lr |
||
68 | endfunc |
||
69 | |||
70 | /** |
||
71 | * ARM VFP optimized implementation of 'vector_fmul_reverse_c' function. |
||
72 | * Assume that len is a positive number and is multiple of 8 |
||
73 | */ |
||
74 | @ void ff_vector_fmul_reverse_vfp(float *dst, const float *src0, |
||
75 | @ const float *src1, int len) |
||
76 | function ff_vector_fmul_reverse_vfp, export=1 |
||
77 | vpush {d8-d15} |
||
78 | add r2, r2, r3, lsl #2 |
||
79 | vldmdb r2!, {s0-s3} |
||
80 | vldmia r1!, {s8-s11} |
||
81 | vldmdb r2!, {s4-s7} |
||
82 | vldmia r1!, {s12-s15} |
||
83 | vmul.f32 s8, s3, s8 |
||
84 | vmul.f32 s9, s2, s9 |
||
85 | vmul.f32 s10, s1, s10 |
||
86 | vmul.f32 s11, s0, s11 |
||
87 | 1: |
||
88 | subs r3, r3, #16 |
||
89 | it ge |
||
90 | vldmdbge r2!, {s16-s19} |
||
91 | vmul.f32 s12, s7, s12 |
||
92 | it ge |
||
93 | vldmiage r1!, {s24-s27} |
||
94 | vmul.f32 s13, s6, s13 |
||
95 | it ge |
||
96 | vldmdbge r2!, {s20-s23} |
||
97 | vmul.f32 s14, s5, s14 |
||
98 | it ge |
||
99 | vldmiage r1!, {s28-s31} |
||
100 | vmul.f32 s15, s4, s15 |
||
101 | it ge |
||
102 | vmulge.f32 s24, s19, s24 |
||
103 | it gt |
||
104 | vldmdbgt r2!, {s0-s3} |
||
105 | it ge |
||
106 | vmulge.f32 s25, s18, s25 |
||
107 | vstmia r0!, {s8-s13} |
||
108 | it ge |
||
109 | vmulge.f32 s26, s17, s26 |
||
110 | it gt |
||
111 | vldmiagt r1!, {s8-s11} |
||
112 | itt ge |
||
113 | vmulge.f32 s27, s16, s27 |
||
114 | vmulge.f32 s28, s23, s28 |
||
115 | it gt |
||
116 | vldmdbgt r2!, {s4-s7} |
||
117 | it ge |
||
118 | vmulge.f32 s29, s22, s29 |
||
119 | vstmia r0!, {s14-s15} |
||
120 | ittt ge |
||
121 | vmulge.f32 s30, s21, s30 |
||
122 | vmulge.f32 s31, s20, s31 |
||
123 | vmulge.f32 s8, s3, s8 |
||
124 | it gt |
||
125 | vldmiagt r1!, {s12-s15} |
||
126 | itttt ge |
||
127 | vmulge.f32 s9, s2, s9 |
||
128 | vmulge.f32 s10, s1, s10 |
||
129 | vstmiage r0!, {s24-s27} |
||
130 | vmulge.f32 s11, s0, s11 |
||
131 | it ge |
||
132 | vstmiage r0!, {s28-s31} |
||
133 | bgt 1b |
||
134 | |||
135 | vpop {d8-d15} |
||
136 | bx lr |
||
137 | endfunc><>><> |