Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * ARM NEON optimised DSP functions
3
 * Copyright (c) 2008 Mans Rullgard 
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
 
22
#include "libavutil/arm/asm.S"
23
 
24
function ff_clear_block_neon, export=1
25
        vmov.i16        q0,  #0
26
        .rept           8
27
        vst1.16         {q0}, [r0,:128]!
28
        .endr
29
        bx              lr
30
endfunc
31
 
32
function ff_clear_blocks_neon, export=1
33
        vmov.i16        q0,  #0
34
        .rept           8*6
35
        vst1.16         {q0}, [r0,:128]!
36
        .endr
37
        bx              lr
38
endfunc
39
 
40
function ff_put_pixels_clamped_neon, export=1
41
        vld1.16         {d16-d19}, [r0,:128]!
42
        vqmovun.s16     d0, q8
43
        vld1.16         {d20-d23}, [r0,:128]!
44
        vqmovun.s16     d1, q9
45
        vld1.16         {d24-d27}, [r0,:128]!
46
        vqmovun.s16     d2, q10
47
        vld1.16         {d28-d31}, [r0,:128]!
48
        vqmovun.s16     d3, q11
49
        vst1.8          {d0},      [r1,:64], r2
50
        vqmovun.s16     d4, q12
51
        vst1.8          {d1},      [r1,:64], r2
52
        vqmovun.s16     d5, q13
53
        vst1.8          {d2},      [r1,:64], r2
54
        vqmovun.s16     d6, q14
55
        vst1.8          {d3},      [r1,:64], r2
56
        vqmovun.s16     d7, q15
57
        vst1.8          {d4},      [r1,:64], r2
58
        vst1.8          {d5},      [r1,:64], r2
59
        vst1.8          {d6},      [r1,:64], r2
60
        vst1.8          {d7},      [r1,:64], r2
61
        bx              lr
62
endfunc
63
 
64
function ff_put_signed_pixels_clamped_neon, export=1
65
        vmov.u8         d31, #128
66
        vld1.16         {d16-d17}, [r0,:128]!
67
        vqmovn.s16      d0, q8
68
        vld1.16         {d18-d19}, [r0,:128]!
69
        vqmovn.s16      d1, q9
70
        vld1.16         {d16-d17}, [r0,:128]!
71
        vqmovn.s16      d2, q8
72
        vld1.16         {d18-d19}, [r0,:128]!
73
        vadd.u8         d0, d0, d31
74
        vld1.16         {d20-d21}, [r0,:128]!
75
        vadd.u8         d1, d1, d31
76
        vld1.16         {d22-d23}, [r0,:128]!
77
        vadd.u8         d2, d2, d31
78
        vst1.8          {d0},      [r1,:64], r2
79
        vqmovn.s16      d3, q9
80
        vst1.8          {d1},      [r1,:64], r2
81
        vqmovn.s16      d4, q10
82
        vst1.8          {d2},      [r1,:64], r2
83
        vqmovn.s16      d5, q11
84
        vld1.16         {d24-d25}, [r0,:128]!
85
        vadd.u8         d3, d3, d31
86
        vld1.16         {d26-d27}, [r0,:128]!
87
        vadd.u8         d4, d4, d31
88
        vadd.u8         d5, d5, d31
89
        vst1.8          {d3},      [r1,:64], r2
90
        vqmovn.s16      d6, q12
91
        vst1.8          {d4},      [r1,:64], r2
92
        vqmovn.s16      d7, q13
93
        vst1.8          {d5},      [r1,:64], r2
94
        vadd.u8         d6, d6, d31
95
        vadd.u8         d7, d7, d31
96
        vst1.8          {d6},      [r1,:64], r2
97
        vst1.8          {d7},      [r1,:64], r2
98
        bx              lr
99
endfunc
100
 
101
function ff_add_pixels_clamped_neon, export=1
102
        mov             r3, r1
103
        vld1.8          {d16},   [r1,:64], r2
104
        vld1.16         {d0-d1}, [r0,:128]!
105
        vaddw.u8        q0, q0, d16
106
        vld1.8          {d17},   [r1,:64], r2
107
        vld1.16         {d2-d3}, [r0,:128]!
108
        vqmovun.s16     d0, q0
109
        vld1.8          {d18},   [r1,:64], r2
110
        vaddw.u8        q1, q1, d17
111
        vld1.16         {d4-d5}, [r0,:128]!
112
        vaddw.u8        q2, q2, d18
113
        vst1.8          {d0},    [r3,:64], r2
114
        vqmovun.s16     d2, q1
115
        vld1.8          {d19},   [r1,:64], r2
116
        vld1.16         {d6-d7}, [r0,:128]!
117
        vaddw.u8        q3, q3, d19
118
        vqmovun.s16     d4, q2
119
        vst1.8          {d2},    [r3,:64], r2
120
        vld1.8          {d16},   [r1,:64], r2
121
        vqmovun.s16     d6, q3
122
        vld1.16         {d0-d1}, [r0,:128]!
123
        vaddw.u8        q0, q0, d16
124
        vst1.8          {d4},    [r3,:64], r2
125
        vld1.8          {d17},   [r1,:64], r2
126
        vld1.16         {d2-d3}, [r0,:128]!
127
        vaddw.u8        q1, q1, d17
128
        vst1.8          {d6},    [r3,:64], r2
129
        vqmovun.s16     d0, q0
130
        vld1.8          {d18},   [r1,:64], r2
131
        vld1.16         {d4-d5}, [r0,:128]!
132
        vaddw.u8        q2, q2, d18
133
        vst1.8          {d0},    [r3,:64], r2
134
        vqmovun.s16     d2, q1
135
        vld1.8          {d19},   [r1,:64], r2
136
        vqmovun.s16     d4, q2
137
        vld1.16         {d6-d7}, [r0,:128]!
138
        vaddw.u8        q3, q3, d19
139
        vst1.8          {d2},    [r3,:64], r2
140
        vqmovun.s16     d6, q3
141
        vst1.8          {d4},    [r3,:64], r2
142
        vst1.8          {d6},    [r3,:64], r2
143
        bx              lr
144
endfunc
145
 
146
function ff_vector_clipf_neon, export=1
147
VFP     vdup.32         q1,  d0[1]
148
VFP     vdup.32         q0,  d0[0]
149
NOVFP   vdup.32         q0,  r2
150
NOVFP   vdup.32         q1,  r3
151
NOVFP   ldr             r2,  [sp]
152
        vld1.f32        {q2},[r1,:128]!
153
        vmin.f32        q10, q2,  q1
154
        vld1.f32        {q3},[r1,:128]!
155
        vmin.f32        q11, q3,  q1
156
1:      vmax.f32        q8,  q10, q0
157
        vmax.f32        q9,  q11, q0
158
        subs            r2,  r2,  #8
159
        beq             2f
160
        vld1.f32        {q2},[r1,:128]!
161
        vmin.f32        q10, q2,  q1
162
        vld1.f32        {q3},[r1,:128]!
163
        vmin.f32        q11, q3,  q1
164
        vst1.f32        {q8},[r0,:128]!
165
        vst1.f32        {q9},[r0,:128]!
166
        b               1b
167
2:      vst1.f32        {q8},[r0,:128]!
168
        vst1.f32        {q9},[r0,:128]!
169
        bx              lr
170
endfunc
171
 
172
function ff_apply_window_int16_neon, export=1
173
        push            {r4,lr}
174
        add             r4,  r1,  r3,  lsl #1
175
        add             lr,  r0,  r3,  lsl #1
176
        sub             r4,  r4,  #16
177
        sub             lr,  lr,  #16
178
        mov             r12, #-16
179
1:
180
        vld1.16         {q0},     [r1,:128]!
181
        vld1.16         {q2},     [r2,:128]!
182
        vld1.16         {q1},     [r4,:128], r12
183
        vrev64.16       q3,  q2
184
        vqrdmulh.s16    q0,  q0,  q2
185
        vqrdmulh.s16    d2,  d2,  d7
186
        vqrdmulh.s16    d3,  d3,  d6
187
        vst1.16         {q0},     [r0,:128]!
188
        vst1.16         {q1},     [lr,:128], r12
189
        subs            r3,  r3,  #16
190
        bgt             1b
191
 
192
        pop             {r4,pc}
193
endfunc
194
 
195
function ff_vector_clip_int32_neon, export=1
196
        vdup.32         q0,  r2
197
        vdup.32         q1,  r3
198
        ldr             r2,  [sp]
199
1:
200
        vld1.32         {q2-q3},  [r1,:128]!
201
        vmin.s32        q2,  q2,  q1
202
        vmin.s32        q3,  q3,  q1
203
        vmax.s32        q2,  q2,  q0
204
        vmax.s32        q3,  q3,  q0
205
        vst1.32         {q2-q3},  [r0,:128]!
206
        subs            r2,  r2,  #8
207
        bgt             1b
208
        bx              lr
209
endfunc