Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * Copyright (c) 2004 Romain Dolbeau |
||
3 | * |
||
4 | * This file is part of FFmpeg. |
||
5 | * |
||
6 | * FFmpeg is free software; you can redistribute it and/or |
||
7 | * modify it under the terms of the GNU Lesser General Public |
||
8 | * License as published by the Free Software Foundation; either |
||
9 | * version 2.1 of the License, or (at your option) any later version. |
||
10 | * |
||
11 | * FFmpeg is distributed in the hope that it will be useful, |
||
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
14 | * Lesser General Public License for more details. |
||
15 | * |
||
16 | * You should have received a copy of the GNU Lesser General Public |
||
17 | * License along with FFmpeg; if not, write to the Free Software |
||
18 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
19 | */ |
||
20 | |||
21 | #include "config.h" |
||
22 | #include "libavutil/attributes.h" |
||
23 | #include "libavutil/cpu.h" |
||
24 | #include "libavutil/intreadwrite.h" |
||
25 | #include "libavutil/ppc/types_altivec.h" |
||
26 | #include "libavutil/ppc/util_altivec.h" |
||
27 | #include "libavcodec/h264qpel.h" |
||
28 | #include "dsputil_altivec.h" |
||
29 | |||
30 | #if HAVE_ALTIVEC |
||
31 | |||
32 | #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s |
||
33 | #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s) |
||
34 | |||
35 | #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC |
||
36 | #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec |
||
37 | #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num |
||
38 | #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec |
||
39 | #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num |
||
40 | #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec |
||
41 | #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num |
||
42 | #include "h264qpel_template.c" |
||
43 | #undef OP_U8_ALTIVEC |
||
44 | #undef PREFIX_h264_qpel16_h_lowpass_altivec |
||
45 | #undef PREFIX_h264_qpel16_h_lowpass_num |
||
46 | #undef PREFIX_h264_qpel16_v_lowpass_altivec |
||
47 | #undef PREFIX_h264_qpel16_v_lowpass_num |
||
48 | #undef PREFIX_h264_qpel16_hv_lowpass_altivec |
||
49 | #undef PREFIX_h264_qpel16_hv_lowpass_num |
||
50 | |||
51 | #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC |
||
52 | #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec |
||
53 | #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num |
||
54 | #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec |
||
55 | #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num |
||
56 | #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec |
||
57 | #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num |
||
58 | #include "h264qpel_template.c" |
||
59 | #undef OP_U8_ALTIVEC |
||
60 | #undef PREFIX_h264_qpel16_h_lowpass_altivec |
||
61 | #undef PREFIX_h264_qpel16_h_lowpass_num |
||
62 | #undef PREFIX_h264_qpel16_v_lowpass_altivec |
||
63 | #undef PREFIX_h264_qpel16_v_lowpass_num |
||
64 | #undef PREFIX_h264_qpel16_hv_lowpass_altivec |
||
65 | #undef PREFIX_h264_qpel16_hv_lowpass_num |
||
66 | |||
67 | #define H264_MC(OPNAME, SIZE, CODETYPE) \ |
||
68 | static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
69 | {\ |
||
70 | ff_ ## OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\ |
||
71 | }\ |
||
72 | \ |
||
73 | static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
74 | { \ |
||
75 | DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
||
76 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
||
77 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ |
||
78 | }\ |
||
79 | \ |
||
80 | static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
81 | {\ |
||
82 | OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\ |
||
83 | }\ |
||
84 | \ |
||
85 | static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
86 | {\ |
||
87 | DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
||
88 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
||
89 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\ |
||
90 | }\ |
||
91 | \ |
||
92 | static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
93 | {\ |
||
94 | DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
||
95 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
||
96 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\ |
||
97 | }\ |
||
98 | \ |
||
99 | static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
100 | {\ |
||
101 | OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\ |
||
102 | }\ |
||
103 | \ |
||
104 | static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
105 | {\ |
||
106 | DECLARE_ALIGNED(16, uint8_t, half)[SIZE*SIZE];\ |
||
107 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\ |
||
108 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\ |
||
109 | }\ |
||
110 | \ |
||
111 | static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
112 | {\ |
||
113 | DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
||
114 | DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
||
115 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
||
116 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
||
117 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ |
||
118 | }\ |
||
119 | \ |
||
120 | static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
121 | {\ |
||
122 | DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
||
123 | DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
||
124 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
||
125 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ |
||
126 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ |
||
127 | }\ |
||
128 | \ |
||
129 | static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
130 | {\ |
||
131 | DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
||
132 | DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
||
133 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ |
||
134 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
||
135 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ |
||
136 | }\ |
||
137 | \ |
||
138 | static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
139 | {\ |
||
140 | DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
||
141 | DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
||
142 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ |
||
143 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ |
||
144 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\ |
||
145 | }\ |
||
146 | \ |
||
147 | static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
148 | {\ |
||
149 | DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
||
150 | OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\ |
||
151 | }\ |
||
152 | \ |
||
153 | static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
154 | {\ |
||
155 | DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
||
156 | DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
||
157 | DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
||
158 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\ |
||
159 | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ |
||
160 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ |
||
161 | }\ |
||
162 | \ |
||
163 | static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
164 | {\ |
||
165 | DECLARE_ALIGNED(16, uint8_t, halfH)[SIZE*SIZE];\ |
||
166 | DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
||
167 | DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
||
168 | put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\ |
||
169 | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ |
||
170 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\ |
||
171 | }\ |
||
172 | \ |
||
173 | static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
174 | {\ |
||
175 | DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
||
176 | DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
||
177 | DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
||
178 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\ |
||
179 | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ |
||
180 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ |
||
181 | }\ |
||
182 | \ |
||
183 | static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\ |
||
184 | {\ |
||
185 | DECLARE_ALIGNED(16, uint8_t, halfV)[SIZE*SIZE];\ |
||
186 | DECLARE_ALIGNED(16, uint8_t, halfHV)[SIZE*SIZE];\ |
||
187 | DECLARE_ALIGNED(16, int16_t, tmp)[SIZE*(SIZE+8)];\ |
||
188 | put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ |
||
189 | put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ |
||
190 | OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ |
||
191 | }\ |
||
192 | |||
193 | static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
||
194 | const uint8_t * src2, int dst_stride, |
||
195 | int src_stride1, int h) |
||
196 | { |
||
197 | int i; |
||
198 | vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align; |
||
199 | |||
200 | mask_ = vec_lvsl(0, src2); |
||
201 | |||
202 | for (i = 0; i < h; i++) { |
||
203 | |||
204 | tmp1 = vec_ld(i * src_stride1, src1); |
||
205 | mask = vec_lvsl(i * src_stride1, src1); |
||
206 | tmp2 = vec_ld(i * src_stride1 + 15, src1); |
||
207 | |||
208 | a = vec_perm(tmp1, tmp2, mask); |
||
209 | |||
210 | tmp1 = vec_ld(i * 16, src2); |
||
211 | tmp2 = vec_ld(i * 16 + 15, src2); |
||
212 | |||
213 | b = vec_perm(tmp1, tmp2, mask_); |
||
214 | |||
215 | tmp1 = vec_ld(0, dst); |
||
216 | mask = vec_lvsl(0, dst); |
||
217 | tmp2 = vec_ld(15, dst); |
||
218 | |||
219 | d = vec_avg(a, b); |
||
220 | |||
221 | edges = vec_perm(tmp2, tmp1, mask); |
||
222 | |||
223 | align = vec_lvsr(0, dst); |
||
224 | |||
225 | tmp2 = vec_perm(d, edges, align); |
||
226 | tmp1 = vec_perm(edges, d, align); |
||
227 | |||
228 | vec_st(tmp2, 15, dst); |
||
229 | vec_st(tmp1, 0 , dst); |
||
230 | |||
231 | dst += dst_stride; |
||
232 | } |
||
233 | } |
||
234 | |||
235 | static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
||
236 | const uint8_t * src2, int dst_stride, |
||
237 | int src_stride1, int h) |
||
238 | { |
||
239 | int i; |
||
240 | vec_u8 a, b, d, tmp1, tmp2, mask, mask_, edges, align; |
||
241 | |||
242 | mask_ = vec_lvsl(0, src2); |
||
243 | |||
244 | for (i = 0; i < h; i++) { |
||
245 | |||
246 | tmp1 = vec_ld(i * src_stride1, src1); |
||
247 | mask = vec_lvsl(i * src_stride1, src1); |
||
248 | tmp2 = vec_ld(i * src_stride1 + 15, src1); |
||
249 | |||
250 | a = vec_perm(tmp1, tmp2, mask); |
||
251 | |||
252 | tmp1 = vec_ld(i * 16, src2); |
||
253 | tmp2 = vec_ld(i * 16 + 15, src2); |
||
254 | |||
255 | b = vec_perm(tmp1, tmp2, mask_); |
||
256 | |||
257 | tmp1 = vec_ld(0, dst); |
||
258 | mask = vec_lvsl(0, dst); |
||
259 | tmp2 = vec_ld(15, dst); |
||
260 | |||
261 | d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b)); |
||
262 | |||
263 | edges = vec_perm(tmp2, tmp1, mask); |
||
264 | |||
265 | align = vec_lvsr(0, dst); |
||
266 | |||
267 | tmp2 = vec_perm(d, edges, align); |
||
268 | tmp1 = vec_perm(edges, d, align); |
||
269 | |||
270 | vec_st(tmp2, 15, dst); |
||
271 | vec_st(tmp1, 0 , dst); |
||
272 | |||
273 | dst += dst_stride; |
||
274 | } |
||
275 | } |
||
276 | |||
277 | /* Implemented but could be faster |
||
278 | #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) |
||
279 | #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) |
||
280 | */ |
||
281 | |||
282 | H264_MC(put_, 16, altivec) |
||
283 | H264_MC(avg_, 16, altivec) |
||
284 | #endif /* HAVE_ALTIVEC */ |
||
285 | |||
286 | av_cold void ff_h264qpel_init_ppc(H264QpelContext *c, int bit_depth) |
||
287 | { |
||
288 | #if HAVE_ALTIVEC |
||
289 | const int high_bit_depth = bit_depth > 8; |
||
290 | |||
291 | if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
||
292 | return; |
||
293 | |||
294 | if (!high_bit_depth) { |
||
295 | #define dspfunc(PFX, IDX, NUM) \ |
||
296 | c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \ |
||
297 | c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \ |
||
298 | c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \ |
||
299 | c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \ |
||
300 | c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \ |
||
301 | c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \ |
||
302 | c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \ |
||
303 | c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \ |
||
304 | c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \ |
||
305 | c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \ |
||
306 | c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \ |
||
307 | c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \ |
||
308 | c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \ |
||
309 | c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \ |
||
310 | c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \ |
||
311 | c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec |
||
312 | |||
313 | dspfunc(put_h264_qpel, 0, 16); |
||
314 | dspfunc(avg_h264_qpel, 0, 16); |
||
315 | #undef dspfunc |
||
316 | } |
||
317 | #endif /* HAVE_ALTIVEC */ |
||
318 | }>> |