Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * Copyright (c) 2002 Brian Foley |
||
3 | * Copyright (c) 2002 Dieter Shirley |
||
4 | * Copyright (c) 2003-2004 Romain Dolbeau |
||
5 | * |
||
6 | * This file is part of FFmpeg. |
||
7 | * |
||
8 | * FFmpeg is free software; you can redistribute it and/or |
||
9 | * modify it under the terms of the GNU Lesser General Public |
||
10 | * License as published by the Free Software Foundation; either |
||
11 | * version 2.1 of the License, or (at your option) any later version. |
||
12 | * |
||
13 | * FFmpeg is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 | * Lesser General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU Lesser General Public |
||
19 | * License along with FFmpeg; if not, write to the Free Software |
||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
21 | */ |
||
22 | |||
23 | #include "config.h" |
||
24 | |||
25 | #if HAVE_ALTIVEC_H |
||
26 | #include |
||
27 | #endif |
||
28 | |||
29 | #include "libavutil/attributes.h" |
||
30 | #include "libavutil/cpu.h" |
||
31 | #include "libavutil/ppc/types_altivec.h" |
||
32 | #include "libavutil/ppc/util_altivec.h" |
||
33 | #include "libavcodec/hpeldsp.h" |
||
34 | #include "dsputil_altivec.h" |
||
35 | |||
36 | #if HAVE_ALTIVEC |
||
37 | /* next one assumes that ((line_size % 16) == 0) */ |
||
38 | void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) |
||
39 | { |
||
40 | register vector unsigned char pixelsv1, pixelsv2; |
||
41 | register vector unsigned char pixelsv1B, pixelsv2B; |
||
42 | register vector unsigned char pixelsv1C, pixelsv2C; |
||
43 | register vector unsigned char pixelsv1D, pixelsv2D; |
||
44 | |||
45 | register vector unsigned char perm = vec_lvsl(0, pixels); |
||
46 | int i; |
||
47 | register ptrdiff_t line_size_2 = line_size << 1; |
||
48 | register ptrdiff_t line_size_3 = line_size + line_size_2; |
||
49 | register ptrdiff_t line_size_4 = line_size << 2; |
||
50 | |||
51 | // hand-unrolling the loop by 4 gains about 15% |
||
52 | // mininum execution time goes from 74 to 60 cycles |
||
53 | // it's faster than -funroll-loops, but using |
||
54 | // -funroll-loops w/ this is bad - 74 cycles again. |
||
55 | // all this is on a 7450, tuning for the 7450 |
||
56 | for (i = 0; i < h; i += 4) { |
||
57 | pixelsv1 = vec_ld( 0, pixels); |
||
58 | pixelsv2 = vec_ld(15, pixels); |
||
59 | pixelsv1B = vec_ld(line_size, pixels); |
||
60 | pixelsv2B = vec_ld(15 + line_size, pixels); |
||
61 | pixelsv1C = vec_ld(line_size_2, pixels); |
||
62 | pixelsv2C = vec_ld(15 + line_size_2, pixels); |
||
63 | pixelsv1D = vec_ld(line_size_3, pixels); |
||
64 | pixelsv2D = vec_ld(15 + line_size_3, pixels); |
||
65 | vec_st(vec_perm(pixelsv1, pixelsv2, perm), |
||
66 | 0, (unsigned char*)block); |
||
67 | vec_st(vec_perm(pixelsv1B, pixelsv2B, perm), |
||
68 | line_size, (unsigned char*)block); |
||
69 | vec_st(vec_perm(pixelsv1C, pixelsv2C, perm), |
||
70 | line_size_2, (unsigned char*)block); |
||
71 | vec_st(vec_perm(pixelsv1D, pixelsv2D, perm), |
||
72 | line_size_3, (unsigned char*)block); |
||
73 | pixels+=line_size_4; |
||
74 | block +=line_size_4; |
||
75 | } |
||
76 | } |
||
77 | |||
78 | /* next one assumes that ((line_size % 16) == 0) */ |
||
79 | #define op_avg(a,b) a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) ) |
||
80 | void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) |
||
81 | { |
||
82 | register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
||
83 | register vector unsigned char perm = vec_lvsl(0, pixels); |
||
84 | int i; |
||
85 | |||
86 | for (i = 0; i < h; i++) { |
||
87 | pixelsv1 = vec_ld( 0, pixels); |
||
88 | pixelsv2 = vec_ld(16,pixels); |
||
89 | blockv = vec_ld(0, block); |
||
90 | pixelsv = vec_perm(pixelsv1, pixelsv2, perm); |
||
91 | blockv = vec_avg(blockv,pixelsv); |
||
92 | vec_st(blockv, 0, (unsigned char*)block); |
||
93 | pixels+=line_size; |
||
94 | block +=line_size; |
||
95 | } |
||
96 | } |
||
97 | |||
98 | /* next one assumes that ((line_size % 8) == 0) */ |
||
99 | static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) |
||
100 | { |
||
101 | register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv; |
||
102 | int i; |
||
103 | |||
104 | for (i = 0; i < h; i++) { |
||
105 | /* block is 8 bytes-aligned, so we're either in the |
||
106 | left block (16 bytes-aligned) or in the right block (not) */ |
||
107 | int rightside = ((unsigned long)block & 0x0000000F); |
||
108 | |||
109 | blockv = vec_ld(0, block); |
||
110 | pixelsv1 = vec_ld( 0, pixels); |
||
111 | pixelsv2 = vec_ld(16, pixels); |
||
112 | pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels)); |
||
113 | |||
114 | if (rightside) { |
||
115 | pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1)); |
||
116 | } else { |
||
117 | pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3)); |
||
118 | } |
||
119 | |||
120 | blockv = vec_avg(blockv, pixelsv); |
||
121 | |||
122 | vec_st(blockv, 0, block); |
||
123 | |||
124 | pixels += line_size; |
||
125 | block += line_size; |
||
126 | } |
||
127 | } |
||
128 | |||
129 | /* next one assumes that ((line_size % 8) == 0) */ |
||
130 | static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) |
||
131 | { |
||
132 | register int i; |
||
133 | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
||
134 | register vector unsigned char blockv, temp1, temp2; |
||
135 | register vector unsigned short pixelssum1, pixelssum2, temp3; |
||
136 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
||
137 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
||
138 | |||
139 | temp1 = vec_ld(0, pixels); |
||
140 | temp2 = vec_ld(16, pixels); |
||
141 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
||
142 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
||
143 | pixelsv2 = temp2; |
||
144 | } else { |
||
145 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
||
146 | } |
||
147 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
148 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
149 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
||
150 | (vector unsigned short)pixelsv2); |
||
151 | pixelssum1 = vec_add(pixelssum1, vctwo); |
||
152 | |||
153 | for (i = 0; i < h ; i++) { |
||
154 | int rightside = ((unsigned long)block & 0x0000000F); |
||
155 | blockv = vec_ld(0, block); |
||
156 | |||
157 | temp1 = vec_ld(line_size, pixels); |
||
158 | temp2 = vec_ld(line_size + 16, pixels); |
||
159 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
||
160 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { |
||
161 | pixelsv2 = temp2; |
||
162 | } else { |
||
163 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
||
164 | } |
||
165 | |||
166 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
167 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
168 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
||
169 | (vector unsigned short)pixelsv2); |
||
170 | temp3 = vec_add(pixelssum1, pixelssum2); |
||
171 | temp3 = vec_sra(temp3, vctwo); |
||
172 | pixelssum1 = vec_add(pixelssum2, vctwo); |
||
173 | pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); |
||
174 | |||
175 | if (rightside) { |
||
176 | blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); |
||
177 | } else { |
||
178 | blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); |
||
179 | } |
||
180 | |||
181 | vec_st(blockv, 0, block); |
||
182 | |||
183 | block += line_size; |
||
184 | pixels += line_size; |
||
185 | } |
||
186 | } |
||
187 | |||
188 | /* next one assumes that ((line_size % 8) == 0) */ |
||
189 | static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) |
||
190 | { |
||
191 | register int i; |
||
192 | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
||
193 | register vector unsigned char blockv, temp1, temp2; |
||
194 | register vector unsigned short pixelssum1, pixelssum2, temp3; |
||
195 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
||
196 | register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); |
||
197 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
||
198 | |||
199 | temp1 = vec_ld(0, pixels); |
||
200 | temp2 = vec_ld(16, pixels); |
||
201 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
||
202 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
||
203 | pixelsv2 = temp2; |
||
204 | } else { |
||
205 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
||
206 | } |
||
207 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
208 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
209 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
||
210 | (vector unsigned short)pixelsv2); |
||
211 | pixelssum1 = vec_add(pixelssum1, vcone); |
||
212 | |||
213 | for (i = 0; i < h ; i++) { |
||
214 | int rightside = ((unsigned long)block & 0x0000000F); |
||
215 | blockv = vec_ld(0, block); |
||
216 | |||
217 | temp1 = vec_ld(line_size, pixels); |
||
218 | temp2 = vec_ld(line_size + 16, pixels); |
||
219 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
||
220 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { |
||
221 | pixelsv2 = temp2; |
||
222 | } else { |
||
223 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
||
224 | } |
||
225 | |||
226 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
227 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
228 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
||
229 | (vector unsigned short)pixelsv2); |
||
230 | temp3 = vec_add(pixelssum1, pixelssum2); |
||
231 | temp3 = vec_sra(temp3, vctwo); |
||
232 | pixelssum1 = vec_add(pixelssum2, vcone); |
||
233 | pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); |
||
234 | |||
235 | if (rightside) { |
||
236 | blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); |
||
237 | } else { |
||
238 | blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); |
||
239 | } |
||
240 | |||
241 | vec_st(blockv, 0, block); |
||
242 | |||
243 | block += line_size; |
||
244 | pixels += line_size; |
||
245 | } |
||
246 | } |
||
247 | |||
248 | /* next one assumes that ((line_size % 16) == 0) */ |
||
249 | static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) |
||
250 | { |
||
251 | register int i; |
||
252 | register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
||
253 | register vector unsigned char blockv, temp1, temp2; |
||
254 | register vector unsigned short temp3, temp4, |
||
255 | pixelssum1, pixelssum2, pixelssum3, pixelssum4; |
||
256 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
||
257 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
||
258 | |||
259 | temp1 = vec_ld(0, pixels); |
||
260 | temp2 = vec_ld(16, pixels); |
||
261 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
||
262 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
||
263 | pixelsv2 = temp2; |
||
264 | } else { |
||
265 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
||
266 | } |
||
267 | pixelsv3 = vec_mergel(vczero, pixelsv1); |
||
268 | pixelsv4 = vec_mergel(vczero, pixelsv2); |
||
269 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
270 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
271 | pixelssum3 = vec_add((vector unsigned short)pixelsv3, |
||
272 | (vector unsigned short)pixelsv4); |
||
273 | pixelssum3 = vec_add(pixelssum3, vctwo); |
||
274 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
||
275 | (vector unsigned short)pixelsv2); |
||
276 | pixelssum1 = vec_add(pixelssum1, vctwo); |
||
277 | |||
278 | for (i = 0; i < h ; i++) { |
||
279 | blockv = vec_ld(0, block); |
||
280 | |||
281 | temp1 = vec_ld(line_size, pixels); |
||
282 | temp2 = vec_ld(line_size + 16, pixels); |
||
283 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
||
284 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { |
||
285 | pixelsv2 = temp2; |
||
286 | } else { |
||
287 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
||
288 | } |
||
289 | |||
290 | pixelsv3 = vec_mergel(vczero, pixelsv1); |
||
291 | pixelsv4 = vec_mergel(vczero, pixelsv2); |
||
292 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
293 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
294 | |||
295 | pixelssum4 = vec_add((vector unsigned short)pixelsv3, |
||
296 | (vector unsigned short)pixelsv4); |
||
297 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
||
298 | (vector unsigned short)pixelsv2); |
||
299 | temp4 = vec_add(pixelssum3, pixelssum4); |
||
300 | temp4 = vec_sra(temp4, vctwo); |
||
301 | temp3 = vec_add(pixelssum1, pixelssum2); |
||
302 | temp3 = vec_sra(temp3, vctwo); |
||
303 | |||
304 | pixelssum3 = vec_add(pixelssum4, vctwo); |
||
305 | pixelssum1 = vec_add(pixelssum2, vctwo); |
||
306 | |||
307 | blockv = vec_packsu(temp3, temp4); |
||
308 | |||
309 | vec_st(blockv, 0, block); |
||
310 | |||
311 | block += line_size; |
||
312 | pixels += line_size; |
||
313 | } |
||
314 | } |
||
315 | |||
316 | /* next one assumes that ((line_size % 16) == 0) */ |
||
317 | static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h) |
||
318 | { |
||
319 | register int i; |
||
320 | register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4; |
||
321 | register vector unsigned char blockv, temp1, temp2; |
||
322 | register vector unsigned short temp3, temp4, |
||
323 | pixelssum1, pixelssum2, pixelssum3, pixelssum4; |
||
324 | register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0); |
||
325 | register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1); |
||
326 | register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2); |
||
327 | |||
328 | temp1 = vec_ld(0, pixels); |
||
329 | temp2 = vec_ld(16, pixels); |
||
330 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
||
331 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
||
332 | pixelsv2 = temp2; |
||
333 | } else { |
||
334 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
||
335 | } |
||
336 | pixelsv3 = vec_mergel(vczero, pixelsv1); |
||
337 | pixelsv4 = vec_mergel(vczero, pixelsv2); |
||
338 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
339 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
340 | pixelssum3 = vec_add((vector unsigned short)pixelsv3, |
||
341 | (vector unsigned short)pixelsv4); |
||
342 | pixelssum3 = vec_add(pixelssum3, vcone); |
||
343 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
||
344 | (vector unsigned short)pixelsv2); |
||
345 | pixelssum1 = vec_add(pixelssum1, vcone); |
||
346 | |||
347 | for (i = 0; i < h ; i++) { |
||
348 | blockv = vec_ld(0, block); |
||
349 | |||
350 | temp1 = vec_ld(line_size, pixels); |
||
351 | temp2 = vec_ld(line_size + 16, pixels); |
||
352 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
||
353 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { |
||
354 | pixelsv2 = temp2; |
||
355 | } else { |
||
356 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
||
357 | } |
||
358 | |||
359 | pixelsv3 = vec_mergel(vczero, pixelsv1); |
||
360 | pixelsv4 = vec_mergel(vczero, pixelsv2); |
||
361 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
362 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
363 | |||
364 | pixelssum4 = vec_add((vector unsigned short)pixelsv3, |
||
365 | (vector unsigned short)pixelsv4); |
||
366 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
||
367 | (vector unsigned short)pixelsv2); |
||
368 | temp4 = vec_add(pixelssum3, pixelssum4); |
||
369 | temp4 = vec_sra(temp4, vctwo); |
||
370 | temp3 = vec_add(pixelssum1, pixelssum2); |
||
371 | temp3 = vec_sra(temp3, vctwo); |
||
372 | |||
373 | pixelssum3 = vec_add(pixelssum4, vcone); |
||
374 | pixelssum1 = vec_add(pixelssum2, vcone); |
||
375 | |||
376 | blockv = vec_packsu(temp3, temp4); |
||
377 | |||
378 | vec_st(blockv, 0, block); |
||
379 | |||
380 | block += line_size; |
||
381 | pixels += line_size; |
||
382 | } |
||
383 | } |
||
384 | |||
385 | /* next one assumes that ((line_size % 8) == 0) */ |
||
386 | static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h) |
||
387 | { |
||
388 | register int i; |
||
389 | register vector unsigned char pixelsv1, pixelsv2, pixelsavg; |
||
390 | register vector unsigned char blockv, temp1, temp2, blocktemp; |
||
391 | register vector unsigned short pixelssum1, pixelssum2, temp3; |
||
392 | |||
393 | register const vector unsigned char vczero = (const vector unsigned char) |
||
394 | vec_splat_u8(0); |
||
395 | register const vector unsigned short vctwo = (const vector unsigned short) |
||
396 | vec_splat_u16(2); |
||
397 | |||
398 | temp1 = vec_ld(0, pixels); |
||
399 | temp2 = vec_ld(16, pixels); |
||
400 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels)); |
||
401 | if ((((unsigned long)pixels) & 0x0000000F) == 0x0000000F) { |
||
402 | pixelsv2 = temp2; |
||
403 | } else { |
||
404 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels)); |
||
405 | } |
||
406 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
407 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
408 | pixelssum1 = vec_add((vector unsigned short)pixelsv1, |
||
409 | (vector unsigned short)pixelsv2); |
||
410 | pixelssum1 = vec_add(pixelssum1, vctwo); |
||
411 | |||
412 | for (i = 0; i < h ; i++) { |
||
413 | int rightside = ((unsigned long)block & 0x0000000F); |
||
414 | blockv = vec_ld(0, block); |
||
415 | |||
416 | temp1 = vec_ld(line_size, pixels); |
||
417 | temp2 = vec_ld(line_size + 16, pixels); |
||
418 | pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels)); |
||
419 | if (((((unsigned long)pixels) + line_size) & 0x0000000F) == 0x0000000F) { |
||
420 | pixelsv2 = temp2; |
||
421 | } else { |
||
422 | pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels)); |
||
423 | } |
||
424 | |||
425 | pixelsv1 = vec_mergeh(vczero, pixelsv1); |
||
426 | pixelsv2 = vec_mergeh(vczero, pixelsv2); |
||
427 | pixelssum2 = vec_add((vector unsigned short)pixelsv1, |
||
428 | (vector unsigned short)pixelsv2); |
||
429 | temp3 = vec_add(pixelssum1, pixelssum2); |
||
430 | temp3 = vec_sra(temp3, vctwo); |
||
431 | pixelssum1 = vec_add(pixelssum2, vctwo); |
||
432 | pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero); |
||
433 | |||
434 | if (rightside) { |
||
435 | blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1)); |
||
436 | } else { |
||
437 | blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3)); |
||
438 | } |
||
439 | |||
440 | blockv = vec_avg(blocktemp, blockv); |
||
441 | vec_st(blockv, 0, block); |
||
442 | |||
443 | block += line_size; |
||
444 | pixels += line_size; |
||
445 | } |
||
446 | } |
||
447 | #endif /* HAVE_ALTIVEC */ |
||
448 | |||
449 | av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags) |
||
450 | { |
||
451 | #if HAVE_ALTIVEC |
||
452 | if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC)) |
||
453 | return; |
||
454 | |||
455 | c->avg_pixels_tab[0][0] = ff_avg_pixels16_altivec; |
||
456 | c->avg_pixels_tab[1][0] = avg_pixels8_altivec; |
||
457 | c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec; |
||
458 | |||
459 | c->put_pixels_tab[0][0] = ff_put_pixels16_altivec; |
||
460 | c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec; |
||
461 | c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec; |
||
462 | |||
463 | c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_altivec; |
||
464 | c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec; |
||
465 | c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; |
||
466 | #endif /* HAVE_ALTIVEC */ |
||
467 | }>>>>>>>>><>><> |