Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (c) 2002 Brian Foley
3
 * Copyright (c) 2002 Dieter Shirley
4
 * Copyright (c) 2003-2004 Romain Dolbeau 
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
 
23
#include "config.h"
24
 
25
#if HAVE_ALTIVEC_H
26
#include 
27
#endif
28
 
29
#include "libavutil/attributes.h"
30
#include "libavutil/cpu.h"
31
#include "libavutil/ppc/types_altivec.h"
32
#include "libavutil/ppc/util_altivec.h"
33
#include "libavcodec/hpeldsp.h"
34
#include "dsputil_altivec.h"
35
 
36
#if HAVE_ALTIVEC
37
/* next one assumes that ((line_size % 16) == 0) */
38
void ff_put_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
39
{
40
    register vector unsigned char pixelsv1, pixelsv2;
41
    register vector unsigned char pixelsv1B, pixelsv2B;
42
    register vector unsigned char pixelsv1C, pixelsv2C;
43
    register vector unsigned char pixelsv1D, pixelsv2D;
44
 
45
    register vector unsigned char perm = vec_lvsl(0, pixels);
46
    int i;
47
    register ptrdiff_t line_size_2 = line_size << 1;
48
    register ptrdiff_t line_size_3 = line_size + line_size_2;
49
    register ptrdiff_t line_size_4 = line_size << 2;
50
 
51
// hand-unrolling the loop by 4 gains about 15%
52
// mininum execution time goes from 74 to 60 cycles
53
// it's faster than -funroll-loops, but using
54
// -funroll-loops w/ this is bad - 74 cycles again.
55
// all this is on a 7450, tuning for the 7450
56
    for (i = 0; i < h; i += 4) {
57
        pixelsv1  = vec_ld( 0, pixels);
58
        pixelsv2  = vec_ld(15, pixels);
59
        pixelsv1B = vec_ld(line_size, pixels);
60
        pixelsv2B = vec_ld(15 + line_size, pixels);
61
        pixelsv1C = vec_ld(line_size_2, pixels);
62
        pixelsv2C = vec_ld(15 + line_size_2, pixels);
63
        pixelsv1D = vec_ld(line_size_3, pixels);
64
        pixelsv2D = vec_ld(15 + line_size_3, pixels);
65
        vec_st(vec_perm(pixelsv1, pixelsv2, perm),
66
               0, (unsigned char*)block);
67
        vec_st(vec_perm(pixelsv1B, pixelsv2B, perm),
68
               line_size, (unsigned char*)block);
69
        vec_st(vec_perm(pixelsv1C, pixelsv2C, perm),
70
               line_size_2, (unsigned char*)block);
71
        vec_st(vec_perm(pixelsv1D, pixelsv2D, perm),
72
               line_size_3, (unsigned char*)block);
73
        pixels+=line_size_4;
74
        block +=line_size_4;
75
    }
76
}
77
 
78
/* next one assumes that ((line_size % 16) == 0) */
79
#define op_avg(a,b)  a = ( ((a)|(b)) - ((((a)^(b))&0xFEFEFEFEUL)>>1) )
80
void ff_avg_pixels16_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
81
{
82
    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
83
    register vector unsigned char perm = vec_lvsl(0, pixels);
84
    int i;
85
 
86
    for (i = 0; i < h; i++) {
87
        pixelsv1 = vec_ld( 0, pixels);
88
        pixelsv2 = vec_ld(16,pixels);
89
        blockv = vec_ld(0, block);
90
        pixelsv = vec_perm(pixelsv1, pixelsv2, perm);
91
        blockv = vec_avg(blockv,pixelsv);
92
        vec_st(blockv, 0, (unsigned char*)block);
93
        pixels+=line_size;
94
        block +=line_size;
95
    }
96
}
97
 
98
/* next one assumes that ((line_size % 8) == 0) */
99
static void avg_pixels8_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
100
{
101
    register vector unsigned char pixelsv1, pixelsv2, pixelsv, blockv;
102
    int i;
103
 
104
   for (i = 0; i < h; i++) {
105
       /* block is 8 bytes-aligned, so we're either in the
106
          left block (16 bytes-aligned) or in the right block (not) */
107
       int rightside = ((unsigned long)block & 0x0000000F);
108
 
109
       blockv = vec_ld(0, block);
110
       pixelsv1 = vec_ld( 0, pixels);
111
       pixelsv2 = vec_ld(16, pixels);
112
       pixelsv = vec_perm(pixelsv1, pixelsv2, vec_lvsl(0, pixels));
113
 
114
       if (rightside) {
115
           pixelsv = vec_perm(blockv, pixelsv, vcprm(0,1,s0,s1));
116
       } else {
117
           pixelsv = vec_perm(blockv, pixelsv, vcprm(s0,s1,2,3));
118
       }
119
 
120
       blockv = vec_avg(blockv, pixelsv);
121
 
122
       vec_st(blockv, 0, block);
123
 
124
       pixels += line_size;
125
       block += line_size;
126
   }
127
}
128
 
129
/* next one assumes that ((line_size % 8) == 0) */
130
static void put_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
131
{
132
    register int i;
133
    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
134
    register vector unsigned char blockv, temp1, temp2;
135
    register vector unsigned short pixelssum1, pixelssum2, temp3;
136
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
137
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
138
 
139
    temp1 = vec_ld(0, pixels);
140
    temp2 = vec_ld(16, pixels);
141
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
142
    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
143
        pixelsv2 = temp2;
144
    } else {
145
        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
146
    }
147
    pixelsv1 = vec_mergeh(vczero, pixelsv1);
148
    pixelsv2 = vec_mergeh(vczero, pixelsv2);
149
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
150
                         (vector unsigned short)pixelsv2);
151
    pixelssum1 = vec_add(pixelssum1, vctwo);
152
 
153
    for (i = 0; i < h ; i++) {
154
        int rightside = ((unsigned long)block & 0x0000000F);
155
        blockv = vec_ld(0, block);
156
 
157
        temp1 = vec_ld(line_size, pixels);
158
        temp2 = vec_ld(line_size + 16, pixels);
159
        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
160
        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
161
            pixelsv2 = temp2;
162
        } else {
163
            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
164
        }
165
 
166
        pixelsv1 = vec_mergeh(vczero, pixelsv1);
167
        pixelsv2 = vec_mergeh(vczero, pixelsv2);
168
        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
169
                             (vector unsigned short)pixelsv2);
170
        temp3 = vec_add(pixelssum1, pixelssum2);
171
        temp3 = vec_sra(temp3, vctwo);
172
        pixelssum1 = vec_add(pixelssum2, vctwo);
173
        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
174
 
175
        if (rightside) {
176
            blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
177
        } else {
178
            blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
179
        }
180
 
181
        vec_st(blockv, 0, block);
182
 
183
        block += line_size;
184
        pixels += line_size;
185
    }
186
}
187
 
188
/* next one assumes that ((line_size % 8) == 0) */
189
static void put_no_rnd_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
190
{
191
    register int i;
192
    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
193
    register vector unsigned char blockv, temp1, temp2;
194
    register vector unsigned short pixelssum1, pixelssum2, temp3;
195
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
196
    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
197
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
198
 
199
    temp1 = vec_ld(0, pixels);
200
    temp2 = vec_ld(16, pixels);
201
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
202
    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
203
        pixelsv2 = temp2;
204
    } else {
205
        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
206
    }
207
    pixelsv1 = vec_mergeh(vczero, pixelsv1);
208
    pixelsv2 = vec_mergeh(vczero, pixelsv2);
209
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
210
                         (vector unsigned short)pixelsv2);
211
    pixelssum1 = vec_add(pixelssum1, vcone);
212
 
213
    for (i = 0; i < h ; i++) {
214
        int rightside = ((unsigned long)block & 0x0000000F);
215
        blockv = vec_ld(0, block);
216
 
217
        temp1 = vec_ld(line_size, pixels);
218
        temp2 = vec_ld(line_size + 16, pixels);
219
        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
220
        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
221
            pixelsv2 = temp2;
222
        } else {
223
            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
224
        }
225
 
226
        pixelsv1 = vec_mergeh(vczero, pixelsv1);
227
        pixelsv2 = vec_mergeh(vczero, pixelsv2);
228
        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
229
                             (vector unsigned short)pixelsv2);
230
        temp3 = vec_add(pixelssum1, pixelssum2);
231
        temp3 = vec_sra(temp3, vctwo);
232
        pixelssum1 = vec_add(pixelssum2, vcone);
233
        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
234
 
235
        if (rightside) {
236
            blockv = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
237
        } else {
238
            blockv = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
239
        }
240
 
241
        vec_st(blockv, 0, block);
242
 
243
        block += line_size;
244
        pixels += line_size;
245
    }
246
}
247
 
248
/* next one assumes that ((line_size % 16) == 0) */
249
static void put_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
250
{
251
    register int i;
252
    register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
253
    register vector unsigned char blockv, temp1, temp2;
254
    register vector unsigned short temp3, temp4,
255
        pixelssum1, pixelssum2, pixelssum3, pixelssum4;
256
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
257
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
258
 
259
    temp1 = vec_ld(0, pixels);
260
    temp2 = vec_ld(16, pixels);
261
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
262
    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
263
        pixelsv2 = temp2;
264
    } else {
265
        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
266
    }
267
    pixelsv3 = vec_mergel(vczero, pixelsv1);
268
    pixelsv4 = vec_mergel(vczero, pixelsv2);
269
    pixelsv1 = vec_mergeh(vczero, pixelsv1);
270
    pixelsv2 = vec_mergeh(vczero, pixelsv2);
271
    pixelssum3 = vec_add((vector unsigned short)pixelsv3,
272
                         (vector unsigned short)pixelsv4);
273
    pixelssum3 = vec_add(pixelssum3, vctwo);
274
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
275
                         (vector unsigned short)pixelsv2);
276
    pixelssum1 = vec_add(pixelssum1, vctwo);
277
 
278
    for (i = 0; i < h ; i++) {
279
        blockv = vec_ld(0, block);
280
 
281
        temp1 = vec_ld(line_size, pixels);
282
        temp2 = vec_ld(line_size + 16, pixels);
283
        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
284
        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
285
            pixelsv2 = temp2;
286
        } else {
287
            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
288
        }
289
 
290
        pixelsv3 = vec_mergel(vczero, pixelsv1);
291
        pixelsv4 = vec_mergel(vczero, pixelsv2);
292
        pixelsv1 = vec_mergeh(vczero, pixelsv1);
293
        pixelsv2 = vec_mergeh(vczero, pixelsv2);
294
 
295
        pixelssum4 = vec_add((vector unsigned short)pixelsv3,
296
                             (vector unsigned short)pixelsv4);
297
        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
298
                             (vector unsigned short)pixelsv2);
299
        temp4 = vec_add(pixelssum3, pixelssum4);
300
        temp4 = vec_sra(temp4, vctwo);
301
        temp3 = vec_add(pixelssum1, pixelssum2);
302
        temp3 = vec_sra(temp3, vctwo);
303
 
304
        pixelssum3 = vec_add(pixelssum4, vctwo);
305
        pixelssum1 = vec_add(pixelssum2, vctwo);
306
 
307
        blockv = vec_packsu(temp3, temp4);
308
 
309
        vec_st(blockv, 0, block);
310
 
311
        block += line_size;
312
        pixels += line_size;
313
    }
314
}
315
 
316
/* next one assumes that ((line_size % 16) == 0) */
317
static void put_no_rnd_pixels16_xy2_altivec(uint8_t * block, const uint8_t * pixels, ptrdiff_t line_size, int h)
318
{
319
    register int i;
320
    register vector unsigned char pixelsv1, pixelsv2, pixelsv3, pixelsv4;
321
    register vector unsigned char blockv, temp1, temp2;
322
    register vector unsigned short temp3, temp4,
323
        pixelssum1, pixelssum2, pixelssum3, pixelssum4;
324
    register const vector unsigned char vczero = (const vector unsigned char)vec_splat_u8(0);
325
    register const vector unsigned short vcone = (const vector unsigned short)vec_splat_u16(1);
326
    register const vector unsigned short vctwo = (const vector unsigned short)vec_splat_u16(2);
327
 
328
    temp1 = vec_ld(0, pixels);
329
    temp2 = vec_ld(16, pixels);
330
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
331
    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
332
        pixelsv2 = temp2;
333
    } else {
334
        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
335
    }
336
    pixelsv3 = vec_mergel(vczero, pixelsv1);
337
    pixelsv4 = vec_mergel(vczero, pixelsv2);
338
    pixelsv1 = vec_mergeh(vczero, pixelsv1);
339
    pixelsv2 = vec_mergeh(vczero, pixelsv2);
340
    pixelssum3 = vec_add((vector unsigned short)pixelsv3,
341
                         (vector unsigned short)pixelsv4);
342
    pixelssum3 = vec_add(pixelssum3, vcone);
343
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
344
                         (vector unsigned short)pixelsv2);
345
    pixelssum1 = vec_add(pixelssum1, vcone);
346
 
347
    for (i = 0; i < h ; i++) {
348
        blockv = vec_ld(0, block);
349
 
350
        temp1 = vec_ld(line_size, pixels);
351
        temp2 = vec_ld(line_size + 16, pixels);
352
        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
353
        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
354
            pixelsv2 = temp2;
355
        } else {
356
            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
357
        }
358
 
359
        pixelsv3 = vec_mergel(vczero, pixelsv1);
360
        pixelsv4 = vec_mergel(vczero, pixelsv2);
361
        pixelsv1 = vec_mergeh(vczero, pixelsv1);
362
        pixelsv2 = vec_mergeh(vczero, pixelsv2);
363
 
364
        pixelssum4 = vec_add((vector unsigned short)pixelsv3,
365
                             (vector unsigned short)pixelsv4);
366
        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
367
                             (vector unsigned short)pixelsv2);
368
        temp4 = vec_add(pixelssum3, pixelssum4);
369
        temp4 = vec_sra(temp4, vctwo);
370
        temp3 = vec_add(pixelssum1, pixelssum2);
371
        temp3 = vec_sra(temp3, vctwo);
372
 
373
        pixelssum3 = vec_add(pixelssum4, vcone);
374
        pixelssum1 = vec_add(pixelssum2, vcone);
375
 
376
        blockv = vec_packsu(temp3, temp4);
377
 
378
        vec_st(blockv, 0, block);
379
 
380
        block += line_size;
381
        pixels += line_size;
382
    }
383
}
384
 
385
/* next one assumes that ((line_size % 8) == 0) */
386
static void avg_pixels8_xy2_altivec(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int h)
387
{
388
    register int i;
389
    register vector unsigned char pixelsv1, pixelsv2, pixelsavg;
390
    register vector unsigned char blockv, temp1, temp2, blocktemp;
391
    register vector unsigned short pixelssum1, pixelssum2, temp3;
392
 
393
    register const vector unsigned char vczero = (const vector unsigned char)
394
                                        vec_splat_u8(0);
395
    register const vector unsigned short vctwo = (const vector unsigned short)
396
                                        vec_splat_u16(2);
397
 
398
    temp1 = vec_ld(0, pixels);
399
    temp2 = vec_ld(16, pixels);
400
    pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(0, pixels));
401
    if ((((unsigned long)pixels) & 0x0000000F) ==  0x0000000F) {
402
        pixelsv2 = temp2;
403
    } else {
404
        pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(1, pixels));
405
    }
406
    pixelsv1 = vec_mergeh(vczero, pixelsv1);
407
    pixelsv2 = vec_mergeh(vczero, pixelsv2);
408
    pixelssum1 = vec_add((vector unsigned short)pixelsv1,
409
                         (vector unsigned short)pixelsv2);
410
    pixelssum1 = vec_add(pixelssum1, vctwo);
411
 
412
    for (i = 0; i < h ; i++) {
413
        int rightside = ((unsigned long)block & 0x0000000F);
414
        blockv = vec_ld(0, block);
415
 
416
        temp1 = vec_ld(line_size, pixels);
417
        temp2 = vec_ld(line_size + 16, pixels);
418
        pixelsv1 = vec_perm(temp1, temp2, vec_lvsl(line_size, pixels));
419
        if (((((unsigned long)pixels) + line_size) & 0x0000000F) ==  0x0000000F) {
420
            pixelsv2 = temp2;
421
        } else {
422
            pixelsv2 = vec_perm(temp1, temp2, vec_lvsl(line_size + 1, pixels));
423
        }
424
 
425
        pixelsv1 = vec_mergeh(vczero, pixelsv1);
426
        pixelsv2 = vec_mergeh(vczero, pixelsv2);
427
        pixelssum2 = vec_add((vector unsigned short)pixelsv1,
428
                             (vector unsigned short)pixelsv2);
429
        temp3 = vec_add(pixelssum1, pixelssum2);
430
        temp3 = vec_sra(temp3, vctwo);
431
        pixelssum1 = vec_add(pixelssum2, vctwo);
432
        pixelsavg = vec_packsu(temp3, (vector unsigned short) vczero);
433
 
434
        if (rightside) {
435
            blocktemp = vec_perm(blockv, pixelsavg, vcprm(0, 1, s0, s1));
436
        } else {
437
            blocktemp = vec_perm(blockv, pixelsavg, vcprm(s0, s1, 2, 3));
438
        }
439
 
440
        blockv = vec_avg(blocktemp, blockv);
441
        vec_st(blockv, 0, block);
442
 
443
        block += line_size;
444
        pixels += line_size;
445
    }
446
}
447
#endif /* HAVE_ALTIVEC */
448
 
449
av_cold void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags)
450
{
451
#if HAVE_ALTIVEC
452
    if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
453
        return;
454
 
455
    c->avg_pixels_tab[0][0]        = ff_avg_pixels16_altivec;
456
    c->avg_pixels_tab[1][0]        = avg_pixels8_altivec;
457
    c->avg_pixels_tab[1][3]        = avg_pixels8_xy2_altivec;
458
 
459
    c->put_pixels_tab[0][0]        = ff_put_pixels16_altivec;
460
    c->put_pixels_tab[1][3]        = put_pixels8_xy2_altivec;
461
    c->put_pixels_tab[0][3]        = put_pixels16_xy2_altivec;
462
 
463
    c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_altivec;
464
    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
465
    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
466
#endif /* HAVE_ALTIVEC */
467
}