Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * Copyright (c) 2002 Brian Foley
3
 * Copyright (c) 2002 Dieter Shirley
4
 * Copyright (c) 2003-2004 Romain Dolbeau 
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
 
23
#include "config.h"
24
#if HAVE_ALTIVEC_H
25
#include 
26
#endif
27
 
28
#include "libavutil/attributes.h"
29
#include "libavutil/cpu.h"
30
#include "libavutil/ppc/cpu.h"
31
#include "libavutil/ppc/types_altivec.h"
32
#include "libavutil/ppc/util_altivec.h"
33
#include "libavcodec/avcodec.h"
34
#include "libavcodec/pixblockdsp.h"
35
 
36
#if HAVE_ALTIVEC
37
 
38
#if HAVE_VSX
39
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
40
                               ptrdiff_t line_size)
41
{
42
    int i;
43
    vector unsigned char perm =
44
        (vector unsigned char) {0x00,0x10, 0x01,0x11,0x02,0x12,0x03,0x13,\
45
            0x04,0x14,0x05,0x15,0x06,0x16,0x07,0x17};
46
    const vector unsigned char zero =
47
        (const vector unsigned char) vec_splat_u8(0);
48
 
49
    for (i = 0; i < 8; i++) {
50
        /* Read potentially unaligned pixels.
51
         * We're reading 16 pixels, and actually only want 8,
52
         * but we simply ignore the extras. */
53
        vector unsigned char bytes = vec_vsx_ld(0, pixels);
54
 
55
        // Convert the bytes into shorts.
56
        //vector signed short shorts = (vector signed short) vec_perm(zero, bytes, perm);
57
        vector signed short shorts = (vector signed short) vec_perm(bytes, zero, perm);
58
 
59
        // Save the data to the block, we assume the block is 16-byte aligned.
60
        vec_vsx_st(shorts, i * 16, (vector signed short *) block);
61
 
62
        pixels += line_size;
63
    }
64
}
65
#else
66
static void get_pixels_altivec(int16_t *restrict block, const uint8_t *pixels,
67
                               ptrdiff_t line_size)
68
{
69
    int i;
70
    vec_u8 perm = vec_lvsl(0, pixels);
71
    const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
72
 
73
    for (i = 0; i < 8; i++) {
74
        /* Read potentially unaligned pixels.
75
         * We're reading 16 pixels, and actually only want 8,
76
         * but we simply ignore the extras. */
77
        vec_u8 pixl = vec_ld(0, pixels);
78
        vec_u8 pixr = vec_ld(7, pixels);
79
        vec_u8 bytes = vec_perm(pixl, pixr, perm);
80
 
81
        // Convert the bytes into shorts.
82
        vec_s16 shorts = (vec_s16)vec_mergeh(zero, bytes);
83
 
84
        // Save the data to the block, we assume the block is 16-byte aligned.
85
        vec_st(shorts, i * 16, (vec_s16 *)block);
86
 
87
        pixels += line_size;
88
    }
89
}
90
 
91
#endif /* HAVE_VSX */
92
 
93
#if HAVE_VSX
94
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
95
                                const uint8_t *s2, int stride)
96
{
97
  int i;
98
  const vector unsigned char zero =
99
    (const vector unsigned char) vec_splat_u8(0);
100
  vector signed short shorts1, shorts2;
101
 
102
  for (i = 0; i < 4; i++) {
103
    /* Read potentially unaligned pixels.
104
     * We're reading 16 pixels, and actually only want 8,
105
     * but we simply ignore the extras. */
106
    vector unsigned char bytes = vec_vsx_ld(0,  s1);
107
 
108
    // Convert the bytes into shorts.
109
    shorts1 = (vector signed short) vec_mergeh(bytes, zero);
110
 
111
    // Do the same for the second block of pixels.
112
    bytes =vec_vsx_ld(0,  s2);
113
 
114
    // Convert the bytes into shorts.
115
    shorts2 = (vector signed short) vec_mergeh(bytes, zero);
116
 
117
    // Do the subtraction.
118
    shorts1 = vec_sub(shorts1, shorts2);
119
 
120
    // Save the data to the block, we assume the block is 16-byte aligned.
121
    vec_vsx_st(shorts1, 0, (vector signed short *) block);
122
 
123
    s1    += stride;
124
    s2    += stride;
125
    block += 8;
126
 
127
    /* The code below is a copy of the code above...
128
     * This is a manual unroll. */
129
 
130
    /* Read potentially unaligned pixels.
131
     * We're reading 16 pixels, and actually only want 8,
132
     * but we simply ignore the extras. */
133
    bytes = vec_vsx_ld(0,  s1);
134
 
135
    // Convert the bytes into shorts.
136
    shorts1 = (vector signed short) vec_mergeh(bytes, zero);
137
 
138
    // Do the same for the second block of pixels.
139
    bytes = vec_vsx_ld(0,  s2);
140
 
141
    // Convert the bytes into shorts.
142
    shorts2 = (vector signed short) vec_mergeh(bytes, zero);
143
 
144
    // Do the subtraction.
145
    shorts1 = vec_sub(shorts1, shorts2);
146
 
147
    // Save the data to the block, we assume the block is 16-byte aligned.
148
    vec_vsx_st(shorts1, 0, (vector signed short *) block);
149
 
150
    s1    += stride;
151
    s2    += stride;
152
    block += 8;
153
  }
154
}
155
#else
156
static void diff_pixels_altivec(int16_t *restrict block, const uint8_t *s1,
157
                                const uint8_t *s2, int stride)
158
{
159
    int i;
160
    vec_u8 perm1 = vec_lvsl(0, s1);
161
    vec_u8 perm2 = vec_lvsl(0, s2);
162
    const vec_u8 zero = (const vec_u8)vec_splat_u8(0);
163
    vec_s16 shorts1, shorts2;
164
 
165
    for (i = 0; i < 4; i++) {
166
        /* Read potentially unaligned pixels.
167
         * We're reading 16 pixels, and actually only want 8,
168
         * but we simply ignore the extras. */
169
        vec_u8 pixl  = vec_ld(0,  s1);
170
        vec_u8 pixr  = vec_ld(15, s1);
171
        vec_u8 bytes = vec_perm(pixl, pixr, perm1);
172
 
173
        // Convert the bytes into shorts.
174
        shorts1 = (vec_s16)vec_mergeh(zero, bytes);
175
 
176
        // Do the same for the second block of pixels.
177
        pixl  = vec_ld(0,  s2);
178
        pixr  = vec_ld(15, s2);
179
        bytes = vec_perm(pixl, pixr, perm2);
180
 
181
        // Convert the bytes into shorts.
182
        shorts2 = (vec_s16)vec_mergeh(zero, bytes);
183
 
184
        // Do the subtraction.
185
        shorts1 = vec_sub(shorts1, shorts2);
186
 
187
        // Save the data to the block, we assume the block is 16-byte aligned.
188
        vec_st(shorts1, 0, (vec_s16 *)block);
189
 
190
        s1    += stride;
191
        s2    += stride;
192
        block += 8;
193
 
194
        /* The code below is a copy of the code above...
195
         * This is a manual unroll. */
196
 
197
        /* Read potentially unaligned pixels.
198
         * We're reading 16 pixels, and actually only want 8,
199
         * but we simply ignore the extras. */
200
        pixl  = vec_ld(0,  s1);
201
        pixr  = vec_ld(15, s1);
202
        bytes = vec_perm(pixl, pixr, perm1);
203
 
204
        // Convert the bytes into shorts.
205
        shorts1 = (vec_s16)vec_mergeh(zero, bytes);
206
 
207
        // Do the same for the second block of pixels.
208
        pixl  = vec_ld(0,  s2);
209
        pixr  = vec_ld(15, s2);
210
        bytes = vec_perm(pixl, pixr, perm2);
211
 
212
        // Convert the bytes into shorts.
213
        shorts2 = (vec_s16)vec_mergeh(zero, bytes);
214
 
215
        // Do the subtraction.
216
        shorts1 = vec_sub(shorts1, shorts2);
217
 
218
        // Save the data to the block, we assume the block is 16-byte aligned.
219
        vec_st(shorts1, 0, (vec_s16 *)block);
220
 
221
        s1    += stride;
222
        s2    += stride;
223
        block += 8;
224
    }
225
}
226
 
227
#endif /* HAVE_VSX */
228
 
229
#endif /* HAVE_ALTIVEC */
230
 
231
#if HAVE_VSX
232
static void get_pixels_vsx(int16_t *restrict block, const uint8_t *pixels,
233
                           ptrdiff_t line_size)
234
{
235
    int i;
236
    for (i = 0; i < 8; i++) {
237
        vec_s16 shorts = vsx_ld_u8_s16(0, pixels);
238
 
239
        vec_vsx_st(shorts, i * 16, block);
240
 
241
        pixels += line_size;
242
    }
243
}
244
 
245
static void diff_pixels_vsx(int16_t *restrict block, const uint8_t *s1,
246
                            const uint8_t *s2, int stride)
247
{
248
    int i;
249
    vec_s16 shorts1, shorts2;
250
    for (i = 0; i < 8; i++) {
251
        shorts1 = vsx_ld_u8_s16(0, s1);
252
        shorts2 = vsx_ld_u8_s16(0, s2);
253
 
254
        shorts1 = vec_sub(shorts1, shorts2);
255
 
256
        vec_vsx_st(shorts1, 0, block);
257
 
258
        s1    += stride;
259
        s2    += stride;
260
        block += 8;
261
    }
262
}
263
#endif /* HAVE_VSX */
264
 
265
av_cold void ff_pixblockdsp_init_ppc(PixblockDSPContext *c,
266
                                     AVCodecContext *avctx,
267
                                     unsigned high_bit_depth)
268
{
269
#if HAVE_ALTIVEC
270
    if (!PPC_ALTIVEC(av_get_cpu_flags()))
271
        return;
272
 
273
    c->diff_pixels = diff_pixels_altivec;
274
 
275
    if (!high_bit_depth) {
276
        c->get_pixels = get_pixels_altivec;
277
    }
278
#endif /* HAVE_ALTIVEC */
279
 
280
#if HAVE_VSX
281
    if (!PPC_VSX(av_get_cpu_flags()))
282
        return;
283
 
284
    c->diff_pixels = diff_pixels_vsx;
285
 
286
    if (!high_bit_depth)
287
        c->get_pixels = get_pixels_vsx;
288
#endif /* HAVE_VSX */
289
}