Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6148 serge 1
/*
2
 * Simple IDCT
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer 
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
 
23
/**
24
 * @file
25
 * simpleidct in C.
26
 */
27
 
28
/*
29
  based upon some outcommented c code from mpeg2dec (idct_mmx.c
30
  written by Aaron Holtzman )
31
 */
32
 
33
#include "bit_depth_template.c"
34
 
35
#undef W1
36
#undef W2
37
#undef W3
38
#undef W4
39
#undef W5
40
#undef W6
41
#undef W7
42
#undef ROW_SHIFT
43
#undef COL_SHIFT
44
#undef DC_SHIFT
45
#undef MUL
46
#undef MAC
47
 
48
#if BIT_DEPTH == 8
49
 
50
#define W1  22725  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
51
#define W2  21407  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
52
#define W3  19266  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
53
#define W4  16383  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
54
#define W5  12873  //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
55
#define W6  8867   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
56
#define W7  4520   //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5
57
 
58
#define ROW_SHIFT 11
59
#define COL_SHIFT 20
60
#define DC_SHIFT 3
61
 
62
#define MUL(a, b)    MUL16(a, b)
63
#define MAC(a, b, c) MAC16(a, b, c)
64
 
65
#elif BIT_DEPTH == 10 || BIT_DEPTH == 12
66
 
67
#if BIT_DEPTH == 10
68
#define W1 90901
69
#define W2 85627
70
#define W3 77062
71
#define W4 65535
72
#define W5 51491
73
#define W6 35468
74
#define W7 18081
75
 
76
#define ROW_SHIFT 15
77
#define COL_SHIFT 20
78
#define DC_SHIFT 1
79
#else
80
#define W1 45451
81
#define W2 42813
82
#define W3 38531
83
#define W4 32767
84
#define W5 25746
85
#define W6 17734
86
#define W7 9041
87
 
88
#define ROW_SHIFT 16
89
#define COL_SHIFT 17
90
#define DC_SHIFT -1
91
#endif
92
 
93
#define MUL(a, b)    ((a) * (b))
94
#define MAC(a, b, c) ((a) += (b) * (c))
95
 
96
#else
97
 
98
#error "Unsupported bitdepth"
99
 
100
#endif
101
 
102
static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift)
103
{
104
    int a0, a1, a2, a3, b0, b1, b2, b3;
105
 
106
#if HAVE_FAST_64BIT
107
#define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN)
108
    if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) {
109
        uint64_t temp;
110
        if (DC_SHIFT - extra_shift > 0) {
111
            temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
112
        } else {
113
            temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
114
        }
115
        temp += temp << 16;
116
        temp += temp << 32;
117
        ((uint64_t *)row)[0] = temp;
118
        ((uint64_t *)row)[1] = temp;
119
        return;
120
    }
121
#else
122
    if (!(((uint32_t*)row)[1] |
123
          ((uint32_t*)row)[2] |
124
          ((uint32_t*)row)[3] |
125
          row[1])) {
126
        uint32_t temp;
127
        if (DC_SHIFT - extra_shift > 0) {
128
            temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff;
129
        } else {
130
            temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff;
131
        }
132
        temp += temp << 16;
133
        ((uint32_t*)row)[0]=((uint32_t*)row)[1] =
134
            ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp;
135
        return;
136
    }
137
#endif
138
 
139
    a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1));
140
    a1 = a0;
141
    a2 = a0;
142
    a3 = a0;
143
 
144
    a0 += W2 * row[2];
145
    a1 += W6 * row[2];
146
    a2 -= W6 * row[2];
147
    a3 -= W2 * row[2];
148
 
149
    b0 = MUL(W1, row[1]);
150
    MAC(b0, W3, row[3]);
151
    b1 = MUL(W3, row[1]);
152
    MAC(b1, -W7, row[3]);
153
    b2 = MUL(W5, row[1]);
154
    MAC(b2, -W1, row[3]);
155
    b3 = MUL(W7, row[1]);
156
    MAC(b3, -W5, row[3]);
157
 
158
    if (AV_RN64A(row + 4)) {
159
        a0 +=   W4*row[4] + W6*row[6];
160
        a1 += - W4*row[4] - W2*row[6];
161
        a2 += - W4*row[4] + W2*row[6];
162
        a3 +=   W4*row[4] - W6*row[6];
163
 
164
        MAC(b0,  W5, row[5]);
165
        MAC(b0,  W7, row[7]);
166
 
167
        MAC(b1, -W1, row[5]);
168
        MAC(b1, -W5, row[7]);
169
 
170
        MAC(b2,  W7, row[5]);
171
        MAC(b2,  W3, row[7]);
172
 
173
        MAC(b3,  W3, row[5]);
174
        MAC(b3, -W1, row[7]);
175
    }
176
 
177
    row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift);
178
    row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift);
179
    row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift);
180
    row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift);
181
    row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift);
182
    row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift);
183
    row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift);
184
    row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift);
185
}
186
 
187
#define IDCT_COLS do {                                  \
188
        a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \
189
        a1 = a0;                                        \
190
        a2 = a0;                                        \
191
        a3 = a0;                                        \
192
                                                        \
193
        a0 +=  W2*col[8*2];                             \
194
        a1 +=  W6*col[8*2];                             \
195
        a2 += -W6*col[8*2];                             \
196
        a3 += -W2*col[8*2];                             \
197
                                                        \
198
        b0 = MUL(W1, col[8*1]);                         \
199
        b1 = MUL(W3, col[8*1]);                         \
200
        b2 = MUL(W5, col[8*1]);                         \
201
        b3 = MUL(W7, col[8*1]);                         \
202
                                                        \
203
        MAC(b0,  W3, col[8*3]);                         \
204
        MAC(b1, -W7, col[8*3]);                         \
205
        MAC(b2, -W1, col[8*3]);                         \
206
        MAC(b3, -W5, col[8*3]);                         \
207
                                                        \
208
        if (col[8*4]) {                                 \
209
            a0 +=  W4*col[8*4];                         \
210
            a1 += -W4*col[8*4];                         \
211
            a2 += -W4*col[8*4];                         \
212
            a3 +=  W4*col[8*4];                         \
213
        }                                               \
214
                                                        \
215
        if (col[8*5]) {                                 \
216
            MAC(b0,  W5, col[8*5]);                     \
217
            MAC(b1, -W1, col[8*5]);                     \
218
            MAC(b2,  W7, col[8*5]);                     \
219
            MAC(b3,  W3, col[8*5]);                     \
220
        }                                               \
221
                                                        \
222
        if (col[8*6]) {                                 \
223
            a0 +=  W6*col[8*6];                         \
224
            a1 += -W2*col[8*6];                         \
225
            a2 +=  W2*col[8*6];                         \
226
            a3 += -W6*col[8*6];                         \
227
        }                                               \
228
                                                        \
229
        if (col[8*7]) {                                 \
230
            MAC(b0,  W7, col[8*7]);                     \
231
            MAC(b1, -W5, col[8*7]);                     \
232
            MAC(b2,  W3, col[8*7]);                     \
233
            MAC(b3, -W1, col[8*7]);                     \
234
        }                                               \
235
    } while (0)
236
 
237
static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size,
238
                                          int16_t *col)
239
{
240
    int a0, a1, a2, a3, b0, b1, b2, b3;
241
 
242
    IDCT_COLS;
243
 
244
    dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT);
245
    dest += line_size;
246
    dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT);
247
    dest += line_size;
248
    dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT);
249
    dest += line_size;
250
    dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT);
251
    dest += line_size;
252
    dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT);
253
    dest += line_size;
254
    dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT);
255
    dest += line_size;
256
    dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT);
257
    dest += line_size;
258
    dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT);
259
}
260
 
261
static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size,
262
                                          int16_t *col)
263
{
264
    int a0, a1, a2, a3, b0, b1, b2, b3;
265
 
266
    IDCT_COLS;
267
 
268
    dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT));
269
    dest += line_size;
270
    dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT));
271
    dest += line_size;
272
    dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT));
273
    dest += line_size;
274
    dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT));
275
    dest += line_size;
276
    dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT));
277
    dest += line_size;
278
    dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT));
279
    dest += line_size;
280
    dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT));
281
    dest += line_size;
282
    dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT));
283
}
284
 
285
static inline void FUNC(idctSparseCol)(int16_t *col)
286
{
287
    int a0, a1, a2, a3, b0, b1, b2, b3;
288
 
289
    IDCT_COLS;
290
 
291
    col[0 ] = ((a0 + b0) >> COL_SHIFT);
292
    col[8 ] = ((a1 + b1) >> COL_SHIFT);
293
    col[16] = ((a2 + b2) >> COL_SHIFT);
294
    col[24] = ((a3 + b3) >> COL_SHIFT);
295
    col[32] = ((a3 - b3) >> COL_SHIFT);
296
    col[40] = ((a2 - b2) >> COL_SHIFT);
297
    col[48] = ((a1 - b1) >> COL_SHIFT);
298
    col[56] = ((a0 - b0) >> COL_SHIFT);
299
}
300
 
301
void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block)
302
{
303
    pixel *dest = (pixel *)dest_;
304
    int i;
305
 
306
    line_size /= sizeof(pixel);
307
 
308
    for (i = 0; i < 8; i++)
309
        FUNC(idctRowCondDC)(block + i*8, 0);
310
 
311
    for (i = 0; i < 8; i++)
312
        FUNC(idctSparseColPut)(dest + i, line_size, block + i);
313
}
314
 
315
void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block)
316
{
317
    pixel *dest = (pixel *)dest_;
318
    int i;
319
 
320
    line_size /= sizeof(pixel);
321
 
322
    for (i = 0; i < 8; i++)
323
        FUNC(idctRowCondDC)(block + i*8, 0);
324
 
325
    for (i = 0; i < 8; i++)
326
        FUNC(idctSparseColAdd)(dest + i, line_size, block + i);
327
}
328
 
329
void FUNC(ff_simple_idct)(int16_t *block)
330
{
331
    int i;
332
 
333
    for (i = 0; i < 8; i++)
334
        FUNC(idctRowCondDC)(block + i*8, 0);
335
 
336
    for (i = 0; i < 8; i++)
337
        FUNC(idctSparseCol)(block + i);
338
}