Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Simple IDCT (Alpha optimized)
3
 *
4
 * Copyright (c) 2001 Michael Niedermayer 
5
 *
6
 * based upon some outcommented C code from mpeg2dec (idct_mmx.c
7
 * written by Aaron Holtzman )
8
 *
9
 * Alpha optimizations by Måns Rullgård 
10
 *                     and Falk Hueffner 
11
 *
12
 * This file is part of FFmpeg.
13
 *
14
 * FFmpeg is free software; you can redistribute it and/or
15
 * modify it under the terms of the GNU Lesser General Public
16
 * License as published by the Free Software Foundation; either
17
 * version 2.1 of the License, or (at your option) any later version.
18
 *
19
 * FFmpeg is distributed in the hope that it will be useful,
20
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22
 * Lesser General Public License for more details.
23
 *
24
 * You should have received a copy of the GNU Lesser General Public
25
 * License along with FFmpeg; if not, write to the Free Software
26
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
27
 */
28
 
29
#include "dsputil_alpha.h"
30
#include "asm.h"
31
 
32
// cos(i * M_PI / 16) * sqrt(2) * (1 << 14)
33
// W4 is actually exactly 16384, but using 16383 works around
34
// accumulating rounding errors for some encoders
35
#define W1 22725
36
#define W2 21407
37
#define W3 19266
38
#define W4 16383
39
#define W5 12873
40
#define W6  8867
41
#define W7  4520
42
#define ROW_SHIFT 11
43
#define COL_SHIFT 20
44
 
45
/* 0: all entries 0, 1: only first entry nonzero, 2: otherwise  */
46
static inline int idct_row(int16_t *row)
47
{
48
    int a0, a1, a2, a3, b0, b1, b2, b3, t;
49
    uint64_t l, r, t2;
50
    l = ldq(row);
51
    r = ldq(row + 4);
52
 
53
    if (l == 0 && r == 0)
54
        return 0;
55
 
56
    a0 = W4 * sextw(l) + (1 << (ROW_SHIFT - 1));
57
 
58
    if (((l & ~0xffffUL) | r) == 0) {
59
        a0 >>= ROW_SHIFT;
60
        t2 = (uint16_t) a0;
61
        t2 |= t2 << 16;
62
        t2 |= t2 << 32;
63
 
64
        stq(t2, row);
65
        stq(t2, row + 4);
66
        return 1;
67
    }
68
 
69
    a1 = a0;
70
    a2 = a0;
71
    a3 = a0;
72
 
73
    t = extwl(l, 4);            /* row[2] */
74
    if (t != 0) {
75
        t = sextw(t);
76
        a0 += W2 * t;
77
        a1 += W6 * t;
78
        a2 -= W6 * t;
79
        a3 -= W2 * t;
80
    }
81
 
82
    t = extwl(r, 0);            /* row[4] */
83
    if (t != 0) {
84
        t = sextw(t);
85
        a0 += W4 * t;
86
        a1 -= W4 * t;
87
        a2 -= W4 * t;
88
        a3 += W4 * t;
89
    }
90
 
91
    t = extwl(r, 4);            /* row[6] */
92
    if (t != 0) {
93
        t = sextw(t);
94
        a0 += W6 * t;
95
        a1 -= W2 * t;
96
        a2 += W2 * t;
97
        a3 -= W6 * t;
98
    }
99
 
100
    t = extwl(l, 2);            /* row[1] */
101
    if (t != 0) {
102
        t = sextw(t);
103
        b0 = W1 * t;
104
        b1 = W3 * t;
105
        b2 = W5 * t;
106
        b3 = W7 * t;
107
    } else {
108
        b0 = 0;
109
        b1 = 0;
110
        b2 = 0;
111
        b3 = 0;
112
    }
113
 
114
    t = extwl(l, 6);            /* row[3] */
115
    if (t) {
116
        t = sextw(t);
117
        b0 += W3 * t;
118
        b1 -= W7 * t;
119
        b2 -= W1 * t;
120
        b3 -= W5 * t;
121
    }
122
 
123
 
124
    t = extwl(r, 2);            /* row[5] */
125
    if (t) {
126
        t = sextw(t);
127
        b0 += W5 * t;
128
        b1 -= W1 * t;
129
        b2 += W7 * t;
130
        b3 += W3 * t;
131
    }
132
 
133
    t = extwl(r, 6);            /* row[7] */
134
    if (t) {
135
        t = sextw(t);
136
        b0 += W7 * t;
137
        b1 -= W5 * t;
138
        b2 += W3 * t;
139
        b3 -= W1 * t;
140
    }
141
 
142
    row[0] = (a0 + b0) >> ROW_SHIFT;
143
    row[1] = (a1 + b1) >> ROW_SHIFT;
144
    row[2] = (a2 + b2) >> ROW_SHIFT;
145
    row[3] = (a3 + b3) >> ROW_SHIFT;
146
    row[4] = (a3 - b3) >> ROW_SHIFT;
147
    row[5] = (a2 - b2) >> ROW_SHIFT;
148
    row[6] = (a1 - b1) >> ROW_SHIFT;
149
    row[7] = (a0 - b0) >> ROW_SHIFT;
150
 
151
    return 2;
152
}
153
 
154
static inline void idct_col(int16_t *col)
155
{
156
    int a0, a1, a2, a3, b0, b1, b2, b3;
157
 
158
    col[0] += (1 << (COL_SHIFT - 1)) / W4;
159
 
160
    a0 = W4 * col[8 * 0];
161
    a1 = W4 * col[8 * 0];
162
    a2 = W4 * col[8 * 0];
163
    a3 = W4 * col[8 * 0];
164
 
165
    if (col[8 * 2]) {
166
        a0 += W2 * col[8 * 2];
167
        a1 += W6 * col[8 * 2];
168
        a2 -= W6 * col[8 * 2];
169
        a3 -= W2 * col[8 * 2];
170
    }
171
 
172
    if (col[8 * 4]) {
173
        a0 += W4 * col[8 * 4];
174
        a1 -= W4 * col[8 * 4];
175
        a2 -= W4 * col[8 * 4];
176
        a3 += W4 * col[8 * 4];
177
    }
178
 
179
    if (col[8 * 6]) {
180
        a0 += W6 * col[8 * 6];
181
        a1 -= W2 * col[8 * 6];
182
        a2 += W2 * col[8 * 6];
183
        a3 -= W6 * col[8 * 6];
184
    }
185
 
186
    if (col[8 * 1]) {
187
        b0 = W1 * col[8 * 1];
188
        b1 = W3 * col[8 * 1];
189
        b2 = W5 * col[8 * 1];
190
        b3 = W7 * col[8 * 1];
191
    } else {
192
        b0 = 0;
193
        b1 = 0;
194
        b2 = 0;
195
        b3 = 0;
196
    }
197
 
198
    if (col[8 * 3]) {
199
        b0 += W3 * col[8 * 3];
200
        b1 -= W7 * col[8 * 3];
201
        b2 -= W1 * col[8 * 3];
202
        b3 -= W5 * col[8 * 3];
203
    }
204
 
205
    if (col[8 * 5]) {
206
        b0 += W5 * col[8 * 5];
207
        b1 -= W1 * col[8 * 5];
208
        b2 += W7 * col[8 * 5];
209
        b3 += W3 * col[8 * 5];
210
    }
211
 
212
    if (col[8 * 7]) {
213
        b0 += W7 * col[8 * 7];
214
        b1 -= W5 * col[8 * 7];
215
        b2 += W3 * col[8 * 7];
216
        b3 -= W1 * col[8 * 7];
217
    }
218
 
219
    col[8 * 0] = (a0 + b0) >> COL_SHIFT;
220
    col[8 * 7] = (a0 - b0) >> COL_SHIFT;
221
    col[8 * 1] = (a1 + b1) >> COL_SHIFT;
222
    col[8 * 6] = (a1 - b1) >> COL_SHIFT;
223
    col[8 * 2] = (a2 + b2) >> COL_SHIFT;
224
    col[8 * 5] = (a2 - b2) >> COL_SHIFT;
225
    col[8 * 3] = (a3 + b3) >> COL_SHIFT;
226
    col[8 * 4] = (a3 - b3) >> COL_SHIFT;
227
}
228
 
229
/* If all rows but the first one are zero after row transformation,
230
   all rows will be identical after column transformation.  */
231
static inline void idct_col2(int16_t *col)
232
{
233
    int i;
234
    uint64_t l, r;
235
 
236
    for (i = 0; i < 8; ++i) {
237
        int a0 = col[i] + (1 << (COL_SHIFT - 1)) / W4;
238
 
239
        a0 *= W4;
240
        col[i] = a0 >> COL_SHIFT;
241
    }
242
 
243
    l = ldq(col + 0 * 4); r = ldq(col + 1 * 4);
244
    stq(l, col +  2 * 4); stq(r, col +  3 * 4);
245
    stq(l, col +  4 * 4); stq(r, col +  5 * 4);
246
    stq(l, col +  6 * 4); stq(r, col +  7 * 4);
247
    stq(l, col +  8 * 4); stq(r, col +  9 * 4);
248
    stq(l, col + 10 * 4); stq(r, col + 11 * 4);
249
    stq(l, col + 12 * 4); stq(r, col + 13 * 4);
250
    stq(l, col + 14 * 4); stq(r, col + 15 * 4);
251
}
252
 
253
void ff_simple_idct_axp(int16_t *block)
254
{
255
 
256
    int i;
257
    int rowsZero = 1;           /* all rows except row 0 zero */
258
    int rowsConstant = 1;       /* all rows consist of a constant value */
259
 
260
    for (i = 0; i < 8; i++) {
261
        int sparseness = idct_row(block + 8 * i);
262
 
263
        if (i > 0 && sparseness > 0)
264
            rowsZero = 0;
265
        if (sparseness == 2)
266
            rowsConstant = 0;
267
    }
268
 
269
    if (rowsZero) {
270
        idct_col2(block);
271
    } else if (rowsConstant) {
272
        idct_col(block);
273
        for (i = 0; i < 8; i += 2) {
274
            uint64_t v = (uint16_t) block[0];
275
            uint64_t w = (uint16_t) block[8];
276
 
277
            v |= v << 16;
278
            w |= w << 16;
279
            v |= v << 32;
280
            w |= w << 32;
281
            stq(v, block + 0 * 4);
282
            stq(v, block + 1 * 4);
283
            stq(w, block + 2 * 4);
284
            stq(w, block + 3 * 4);
285
            block += 4 * 4;
286
        }
287
    } else {
288
        for (i = 0; i < 8; i++)
289
            idct_col(block + i);
290
    }
291
}
292
 
293
void ff_simple_idct_put_axp(uint8_t *dest, int line_size, int16_t *block)
294
{
295
    ff_simple_idct_axp(block);
296
    put_pixels_clamped_axp_p(block, dest, line_size);
297
}
298
 
299
void ff_simple_idct_add_axp(uint8_t *dest, int line_size, int16_t *block)
300
{
301
    ff_simple_idct_axp(block);
302
    add_pixels_clamped_axp_p(block, dest, line_size);
303
}