Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
6148 | serge | 1 | /* |
2 | * Simple IDCT |
||
3 | * |
||
4 | * Copyright (c) 2001 Michael Niedermayer |
||
5 | * |
||
6 | * This file is part of FFmpeg. |
||
7 | * |
||
8 | * FFmpeg is free software; you can redistribute it and/or |
||
9 | * modify it under the terms of the GNU Lesser General Public |
||
10 | * License as published by the Free Software Foundation; either |
||
11 | * version 2.1 of the License, or (at your option) any later version. |
||
12 | * |
||
13 | * FFmpeg is distributed in the hope that it will be useful, |
||
14 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
15 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
16 | * Lesser General Public License for more details. |
||
17 | * |
||
18 | * You should have received a copy of the GNU Lesser General Public |
||
19 | * License along with FFmpeg; if not, write to the Free Software |
||
20 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
21 | */ |
||
22 | |||
23 | /** |
||
24 | * @file |
||
25 | * simpleidct in C. |
||
26 | */ |
||
27 | |||
28 | /* |
||
29 | based upon some outcommented c code from mpeg2dec (idct_mmx.c |
||
30 | written by Aaron Holtzman |
||
31 | */ |
||
32 | |||
33 | #include "bit_depth_template.c" |
||
34 | |||
35 | #undef W1 |
||
36 | #undef W2 |
||
37 | #undef W3 |
||
38 | #undef W4 |
||
39 | #undef W5 |
||
40 | #undef W6 |
||
41 | #undef W7 |
||
42 | #undef ROW_SHIFT |
||
43 | #undef COL_SHIFT |
||
44 | #undef DC_SHIFT |
||
45 | #undef MUL |
||
46 | #undef MAC |
||
47 | |||
48 | #if BIT_DEPTH == 8 |
||
49 | |||
50 | #define W1 22725 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
51 | #define W2 21407 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
52 | #define W3 19266 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
53 | #define W4 16383 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
54 | #define W5 12873 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
55 | #define W6 8867 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
56 | #define W7 4520 //cos(i*M_PI/16)*sqrt(2)*(1<<14) + 0.5 |
||
57 | |||
58 | #define ROW_SHIFT 11 |
||
59 | #define COL_SHIFT 20 |
||
60 | #define DC_SHIFT 3 |
||
61 | |||
62 | #define MUL(a, b) MUL16(a, b) |
||
63 | #define MAC(a, b, c) MAC16(a, b, c) |
||
64 | |||
65 | #elif BIT_DEPTH == 10 || BIT_DEPTH == 12 |
||
66 | |||
67 | #if BIT_DEPTH == 10 |
||
68 | #define W1 90901 |
||
69 | #define W2 85627 |
||
70 | #define W3 77062 |
||
71 | #define W4 65535 |
||
72 | #define W5 51491 |
||
73 | #define W6 35468 |
||
74 | #define W7 18081 |
||
75 | |||
76 | #define ROW_SHIFT 15 |
||
77 | #define COL_SHIFT 20 |
||
78 | #define DC_SHIFT 1 |
||
79 | #else |
||
80 | #define W1 45451 |
||
81 | #define W2 42813 |
||
82 | #define W3 38531 |
||
83 | #define W4 32767 |
||
84 | #define W5 25746 |
||
85 | #define W6 17734 |
||
86 | #define W7 9041 |
||
87 | |||
88 | #define ROW_SHIFT 16 |
||
89 | #define COL_SHIFT 17 |
||
90 | #define DC_SHIFT -1 |
||
91 | #endif |
||
92 | |||
93 | #define MUL(a, b) ((a) * (b)) |
||
94 | #define MAC(a, b, c) ((a) += (b) * (c)) |
||
95 | |||
96 | #else |
||
97 | |||
98 | #error "Unsupported bitdepth" |
||
99 | |||
100 | #endif |
||
101 | |||
102 | static inline void FUNC(idctRowCondDC)(int16_t *row, int extra_shift) |
||
103 | { |
||
104 | int a0, a1, a2, a3, b0, b1, b2, b3; |
||
105 | |||
106 | #if HAVE_FAST_64BIT |
||
107 | #define ROW0_MASK (0xffffLL << 48 * HAVE_BIGENDIAN) |
||
108 | if (((((uint64_t *)row)[0] & ~ROW0_MASK) | ((uint64_t *)row)[1]) == 0) { |
||
109 | uint64_t temp; |
||
110 | if (DC_SHIFT - extra_shift > 0) { |
||
111 | temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff; |
||
112 | } else { |
||
113 | temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff; |
||
114 | } |
||
115 | temp += temp << 16; |
||
116 | temp += temp << 32; |
||
117 | ((uint64_t *)row)[0] = temp; |
||
118 | ((uint64_t *)row)[1] = temp; |
||
119 | return; |
||
120 | } |
||
121 | #else |
||
122 | if (!(((uint32_t*)row)[1] | |
||
123 | ((uint32_t*)row)[2] | |
||
124 | ((uint32_t*)row)[3] | |
||
125 | row[1])) { |
||
126 | uint32_t temp; |
||
127 | if (DC_SHIFT - extra_shift > 0) { |
||
128 | temp = (row[0] << (DC_SHIFT - extra_shift)) & 0xffff; |
||
129 | } else { |
||
130 | temp = (row[0] >> (extra_shift - DC_SHIFT)) & 0xffff; |
||
131 | } |
||
132 | temp += temp << 16; |
||
133 | ((uint32_t*)row)[0]=((uint32_t*)row)[1] = |
||
134 | ((uint32_t*)row)[2]=((uint32_t*)row)[3] = temp; |
||
135 | return; |
||
136 | } |
||
137 | #endif |
||
138 | |||
139 | a0 = (W4 * row[0]) + (1 << (ROW_SHIFT - 1)); |
||
140 | a1 = a0; |
||
141 | a2 = a0; |
||
142 | a3 = a0; |
||
143 | |||
144 | a0 += W2 * row[2]; |
||
145 | a1 += W6 * row[2]; |
||
146 | a2 -= W6 * row[2]; |
||
147 | a3 -= W2 * row[2]; |
||
148 | |||
149 | b0 = MUL(W1, row[1]); |
||
150 | MAC(b0, W3, row[3]); |
||
151 | b1 = MUL(W3, row[1]); |
||
152 | MAC(b1, -W7, row[3]); |
||
153 | b2 = MUL(W5, row[1]); |
||
154 | MAC(b2, -W1, row[3]); |
||
155 | b3 = MUL(W7, row[1]); |
||
156 | MAC(b3, -W5, row[3]); |
||
157 | |||
158 | if (AV_RN64A(row + 4)) { |
||
159 | a0 += W4*row[4] + W6*row[6]; |
||
160 | a1 += - W4*row[4] - W2*row[6]; |
||
161 | a2 += - W4*row[4] + W2*row[6]; |
||
162 | a3 += W4*row[4] - W6*row[6]; |
||
163 | |||
164 | MAC(b0, W5, row[5]); |
||
165 | MAC(b0, W7, row[7]); |
||
166 | |||
167 | MAC(b1, -W1, row[5]); |
||
168 | MAC(b1, -W5, row[7]); |
||
169 | |||
170 | MAC(b2, W7, row[5]); |
||
171 | MAC(b2, W3, row[7]); |
||
172 | |||
173 | MAC(b3, W3, row[5]); |
||
174 | MAC(b3, -W1, row[7]); |
||
175 | } |
||
176 | |||
177 | row[0] = (a0 + b0) >> (ROW_SHIFT + extra_shift); |
||
178 | row[7] = (a0 - b0) >> (ROW_SHIFT + extra_shift); |
||
179 | row[1] = (a1 + b1) >> (ROW_SHIFT + extra_shift); |
||
180 | row[6] = (a1 - b1) >> (ROW_SHIFT + extra_shift); |
||
181 | row[2] = (a2 + b2) >> (ROW_SHIFT + extra_shift); |
||
182 | row[5] = (a2 - b2) >> (ROW_SHIFT + extra_shift); |
||
183 | row[3] = (a3 + b3) >> (ROW_SHIFT + extra_shift); |
||
184 | row[4] = (a3 - b3) >> (ROW_SHIFT + extra_shift); |
||
185 | } |
||
186 | |||
187 | #define IDCT_COLS do { \ |
||
188 | a0 = W4 * (col[8*0] + ((1<<(COL_SHIFT-1))/W4)); \ |
||
189 | a1 = a0; \ |
||
190 | a2 = a0; \ |
||
191 | a3 = a0; \ |
||
192 | \ |
||
193 | a0 += W2*col[8*2]; \ |
||
194 | a1 += W6*col[8*2]; \ |
||
195 | a2 += -W6*col[8*2]; \ |
||
196 | a3 += -W2*col[8*2]; \ |
||
197 | \ |
||
198 | b0 = MUL(W1, col[8*1]); \ |
||
199 | b1 = MUL(W3, col[8*1]); \ |
||
200 | b2 = MUL(W5, col[8*1]); \ |
||
201 | b3 = MUL(W7, col[8*1]); \ |
||
202 | \ |
||
203 | MAC(b0, W3, col[8*3]); \ |
||
204 | MAC(b1, -W7, col[8*3]); \ |
||
205 | MAC(b2, -W1, col[8*3]); \ |
||
206 | MAC(b3, -W5, col[8*3]); \ |
||
207 | \ |
||
208 | if (col[8*4]) { \ |
||
209 | a0 += W4*col[8*4]; \ |
||
210 | a1 += -W4*col[8*4]; \ |
||
211 | a2 += -W4*col[8*4]; \ |
||
212 | a3 += W4*col[8*4]; \ |
||
213 | } \ |
||
214 | \ |
||
215 | if (col[8*5]) { \ |
||
216 | MAC(b0, W5, col[8*5]); \ |
||
217 | MAC(b1, -W1, col[8*5]); \ |
||
218 | MAC(b2, W7, col[8*5]); \ |
||
219 | MAC(b3, W3, col[8*5]); \ |
||
220 | } \ |
||
221 | \ |
||
222 | if (col[8*6]) { \ |
||
223 | a0 += W6*col[8*6]; \ |
||
224 | a1 += -W2*col[8*6]; \ |
||
225 | a2 += W2*col[8*6]; \ |
||
226 | a3 += -W6*col[8*6]; \ |
||
227 | } \ |
||
228 | \ |
||
229 | if (col[8*7]) { \ |
||
230 | MAC(b0, W7, col[8*7]); \ |
||
231 | MAC(b1, -W5, col[8*7]); \ |
||
232 | MAC(b2, W3, col[8*7]); \ |
||
233 | MAC(b3, -W1, col[8*7]); \ |
||
234 | } \ |
||
235 | } while (0) |
||
236 | |||
237 | static inline void FUNC(idctSparseColPut)(pixel *dest, int line_size, |
||
238 | int16_t *col) |
||
239 | { |
||
240 | int a0, a1, a2, a3, b0, b1, b2, b3; |
||
241 | |||
242 | IDCT_COLS; |
||
243 | |||
244 | dest[0] = av_clip_pixel((a0 + b0) >> COL_SHIFT); |
||
245 | dest += line_size; |
||
246 | dest[0] = av_clip_pixel((a1 + b1) >> COL_SHIFT); |
||
247 | dest += line_size; |
||
248 | dest[0] = av_clip_pixel((a2 + b2) >> COL_SHIFT); |
||
249 | dest += line_size; |
||
250 | dest[0] = av_clip_pixel((a3 + b3) >> COL_SHIFT); |
||
251 | dest += line_size; |
||
252 | dest[0] = av_clip_pixel((a3 - b3) >> COL_SHIFT); |
||
253 | dest += line_size; |
||
254 | dest[0] = av_clip_pixel((a2 - b2) >> COL_SHIFT); |
||
255 | dest += line_size; |
||
256 | dest[0] = av_clip_pixel((a1 - b1) >> COL_SHIFT); |
||
257 | dest += line_size; |
||
258 | dest[0] = av_clip_pixel((a0 - b0) >> COL_SHIFT); |
||
259 | } |
||
260 | |||
261 | static inline void FUNC(idctSparseColAdd)(pixel *dest, int line_size, |
||
262 | int16_t *col) |
||
263 | { |
||
264 | int a0, a1, a2, a3, b0, b1, b2, b3; |
||
265 | |||
266 | IDCT_COLS; |
||
267 | |||
268 | dest[0] = av_clip_pixel(dest[0] + ((a0 + b0) >> COL_SHIFT)); |
||
269 | dest += line_size; |
||
270 | dest[0] = av_clip_pixel(dest[0] + ((a1 + b1) >> COL_SHIFT)); |
||
271 | dest += line_size; |
||
272 | dest[0] = av_clip_pixel(dest[0] + ((a2 + b2) >> COL_SHIFT)); |
||
273 | dest += line_size; |
||
274 | dest[0] = av_clip_pixel(dest[0] + ((a3 + b3) >> COL_SHIFT)); |
||
275 | dest += line_size; |
||
276 | dest[0] = av_clip_pixel(dest[0] + ((a3 - b3) >> COL_SHIFT)); |
||
277 | dest += line_size; |
||
278 | dest[0] = av_clip_pixel(dest[0] + ((a2 - b2) >> COL_SHIFT)); |
||
279 | dest += line_size; |
||
280 | dest[0] = av_clip_pixel(dest[0] + ((a1 - b1) >> COL_SHIFT)); |
||
281 | dest += line_size; |
||
282 | dest[0] = av_clip_pixel(dest[0] + ((a0 - b0) >> COL_SHIFT)); |
||
283 | } |
||
284 | |||
285 | static inline void FUNC(idctSparseCol)(int16_t *col) |
||
286 | { |
||
287 | int a0, a1, a2, a3, b0, b1, b2, b3; |
||
288 | |||
289 | IDCT_COLS; |
||
290 | |||
291 | col[0 ] = ((a0 + b0) >> COL_SHIFT); |
||
292 | col[8 ] = ((a1 + b1) >> COL_SHIFT); |
||
293 | col[16] = ((a2 + b2) >> COL_SHIFT); |
||
294 | col[24] = ((a3 + b3) >> COL_SHIFT); |
||
295 | col[32] = ((a3 - b3) >> COL_SHIFT); |
||
296 | col[40] = ((a2 - b2) >> COL_SHIFT); |
||
297 | col[48] = ((a1 - b1) >> COL_SHIFT); |
||
298 | col[56] = ((a0 - b0) >> COL_SHIFT); |
||
299 | } |
||
300 | |||
301 | void FUNC(ff_simple_idct_put)(uint8_t *dest_, int line_size, int16_t *block) |
||
302 | { |
||
303 | pixel *dest = (pixel *)dest_; |
||
304 | int i; |
||
305 | |||
306 | line_size /= sizeof(pixel); |
||
307 | |||
308 | for (i = 0; i < 8; i++) |
||
309 | FUNC(idctRowCondDC)(block + i*8, 0); |
||
310 | |||
311 | for (i = 0; i < 8; i++) |
||
312 | FUNC(idctSparseColPut)(dest + i, line_size, block + i); |
||
313 | } |
||
314 | |||
315 | void FUNC(ff_simple_idct_add)(uint8_t *dest_, int line_size, int16_t *block) |
||
316 | { |
||
317 | pixel *dest = (pixel *)dest_; |
||
318 | int i; |
||
319 | |||
320 | line_size /= sizeof(pixel); |
||
321 | |||
322 | for (i = 0; i < 8; i++) |
||
323 | FUNC(idctRowCondDC)(block + i*8, 0); |
||
324 | |||
325 | for (i = 0; i < 8; i++) |
||
326 | FUNC(idctSparseColAdd)(dest + i, line_size, block + i); |
||
327 | } |
||
328 | |||
329 | void FUNC(ff_simple_idct)(int16_t *block) |
||
330 | { |
||
331 | int i; |
||
332 | |||
333 | for (i = 0; i < 8; i++) |
||
334 | FUNC(idctRowCondDC)(block + i*8, 0); |
||
335 | |||
336 | for (i = 0; i < 8; i++) |
||
337 | FUNC(idctSparseCol)(block + i); |
||
338 | }>>>>>>(COL_SHIFT-1))/W4));><(COL_SHIFT-1))/W4));>><>><>><>><>><>><>><>14)><14)>14)><14)>14)><14)>14)><14)>14)><14)>14)><14)>14)><14)> |