Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4349 | Serge | 1 | /* |
2 | * (c) 2001 Fabrice Bellard |
||
3 | * 2007 Marc Hoffman |
||
4 | * |
||
5 | * This file is part of FFmpeg. |
||
6 | * |
||
7 | * FFmpeg is free software; you can redistribute it and/or |
||
8 | * modify it under the terms of the GNU Lesser General Public |
||
9 | * License as published by the Free Software Foundation; either |
||
10 | * version 2.1 of the License, or (at your option) any later version. |
||
11 | * |
||
12 | * FFmpeg is distributed in the hope that it will be useful, |
||
13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
||
14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
||
15 | * Lesser General Public License for more details. |
||
16 | * |
||
17 | * You should have received a copy of the GNU Lesser General Public |
||
18 | * License along with FFmpeg; if not, write to the Free Software |
||
19 | * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
||
20 | */ |
||
21 | |||
22 | /** |
||
23 | * @file |
||
24 | * DCT test (c) 2001 Fabrice Bellard |
||
25 | * Started from sample code by Juan J. Sierralta P. |
||
26 | */ |
||
27 | |||
28 | #include "config.h" |
||
29 | #include |
||
30 | #include |
||
31 | #include |
||
32 | #if HAVE_UNISTD_H |
||
33 | #include |
||
34 | #endif |
||
35 | #include |
||
36 | |||
37 | #include "libavutil/cpu.h" |
||
38 | #include "libavutil/common.h" |
||
39 | #include "libavutil/lfg.h" |
||
40 | #include "libavutil/time.h" |
||
41 | |||
42 | #include "dct.h" |
||
43 | #include "simple_idct.h" |
||
44 | #include "aandcttab.h" |
||
45 | #include "faandct.h" |
||
46 | #include "faanidct.h" |
||
47 | #include "x86/idct_xvid.h" |
||
48 | #include "dctref.h" |
||
49 | |||
50 | #undef printf |
||
51 | |||
52 | // BFIN |
||
53 | void ff_bfin_idct(int16_t *block); |
||
54 | void ff_bfin_fdct(int16_t *block); |
||
55 | |||
56 | // ALTIVEC |
||
57 | void ff_fdct_altivec(int16_t *block); |
||
58 | |||
59 | // ARM |
||
60 | void ff_j_rev_dct_arm(int16_t *data); |
||
61 | void ff_simple_idct_arm(int16_t *data); |
||
62 | void ff_simple_idct_armv5te(int16_t *data); |
||
63 | void ff_simple_idct_armv6(int16_t *data); |
||
64 | void ff_simple_idct_neon(int16_t *data); |
||
65 | |||
66 | void ff_simple_idct_axp(int16_t *data); |
||
67 | |||
68 | struct algo { |
||
69 | const char *name; |
||
70 | void (*func)(int16_t *block); |
||
71 | enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM, |
||
72 | SSE2_PERM, PARTTRANS_PERM, TRANSPOSE_PERM } format; |
||
73 | int mm_support; |
||
74 | int nonspec; |
||
75 | }; |
||
76 | |||
77 | static int cpu_flags; |
||
78 | |||
79 | static const struct algo fdct_tab[] = { |
||
80 | { "REF-DBL", ff_ref_fdct, NO_PERM }, |
||
81 | { "FAAN", ff_faandct, NO_PERM }, |
||
82 | { "IJG-AAN-INT", ff_fdct_ifast, SCALE_PERM }, |
||
83 | { "IJG-LLM-INT", ff_jpeg_fdct_islow_8, NO_PERM }, |
||
84 | |||
85 | #if HAVE_MMX_INLINE |
||
86 | { "MMX", ff_fdct_mmx, NO_PERM, AV_CPU_FLAG_MMX }, |
||
87 | #endif |
||
88 | #if HAVE_MMXEXT_INLINE |
||
89 | { "MMXEXT", ff_fdct_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT }, |
||
90 | #endif |
||
91 | #if HAVE_SSE2_INLINE |
||
92 | { "SSE2", ff_fdct_sse2, NO_PERM, AV_CPU_FLAG_SSE2 }, |
||
93 | #endif |
||
94 | |||
95 | #if HAVE_ALTIVEC |
||
96 | { "altivecfdct", ff_fdct_altivec, NO_PERM, AV_CPU_FLAG_ALTIVEC }, |
||
97 | #endif |
||
98 | |||
99 | #if ARCH_BFIN |
||
100 | { "BFINfdct", ff_bfin_fdct, NO_PERM }, |
||
101 | #endif |
||
102 | |||
103 | { 0 } |
||
104 | }; |
||
105 | |||
106 | #if ARCH_X86_64 && HAVE_MMX && HAVE_YASM |
||
107 | void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize, |
||
108 | int16_t *block, int16_t *qmat); |
||
109 | |||
110 | static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){ |
||
111 | DECLARE_ALIGNED(16, static int16_t, qmat)[64]; |
||
112 | DECLARE_ALIGNED(16, static int16_t, tmp)[64]; |
||
113 | int i; |
||
114 | |||
115 | for(i=0; i<64; i++){ |
||
116 | qmat[i]=4; |
||
117 | tmp[i]= dst[i]; |
||
118 | } |
||
119 | ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat); |
||
120 | } |
||
121 | #endif |
||
122 | |||
123 | static const struct algo idct_tab[] = { |
||
124 | { "FAANI", ff_faanidct, NO_PERM }, |
||
125 | { "REF-DBL", ff_ref_idct, NO_PERM }, |
||
126 | { "INT", ff_j_rev_dct, MMX_PERM }, |
||
127 | { "SIMPLE-C", ff_simple_idct_8, NO_PERM }, |
||
128 | |||
129 | #if HAVE_MMX_INLINE |
||
130 | { "SIMPLE-MMX", ff_simple_idct_mmx, MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX }, |
||
131 | { "XVID-MMX", ff_idct_xvid_mmx, NO_PERM, AV_CPU_FLAG_MMX, 1 }, |
||
132 | #endif |
||
133 | #if HAVE_MMXEXT_INLINE |
||
134 | { "XVID-MMXEXT", ff_idct_xvid_mmxext, NO_PERM, AV_CPU_FLAG_MMXEXT, 1 }, |
||
135 | #endif |
||
136 | #if HAVE_SSE2_INLINE |
||
137 | { "XVID-SSE2", ff_idct_xvid_sse2, SSE2_PERM, AV_CPU_FLAG_SSE2, 1 }, |
||
138 | #if ARCH_X86_64 && HAVE_YASM |
||
139 | { "PR-SSE2", ff_prores_idct_put_10_sse2_wrap, TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 }, |
||
140 | #endif |
||
141 | #endif |
||
142 | |||
143 | #if ARCH_BFIN |
||
144 | { "BFINidct", ff_bfin_idct, NO_PERM }, |
||
145 | #endif |
||
146 | |||
147 | #if ARCH_ARM |
||
148 | { "SIMPLE-ARM", ff_simple_idct_arm, NO_PERM }, |
||
149 | { "INT-ARM", ff_j_rev_dct_arm, MMX_PERM }, |
||
150 | #endif |
||
151 | #if HAVE_ARMV5TE |
||
152 | { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM, AV_CPU_FLAG_ARMV5TE }, |
||
153 | #endif |
||
154 | #if HAVE_ARMV6 |
||
155 | { "SIMPLE-ARMV6", ff_simple_idct_armv6, MMX_PERM, AV_CPU_FLAG_ARMV6 }, |
||
156 | #endif |
||
157 | #if HAVE_NEON |
||
158 | { "SIMPLE-NEON", ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON }, |
||
159 | #endif |
||
160 | |||
161 | #if ARCH_ALPHA |
||
162 | { "SIMPLE-ALPHA", ff_simple_idct_axp, NO_PERM }, |
||
163 | #endif |
||
164 | |||
165 | { 0 } |
||
166 | }; |
||
167 | |||
168 | #define AANSCALE_BITS 12 |
||
169 | |||
170 | #define NB_ITS 20000 |
||
171 | #define NB_ITS_SPEED 50000 |
||
172 | |||
173 | static short idct_mmx_perm[64]; |
||
174 | |||
175 | static short idct_simple_mmx_perm[64] = { |
||
176 | 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, |
||
177 | 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, |
||
178 | 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, |
||
179 | 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, |
||
180 | 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, |
||
181 | 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, |
||
182 | 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, |
||
183 | 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, |
||
184 | }; |
||
185 | |||
186 | static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 }; |
||
187 | |||
188 | static void idct_mmx_init(void) |
||
189 | { |
||
190 | int i; |
||
191 | |||
192 | /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */ |
||
193 | for (i = 0; i < 64; i++) { |
||
194 | idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); |
||
195 | } |
||
196 | } |
||
197 | |||
198 | DECLARE_ALIGNED(16, static int16_t, block)[64]; |
||
199 | DECLARE_ALIGNED(8, static int16_t, block1)[64]; |
||
200 | |||
201 | static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals) |
||
202 | { |
||
203 | int i, j; |
||
204 | |||
205 | memset(block, 0, 64 * sizeof(*block)); |
||
206 | |||
207 | switch (test) { |
||
208 | case 0: |
||
209 | for (i = 0; i < 64; i++) |
||
210 | block[i] = (av_lfg_get(prng) % (2*vals)) -vals; |
||
211 | if (is_idct) { |
||
212 | ff_ref_fdct(block); |
||
213 | for (i = 0; i < 64; i++) |
||
214 | block[i] >>= 3; |
||
215 | } |
||
216 | break; |
||
217 | case 1: |
||
218 | j = av_lfg_get(prng) % 10 + 1; |
||
219 | for (i = 0; i < j; i++) { |
||
220 | int idx = av_lfg_get(prng) % 64; |
||
221 | block[idx] = av_lfg_get(prng) % (2*vals) -vals; |
||
222 | } |
||
223 | break; |
||
224 | case 2: |
||
225 | block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals); |
||
226 | block[63] = (block[0] & 1) ^ 1; |
||
227 | break; |
||
228 | } |
||
229 | } |
||
230 | |||
231 | static void permute(int16_t dst[64], const int16_t src[64], int perm) |
||
232 | { |
||
233 | int i; |
||
234 | |||
235 | if (perm == MMX_PERM) { |
||
236 | for (i = 0; i < 64; i++) |
||
237 | dst[idct_mmx_perm[i]] = src[i]; |
||
238 | } else if (perm == MMX_SIMPLE_PERM) { |
||
239 | for (i = 0; i < 64; i++) |
||
240 | dst[idct_simple_mmx_perm[i]] = src[i]; |
||
241 | } else if (perm == SSE2_PERM) { |
||
242 | for (i = 0; i < 64; i++) |
||
243 | dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i]; |
||
244 | } else if (perm == PARTTRANS_PERM) { |
||
245 | for (i = 0; i < 64; i++) |
||
246 | dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i]; |
||
247 | } else if (perm == TRANSPOSE_PERM) { |
||
248 | for (i = 0; i < 64; i++) |
||
249 | dst[(i>>3) | ((i<<3)&0x38)] = src[i]; |
||
250 | } else { |
||
251 | for (i = 0; i < 64; i++) |
||
252 | dst[i] = src[i]; |
||
253 | } |
||
254 | } |
||
255 | |||
256 | static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits) |
||
257 | { |
||
258 | void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct; |
||
259 | int it, i, scale; |
||
260 | int err_inf, v; |
||
261 | int64_t err2, ti, ti1, it1, err_sum = 0; |
||
262 | int64_t sysErr[64], sysErrMax = 0; |
||
263 | int maxout = 0; |
||
264 | int blockSumErrMax = 0, blockSumErr; |
||
265 | AVLFG prng; |
||
266 | const int vals=1< |
||
267 | double omse, ome; |
||
268 | int spec_err; |
||
269 | |||
270 | av_lfg_init(&prng, 1); |
||
271 | |||
272 | err_inf = 0; |
||
273 | err2 = 0; |
||
274 | for (i = 0; i < 64; i++) |
||
275 | sysErr[i] = 0; |
||
276 | for (it = 0; it < NB_ITS; it++) { |
||
277 | init_block(block1, test, is_idct, &prng, vals); |
||
278 | permute(block, block1, dct->format); |
||
279 | |||
280 | dct->func(block); |
||
281 | emms_c(); |
||
282 | |||
283 | if (dct->format == SCALE_PERM) { |
||
284 | for (i = 0; i < 64; i++) { |
||
285 | scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i]; |
||
286 | block[i] = (block[i] * scale) >> AANSCALE_BITS; |
||
287 | } |
||
288 | } |
||
289 | |||
290 | ref(block1); |
||
291 | |||
292 | blockSumErr = 0; |
||
293 | for (i = 0; i < 64; i++) { |
||
294 | int err = block[i] - block1[i]; |
||
295 | err_sum += err; |
||
296 | v = abs(err); |
||
297 | if (v > err_inf) |
||
298 | err_inf = v; |
||
299 | err2 += v * v; |
||
300 | sysErr[i] += block[i] - block1[i]; |
||
301 | blockSumErr += v; |
||
302 | if (abs(block[i]) > maxout) |
||
303 | maxout = abs(block[i]); |
||
304 | } |
||
305 | if (blockSumErrMax < blockSumErr) |
||
306 | blockSumErrMax = blockSumErr; |
||
307 | } |
||
308 | for (i = 0; i < 64; i++) |
||
309 | sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i])); |
||
310 | |||
311 | for (i = 0; i < 64; i++) { |
||
312 | if (i % 8 == 0) |
||
313 | printf("\n"); |
||
314 | printf("%7d ", (int) sysErr[i]); |
||
315 | } |
||
316 | printf("\n"); |
||
317 | |||
318 | omse = (double) err2 / NB_ITS / 64; |
||
319 | ome = (double) err_sum / NB_ITS / 64; |
||
320 | |||
321 | spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015); |
||
322 | |||
323 | printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n", |
||
324 | is_idct ? "IDCT" : "DCT", dct->name, err_inf, |
||
325 | omse, ome, (double) sysErrMax / NB_ITS, |
||
326 | maxout, blockSumErrMax); |
||
327 | |||
328 | if (spec_err && !dct->nonspec) |
||
329 | return 1; |
||
330 | |||
331 | if (!speed) |
||
332 | return 0; |
||
333 | |||
334 | /* speed test */ |
||
335 | |||
336 | init_block(block, test, is_idct, &prng, vals); |
||
337 | permute(block1, block, dct->format); |
||
338 | |||
339 | ti = av_gettime(); |
||
340 | it1 = 0; |
||
341 | do { |
||
342 | for (it = 0; it < NB_ITS_SPEED; it++) { |
||
343 | memcpy(block, block1, sizeof(block)); |
||
344 | dct->func(block); |
||
345 | } |
||
346 | emms_c(); |
||
347 | it1 += NB_ITS_SPEED; |
||
348 | ti1 = av_gettime() - ti; |
||
349 | } while (ti1 < 1000000); |
||
350 | |||
351 | printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name, |
||
352 | (double) it1 * 1000.0 / (double) ti1); |
||
353 | |||
354 | return 0; |
||
355 | } |
||
356 | |||
357 | DECLARE_ALIGNED(8, static uint8_t, img_dest)[64]; |
||
358 | DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64]; |
||
359 | |||
360 | static void idct248_ref(uint8_t *dest, int linesize, int16_t *block) |
||
361 | { |
||
362 | static int init; |
||
363 | static double c8[8][8]; |
||
364 | static double c4[4][4]; |
||
365 | double block1[64], block2[64], block3[64]; |
||
366 | double s, sum, v; |
||
367 | int i, j, k; |
||
368 | |||
369 | if (!init) { |
||
370 | init = 1; |
||
371 | |||
372 | for (i = 0; i < 8; i++) { |
||
373 | sum = 0; |
||
374 | for (j = 0; j < 8; j++) { |
||
375 | s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0); |
||
376 | c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0); |
||
377 | sum += c8[i][j] * c8[i][j]; |
||
378 | } |
||
379 | } |
||
380 | |||
381 | for (i = 0; i < 4; i++) { |
||
382 | sum = 0; |
||
383 | for (j = 0; j < 4; j++) { |
||
384 | s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0); |
||
385 | c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0); |
||
386 | sum += c4[i][j] * c4[i][j]; |
||
387 | } |
||
388 | } |
||
389 | } |
||
390 | |||
391 | /* butterfly */ |
||
392 | s = 0.5 * sqrt(2.0); |
||
393 | for (i = 0; i < 4; i++) { |
||
394 | for (j = 0; j < 8; j++) { |
||
395 | block1[8 * (2 * i) + j] = |
||
396 | (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s; |
||
397 | block1[8 * (2 * i + 1) + j] = |
||
398 | (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s; |
||
399 | } |
||
400 | } |
||
401 | |||
402 | /* idct8 on lines */ |
||
403 | for (i = 0; i < 8; i++) { |
||
404 | for (j = 0; j < 8; j++) { |
||
405 | sum = 0; |
||
406 | for (k = 0; k < 8; k++) |
||
407 | sum += c8[k][j] * block1[8 * i + k]; |
||
408 | block2[8 * i + j] = sum; |
||
409 | } |
||
410 | } |
||
411 | |||
412 | /* idct4 */ |
||
413 | for (i = 0; i < 8; i++) { |
||
414 | for (j = 0; j < 4; j++) { |
||
415 | /* top */ |
||
416 | sum = 0; |
||
417 | for (k = 0; k < 4; k++) |
||
418 | sum += c4[k][j] * block2[8 * (2 * k) + i]; |
||
419 | block3[8 * (2 * j) + i] = sum; |
||
420 | |||
421 | /* bottom */ |
||
422 | sum = 0; |
||
423 | for (k = 0; k < 4; k++) |
||
424 | sum += c4[k][j] * block2[8 * (2 * k + 1) + i]; |
||
425 | block3[8 * (2 * j + 1) + i] = sum; |
||
426 | } |
||
427 | } |
||
428 | |||
429 | /* clamp and store the result */ |
||
430 | for (i = 0; i < 8; i++) { |
||
431 | for (j = 0; j < 8; j++) { |
||
432 | v = block3[8 * i + j]; |
||
433 | if (v < 0) v = 0; |
||
434 | else if (v > 255) v = 255; |
||
435 | dest[i * linesize + j] = (int) rint(v); |
||
436 | } |
||
437 | } |
||
438 | } |
||
439 | |||
440 | static void idct248_error(const char *name, |
||
441 | void (*idct248_put)(uint8_t *dest, int line_size, |
||
442 | int16_t *block), |
||
443 | int speed) |
||
444 | { |
||
445 | int it, i, it1, ti, ti1, err_max, v; |
||
446 | AVLFG prng; |
||
447 | |||
448 | av_lfg_init(&prng, 1); |
||
449 | |||
450 | /* just one test to see if code is correct (precision is less |
||
451 | important here) */ |
||
452 | err_max = 0; |
||
453 | for (it = 0; it < NB_ITS; it++) { |
||
454 | /* XXX: use forward transform to generate values */ |
||
455 | for (i = 0; i < 64; i++) |
||
456 | block1[i] = av_lfg_get(&prng) % 256 - 128; |
||
457 | block1[0] += 1024; |
||
458 | |||
459 | for (i = 0; i < 64; i++) |
||
460 | block[i] = block1[i]; |
||
461 | idct248_ref(img_dest1, 8, block); |
||
462 | |||
463 | for (i = 0; i < 64; i++) |
||
464 | block[i] = block1[i]; |
||
465 | idct248_put(img_dest, 8, block); |
||
466 | |||
467 | for (i = 0; i < 64; i++) { |
||
468 | v = abs((int) img_dest[i] - (int) img_dest1[i]); |
||
469 | if (v == 255) |
||
470 | printf("%d %d\n", img_dest[i], img_dest1[i]); |
||
471 | if (v > err_max) |
||
472 | err_max = v; |
||
473 | } |
||
474 | #if 0 |
||
475 | printf("ref=\n"); |
||
476 | for(i=0;i<8;i++) { |
||
477 | int j; |
||
478 | for(j=0;j<8;j++) { |
||
479 | printf(" %3d", img_dest1[i*8+j]); |
||
480 | } |
||
481 | printf("\n"); |
||
482 | } |
||
483 | |||
484 | printf("out=\n"); |
||
485 | for(i=0;i<8;i++) { |
||
486 | int j; |
||
487 | for(j=0;j<8;j++) { |
||
488 | printf(" %3d", img_dest[i*8+j]); |
||
489 | } |
||
490 | printf("\n"); |
||
491 | } |
||
492 | #endif |
||
493 | } |
||
494 | printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max); |
||
495 | |||
496 | if (!speed) |
||
497 | return; |
||
498 | |||
499 | ti = av_gettime(); |
||
500 | it1 = 0; |
||
501 | do { |
||
502 | for (it = 0; it < NB_ITS_SPEED; it++) { |
||
503 | for (i = 0; i < 64; i++) |
||
504 | block[i] = block1[i]; |
||
505 | idct248_put(img_dest, 8, block); |
||
506 | } |
||
507 | emms_c(); |
||
508 | it1 += NB_ITS_SPEED; |
||
509 | ti1 = av_gettime() - ti; |
||
510 | } while (ti1 < 1000000); |
||
511 | |||
512 | printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name, |
||
513 | (double) it1 * 1000.0 / (double) ti1); |
||
514 | } |
||
515 | |||
516 | static void help(void) |
||
517 | { |
||
518 | printf("dct-test [-i] [ |
||
519 | "test-number 0 -> test with random matrixes\n" |
||
520 | " 1 -> test with random sparse matrixes\n" |
||
521 | " 2 -> do 3. test from mpeg4 std\n" |
||
522 | "bits Number of time domain bits to use, 8 is default\n" |
||
523 | "-i test IDCT implementations\n" |
||
524 | "-4 test IDCT248 implementations\n" |
||
525 | "-t speed test\n"); |
||
526 | } |
||
527 | |||
528 | #if !HAVE_GETOPT |
||
529 | #include "compat/getopt.c" |
||
530 | #endif |
||
531 | |||
532 | int main(int argc, char **argv) |
||
533 | { |
||
534 | int test_idct = 0, test_248_dct = 0; |
||
535 | int c, i; |
||
536 | int test = 1; |
||
537 | int speed = 0; |
||
538 | int err = 0; |
||
539 | int bits=8; |
||
540 | |||
541 | cpu_flags = av_get_cpu_flags(); |
||
542 | |||
543 | ff_ref_dct_init(); |
||
544 | idct_mmx_init(); |
||
545 | |||
546 | for (;;) { |
||
547 | c = getopt(argc, argv, "ih4t"); |
||
548 | if (c == -1) |
||
549 | break; |
||
550 | switch (c) { |
||
551 | case 'i': |
||
552 | test_idct = 1; |
||
553 | break; |
||
554 | case '4': |
||
555 | test_248_dct = 1; |
||
556 | break; |
||
557 | case 't': |
||
558 | speed = 1; |
||
559 | break; |
||
560 | default: |
||
561 | case 'h': |
||
562 | help(); |
||
563 | return 0; |
||
564 | } |
||
565 | } |
||
566 | |||
567 | if (optind < argc) |
||
568 | test = atoi(argv[optind]); |
||
569 | if(optind+1 < argc) bits= atoi(argv[optind+1]); |
||
570 | |||
571 | printf("ffmpeg DCT/IDCT test\n"); |
||
572 | |||
573 | if (test_248_dct) { |
||
574 | idct248_error("SIMPLE-C", ff_simple_idct248_put, speed); |
||
575 | } else { |
||
576 | const struct algo *algos = test_idct ? idct_tab : fdct_tab; |
||
577 | for (i = 0; algos[i].name; i++) |
||
578 | if (!(~cpu_flags & algos[i].mm_support)) { |
||
579 | err |= dct_error(&algos[i], test, test_idct, speed, bits); |
||
580 | } |
||
581 | } |
||
582 | |||
583 | if (err) |
||
584 | printf("Error: %d.\n", err); |
||
585 | |||
586 | return !!err; |
||
587 | }>>>>>8;j++)>8;i++)>8;j++)>8;i++)>>>>>>>>>>>>>>>>>>>>>>>>>>>>><>>>> |