Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * (c) 2001 Fabrice Bellard
3
 *     2007 Marc Hoffman 
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
 
22
/**
23
 * @file
24
 * DCT test (c) 2001 Fabrice Bellard
25
 * Started from sample code by Juan J. Sierralta P.
26
 */
27
 
28
#include "config.h"
29
#include 
30
#include 
31
#include 
32
#if HAVE_UNISTD_H
33
#include 
34
#endif
35
#include 
36
 
37
#include "libavutil/cpu.h"
38
#include "libavutil/common.h"
39
#include "libavutil/lfg.h"
40
#include "libavutil/time.h"
41
 
42
#include "dct.h"
43
#include "simple_idct.h"
44
#include "aandcttab.h"
45
#include "faandct.h"
46
#include "faanidct.h"
47
#include "x86/idct_xvid.h"
48
#include "dctref.h"
49
 
50
#undef printf
51
 
52
// BFIN
53
void ff_bfin_idct(int16_t *block);
54
void ff_bfin_fdct(int16_t *block);
55
 
56
// ALTIVEC
57
void ff_fdct_altivec(int16_t *block);
58
 
59
// ARM
60
void ff_j_rev_dct_arm(int16_t *data);
61
void ff_simple_idct_arm(int16_t *data);
62
void ff_simple_idct_armv5te(int16_t *data);
63
void ff_simple_idct_armv6(int16_t *data);
64
void ff_simple_idct_neon(int16_t *data);
65
 
66
void ff_simple_idct_axp(int16_t *data);
67
 
68
struct algo {
69
    const char *name;
70
    void (*func)(int16_t *block);
71
    enum formattag { NO_PERM, MMX_PERM, MMX_SIMPLE_PERM, SCALE_PERM,
72
                     SSE2_PERM, PARTTRANS_PERM, TRANSPOSE_PERM } format;
73
    int mm_support;
74
    int nonspec;
75
};
76
 
77
static int cpu_flags;
78
 
79
static const struct algo fdct_tab[] = {
80
    { "REF-DBL",        ff_ref_fdct,           NO_PERM    },
81
    { "FAAN",           ff_faandct,            NO_PERM    },
82
    { "IJG-AAN-INT",    ff_fdct_ifast,         SCALE_PERM },
83
    { "IJG-LLM-INT",    ff_jpeg_fdct_islow_8,  NO_PERM    },
84
 
85
#if HAVE_MMX_INLINE
86
    { "MMX",            ff_fdct_mmx,           NO_PERM,   AV_CPU_FLAG_MMX     },
87
#endif
88
#if HAVE_MMXEXT_INLINE
89
    { "MMXEXT",         ff_fdct_mmxext,        NO_PERM,   AV_CPU_FLAG_MMXEXT  },
90
#endif
91
#if HAVE_SSE2_INLINE
92
    { "SSE2",           ff_fdct_sse2,          NO_PERM,   AV_CPU_FLAG_SSE2    },
93
#endif
94
 
95
#if HAVE_ALTIVEC
96
    { "altivecfdct",    ff_fdct_altivec,       NO_PERM,   AV_CPU_FLAG_ALTIVEC },
97
#endif
98
 
99
#if ARCH_BFIN
100
    { "BFINfdct",       ff_bfin_fdct,          NO_PERM  },
101
#endif
102
 
103
    { 0 }
104
};
105
 
106
#if ARCH_X86_64 && HAVE_MMX && HAVE_YASM
107
void ff_prores_idct_put_10_sse2(uint16_t *dst, int linesize,
108
                                int16_t *block, int16_t *qmat);
109
 
110
static void ff_prores_idct_put_10_sse2_wrap(int16_t *dst){
111
    DECLARE_ALIGNED(16, static int16_t, qmat)[64];
112
    DECLARE_ALIGNED(16, static int16_t, tmp)[64];
113
    int i;
114
 
115
    for(i=0; i<64; i++){
116
        qmat[i]=4;
117
        tmp[i]= dst[i];
118
    }
119
    ff_prores_idct_put_10_sse2(dst, 16, tmp, qmat);
120
}
121
#endif
122
 
123
static const struct algo idct_tab[] = {
124
    { "FAANI",          ff_faanidct,           NO_PERM  },
125
    { "REF-DBL",        ff_ref_idct,           NO_PERM  },
126
    { "INT",            ff_j_rev_dct,          MMX_PERM },
127
    { "SIMPLE-C",       ff_simple_idct_8,      NO_PERM  },
128
 
129
#if HAVE_MMX_INLINE
130
    { "SIMPLE-MMX",     ff_simple_idct_mmx,  MMX_SIMPLE_PERM, AV_CPU_FLAG_MMX },
131
    { "XVID-MMX",       ff_idct_xvid_mmx,      NO_PERM,   AV_CPU_FLAG_MMX,  1 },
132
#endif
133
#if HAVE_MMXEXT_INLINE
134
    { "XVID-MMXEXT",    ff_idct_xvid_mmxext,   NO_PERM,   AV_CPU_FLAG_MMXEXT, 1 },
135
#endif
136
#if HAVE_SSE2_INLINE
137
    { "XVID-SSE2",      ff_idct_xvid_sse2,     SSE2_PERM, AV_CPU_FLAG_SSE2, 1 },
138
#if ARCH_X86_64 && HAVE_YASM
139
    { "PR-SSE2",        ff_prores_idct_put_10_sse2_wrap,     TRANSPOSE_PERM, AV_CPU_FLAG_SSE2, 1 },
140
#endif
141
#endif
142
 
143
#if ARCH_BFIN
144
    { "BFINidct",       ff_bfin_idct,          NO_PERM  },
145
#endif
146
 
147
#if ARCH_ARM
148
    { "SIMPLE-ARM",     ff_simple_idct_arm,    NO_PERM  },
149
    { "INT-ARM",        ff_j_rev_dct_arm,      MMX_PERM },
150
#endif
151
#if HAVE_ARMV5TE
152
    { "SIMPLE-ARMV5TE", ff_simple_idct_armv5te,NO_PERM,   AV_CPU_FLAG_ARMV5TE },
153
#endif
154
#if HAVE_ARMV6
155
    { "SIMPLE-ARMV6",   ff_simple_idct_armv6,  MMX_PERM,  AV_CPU_FLAG_ARMV6   },
156
#endif
157
#if HAVE_NEON
158
    { "SIMPLE-NEON",    ff_simple_idct_neon, PARTTRANS_PERM, AV_CPU_FLAG_NEON },
159
#endif
160
 
161
#if ARCH_ALPHA
162
    { "SIMPLE-ALPHA",   ff_simple_idct_axp,    NO_PERM },
163
#endif
164
 
165
    { 0 }
166
};
167
 
168
#define AANSCALE_BITS 12
169
 
170
#define NB_ITS 20000
171
#define NB_ITS_SPEED 50000
172
 
173
static short idct_mmx_perm[64];
174
 
175
static short idct_simple_mmx_perm[64] = {
176
    0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
177
    0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
178
    0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
179
    0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
180
    0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
181
    0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
182
    0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
183
    0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
184
};
185
 
186
static const uint8_t idct_sse2_row_perm[8] = { 0, 4, 1, 5, 2, 6, 3, 7 };
187
 
188
static void idct_mmx_init(void)
189
{
190
    int i;
191
 
192
    /* the mmx/mmxext idct uses a reordered input, so we patch scan tables */
193
    for (i = 0; i < 64; i++) {
194
        idct_mmx_perm[i] = (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
195
    }
196
}
197
 
198
DECLARE_ALIGNED(16, static int16_t, block)[64];
199
DECLARE_ALIGNED(8,  static int16_t, block1)[64];
200
 
201
static void init_block(int16_t block[64], int test, int is_idct, AVLFG *prng, int vals)
202
{
203
    int i, j;
204
 
205
    memset(block, 0, 64 * sizeof(*block));
206
 
207
    switch (test) {
208
    case 0:
209
        for (i = 0; i < 64; i++)
210
            block[i] = (av_lfg_get(prng) % (2*vals)) -vals;
211
        if (is_idct) {
212
            ff_ref_fdct(block);
213
            for (i = 0; i < 64; i++)
214
                block[i] >>= 3;
215
        }
216
        break;
217
    case 1:
218
        j = av_lfg_get(prng) % 10 + 1;
219
        for (i = 0; i < j; i++) {
220
            int idx = av_lfg_get(prng) % 64;
221
            block[idx] = av_lfg_get(prng) % (2*vals) -vals;
222
        }
223
        break;
224
    case 2:
225
        block[ 0] = av_lfg_get(prng) % (16*vals) - (8*vals);
226
        block[63] = (block[0] & 1) ^ 1;
227
        break;
228
    }
229
}
230
 
231
static void permute(int16_t dst[64], const int16_t src[64], int perm)
232
{
233
    int i;
234
 
235
    if (perm == MMX_PERM) {
236
        for (i = 0; i < 64; i++)
237
            dst[idct_mmx_perm[i]] = src[i];
238
    } else if (perm == MMX_SIMPLE_PERM) {
239
        for (i = 0; i < 64; i++)
240
            dst[idct_simple_mmx_perm[i]] = src[i];
241
    } else if (perm == SSE2_PERM) {
242
        for (i = 0; i < 64; i++)
243
            dst[(i & 0x38) | idct_sse2_row_perm[i & 7]] = src[i];
244
    } else if (perm == PARTTRANS_PERM) {
245
        for (i = 0; i < 64; i++)
246
            dst[(i & 0x24) | ((i & 3) << 3) | ((i >> 3) & 3)] = src[i];
247
    } else if (perm == TRANSPOSE_PERM) {
248
        for (i = 0; i < 64; i++)
249
            dst[(i>>3) | ((i<<3)&0x38)] = src[i];
250
    } else {
251
        for (i = 0; i < 64; i++)
252
            dst[i] = src[i];
253
    }
254
}
255
 
256
static int dct_error(const struct algo *dct, int test, int is_idct, int speed, const int bits)
257
{
258
    void (*ref)(int16_t *block) = is_idct ? ff_ref_idct : ff_ref_fdct;
259
    int it, i, scale;
260
    int err_inf, v;
261
    int64_t err2, ti, ti1, it1, err_sum = 0;
262
    int64_t sysErr[64], sysErrMax = 0;
263
    int maxout = 0;
264
    int blockSumErrMax = 0, blockSumErr;
265
    AVLFG prng;
266
    const int vals=1<
267
    double omse, ome;
268
    int spec_err;
269
 
270
    av_lfg_init(&prng, 1);
271
 
272
    err_inf = 0;
273
    err2 = 0;
274
    for (i = 0; i < 64; i++)
275
        sysErr[i] = 0;
276
    for (it = 0; it < NB_ITS; it++) {
277
        init_block(block1, test, is_idct, &prng, vals);
278
        permute(block, block1, dct->format);
279
 
280
        dct->func(block);
281
        emms_c();
282
 
283
        if (dct->format == SCALE_PERM) {
284
            for (i = 0; i < 64; i++) {
285
                scale = 8 * (1 << (AANSCALE_BITS + 11)) / ff_aanscales[i];
286
                block[i] = (block[i] * scale) >> AANSCALE_BITS;
287
            }
288
        }
289
 
290
        ref(block1);
291
 
292
        blockSumErr = 0;
293
        for (i = 0; i < 64; i++) {
294
            int err = block[i] - block1[i];
295
            err_sum += err;
296
            v = abs(err);
297
            if (v > err_inf)
298
                err_inf = v;
299
            err2 += v * v;
300
            sysErr[i] += block[i] - block1[i];
301
            blockSumErr += v;
302
            if (abs(block[i]) > maxout)
303
                maxout = abs(block[i]);
304
        }
305
        if (blockSumErrMax < blockSumErr)
306
            blockSumErrMax = blockSumErr;
307
    }
308
    for (i = 0; i < 64; i++)
309
        sysErrMax = FFMAX(sysErrMax, FFABS(sysErr[i]));
310
 
311
    for (i = 0; i < 64; i++) {
312
        if (i % 8 == 0)
313
            printf("\n");
314
        printf("%7d ", (int) sysErr[i]);
315
    }
316
    printf("\n");
317
 
318
    omse = (double) err2 / NB_ITS / 64;
319
    ome  = (double) err_sum / NB_ITS / 64;
320
 
321
    spec_err = is_idct && (err_inf > 1 || omse > 0.02 || fabs(ome) > 0.0015);
322
 
323
    printf("%s %s: max_err=%d omse=%0.8f ome=%0.8f syserr=%0.8f maxout=%d blockSumErr=%d\n",
324
           is_idct ? "IDCT" : "DCT", dct->name, err_inf,
325
           omse, ome, (double) sysErrMax / NB_ITS,
326
           maxout, blockSumErrMax);
327
 
328
    if (spec_err && !dct->nonspec)
329
        return 1;
330
 
331
    if (!speed)
332
        return 0;
333
 
334
    /* speed test */
335
 
336
    init_block(block, test, is_idct, &prng, vals);
337
    permute(block1, block, dct->format);
338
 
339
    ti = av_gettime();
340
    it1 = 0;
341
    do {
342
        for (it = 0; it < NB_ITS_SPEED; it++) {
343
            memcpy(block, block1, sizeof(block));
344
            dct->func(block);
345
        }
346
        emms_c();
347
        it1 += NB_ITS_SPEED;
348
        ti1 = av_gettime() - ti;
349
    } while (ti1 < 1000000);
350
 
351
    printf("%s %s: %0.1f kdct/s\n", is_idct ? "IDCT" : "DCT", dct->name,
352
           (double) it1 * 1000.0 / (double) ti1);
353
 
354
    return 0;
355
}
356
 
357
DECLARE_ALIGNED(8, static uint8_t, img_dest)[64];
358
DECLARE_ALIGNED(8, static uint8_t, img_dest1)[64];
359
 
360
static void idct248_ref(uint8_t *dest, int linesize, int16_t *block)
361
{
362
    static int init;
363
    static double c8[8][8];
364
    static double c4[4][4];
365
    double block1[64], block2[64], block3[64];
366
    double s, sum, v;
367
    int i, j, k;
368
 
369
    if (!init) {
370
        init = 1;
371
 
372
        for (i = 0; i < 8; i++) {
373
            sum = 0;
374
            for (j = 0; j < 8; j++) {
375
                s = (i == 0) ? sqrt(1.0 / 8.0) : sqrt(1.0 / 4.0);
376
                c8[i][j] = s * cos(M_PI * i * (j + 0.5) / 8.0);
377
                sum += c8[i][j] * c8[i][j];
378
            }
379
        }
380
 
381
        for (i = 0; i < 4; i++) {
382
            sum = 0;
383
            for (j = 0; j < 4; j++) {
384
                s = (i == 0) ? sqrt(1.0 / 4.0) : sqrt(1.0 / 2.0);
385
                c4[i][j] = s * cos(M_PI * i * (j + 0.5) / 4.0);
386
                sum += c4[i][j] * c4[i][j];
387
            }
388
        }
389
    }
390
 
391
    /* butterfly */
392
    s = 0.5 * sqrt(2.0);
393
    for (i = 0; i < 4; i++) {
394
        for (j = 0; j < 8; j++) {
395
            block1[8 * (2 * i) + j] =
396
                (block[8 * (2 * i) + j] + block[8 * (2 * i + 1) + j]) * s;
397
            block1[8 * (2 * i + 1) + j] =
398
                (block[8 * (2 * i) + j] - block[8 * (2 * i + 1) + j]) * s;
399
        }
400
    }
401
 
402
    /* idct8 on lines */
403
    for (i = 0; i < 8; i++) {
404
        for (j = 0; j < 8; j++) {
405
            sum = 0;
406
            for (k = 0; k < 8; k++)
407
                sum += c8[k][j] * block1[8 * i + k];
408
            block2[8 * i + j] = sum;
409
        }
410
    }
411
 
412
    /* idct4 */
413
    for (i = 0; i < 8; i++) {
414
        for (j = 0; j < 4; j++) {
415
            /* top */
416
            sum = 0;
417
            for (k = 0; k < 4; k++)
418
                sum += c4[k][j] * block2[8 * (2 * k) + i];
419
            block3[8 * (2 * j) + i] = sum;
420
 
421
            /* bottom */
422
            sum = 0;
423
            for (k = 0; k < 4; k++)
424
                sum += c4[k][j] * block2[8 * (2 * k + 1) + i];
425
            block3[8 * (2 * j + 1) + i] = sum;
426
        }
427
    }
428
 
429
    /* clamp and store the result */
430
    for (i = 0; i < 8; i++) {
431
        for (j = 0; j < 8; j++) {
432
            v = block3[8 * i + j];
433
            if      (v < 0)   v = 0;
434
            else if (v > 255) v = 255;
435
            dest[i * linesize + j] = (int) rint(v);
436
        }
437
    }
438
}
439
 
440
static void idct248_error(const char *name,
441
                          void (*idct248_put)(uint8_t *dest, int line_size,
442
                                              int16_t *block),
443
                          int speed)
444
{
445
    int it, i, it1, ti, ti1, err_max, v;
446
    AVLFG prng;
447
 
448
    av_lfg_init(&prng, 1);
449
 
450
    /* just one test to see if code is correct (precision is less
451
       important here) */
452
    err_max = 0;
453
    for (it = 0; it < NB_ITS; it++) {
454
        /* XXX: use forward transform to generate values */
455
        for (i = 0; i < 64; i++)
456
            block1[i] = av_lfg_get(&prng) % 256 - 128;
457
        block1[0] += 1024;
458
 
459
        for (i = 0; i < 64; i++)
460
            block[i] = block1[i];
461
        idct248_ref(img_dest1, 8, block);
462
 
463
        for (i = 0; i < 64; i++)
464
            block[i] = block1[i];
465
        idct248_put(img_dest, 8, block);
466
 
467
        for (i = 0; i < 64; i++) {
468
            v = abs((int) img_dest[i] - (int) img_dest1[i]);
469
            if (v == 255)
470
                printf("%d %d\n", img_dest[i], img_dest1[i]);
471
            if (v > err_max)
472
                err_max = v;
473
        }
474
#if 0
475
        printf("ref=\n");
476
        for(i=0;i<8;i++) {
477
            int j;
478
            for(j=0;j<8;j++) {
479
                printf(" %3d", img_dest1[i*8+j]);
480
            }
481
            printf("\n");
482
        }
483
 
484
        printf("out=\n");
485
        for(i=0;i<8;i++) {
486
            int j;
487
            for(j=0;j<8;j++) {
488
                printf(" %3d", img_dest[i*8+j]);
489
            }
490
            printf("\n");
491
        }
492
#endif
493
    }
494
    printf("%s %s: err_inf=%d\n", 1 ? "IDCT248" : "DCT248", name, err_max);
495
 
496
    if (!speed)
497
        return;
498
 
499
    ti = av_gettime();
500
    it1 = 0;
501
    do {
502
        for (it = 0; it < NB_ITS_SPEED; it++) {
503
            for (i = 0; i < 64; i++)
504
                block[i] = block1[i];
505
            idct248_put(img_dest, 8, block);
506
        }
507
        emms_c();
508
        it1 += NB_ITS_SPEED;
509
        ti1 = av_gettime() - ti;
510
    } while (ti1 < 1000000);
511
 
512
    printf("%s %s: %0.1f kdct/s\n", 1 ? "IDCT248" : "DCT248", name,
513
           (double) it1 * 1000.0 / (double) ti1);
514
}
515
 
516
static void help(void)
517
{
518
    printf("dct-test [-i] [] []\n"
519
           "test-number 0 -> test with random matrixes\n"
520
           "            1 -> test with random sparse matrixes\n"
521
           "            2 -> do 3. test from mpeg4 std\n"
522
           "bits        Number of time domain bits to use, 8 is default\n"
523
           "-i          test IDCT implementations\n"
524
           "-4          test IDCT248 implementations\n"
525
           "-t          speed test\n");
526
}
527
 
528
#if !HAVE_GETOPT
529
#include "compat/getopt.c"
530
#endif
531
 
532
int main(int argc, char **argv)
533
{
534
    int test_idct = 0, test_248_dct = 0;
535
    int c, i;
536
    int test = 1;
537
    int speed = 0;
538
    int err = 0;
539
    int bits=8;
540
 
541
    cpu_flags = av_get_cpu_flags();
542
 
543
    ff_ref_dct_init();
544
    idct_mmx_init();
545
 
546
    for (;;) {
547
        c = getopt(argc, argv, "ih4t");
548
        if (c == -1)
549
            break;
550
        switch (c) {
551
        case 'i':
552
            test_idct = 1;
553
            break;
554
        case '4':
555
            test_248_dct = 1;
556
            break;
557
        case 't':
558
            speed = 1;
559
            break;
560
        default:
561
        case 'h':
562
            help();
563
            return 0;
564
        }
565
    }
566
 
567
    if (optind < argc)
568
        test = atoi(argv[optind]);
569
    if(optind+1 < argc) bits= atoi(argv[optind+1]);
570
 
571
    printf("ffmpeg DCT/IDCT test\n");
572
 
573
    if (test_248_dct) {
574
        idct248_error("SIMPLE-C", ff_simple_idct248_put, speed);
575
    } else {
576
        const struct algo *algos = test_idct ? idct_tab : fdct_tab;
577
        for (i = 0; algos[i].name; i++)
578
            if (!(~cpu_flags & algos[i].mm_support)) {
579
                err |= dct_error(&algos[i], test, test_idct, speed, bits);
580
            }
581
    }
582
 
583
    if (err)
584
        printf("Error: %d.\n", err);
585
 
586
    return !!err;
587
}