Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (C) 2001-2003 Michael Niedermayer (michaelni@gmx.at)
3
 *
4
 * AltiVec optimizations (C) 2004 Romain Dolbeau 
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or modify
9
 * it under the terms of the GNU General Public License as published by
10
 * the Free Software Foundation; either version 2 of the License, or
11
 * (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16
 * GNU General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU General Public License
19
 * along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
 
23
/**
24
 * @file
25
 * postprocessing.
26
 */
27
 
28
/*
29
                        C       MMX     MMX2    3DNow   AltiVec
30
isVertDC                Ec      Ec                      Ec
31
isVertMinMaxOk          Ec      Ec                      Ec
32
doVertLowPass           E               e       e       Ec
33
doVertDefFilter         Ec      Ec      e       e       Ec
34
isHorizDC               Ec      Ec                      Ec
35
isHorizMinMaxOk         a       E                       Ec
36
doHorizLowPass          E               e       e       Ec
37
doHorizDefFilter        Ec      Ec      e       e       Ec
38
do_a_deblock            Ec      E       Ec      E
39
deRing                  E               e       e*      Ecp
40
Vertical RKAlgo1        E               a       a
41
Horizontal RKAlgo1                      a       a
42
Vertical X1#            a               E       E
43
Horizontal X1#          a               E       E
44
LinIpolDeinterlace      e               E       E*
45
CubicIpolDeinterlace    a               e       e*
46
LinBlendDeinterlace     e               E       E*
47
MedianDeinterlace#      E       Ec      Ec
48
TempDeNoiser#           E               e       e       Ec
49
 
50
* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work
51
# more or less selfinvented filters so the exactness is not too meaningful
52
E = Exact implementation
53
e = almost exact implementation (slightly different rounding,...)
54
a = alternative / approximate impl
55
c = checked against the other implementations (-vo md5)
56
p = partially optimized, still some work to do
57
*/
58
 
59
/*
60
TODO:
61
reduce the time wasted on the mem transfer
62
unroll stuff if instructions depend too much on the prior one
63
move YScale thing to the end instead of fixing QP
64
write a faster and higher quality deblocking filter :)
65
make the mainloop more flexible (variable number of blocks at once
66
        (the if/else stuff per block is slowing things down)
67
compare the quality & speed of all filters
68
split this huge file
69
optimize c versions
70
try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks
71
...
72
*/
73
 
74
//Changelog: use git log
75
 
76
#include "config.h"
77
#include "libavutil/avutil.h"
78
#include "libavutil/avassert.h"
79
#include 
80
#include 
81
#include 
82
#include 
83
//#undef HAVE_MMXEXT_INLINE
84
//#define HAVE_AMD3DNOW_INLINE
85
//#undef HAVE_MMX_INLINE
86
//#undef ARCH_X86
87
//#define DEBUG_BRIGHTNESS
88
#include "postprocess.h"
89
#include "postprocess_internal.h"
90
#include "libavutil/avstring.h"
91
 
92
unsigned postproc_version(void)
93
{
94
    av_assert0(LIBPOSTPROC_VERSION_MICRO >= 100);
95
    return LIBPOSTPROC_VERSION_INT;
96
}
97
 
98
const char *postproc_configuration(void)
99
{
100
    return FFMPEG_CONFIGURATION;
101
}
102
 
103
const char *postproc_license(void)
104
{
105
#define LICENSE_PREFIX "libpostproc license: "
106
    return LICENSE_PREFIX FFMPEG_LICENSE + sizeof(LICENSE_PREFIX) - 1;
107
}
108
 
109
#if HAVE_ALTIVEC_H
110
#include 
111
#endif
112
 
113
#define GET_MODE_BUFFER_SIZE 500
114
#define OPTIONS_ARRAY_SIZE 10
115
#define BLOCK_SIZE 8
116
#define TEMP_STRIDE 8
117
//#define NUM_BLOCKS_AT_ONCE 16 //not used yet
118
 
119
#if ARCH_X86 && HAVE_INLINE_ASM
120
DECLARE_ASM_CONST(8, uint64_t, w05)= 0x0005000500050005LL;
121
DECLARE_ASM_CONST(8, uint64_t, w04)= 0x0004000400040004LL;
122
DECLARE_ASM_CONST(8, uint64_t, w20)= 0x0020002000200020LL;
123
DECLARE_ASM_CONST(8, uint64_t, b00)= 0x0000000000000000LL;
124
DECLARE_ASM_CONST(8, uint64_t, b01)= 0x0101010101010101LL;
125
DECLARE_ASM_CONST(8, uint64_t, b02)= 0x0202020202020202LL;
126
DECLARE_ASM_CONST(8, uint64_t, b08)= 0x0808080808080808LL;
127
DECLARE_ASM_CONST(8, uint64_t, b80)= 0x8080808080808080LL;
128
#endif
129
 
130
DECLARE_ASM_CONST(8, int, deringThreshold)= 20;
131
 
132
 
133
static const struct PPFilter filters[]=
134
{
135
    {"hb", "hdeblock",              1, 1, 3, H_DEBLOCK},
136
    {"vb", "vdeblock",              1, 2, 4, V_DEBLOCK},
137
/*  {"hr", "rkhdeblock",            1, 1, 3, H_RK1_FILTER},
138
    {"vr", "rkvdeblock",            1, 2, 4, V_RK1_FILTER},*/
139
    {"h1", "x1hdeblock",            1, 1, 3, H_X1_FILTER},
140
    {"v1", "x1vdeblock",            1, 2, 4, V_X1_FILTER},
141
    {"ha", "ahdeblock",             1, 1, 3, H_A_DEBLOCK},
142
    {"va", "avdeblock",             1, 2, 4, V_A_DEBLOCK},
143
    {"dr", "dering",                1, 5, 6, DERING},
144
    {"al", "autolevels",            0, 1, 2, LEVEL_FIX},
145
    {"lb", "linblenddeint",         1, 1, 4, LINEAR_BLEND_DEINT_FILTER},
146
    {"li", "linipoldeint",          1, 1, 4, LINEAR_IPOL_DEINT_FILTER},
147
    {"ci", "cubicipoldeint",        1, 1, 4, CUBIC_IPOL_DEINT_FILTER},
148
    {"md", "mediandeint",           1, 1, 4, MEDIAN_DEINT_FILTER},
149
    {"fd", "ffmpegdeint",           1, 1, 4, FFMPEG_DEINT_FILTER},
150
    {"l5", "lowpass5",              1, 1, 4, LOWPASS5_DEINT_FILTER},
151
    {"tn", "tmpnoise",              1, 7, 8, TEMP_NOISE_FILTER},
152
    {"fq", "forcequant",            1, 0, 0, FORCE_QUANT},
153
    {"be", "bitexact",              1, 0, 0, BITEXACT},
154
    {NULL, NULL,0,0,0,0} //End Marker
155
};
156
 
157
static const char *replaceTable[]=
158
{
159
    "default",      "hb:a,vb:a,dr:a",
160
    "de",           "hb:a,vb:a,dr:a",
161
    "fast",         "h1:a,v1:a,dr:a",
162
    "fa",           "h1:a,v1:a,dr:a",
163
    "ac",           "ha:a:128:7,va:a,dr:a",
164
    NULL //End Marker
165
};
166
 
167
 
168
#if ARCH_X86 && HAVE_INLINE_ASM
169
static inline void prefetchnta(void *p)
170
{
171
    __asm__ volatile(   "prefetchnta (%0)\n\t"
172
        : : "r" (p)
173
    );
174
}
175
 
176
static inline void prefetcht0(void *p)
177
{
178
    __asm__ volatile(   "prefetcht0 (%0)\n\t"
179
        : : "r" (p)
180
    );
181
}
182
 
183
static inline void prefetcht1(void *p)
184
{
185
    __asm__ volatile(   "prefetcht1 (%0)\n\t"
186
        : : "r" (p)
187
    );
188
}
189
 
190
static inline void prefetcht2(void *p)
191
{
192
    __asm__ volatile(   "prefetcht2 (%0)\n\t"
193
        : : "r" (p)
194
    );
195
}
196
#endif
197
 
198
/* The horizontal functions exist only in C because the MMX
199
 * code is faster with vertical filters and transposing. */
200
 
201
/**
202
 * Check if the given 8x8 Block is mostly "flat"
203
 */
204
static inline int isHorizDC_C(const uint8_t src[], int stride, const PPContext *c)
205
{
206
    int numEq= 0;
207
    int y;
208
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
209
    const int dcThreshold= dcOffset*2 + 1;
210
 
211
    for(y=0; y
212
        if(((unsigned)(src[0] - src[1] + dcOffset)) < dcThreshold) numEq++;
213
        if(((unsigned)(src[1] - src[2] + dcOffset)) < dcThreshold) numEq++;
214
        if(((unsigned)(src[2] - src[3] + dcOffset)) < dcThreshold) numEq++;
215
        if(((unsigned)(src[3] - src[4] + dcOffset)) < dcThreshold) numEq++;
216
        if(((unsigned)(src[4] - src[5] + dcOffset)) < dcThreshold) numEq++;
217
        if(((unsigned)(src[5] - src[6] + dcOffset)) < dcThreshold) numEq++;
218
        if(((unsigned)(src[6] - src[7] + dcOffset)) < dcThreshold) numEq++;
219
        src+= stride;
220
    }
221
    return numEq > c->ppMode.flatnessThreshold;
222
}
223
 
224
/**
225
 * Check if the middle 8x8 Block in the given 8x16 block is flat
226
 */
227
static inline int isVertDC_C(const uint8_t src[], int stride, const PPContext *c)
228
{
229
    int numEq= 0;
230
    int y;
231
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
232
    const int dcThreshold= dcOffset*2 + 1;
233
 
234
    src+= stride*4; // src points to begin of the 8x8 Block
235
    for(y=0; y
236
        if(((unsigned)(src[0] - src[0+stride] + dcOffset)) < dcThreshold) numEq++;
237
        if(((unsigned)(src[1] - src[1+stride] + dcOffset)) < dcThreshold) numEq++;
238
        if(((unsigned)(src[2] - src[2+stride] + dcOffset)) < dcThreshold) numEq++;
239
        if(((unsigned)(src[3] - src[3+stride] + dcOffset)) < dcThreshold) numEq++;
240
        if(((unsigned)(src[4] - src[4+stride] + dcOffset)) < dcThreshold) numEq++;
241
        if(((unsigned)(src[5] - src[5+stride] + dcOffset)) < dcThreshold) numEq++;
242
        if(((unsigned)(src[6] - src[6+stride] + dcOffset)) < dcThreshold) numEq++;
243
        if(((unsigned)(src[7] - src[7+stride] + dcOffset)) < dcThreshold) numEq++;
244
        src+= stride;
245
    }
246
    return numEq > c->ppMode.flatnessThreshold;
247
}
248
 
249
static inline int isHorizMinMaxOk_C(const uint8_t src[], int stride, int QP)
250
{
251
    int i;
252
    for(i=0; i<2; i++){
253
        if((unsigned)(src[0] - src[5] + 2*QP) > 4*QP) return 0;
254
        src += stride;
255
        if((unsigned)(src[2] - src[7] + 2*QP) > 4*QP) return 0;
256
        src += stride;
257
        if((unsigned)(src[4] - src[1] + 2*QP) > 4*QP) return 0;
258
        src += stride;
259
        if((unsigned)(src[6] - src[3] + 2*QP) > 4*QP) return 0;
260
        src += stride;
261
    }
262
    return 1;
263
}
264
 
265
static inline int isVertMinMaxOk_C(const uint8_t src[], int stride, int QP)
266
{
267
    int x;
268
    src+= stride*4;
269
    for(x=0; x
270
        if((unsigned)(src[  x + 0*stride] - src[  x + 5*stride] + 2*QP) > 4*QP) return 0;
271
        if((unsigned)(src[1+x + 2*stride] - src[1+x + 7*stride] + 2*QP) > 4*QP) return 0;
272
        if((unsigned)(src[2+x + 4*stride] - src[2+x + 1*stride] + 2*QP) > 4*QP) return 0;
273
        if((unsigned)(src[3+x + 6*stride] - src[3+x + 3*stride] + 2*QP) > 4*QP) return 0;
274
    }
275
    return 1;
276
}
277
 
278
static inline int horizClassify_C(const uint8_t src[], int stride, const PPContext *c)
279
{
280
    if( isHorizDC_C(src, stride, c) ){
281
        if( isHorizMinMaxOk_C(src, stride, c->QP) )
282
            return 1;
283
        else
284
            return 0;
285
    }else{
286
        return 2;
287
    }
288
}
289
 
290
static inline int vertClassify_C(const uint8_t src[], int stride, const PPContext *c)
291
{
292
    if( isVertDC_C(src, stride, c) ){
293
        if( isVertMinMaxOk_C(src, stride, c->QP) )
294
            return 1;
295
        else
296
            return 0;
297
    }else{
298
        return 2;
299
    }
300
}
301
 
302
static inline void doHorizDefFilter_C(uint8_t dst[], int stride, const PPContext *c)
303
{
304
    int y;
305
    for(y=0; y
306
        const int middleEnergy= 5*(dst[4] - dst[3]) + 2*(dst[2] - dst[5]);
307
 
308
        if(FFABS(middleEnergy) < 8*c->QP){
309
            const int q=(dst[3] - dst[4])/2;
310
            const int leftEnergy=  5*(dst[2] - dst[1]) + 2*(dst[0] - dst[3]);
311
            const int rightEnergy= 5*(dst[6] - dst[5]) + 2*(dst[4] - dst[7]);
312
 
313
            int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
314
            d= FFMAX(d, 0);
315
 
316
            d= (5*d + 32) >> 6;
317
            d*= FFSIGN(-middleEnergy);
318
 
319
            if(q>0)
320
            {
321
                d= d<0 ? 0 : d;
322
                d= d>q ? q : d;
323
            }
324
            else
325
            {
326
                d= d>0 ? 0 : d;
327
                d= d
328
            }
329
 
330
            dst[3]-= d;
331
            dst[4]+= d;
332
        }
333
        dst+= stride;
334
    }
335
}
336
 
337
/**
338
 * Do a horizontal low pass filter on the 10x8 block (dst points to middle 8x8 Block)
339
 * using the 9-Tap Filter (1,1,2,2,4,2,2,1,1)/16 (C version)
340
 */
341
static inline void doHorizLowPass_C(uint8_t dst[], int stride, const PPContext *c)
342
{
343
    int y;
344
    for(y=0; y
345
        const int first= FFABS(dst[-1] - dst[0]) < c->QP ? dst[-1] : dst[0];
346
        const int last= FFABS(dst[8] - dst[7]) < c->QP ? dst[8] : dst[7];
347
 
348
        int sums[10];
349
        sums[0] = 4*first + dst[0] + dst[1] + dst[2] + 4;
350
        sums[1] = sums[0] - first  + dst[3];
351
        sums[2] = sums[1] - first  + dst[4];
352
        sums[3] = sums[2] - first  + dst[5];
353
        sums[4] = sums[3] - first  + dst[6];
354
        sums[5] = sums[4] - dst[0] + dst[7];
355
        sums[6] = sums[5] - dst[1] + last;
356
        sums[7] = sums[6] - dst[2] + last;
357
        sums[8] = sums[7] - dst[3] + last;
358
        sums[9] = sums[8] - dst[4] + last;
359
 
360
        dst[0]= (sums[0] + sums[2] + 2*dst[0])>>4;
361
        dst[1]= (sums[1] + sums[3] + 2*dst[1])>>4;
362
        dst[2]= (sums[2] + sums[4] + 2*dst[2])>>4;
363
        dst[3]= (sums[3] + sums[5] + 2*dst[3])>>4;
364
        dst[4]= (sums[4] + sums[6] + 2*dst[4])>>4;
365
        dst[5]= (sums[5] + sums[7] + 2*dst[5])>>4;
366
        dst[6]= (sums[6] + sums[8] + 2*dst[6])>>4;
367
        dst[7]= (sums[7] + sums[9] + 2*dst[7])>>4;
368
 
369
        dst+= stride;
370
    }
371
}
372
 
373
/**
374
 * Experimental Filter 1 (Horizontal)
375
 * will not damage linear gradients
376
 * Flat blocks should look like they were passed through the (1,1,2,2,4,2,2,1,1) 9-Tap filter
377
 * can only smooth blocks at the expected locations (it cannot smooth them if they did move)
378
 * MMX2 version does correct clipping C version does not
379
 * not identical with the vertical one
380
 */
381
static inline void horizX1Filter(uint8_t *src, int stride, int QP)
382
{
383
    int y;
384
    static uint64_t lut[256];
385
    if(!lut[255])
386
    {
387
        int i;
388
        for(i=0; i<256; i++)
389
        {
390
            int v= i < 128 ? 2*i : 2*(i-256);
391
/*
392
//Simulate 112242211 9-Tap filter
393
            uint64_t a= (v/16)  & 0xFF;
394
            uint64_t b= (v/8)   & 0xFF;
395
            uint64_t c= (v/4)   & 0xFF;
396
            uint64_t d= (3*v/8) & 0xFF;
397
*/
398
//Simulate piecewise linear interpolation
399
            uint64_t a= (v/16)   & 0xFF;
400
            uint64_t b= (v*3/16) & 0xFF;
401
            uint64_t c= (v*5/16) & 0xFF;
402
            uint64_t d= (7*v/16) & 0xFF;
403
            uint64_t A= (0x100 - a)&0xFF;
404
            uint64_t B= (0x100 - b)&0xFF;
405
            uint64_t C= (0x100 - c)&0xFF;
406
            uint64_t D= (0x100 - c)&0xFF;
407
 
408
            lut[i]   = (a<<56) | (b<<48) | (c<<40) | (d<<32) |
409
                       (D<<24) | (C<<16) | (B<<8)  | (A);
410
            //lut[i] = (v<<32) | (v<<24);
411
        }
412
    }
413
 
414
    for(y=0; y
415
        int a= src[1] - src[2];
416
        int b= src[3] - src[4];
417
        int c= src[5] - src[6];
418
 
419
        int d= FFMAX(FFABS(b) - (FFABS(a) + FFABS(c))/2, 0);
420
 
421
        if(d < QP){
422
            int v = d * FFSIGN(-b);
423
 
424
            src[1] +=v/8;
425
            src[2] +=v/4;
426
            src[3] +=3*v/8;
427
            src[4] -=3*v/8;
428
            src[5] -=v/4;
429
            src[6] -=v/8;
430
        }
431
        src+=stride;
432
    }
433
}
434
 
435
/**
436
 * accurate deblock filter
437
 */
438
static av_always_inline void do_a_deblock_C(uint8_t *src, int step,
439
                                            int stride, const PPContext *c)
440
{
441
    int y;
442
    const int QP= c->QP;
443
    const int dcOffset= ((c->nonBQP*c->ppMode.baseDcDiff)>>8) + 1;
444
    const int dcThreshold= dcOffset*2 + 1;
445
//START_TIMER
446
    src+= step*4; // src points to begin of the 8x8 Block
447
    for(y=0; y<8; y++){
448
        int numEq= 0;
449
 
450
        if(((unsigned)(src[-1*step] - src[0*step] + dcOffset)) < dcThreshold) numEq++;
451
        if(((unsigned)(src[ 0*step] - src[1*step] + dcOffset)) < dcThreshold) numEq++;
452
        if(((unsigned)(src[ 1*step] - src[2*step] + dcOffset)) < dcThreshold) numEq++;
453
        if(((unsigned)(src[ 2*step] - src[3*step] + dcOffset)) < dcThreshold) numEq++;
454
        if(((unsigned)(src[ 3*step] - src[4*step] + dcOffset)) < dcThreshold) numEq++;
455
        if(((unsigned)(src[ 4*step] - src[5*step] + dcOffset)) < dcThreshold) numEq++;
456
        if(((unsigned)(src[ 5*step] - src[6*step] + dcOffset)) < dcThreshold) numEq++;
457
        if(((unsigned)(src[ 6*step] - src[7*step] + dcOffset)) < dcThreshold) numEq++;
458
        if(((unsigned)(src[ 7*step] - src[8*step] + dcOffset)) < dcThreshold) numEq++;
459
        if(numEq > c->ppMode.flatnessThreshold){
460
            int min, max, x;
461
 
462
            if(src[0] > src[step]){
463
                max= src[0];
464
                min= src[step];
465
            }else{
466
                max= src[step];
467
                min= src[0];
468
            }
469
            for(x=2; x<8; x+=2){
470
                if(src[x*step] > src[(x+1)*step]){
471
                        if(src[x    *step] > max) max= src[ x   *step];
472
                        if(src[(x+1)*step] < min) min= src[(x+1)*step];
473
                }else{
474
                        if(src[(x+1)*step] > max) max= src[(x+1)*step];
475
                        if(src[ x   *step] < min) min= src[ x   *step];
476
                }
477
            }
478
            if(max-min < 2*QP){
479
                const int first= FFABS(src[-1*step] - src[0]) < QP ? src[-1*step] : src[0];
480
                const int last= FFABS(src[8*step] - src[7*step]) < QP ? src[8*step] : src[7*step];
481
 
482
                int sums[10];
483
                sums[0] = 4*first + src[0*step] + src[1*step] + src[2*step] + 4;
484
                sums[1] = sums[0] - first       + src[3*step];
485
                sums[2] = sums[1] - first       + src[4*step];
486
                sums[3] = sums[2] - first       + src[5*step];
487
                sums[4] = sums[3] - first       + src[6*step];
488
                sums[5] = sums[4] - src[0*step] + src[7*step];
489
                sums[6] = sums[5] - src[1*step] + last;
490
                sums[7] = sums[6] - src[2*step] + last;
491
                sums[8] = sums[7] - src[3*step] + last;
492
                sums[9] = sums[8] - src[4*step] + last;
493
 
494
                src[0*step]= (sums[0] + sums[2] + 2*src[0*step])>>4;
495
                src[1*step]= (sums[1] + sums[3] + 2*src[1*step])>>4;
496
                src[2*step]= (sums[2] + sums[4] + 2*src[2*step])>>4;
497
                src[3*step]= (sums[3] + sums[5] + 2*src[3*step])>>4;
498
                src[4*step]= (sums[4] + sums[6] + 2*src[4*step])>>4;
499
                src[5*step]= (sums[5] + sums[7] + 2*src[5*step])>>4;
500
                src[6*step]= (sums[6] + sums[8] + 2*src[6*step])>>4;
501
                src[7*step]= (sums[7] + sums[9] + 2*src[7*step])>>4;
502
            }
503
        }else{
504
            const int middleEnergy= 5*(src[4*step] - src[3*step]) + 2*(src[2*step] - src[5*step]);
505
 
506
            if(FFABS(middleEnergy) < 8*QP){
507
                const int q=(src[3*step] - src[4*step])/2;
508
                const int leftEnergy=  5*(src[2*step] - src[1*step]) + 2*(src[0*step] - src[3*step]);
509
                const int rightEnergy= 5*(src[6*step] - src[5*step]) + 2*(src[4*step] - src[7*step]);
510
 
511
                int d= FFABS(middleEnergy) - FFMIN( FFABS(leftEnergy), FFABS(rightEnergy) );
512
                d= FFMAX(d, 0);
513
 
514
                d= (5*d + 32) >> 6;
515
                d*= FFSIGN(-middleEnergy);
516
 
517
                if(q>0){
518
                    d= d<0 ? 0 : d;
519
                    d= d>q ? q : d;
520
                }else{
521
                    d= d>0 ? 0 : d;
522
                    d= d
523
                }
524
 
525
                src[3*step]-= d;
526
                src[4*step]+= d;
527
            }
528
        }
529
 
530
        src += stride;
531
    }
532
/*if(step==16){
533
    STOP_TIMER("step16")
534
}else{
535
    STOP_TIMER("stepX")
536
}*/
537
}
538
 
539
//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
540
//Plain C versions
541
//we always compile C for testing which needs bitexactness
542
#define TEMPLATE_PP_C 1
543
#include "postprocess_template.c"
544
 
545
#if HAVE_ALTIVEC
546
#   define TEMPLATE_PP_ALTIVEC 1
547
#   include "postprocess_altivec_template.c"
548
#   include "postprocess_template.c"
549
#endif
550
 
551
#if ARCH_X86 && HAVE_INLINE_ASM
552
#    if CONFIG_RUNTIME_CPUDETECT
553
#        define TEMPLATE_PP_MMX 1
554
#        include "postprocess_template.c"
555
#        define TEMPLATE_PP_MMXEXT 1
556
#        include "postprocess_template.c"
557
#        define TEMPLATE_PP_3DNOW 1
558
#        include "postprocess_template.c"
559
#        define TEMPLATE_PP_SSE2 1
560
#        include "postprocess_template.c"
561
#    else
562
#        if HAVE_SSE2_INLINE
563
#            define TEMPLATE_PP_SSE2 1
564
#            include "postprocess_template.c"
565
#        elif HAVE_MMXEXT_INLINE
566
#            define TEMPLATE_PP_MMXEXT 1
567
#            include "postprocess_template.c"
568
#        elif HAVE_AMD3DNOW_INLINE
569
#            define TEMPLATE_PP_3DNOW 1
570
#            include "postprocess_template.c"
571
#        elif HAVE_MMX_INLINE
572
#            define TEMPLATE_PP_MMX 1
573
#            include "postprocess_template.c"
574
#        endif
575
#    endif
576
#endif
577
 
578
typedef void (*pp_fn)(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
579
                      const QP_STORE_T QPs[], int QPStride, int isColor, PPContext *c2);
580
 
581
static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[], int dstStride, int width, int height,
582
        const QP_STORE_T QPs[], int QPStride, int isColor, pp_mode *vm, pp_context *vc)
583
{
584
    pp_fn pp = postProcess_C;
585
    PPContext *c= (PPContext *)vc;
586
    PPMode *ppMode= (PPMode *)vm;
587
    c->ppMode= *ppMode; //FIXME
588
 
589
    if (!(ppMode->lumMode & BITEXACT)) {
590
#if CONFIG_RUNTIME_CPUDETECT
591
#if ARCH_X86 && HAVE_INLINE_ASM
592
        // ordered per speed fastest first
593
        if      (c->cpuCaps & AV_CPU_FLAG_SSE2)     pp = postProcess_SSE2;
594
        else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT)   pp = postProcess_MMX2;
595
        else if (c->cpuCaps & AV_CPU_FLAG_3DNOW)    pp = postProcess_3DNow;
596
        else if (c->cpuCaps & AV_CPU_FLAG_MMX)      pp = postProcess_MMX;
597
#elif HAVE_ALTIVEC
598
        if      (c->cpuCaps & AV_CPU_FLAG_ALTIVEC)  pp = postProcess_altivec;
599
#endif
600
#else /* CONFIG_RUNTIME_CPUDETECT */
601
#if     HAVE_SSE2_INLINE
602
        pp = postProcess_SSE2;
603
#elif   HAVE_MMXEXT_INLINE
604
        pp = postProcess_MMX2;
605
#elif HAVE_AMD3DNOW_INLINE
606
        pp = postProcess_3DNow;
607
#elif HAVE_MMX_INLINE
608
        pp = postProcess_MMX;
609
#elif HAVE_ALTIVEC
610
        pp = postProcess_altivec;
611
#endif
612
#endif /* !CONFIG_RUNTIME_CPUDETECT */
613
    }
614
 
615
    pp(src, srcStride, dst, dstStride, width, height, QPs, QPStride, isColor, c);
616
}
617
 
618
/* -pp Command line Help
619
*/
620
const char pp_help[] =
621
"Available postprocessing filters:\n"
622
"Filters                        Options\n"
623
"short  long name       short   long option     Description\n"
624
"*      *               a       autoq           CPU power dependent enabler\n"
625
"                       c       chrom           chrominance filtering enabled\n"
626
"                       y       nochrom         chrominance filtering disabled\n"
627
"                       n       noluma          luma filtering disabled\n"
628
"hb     hdeblock        (2 threshold)           horizontal deblocking filter\n"
629
"       1. difference factor: default=32, higher -> more deblocking\n"
630
"       2. flatness threshold: default=39, lower -> more deblocking\n"
631
"                       the h & v deblocking filters share these\n"
632
"                       so you can't set different thresholds for h / v\n"
633
"vb     vdeblock        (2 threshold)           vertical deblocking filter\n"
634
"ha     hadeblock       (2 threshold)           horizontal deblocking filter\n"
635
"va     vadeblock       (2 threshold)           vertical deblocking filter\n"
636
"h1     x1hdeblock                              experimental h deblock filter 1\n"
637
"v1     x1vdeblock                              experimental v deblock filter 1\n"
638
"dr     dering                                  deringing filter\n"
639
"al     autolevels                              automatic brightness / contrast\n"
640
"                       f        fullyrange     stretch luminance to (0..255)\n"
641
"lb     linblenddeint                           linear blend deinterlacer\n"
642
"li     linipoldeint                            linear interpolating deinterlace\n"
643
"ci     cubicipoldeint                          cubic interpolating deinterlacer\n"
644
"md     mediandeint                             median deinterlacer\n"
645
"fd     ffmpegdeint                             ffmpeg deinterlacer\n"
646
"l5     lowpass5                                FIR lowpass deinterlacer\n"
647
"de     default                                 hb:a,vb:a,dr:a\n"
648
"fa     fast                                    h1:a,v1:a,dr:a\n"
649
"ac                                             ha:a:128:7,va:a,dr:a\n"
650
"tn     tmpnoise        (3 threshold)           temporal noise reducer\n"
651
"                     1. <= 2. <= 3.            larger -> stronger filtering\n"
652
"fq     forceQuant                   force quantizer\n"
653
"Usage:\n"
654
"[:
655
"long form example:\n"
656
"vdeblock:autoq/hdeblock:autoq/linblenddeint    default,-vdeblock\n"
657
"short form example:\n"
658
"vb:a/hb:a/lb                                   de,-vb\n"
659
"more examples:\n"
660
"tn:64:128:256\n"
661
"\n"
662
;
663
 
664
pp_mode *pp_get_mode_by_name_and_quality(const char *name, int quality)
665
{
666
    char temp[GET_MODE_BUFFER_SIZE];
667
    char *p= temp;
668
    static const char filterDelimiters[] = ",/";
669
    static const char optionDelimiters[] = ":|";
670
    struct PPMode *ppMode;
671
    char *filterToken;
672
 
673
    if (!name)  {
674
        av_log(NULL, AV_LOG_ERROR, "pp: Missing argument\n");
675
        return NULL;
676
    }
677
 
678
    if (!strcmp(name, "help")) {
679
        const char *p;
680
        for (p = pp_help; strchr(p, '\n'); p = strchr(p, '\n') + 1) {
681
            av_strlcpy(temp, p, FFMIN(sizeof(temp), strchr(p, '\n') - p + 2));
682
            av_log(NULL, AV_LOG_INFO, "%s", temp);
683
        }
684
        return NULL;
685
    }
686
 
687
    ppMode= av_malloc(sizeof(PPMode));
688
 
689
    ppMode->lumMode= 0;
690
    ppMode->chromMode= 0;
691
    ppMode->maxTmpNoise[0]= 700;
692
    ppMode->maxTmpNoise[1]= 1500;
693
    ppMode->maxTmpNoise[2]= 3000;
694
    ppMode->maxAllowedY= 234;
695
    ppMode->minAllowedY= 16;
696
    ppMode->baseDcDiff= 256/8;
697
    ppMode->flatnessThreshold= 56-16-1;
698
    ppMode->maxClippedThreshold= 0.01;
699
    ppMode->error=0;
700
 
701
    memset(temp, 0, GET_MODE_BUFFER_SIZE);
702
    av_strlcpy(temp, name, GET_MODE_BUFFER_SIZE - 1);
703
 
704
    av_log(NULL, AV_LOG_DEBUG, "pp: %s\n", name);
705
 
706
    for(;;){
707
        char *filterName;
708
        int q= 1000000; //PP_QUALITY_MAX;
709
        int chrom=-1;
710
        int luma=-1;
711
        char *option;
712
        char *options[OPTIONS_ARRAY_SIZE];
713
        int i;
714
        int filterNameOk=0;
715
        int numOfUnknownOptions=0;
716
        int enable=1; //does the user want us to enabled or disabled the filter
717
 
718
        filterToken= strtok(p, filterDelimiters);
719
        if(filterToken == NULL) break;
720
        p+= strlen(filterToken) + 1; // p points to next filterToken
721
        filterName= strtok(filterToken, optionDelimiters);
722
        av_log(NULL, AV_LOG_DEBUG, "pp: %s::%s\n", filterToken, filterName);
723
 
724
        if(*filterName == '-'){
725
            enable=0;
726
            filterName++;
727
        }
728
 
729
        for(;;){ //for all options
730
            option= strtok(NULL, optionDelimiters);
731
            if(option == NULL) break;
732
 
733
            av_log(NULL, AV_LOG_DEBUG, "pp: option: %s\n", option);
734
            if(!strcmp("autoq", option) || !strcmp("a", option)) q= quality;
735
            else if(!strcmp("nochrom", option) || !strcmp("y", option)) chrom=0;
736
            else if(!strcmp("chrom", option) || !strcmp("c", option)) chrom=1;
737
            else if(!strcmp("noluma", option) || !strcmp("n", option)) luma=0;
738
            else{
739
                options[numOfUnknownOptions] = option;
740
                numOfUnknownOptions++;
741
            }
742
            if(numOfUnknownOptions >= OPTIONS_ARRAY_SIZE-1) break;
743
        }
744
        options[numOfUnknownOptions] = NULL;
745
 
746
        /* replace stuff from the replace Table */
747
        for(i=0; replaceTable[2*i]!=NULL; i++){
748
            if(!strcmp(replaceTable[2*i], filterName)){
749
                int newlen= strlen(replaceTable[2*i + 1]);
750
                int plen;
751
                int spaceLeft;
752
 
753
                p--, *p=',';
754
 
755
                plen= strlen(p);
756
                spaceLeft= p - temp + plen;
757
                if(spaceLeft + newlen  >= GET_MODE_BUFFER_SIZE - 1){
758
                    ppMode->error++;
759
                    break;
760
                }
761
                memmove(p + newlen, p, plen+1);
762
                memcpy(p, replaceTable[2*i + 1], newlen);
763
                filterNameOk=1;
764
            }
765
        }
766
 
767
        for(i=0; filters[i].shortName!=NULL; i++){
768
            if(   !strcmp(filters[i].longName, filterName)
769
               || !strcmp(filters[i].shortName, filterName)){
770
                ppMode->lumMode &= ~filters[i].mask;
771
                ppMode->chromMode &= ~filters[i].mask;
772
 
773
                filterNameOk=1;
774
                if(!enable) break; // user wants to disable it
775
 
776
                if(q >= filters[i].minLumQuality && luma)
777
                    ppMode->lumMode|= filters[i].mask;
778
                if(chrom==1 || (chrom==-1 && filters[i].chromDefault))
779
                    if(q >= filters[i].minChromQuality)
780
                            ppMode->chromMode|= filters[i].mask;
781
 
782
                if(filters[i].mask == LEVEL_FIX){
783
                    int o;
784
                    ppMode->minAllowedY= 16;
785
                    ppMode->maxAllowedY= 234;
786
                    for(o=0; options[o]!=NULL; o++){
787
                        if(  !strcmp(options[o],"fullyrange")
788
                           ||!strcmp(options[o],"f")){
789
                            ppMode->minAllowedY= 0;
790
                            ppMode->maxAllowedY= 255;
791
                            numOfUnknownOptions--;
792
                        }
793
                    }
794
                }
795
                else if(filters[i].mask == TEMP_NOISE_FILTER)
796
                {
797
                    int o;
798
                    int numOfNoises=0;
799
 
800
                    for(o=0; options[o]!=NULL; o++){
801
                        char *tail;
802
                        ppMode->maxTmpNoise[numOfNoises]=
803
                            strtol(options[o], &tail, 0);
804
                        if(tail!=options[o]){
805
                            numOfNoises++;
806
                            numOfUnknownOptions--;
807
                            if(numOfNoises >= 3) break;
808
                        }
809
                    }
810
                }
811
                else if(filters[i].mask == V_DEBLOCK   || filters[i].mask == H_DEBLOCK
812
                     || filters[i].mask == V_A_DEBLOCK || filters[i].mask == H_A_DEBLOCK){
813
                    int o;
814
 
815
                    for(o=0; options[o]!=NULL && o<2; o++){
816
                        char *tail;
817
                        int val= strtol(options[o], &tail, 0);
818
                        if(tail==options[o]) break;
819
 
820
                        numOfUnknownOptions--;
821
                        if(o==0) ppMode->baseDcDiff= val;
822
                        else ppMode->flatnessThreshold= val;
823
                    }
824
                }
825
                else if(filters[i].mask == FORCE_QUANT){
826
                    int o;
827
                    ppMode->forcedQuant= 15;
828
 
829
                    for(o=0; options[o]!=NULL && o<1; o++){
830
                        char *tail;
831
                        int val= strtol(options[o], &tail, 0);
832
                        if(tail==options[o]) break;
833
 
834
                        numOfUnknownOptions--;
835
                        ppMode->forcedQuant= val;
836
                    }
837
                }
838
            }
839
        }
840
        if(!filterNameOk) ppMode->error++;
841
        ppMode->error += numOfUnknownOptions;
842
    }
843
 
844
    av_log(NULL, AV_LOG_DEBUG, "pp: lumMode=%X, chromMode=%X\n", ppMode->lumMode, ppMode->chromMode);
845
    if(ppMode->error){
846
        av_log(NULL, AV_LOG_ERROR, "%d errors in postprocess string \"%s\"\n", ppMode->error, name);
847
        av_free(ppMode);
848
        return NULL;
849
    }
850
    return ppMode;
851
}
852
 
853
void pp_free_mode(pp_mode *mode){
854
    av_free(mode);
855
}
856
 
857
static void reallocAlign(void **p, int alignment, int size){
858
    av_free(*p);
859
    *p= av_mallocz(size);
860
}
861
 
862
static void reallocBuffers(PPContext *c, int width, int height, int stride, int qpStride){
863
    int mbWidth = (width+15)>>4;
864
    int mbHeight= (height+15)>>4;
865
    int i;
866
 
867
    c->stride= stride;
868
    c->qpStride= qpStride;
869
 
870
    reallocAlign((void **)&c->tempDst, 8, stride*24+32);
871
    reallocAlign((void **)&c->tempSrc, 8, stride*24);
872
    reallocAlign((void **)&c->tempBlocks, 8, 2*16*8);
873
    reallocAlign((void **)&c->yHistogram, 8, 256*sizeof(uint64_t));
874
    for(i=0; i<256; i++)
875
            c->yHistogram[i]= width*height/64*15/256;
876
 
877
    for(i=0; i<3; i++){
878
        //Note: The +17*1024 is just there so I do not have to worry about r/w over the end.
879
        reallocAlign((void **)&c->tempBlurred[i], 8, stride*mbHeight*16 + 17*1024);
880
        reallocAlign((void **)&c->tempBlurredPast[i], 8, 256*((height+7)&(~7))/2 + 17*1024);//FIXME size
881
    }
882
 
883
    reallocAlign((void **)&c->deintTemp, 8, 2*width+32);
884
    reallocAlign((void **)&c->nonBQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
885
    reallocAlign((void **)&c->stdQPTable, 8, qpStride*mbHeight*sizeof(QP_STORE_T));
886
    reallocAlign((void **)&c->forcedQPTable, 8, mbWidth*sizeof(QP_STORE_T));
887
}
888
 
889
static const char * context_to_name(void * ptr) {
890
    return "postproc";
891
}
892
 
893
static const AVClass av_codec_context_class = { "Postproc", context_to_name, NULL };
894
 
895
pp_context *pp_get_context(int width, int height, int cpuCaps){
896
    PPContext *c= av_malloc(sizeof(PPContext));
897
    int stride= FFALIGN(width, 16);  //assumed / will realloc if needed
898
    int qpStride= (width+15)/16 + 2; //assumed / will realloc if needed
899
 
900
    memset(c, 0, sizeof(PPContext));
901
    c->av_class = &av_codec_context_class;
902
    if(cpuCaps&PP_FORMAT){
903
        c->hChromaSubSample= cpuCaps&0x3;
904
        c->vChromaSubSample= (cpuCaps>>4)&0x3;
905
    }else{
906
        c->hChromaSubSample= 1;
907
        c->vChromaSubSample= 1;
908
    }
909
    if (cpuCaps & PP_CPU_CAPS_AUTO) {
910
        c->cpuCaps = av_get_cpu_flags();
911
    } else {
912
        c->cpuCaps = 0;
913
        if (cpuCaps & PP_CPU_CAPS_MMX)      c->cpuCaps |= AV_CPU_FLAG_MMX;
914
        if (cpuCaps & PP_CPU_CAPS_MMX2)     c->cpuCaps |= AV_CPU_FLAG_MMXEXT;
915
        if (cpuCaps & PP_CPU_CAPS_3DNOW)    c->cpuCaps |= AV_CPU_FLAG_3DNOW;
916
        if (cpuCaps & PP_CPU_CAPS_ALTIVEC)  c->cpuCaps |= AV_CPU_FLAG_ALTIVEC;
917
    }
918
 
919
    reallocBuffers(c, width, height, stride, qpStride);
920
 
921
    c->frameNum=-1;
922
 
923
    return c;
924
}
925
 
926
void pp_free_context(void *vc){
927
    PPContext *c = (PPContext*)vc;
928
    int i;
929
 
930
    for(i=0; i<3; i++) av_free(c->tempBlurred[i]);
931
    for(i=0; i<3; i++) av_free(c->tempBlurredPast[i]);
932
 
933
    av_free(c->tempBlocks);
934
    av_free(c->yHistogram);
935
    av_free(c->tempDst);
936
    av_free(c->tempSrc);
937
    av_free(c->deintTemp);
938
    av_free(c->stdQPTable);
939
    av_free(c->nonBQPTable);
940
    av_free(c->forcedQPTable);
941
 
942
    memset(c, 0, sizeof(PPContext));
943
 
944
    av_free(c);
945
}
946
 
947
void  pp_postprocess(const uint8_t * src[3], const int srcStride[3],
948
                     uint8_t * dst[3], const int dstStride[3],
949
                     int width, int height,
950
                     const QP_STORE_T *QP_store,  int QPStride,
951
                     pp_mode *vm,  void *vc, int pict_type)
952
{
953
    int mbWidth = (width+15)>>4;
954
    int mbHeight= (height+15)>>4;
955
    PPMode *mode = (PPMode*)vm;
956
    PPContext *c = (PPContext*)vc;
957
    int minStride= FFMAX(FFABS(srcStride[0]), FFABS(dstStride[0]));
958
    int absQPStride = FFABS(QPStride);
959
 
960
    // c->stride and c->QPStride are always positive
961
    if(c->stride < minStride || c->qpStride < absQPStride)
962
        reallocBuffers(c, width, height,
963
                       FFMAX(minStride, c->stride),
964
                       FFMAX(c->qpStride, absQPStride));
965
 
966
    if(QP_store==NULL || (mode->lumMode & FORCE_QUANT)){
967
        int i;
968
        QP_store= c->forcedQPTable;
969
        absQPStride = QPStride = 0;
970
        if(mode->lumMode & FORCE_QUANT)
971
            for(i=0; iforcedQPTable[i]= mode->forcedQuant;
972
        else
973
            for(i=0; iforcedQPTable[i]= 1;
974
    }
975
 
976
    if(pict_type & PP_PICT_TYPE_QP2){
977
        int i;
978
        const int count= mbHeight * absQPStride;
979
        for(i=0; i<(count>>2); i++){
980
            ((uint32_t*)c->stdQPTable)[i] = (((const uint32_t*)QP_store)[i]>>1) & 0x7F7F7F7F;
981
        }
982
        for(i<<=2; i
983
            c->stdQPTable[i] = QP_store[i]>>1;
984
        }
985
        QP_store= c->stdQPTable;
986
        QPStride= absQPStride;
987
    }
988
 
989
    if(0){
990
        int x,y;
991
        for(y=0; y
992
            for(x=0; x
993
                av_log(c, AV_LOG_INFO, "%2d ", QP_store[x + y*QPStride]);
994
            }
995
            av_log(c, AV_LOG_INFO, "\n");
996
        }
997
        av_log(c, AV_LOG_INFO, "\n");
998
    }
999
 
1000
    if((pict_type&7)!=3){
1001
        if (QPStride >= 0){
1002
            int i;
1003
            const int count= mbHeight * QPStride;
1004
            for(i=0; i<(count>>2); i++){
1005
                ((uint32_t*)c->nonBQPTable)[i] = ((const uint32_t*)QP_store)[i] & 0x3F3F3F3F;
1006
            }
1007
            for(i<<=2; i
1008
                c->nonBQPTable[i] = QP_store[i] & 0x3F;
1009
            }
1010
        } else {
1011
            int i,j;
1012
            for(i=0; i
1013
                for(j=0; j
1014
                    c->nonBQPTable[i*absQPStride+j] = QP_store[i*QPStride+j] & 0x3F;
1015
                }
1016
            }
1017
        }
1018
    }
1019
 
1020
    av_log(c, AV_LOG_DEBUG, "using npp filters 0x%X/0x%X\n",
1021
           mode->lumMode, mode->chromMode);
1022
 
1023
    postProcess(src[0], srcStride[0], dst[0], dstStride[0],
1024
                width, height, QP_store, QPStride, 0, mode, c);
1025
 
1026
    width  = (width )>>c->hChromaSubSample;
1027
    height = (height)>>c->vChromaSubSample;
1028
 
1029
    if(mode->chromMode){
1030
        postProcess(src[1], srcStride[1], dst[1], dstStride[1],
1031
                    width, height, QP_store, QPStride, 1, mode, c);
1032
        postProcess(src[2], srcStride[2], dst[2], dstStride[2],
1033
                    width, height, QP_store, QPStride, 2, mode, c);
1034
    }
1035
    else if(srcStride[1] == dstStride[1] && srcStride[2] == dstStride[2]){
1036
        linecpy(dst[1], src[1], height, srcStride[1]);
1037
        linecpy(dst[2], src[2], height, srcStride[2]);
1038
    }else{
1039
        int y;
1040
        for(y=0; y
1041
            memcpy(&(dst[1][y*dstStride[1]]), &(src[1][y*srcStride[1]]), width);
1042
            memcpy(&(dst[2][y*dstStride[2]]), &(src[2][y*srcStride[2]]), width);
1043
        }
1044
    }
1045
}