Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * DSP utils
3
 * Copyright (c) 2000, 2001 Fabrice Bellard
4
 * Copyright (c) 2002-2004 Michael Niedermayer 
5
 *
6
 * gmc & q-pel & 32/64 bit based MC by Michael Niedermayer 
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24
 
25
/**
26
 * @file
27
 * DSP utils
28
 */
29
 
30
#include "libavutil/attributes.h"
31
#include "libavutil/imgutils.h"
32
#include "libavutil/internal.h"
33
#include "avcodec.h"
34
#include "copy_block.h"
35
#include "dct.h"
36
#include "dsputil.h"
37
#include "simple_idct.h"
38
#include "faandct.h"
39
#include "faanidct.h"
40
#include "imgconvert.h"
41
#include "mathops.h"
42
#include "mpegvideo.h"
43
#include "config.h"
44
#include "diracdsp.h"
45
 
46
uint32_t ff_squareTbl[512] = {0, };
47
 
48
#define BIT_DEPTH 16
49
#include "dsputil_template.c"
50
#undef BIT_DEPTH
51
 
52
#define BIT_DEPTH 8
53
#include "dsputil_template.c"
54
 
55
// 0x7f7f7f7f or 0x7f7f7f7f7f7f7f7f or whatever, depending on the cpu's native arithmetic size
56
#define pb_7f (~0UL/255 * 0x7f)
57
#define pb_80 (~0UL/255 * 0x80)
58
 
59
/* Specific zigzag scan for 248 idct. NOTE that unlike the
60
   specification, we interleave the fields */
61
const uint8_t ff_zigzag248_direct[64] = {
62
     0,  8,  1,  9, 16, 24,  2, 10,
63
    17, 25, 32, 40, 48, 56, 33, 41,
64
    18, 26,  3, 11,  4, 12, 19, 27,
65
    34, 42, 49, 57, 50, 58, 35, 43,
66
    20, 28,  5, 13,  6, 14, 21, 29,
67
    36, 44, 51, 59, 52, 60, 37, 45,
68
    22, 30,  7, 15, 23, 31, 38, 46,
69
    53, 61, 54, 62, 39, 47, 55, 63,
70
};
71
 
72
/* not permutated inverse zigzag_direct + 1 for MMX quantizer */
73
DECLARE_ALIGNED(16, uint16_t, ff_inv_zigzag_direct16)[64];
74
 
75
const uint8_t ff_alternate_horizontal_scan[64] = {
76
    0,  1,   2,  3,  8,  9, 16, 17,
77
    10, 11,  4,  5,  6,  7, 15, 14,
78
    13, 12, 19, 18, 24, 25, 32, 33,
79
    26, 27, 20, 21, 22, 23, 28, 29,
80
    30, 31, 34, 35, 40, 41, 48, 49,
81
    42, 43, 36, 37, 38, 39, 44, 45,
82
    46, 47, 50, 51, 56, 57, 58, 59,
83
    52, 53, 54, 55, 60, 61, 62, 63,
84
};
85
 
86
const uint8_t ff_alternate_vertical_scan[64] = {
87
    0,  8,  16, 24,  1,  9,  2, 10,
88
    17, 25, 32, 40, 48, 56, 57, 49,
89
    41, 33, 26, 18,  3, 11,  4, 12,
90
    19, 27, 34, 42, 50, 58, 35, 43,
91
    51, 59, 20, 28,  5, 13,  6, 14,
92
    21, 29, 36, 44, 52, 60, 37, 45,
93
    53, 61, 22, 30,  7, 15, 23, 31,
94
    38, 46, 54, 62, 39, 47, 55, 63,
95
};
96
 
97
/* Input permutation for the simple_idct_mmx */
98
static const uint8_t simple_mmx_permutation[64]={
99
        0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D,
100
        0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D,
101
        0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D,
102
        0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F,
103
        0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F,
104
        0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D,
105
        0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F,
106
        0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
107
};
108
 
109
static const uint8_t idct_sse2_row_perm[8] = {0, 4, 1, 5, 2, 6, 3, 7};
110
 
111
av_cold void ff_init_scantable(uint8_t *permutation, ScanTable *st,
112
                               const uint8_t *src_scantable)
113
{
114
    int i;
115
    int end;
116
 
117
    st->scantable= src_scantable;
118
 
119
    for(i=0; i<64; i++){
120
        int j;
121
        j = src_scantable[i];
122
        st->permutated[i] = permutation[j];
123
    }
124
 
125
    end=-1;
126
    for(i=0; i<64; i++){
127
        int j;
128
        j = st->permutated[i];
129
        if(j>end) end=j;
130
        st->raster_end[i]= end;
131
    }
132
}
133
 
134
av_cold void ff_init_scantable_permutation(uint8_t *idct_permutation,
135
                                           int idct_permutation_type)
136
{
137
    int i;
138
 
139
    switch(idct_permutation_type){
140
    case FF_NO_IDCT_PERM:
141
        for(i=0; i<64; i++)
142
            idct_permutation[i]= i;
143
        break;
144
    case FF_LIBMPEG2_IDCT_PERM:
145
        for(i=0; i<64; i++)
146
            idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
147
        break;
148
    case FF_SIMPLE_IDCT_PERM:
149
        for(i=0; i<64; i++)
150
            idct_permutation[i]= simple_mmx_permutation[i];
151
        break;
152
    case FF_TRANSPOSE_IDCT_PERM:
153
        for(i=0; i<64; i++)
154
            idct_permutation[i]= ((i&7)<<3) | (i>>3);
155
        break;
156
    case FF_PARTTRANS_IDCT_PERM:
157
        for(i=0; i<64; i++)
158
            idct_permutation[i]= (i&0x24) | ((i&3)<<3) | ((i>>3)&3);
159
        break;
160
    case FF_SSE2_IDCT_PERM:
161
        for(i=0; i<64; i++)
162
            idct_permutation[i]= (i&0x38) | idct_sse2_row_perm[i&7];
163
        break;
164
    default:
165
        av_log(NULL, AV_LOG_ERROR, "Internal error, IDCT permutation not set\n");
166
    }
167
}
168
 
169
static int pix_sum_c(uint8_t * pix, int line_size)
170
{
171
    int s, i, j;
172
 
173
    s = 0;
174
    for (i = 0; i < 16; i++) {
175
        for (j = 0; j < 16; j += 8) {
176
            s += pix[0];
177
            s += pix[1];
178
            s += pix[2];
179
            s += pix[3];
180
            s += pix[4];
181
            s += pix[5];
182
            s += pix[6];
183
            s += pix[7];
184
            pix += 8;
185
        }
186
        pix += line_size - 16;
187
    }
188
    return s;
189
}
190
 
191
static int pix_norm1_c(uint8_t * pix, int line_size)
192
{
193
    int s, i, j;
194
    uint32_t *sq = ff_squareTbl + 256;
195
 
196
    s = 0;
197
    for (i = 0; i < 16; i++) {
198
        for (j = 0; j < 16; j += 8) {
199
#if 0
200
            s += sq[pix[0]];
201
            s += sq[pix[1]];
202
            s += sq[pix[2]];
203
            s += sq[pix[3]];
204
            s += sq[pix[4]];
205
            s += sq[pix[5]];
206
            s += sq[pix[6]];
207
            s += sq[pix[7]];
208
#else
209
#if HAVE_FAST_64BIT
210
            register uint64_t x=*(uint64_t*)pix;
211
            s += sq[x&0xff];
212
            s += sq[(x>>8)&0xff];
213
            s += sq[(x>>16)&0xff];
214
            s += sq[(x>>24)&0xff];
215
            s += sq[(x>>32)&0xff];
216
            s += sq[(x>>40)&0xff];
217
            s += sq[(x>>48)&0xff];
218
            s += sq[(x>>56)&0xff];
219
#else
220
            register uint32_t x=*(uint32_t*)pix;
221
            s += sq[x&0xff];
222
            s += sq[(x>>8)&0xff];
223
            s += sq[(x>>16)&0xff];
224
            s += sq[(x>>24)&0xff];
225
            x=*(uint32_t*)(pix+4);
226
            s += sq[x&0xff];
227
            s += sq[(x>>8)&0xff];
228
            s += sq[(x>>16)&0xff];
229
            s += sq[(x>>24)&0xff];
230
#endif
231
#endif
232
            pix += 8;
233
        }
234
        pix += line_size - 16;
235
    }
236
    return s;
237
}
238
 
239
static void bswap_buf(uint32_t *dst, const uint32_t *src, int w){
240
    int i;
241
 
242
    for(i=0; i+8<=w; i+=8){
243
        dst[i+0]= av_bswap32(src[i+0]);
244
        dst[i+1]= av_bswap32(src[i+1]);
245
        dst[i+2]= av_bswap32(src[i+2]);
246
        dst[i+3]= av_bswap32(src[i+3]);
247
        dst[i+4]= av_bswap32(src[i+4]);
248
        dst[i+5]= av_bswap32(src[i+5]);
249
        dst[i+6]= av_bswap32(src[i+6]);
250
        dst[i+7]= av_bswap32(src[i+7]);
251
    }
252
    for(;i
253
        dst[i+0]= av_bswap32(src[i+0]);
254
    }
255
}
256
 
257
static void bswap16_buf(uint16_t *dst, const uint16_t *src, int len)
258
{
259
    while (len--)
260
        *dst++ = av_bswap16(*src++);
261
}
262
 
263
static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
264
{
265
    int s, i;
266
    uint32_t *sq = ff_squareTbl + 256;
267
 
268
    s = 0;
269
    for (i = 0; i < h; i++) {
270
        s += sq[pix1[0] - pix2[0]];
271
        s += sq[pix1[1] - pix2[1]];
272
        s += sq[pix1[2] - pix2[2]];
273
        s += sq[pix1[3] - pix2[3]];
274
        pix1 += line_size;
275
        pix2 += line_size;
276
    }
277
    return s;
278
}
279
 
280
static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h)
281
{
282
    int s, i;
283
    uint32_t *sq = ff_squareTbl + 256;
284
 
285
    s = 0;
286
    for (i = 0; i < h; i++) {
287
        s += sq[pix1[0] - pix2[0]];
288
        s += sq[pix1[1] - pix2[1]];
289
        s += sq[pix1[2] - pix2[2]];
290
        s += sq[pix1[3] - pix2[3]];
291
        s += sq[pix1[4] - pix2[4]];
292
        s += sq[pix1[5] - pix2[5]];
293
        s += sq[pix1[6] - pix2[6]];
294
        s += sq[pix1[7] - pix2[7]];
295
        pix1 += line_size;
296
        pix2 += line_size;
297
    }
298
    return s;
299
}
300
 
301
static int sse16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
302
{
303
    int s, i;
304
    uint32_t *sq = ff_squareTbl + 256;
305
 
306
    s = 0;
307
    for (i = 0; i < h; i++) {
308
        s += sq[pix1[ 0] - pix2[ 0]];
309
        s += sq[pix1[ 1] - pix2[ 1]];
310
        s += sq[pix1[ 2] - pix2[ 2]];
311
        s += sq[pix1[ 3] - pix2[ 3]];
312
        s += sq[pix1[ 4] - pix2[ 4]];
313
        s += sq[pix1[ 5] - pix2[ 5]];
314
        s += sq[pix1[ 6] - pix2[ 6]];
315
        s += sq[pix1[ 7] - pix2[ 7]];
316
        s += sq[pix1[ 8] - pix2[ 8]];
317
        s += sq[pix1[ 9] - pix2[ 9]];
318
        s += sq[pix1[10] - pix2[10]];
319
        s += sq[pix1[11] - pix2[11]];
320
        s += sq[pix1[12] - pix2[12]];
321
        s += sq[pix1[13] - pix2[13]];
322
        s += sq[pix1[14] - pix2[14]];
323
        s += sq[pix1[15] - pix2[15]];
324
 
325
        pix1 += line_size;
326
        pix2 += line_size;
327
    }
328
    return s;
329
}
330
 
331
static void diff_pixels_c(int16_t *av_restrict block, const uint8_t *s1,
332
                          const uint8_t *s2, int stride){
333
    int i;
334
 
335
    /* read the pixels */
336
    for(i=0;i<8;i++) {
337
        block[0] = s1[0] - s2[0];
338
        block[1] = s1[1] - s2[1];
339
        block[2] = s1[2] - s2[2];
340
        block[3] = s1[3] - s2[3];
341
        block[4] = s1[4] - s2[4];
342
        block[5] = s1[5] - s2[5];
343
        block[6] = s1[6] - s2[6];
344
        block[7] = s1[7] - s2[7];
345
        s1 += stride;
346
        s2 += stride;
347
        block += 8;
348
    }
349
}
350
 
351
static void put_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
352
                                 int line_size)
353
{
354
    int i;
355
 
356
    /* read the pixels */
357
    for(i=0;i<8;i++) {
358
        pixels[0] = av_clip_uint8(block[0]);
359
        pixels[1] = av_clip_uint8(block[1]);
360
        pixels[2] = av_clip_uint8(block[2]);
361
        pixels[3] = av_clip_uint8(block[3]);
362
        pixels[4] = av_clip_uint8(block[4]);
363
        pixels[5] = av_clip_uint8(block[5]);
364
        pixels[6] = av_clip_uint8(block[6]);
365
        pixels[7] = av_clip_uint8(block[7]);
366
 
367
        pixels += line_size;
368
        block += 8;
369
    }
370
}
371
 
372
static void put_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
373
                                 int line_size)
374
{
375
    int i;
376
 
377
    /* read the pixels */
378
    for(i=0;i<4;i++) {
379
        pixels[0] = av_clip_uint8(block[0]);
380
        pixels[1] = av_clip_uint8(block[1]);
381
        pixels[2] = av_clip_uint8(block[2]);
382
        pixels[3] = av_clip_uint8(block[3]);
383
 
384
        pixels += line_size;
385
        block += 8;
386
    }
387
}
388
 
389
static void put_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
390
                                 int line_size)
391
{
392
    int i;
393
 
394
    /* read the pixels */
395
    for(i=0;i<2;i++) {
396
        pixels[0] = av_clip_uint8(block[0]);
397
        pixels[1] = av_clip_uint8(block[1]);
398
 
399
        pixels += line_size;
400
        block += 8;
401
    }
402
}
403
 
404
static void put_signed_pixels_clamped_c(const int16_t *block,
405
                                        uint8_t *av_restrict pixels,
406
                                        int line_size)
407
{
408
    int i, j;
409
 
410
    for (i = 0; i < 8; i++) {
411
        for (j = 0; j < 8; j++) {
412
            if (*block < -128)
413
                *pixels = 0;
414
            else if (*block > 127)
415
                *pixels = 255;
416
            else
417
                *pixels = (uint8_t)(*block + 128);
418
            block++;
419
            pixels++;
420
        }
421
        pixels += (line_size - 8);
422
    }
423
}
424
 
425
static void add_pixels8_c(uint8_t *av_restrict pixels,
426
                          int16_t *block,
427
                          int line_size)
428
{
429
    int i;
430
 
431
    for(i=0;i<8;i++) {
432
        pixels[0] += block[0];
433
        pixels[1] += block[1];
434
        pixels[2] += block[2];
435
        pixels[3] += block[3];
436
        pixels[4] += block[4];
437
        pixels[5] += block[5];
438
        pixels[6] += block[6];
439
        pixels[7] += block[7];
440
        pixels += line_size;
441
        block += 8;
442
    }
443
}
444
 
445
static void add_pixels_clamped_c(const int16_t *block, uint8_t *av_restrict pixels,
446
                                 int line_size)
447
{
448
    int i;
449
 
450
    /* read the pixels */
451
    for(i=0;i<8;i++) {
452
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
453
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
454
        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
455
        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
456
        pixels[4] = av_clip_uint8(pixels[4] + block[4]);
457
        pixels[5] = av_clip_uint8(pixels[5] + block[5]);
458
        pixels[6] = av_clip_uint8(pixels[6] + block[6]);
459
        pixels[7] = av_clip_uint8(pixels[7] + block[7]);
460
        pixels += line_size;
461
        block += 8;
462
    }
463
}
464
 
465
static void add_pixels_clamped4_c(const int16_t *block, uint8_t *av_restrict pixels,
466
                          int line_size)
467
{
468
    int i;
469
 
470
    /* read the pixels */
471
    for(i=0;i<4;i++) {
472
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
473
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
474
        pixels[2] = av_clip_uint8(pixels[2] + block[2]);
475
        pixels[3] = av_clip_uint8(pixels[3] + block[3]);
476
        pixels += line_size;
477
        block += 8;
478
    }
479
}
480
 
481
static void add_pixels_clamped2_c(const int16_t *block, uint8_t *av_restrict pixels,
482
                          int line_size)
483
{
484
    int i;
485
 
486
    /* read the pixels */
487
    for(i=0;i<2;i++) {
488
        pixels[0] = av_clip_uint8(pixels[0] + block[0]);
489
        pixels[1] = av_clip_uint8(pixels[1] + block[1]);
490
        pixels += line_size;
491
        block += 8;
492
    }
493
}
494
 
495
static int sum_abs_dctelem_c(int16_t *block)
496
{
497
    int sum=0, i;
498
    for(i=0; i<64; i++)
499
        sum+= FFABS(block[i]);
500
    return sum;
501
}
502
 
503
static void fill_block16_c(uint8_t *block, uint8_t value, int line_size, int h)
504
{
505
    int i;
506
 
507
    for (i = 0; i < h; i++) {
508
        memset(block, value, 16);
509
        block += line_size;
510
    }
511
}
512
 
513
static void fill_block8_c(uint8_t *block, uint8_t value, int line_size, int h)
514
{
515
    int i;
516
 
517
    for (i = 0; i < h; i++) {
518
        memset(block, value, 8);
519
        block += line_size;
520
    }
521
}
522
 
523
#define avg2(a,b) ((a+b+1)>>1)
524
#define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
525
 
526
static void gmc1_c(uint8_t *dst, uint8_t *src, int stride, int h, int x16, int y16, int rounder)
527
{
528
    const int A=(16-x16)*(16-y16);
529
    const int B=(   x16)*(16-y16);
530
    const int C=(16-x16)*(   y16);
531
    const int D=(   x16)*(   y16);
532
    int i;
533
 
534
    for(i=0; i
535
    {
536
        dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
537
        dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
538
        dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
539
        dst[3]= (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + rounder)>>8;
540
        dst[4]= (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + rounder)>>8;
541
        dst[5]= (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + rounder)>>8;
542
        dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
543
        dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
544
        dst+= stride;
545
        src+= stride;
546
    }
547
}
548
 
549
void ff_gmc_c(uint8_t *dst, uint8_t *src, int stride, int h, int ox, int oy,
550
                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height)
551
{
552
    int y, vx, vy;
553
    const int s= 1<
554
 
555
    width--;
556
    height--;
557
 
558
    for(y=0; y
559
        int x;
560
 
561
        vx= ox;
562
        vy= oy;
563
        for(x=0; x<8; x++){ //XXX FIXME optimize
564
            int src_x, src_y, frac_x, frac_y, index;
565
 
566
            src_x= vx>>16;
567
            src_y= vy>>16;
568
            frac_x= src_x&(s-1);
569
            frac_y= src_y&(s-1);
570
            src_x>>=shift;
571
            src_y>>=shift;
572
 
573
            if((unsigned)src_x < width){
574
                if((unsigned)src_y < height){
575
                    index= src_x + src_y*stride;
576
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_x)
577
                                           + src[index       +1]*   frac_x )*(s-frac_y)
578
                                        + (  src[index+stride  ]*(s-frac_x)
579
                                           + src[index+stride+1]*   frac_x )*   frac_y
580
                                        + r)>>(shift*2);
581
                }else{
582
                    index= src_x + av_clip(src_y, 0, height)*stride;
583
                    dst[y*stride + x]= ( (  src[index         ]*(s-frac_x)
584
                                          + src[index       +1]*   frac_x )*s
585
                                        + r)>>(shift*2);
586
                }
587
            }else{
588
                if((unsigned)src_y < height){
589
                    index= av_clip(src_x, 0, width) + src_y*stride;
590
                    dst[y*stride + x]= (  (  src[index         ]*(s-frac_y)
591
                                           + src[index+stride  ]*   frac_y )*s
592
                                        + r)>>(shift*2);
593
                }else{
594
                    index= av_clip(src_x, 0, width) + av_clip(src_y, 0, height)*stride;
595
                    dst[y*stride + x]=    src[index         ];
596
                }
597
            }
598
 
599
            vx+= dxx;
600
            vy+= dyx;
601
        }
602
        ox += dxy;
603
        oy += dyy;
604
    }
605
}
606
 
607
static inline void put_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
608
    switch(width){
609
    case 2: put_pixels2_8_c (dst, src, stride, height); break;
610
    case 4: put_pixels4_8_c (dst, src, stride, height); break;
611
    case 8: put_pixels8_8_c (dst, src, stride, height); break;
612
    case 16:put_pixels16_8_c(dst, src, stride, height); break;
613
    }
614
}
615
 
616
static inline void put_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
617
    int i,j;
618
    for (i=0; i < height; i++) {
619
      for (j=0; j < width; j++) {
620
        dst[j] = (683*(2*src[j] + src[j+1] + 1)) >> 11;
621
      }
622
      src += stride;
623
      dst += stride;
624
    }
625
}
626
 
627
static inline void put_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
628
    int i,j;
629
    for (i=0; i < height; i++) {
630
      for (j=0; j < width; j++) {
631
        dst[j] = (683*(src[j] + 2*src[j+1] + 1)) >> 11;
632
      }
633
      src += stride;
634
      dst += stride;
635
    }
636
}
637
 
638
static inline void put_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
639
    int i,j;
640
    for (i=0; i < height; i++) {
641
      for (j=0; j < width; j++) {
642
        dst[j] = (683*(2*src[j] + src[j+stride] + 1)) >> 11;
643
      }
644
      src += stride;
645
      dst += stride;
646
    }
647
}
648
 
649
static inline void put_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
650
    int i,j;
651
    for (i=0; i < height; i++) {
652
      for (j=0; j < width; j++) {
653
        dst[j] = (2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15;
654
      }
655
      src += stride;
656
      dst += stride;
657
    }
658
}
659
 
660
static inline void put_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
661
    int i,j;
662
    for (i=0; i < height; i++) {
663
      for (j=0; j < width; j++) {
664
        dst[j] = (2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
665
      }
666
      src += stride;
667
      dst += stride;
668
    }
669
}
670
 
671
static inline void put_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
672
    int i,j;
673
    for (i=0; i < height; i++) {
674
      for (j=0; j < width; j++) {
675
        dst[j] = (683*(src[j] + 2*src[j+stride] + 1)) >> 11;
676
      }
677
      src += stride;
678
      dst += stride;
679
    }
680
}
681
 
682
static inline void put_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
683
    int i,j;
684
    for (i=0; i < height; i++) {
685
      for (j=0; j < width; j++) {
686
        dst[j] = (2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15;
687
      }
688
      src += stride;
689
      dst += stride;
690
    }
691
}
692
 
693
static inline void put_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
694
    int i,j;
695
    for (i=0; i < height; i++) {
696
      for (j=0; j < width; j++) {
697
        dst[j] = (2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15;
698
      }
699
      src += stride;
700
      dst += stride;
701
    }
702
}
703
 
704
static inline void avg_tpel_pixels_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
705
    switch(width){
706
    case 2: avg_pixels2_8_c (dst, src, stride, height); break;
707
    case 4: avg_pixels4_8_c (dst, src, stride, height); break;
708
    case 8: avg_pixels8_8_c (dst, src, stride, height); break;
709
    case 16:avg_pixels16_8_c(dst, src, stride, height); break;
710
    }
711
}
712
 
713
static inline void avg_tpel_pixels_mc10_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
714
    int i,j;
715
    for (i=0; i < height; i++) {
716
      for (j=0; j < width; j++) {
717
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+1] + 1)) >> 11) + 1) >> 1;
718
      }
719
      src += stride;
720
      dst += stride;
721
    }
722
}
723
 
724
static inline void avg_tpel_pixels_mc20_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
725
    int i,j;
726
    for (i=0; i < height; i++) {
727
      for (j=0; j < width; j++) {
728
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+1] + 1)) >> 11) + 1) >> 1;
729
      }
730
      src += stride;
731
      dst += stride;
732
    }
733
}
734
 
735
static inline void avg_tpel_pixels_mc01_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
736
    int i,j;
737
    for (i=0; i < height; i++) {
738
      for (j=0; j < width; j++) {
739
        dst[j] = (dst[j] + ((683*(2*src[j] + src[j+stride] + 1)) >> 11) + 1) >> 1;
740
      }
741
      src += stride;
742
      dst += stride;
743
    }
744
}
745
 
746
static inline void avg_tpel_pixels_mc11_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
747
    int i,j;
748
    for (i=0; i < height; i++) {
749
      for (j=0; j < width; j++) {
750
        dst[j] = (dst[j] + ((2731*(4*src[j] + 3*src[j+1] + 3*src[j+stride] + 2*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
751
      }
752
      src += stride;
753
      dst += stride;
754
    }
755
}
756
 
757
static inline void avg_tpel_pixels_mc12_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
758
    int i,j;
759
    for (i=0; i < height; i++) {
760
      for (j=0; j < width; j++) {
761
        dst[j] = (dst[j] + ((2731*(3*src[j] + 2*src[j+1] + 4*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
762
      }
763
      src += stride;
764
      dst += stride;
765
    }
766
}
767
 
768
static inline void avg_tpel_pixels_mc02_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
769
    int i,j;
770
    for (i=0; i < height; i++) {
771
      for (j=0; j < width; j++) {
772
        dst[j] = (dst[j] + ((683*(src[j] + 2*src[j+stride] + 1)) >> 11) + 1) >> 1;
773
      }
774
      src += stride;
775
      dst += stride;
776
    }
777
}
778
 
779
static inline void avg_tpel_pixels_mc21_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
780
    int i,j;
781
    for (i=0; i < height; i++) {
782
      for (j=0; j < width; j++) {
783
        dst[j] = (dst[j] + ((2731*(3*src[j] + 4*src[j+1] + 2*src[j+stride] + 3*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
784
      }
785
      src += stride;
786
      dst += stride;
787
    }
788
}
789
 
790
static inline void avg_tpel_pixels_mc22_c(uint8_t *dst, const uint8_t *src, int stride, int width, int height){
791
    int i,j;
792
    for (i=0; i < height; i++) {
793
      for (j=0; j < width; j++) {
794
        dst[j] = (dst[j] + ((2731*(2*src[j] + 3*src[j+1] + 3*src[j+stride] + 4*src[j+stride+1] + 6)) >> 15) + 1) >> 1;
795
      }
796
      src += stride;
797
      dst += stride;
798
    }
799
}
800
 
801
#define QPEL_MC(r, OPNAME, RND, OP) \
802
static void OPNAME ## mpeg4_qpel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
803
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
804
    int i;\
805
    for(i=0; i
806
    {\
807
        OP(dst[0], (src[0]+src[1])*20 - (src[0]+src[2])*6 + (src[1]+src[3])*3 - (src[2]+src[4]));\
808
        OP(dst[1], (src[1]+src[2])*20 - (src[0]+src[3])*6 + (src[0]+src[4])*3 - (src[1]+src[5]));\
809
        OP(dst[2], (src[2]+src[3])*20 - (src[1]+src[4])*6 + (src[0]+src[5])*3 - (src[0]+src[6]));\
810
        OP(dst[3], (src[3]+src[4])*20 - (src[2]+src[5])*6 + (src[1]+src[6])*3 - (src[0]+src[7]));\
811
        OP(dst[4], (src[4]+src[5])*20 - (src[3]+src[6])*6 + (src[2]+src[7])*3 - (src[1]+src[8]));\
812
        OP(dst[5], (src[5]+src[6])*20 - (src[4]+src[7])*6 + (src[3]+src[8])*3 - (src[2]+src[8]));\
813
        OP(dst[6], (src[6]+src[7])*20 - (src[5]+src[8])*6 + (src[4]+src[8])*3 - (src[3]+src[7]));\
814
        OP(dst[7], (src[7]+src[8])*20 - (src[6]+src[8])*6 + (src[5]+src[7])*3 - (src[4]+src[6]));\
815
        dst+=dstStride;\
816
        src+=srcStride;\
817
    }\
818
}\
819
\
820
static void OPNAME ## mpeg4_qpel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
821
    const int w=8;\
822
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
823
    int i;\
824
    for(i=0; i
825
    {\
826
        const int src0= src[0*srcStride];\
827
        const int src1= src[1*srcStride];\
828
        const int src2= src[2*srcStride];\
829
        const int src3= src[3*srcStride];\
830
        const int src4= src[4*srcStride];\
831
        const int src5= src[5*srcStride];\
832
        const int src6= src[6*srcStride];\
833
        const int src7= src[7*srcStride];\
834
        const int src8= src[8*srcStride];\
835
        OP(dst[0*dstStride], (src0+src1)*20 - (src0+src2)*6 + (src1+src3)*3 - (src2+src4));\
836
        OP(dst[1*dstStride], (src1+src2)*20 - (src0+src3)*6 + (src0+src4)*3 - (src1+src5));\
837
        OP(dst[2*dstStride], (src2+src3)*20 - (src1+src4)*6 + (src0+src5)*3 - (src0+src6));\
838
        OP(dst[3*dstStride], (src3+src4)*20 - (src2+src5)*6 + (src1+src6)*3 - (src0+src7));\
839
        OP(dst[4*dstStride], (src4+src5)*20 - (src3+src6)*6 + (src2+src7)*3 - (src1+src8));\
840
        OP(dst[5*dstStride], (src5+src6)*20 - (src4+src7)*6 + (src3+src8)*3 - (src2+src8));\
841
        OP(dst[6*dstStride], (src6+src7)*20 - (src5+src8)*6 + (src4+src8)*3 - (src3+src7));\
842
        OP(dst[7*dstStride], (src7+src8)*20 - (src6+src8)*6 + (src5+src7)*3 - (src4+src6));\
843
        dst++;\
844
        src++;\
845
    }\
846
}\
847
\
848
static void OPNAME ## mpeg4_qpel16_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){\
849
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
850
    int i;\
851
    \
852
    for(i=0; i
853
    {\
854
        OP(dst[ 0], (src[ 0]+src[ 1])*20 - (src[ 0]+src[ 2])*6 + (src[ 1]+src[ 3])*3 - (src[ 2]+src[ 4]));\
855
        OP(dst[ 1], (src[ 1]+src[ 2])*20 - (src[ 0]+src[ 3])*6 + (src[ 0]+src[ 4])*3 - (src[ 1]+src[ 5]));\
856
        OP(dst[ 2], (src[ 2]+src[ 3])*20 - (src[ 1]+src[ 4])*6 + (src[ 0]+src[ 5])*3 - (src[ 0]+src[ 6]));\
857
        OP(dst[ 3], (src[ 3]+src[ 4])*20 - (src[ 2]+src[ 5])*6 + (src[ 1]+src[ 6])*3 - (src[ 0]+src[ 7]));\
858
        OP(dst[ 4], (src[ 4]+src[ 5])*20 - (src[ 3]+src[ 6])*6 + (src[ 2]+src[ 7])*3 - (src[ 1]+src[ 8]));\
859
        OP(dst[ 5], (src[ 5]+src[ 6])*20 - (src[ 4]+src[ 7])*6 + (src[ 3]+src[ 8])*3 - (src[ 2]+src[ 9]));\
860
        OP(dst[ 6], (src[ 6]+src[ 7])*20 - (src[ 5]+src[ 8])*6 + (src[ 4]+src[ 9])*3 - (src[ 3]+src[10]));\
861
        OP(dst[ 7], (src[ 7]+src[ 8])*20 - (src[ 6]+src[ 9])*6 + (src[ 5]+src[10])*3 - (src[ 4]+src[11]));\
862
        OP(dst[ 8], (src[ 8]+src[ 9])*20 - (src[ 7]+src[10])*6 + (src[ 6]+src[11])*3 - (src[ 5]+src[12]));\
863
        OP(dst[ 9], (src[ 9]+src[10])*20 - (src[ 8]+src[11])*6 + (src[ 7]+src[12])*3 - (src[ 6]+src[13]));\
864
        OP(dst[10], (src[10]+src[11])*20 - (src[ 9]+src[12])*6 + (src[ 8]+src[13])*3 - (src[ 7]+src[14]));\
865
        OP(dst[11], (src[11]+src[12])*20 - (src[10]+src[13])*6 + (src[ 9]+src[14])*3 - (src[ 8]+src[15]));\
866
        OP(dst[12], (src[12]+src[13])*20 - (src[11]+src[14])*6 + (src[10]+src[15])*3 - (src[ 9]+src[16]));\
867
        OP(dst[13], (src[13]+src[14])*20 - (src[12]+src[15])*6 + (src[11]+src[16])*3 - (src[10]+src[16]));\
868
        OP(dst[14], (src[14]+src[15])*20 - (src[13]+src[16])*6 + (src[12]+src[16])*3 - (src[11]+src[15]));\
869
        OP(dst[15], (src[15]+src[16])*20 - (src[14]+src[16])*6 + (src[13]+src[15])*3 - (src[12]+src[14]));\
870
        dst+=dstStride;\
871
        src+=srcStride;\
872
    }\
873
}\
874
\
875
static void OPNAME ## mpeg4_qpel16_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride){\
876
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;\
877
    int i;\
878
    const int w=16;\
879
    for(i=0; i
880
    {\
881
        const int src0= src[0*srcStride];\
882
        const int src1= src[1*srcStride];\
883
        const int src2= src[2*srcStride];\
884
        const int src3= src[3*srcStride];\
885
        const int src4= src[4*srcStride];\
886
        const int src5= src[5*srcStride];\
887
        const int src6= src[6*srcStride];\
888
        const int src7= src[7*srcStride];\
889
        const int src8= src[8*srcStride];\
890
        const int src9= src[9*srcStride];\
891
        const int src10= src[10*srcStride];\
892
        const int src11= src[11*srcStride];\
893
        const int src12= src[12*srcStride];\
894
        const int src13= src[13*srcStride];\
895
        const int src14= src[14*srcStride];\
896
        const int src15= src[15*srcStride];\
897
        const int src16= src[16*srcStride];\
898
        OP(dst[ 0*dstStride], (src0 +src1 )*20 - (src0 +src2 )*6 + (src1 +src3 )*3 - (src2 +src4 ));\
899
        OP(dst[ 1*dstStride], (src1 +src2 )*20 - (src0 +src3 )*6 + (src0 +src4 )*3 - (src1 +src5 ));\
900
        OP(dst[ 2*dstStride], (src2 +src3 )*20 - (src1 +src4 )*6 + (src0 +src5 )*3 - (src0 +src6 ));\
901
        OP(dst[ 3*dstStride], (src3 +src4 )*20 - (src2 +src5 )*6 + (src1 +src6 )*3 - (src0 +src7 ));\
902
        OP(dst[ 4*dstStride], (src4 +src5 )*20 - (src3 +src6 )*6 + (src2 +src7 )*3 - (src1 +src8 ));\
903
        OP(dst[ 5*dstStride], (src5 +src6 )*20 - (src4 +src7 )*6 + (src3 +src8 )*3 - (src2 +src9 ));\
904
        OP(dst[ 6*dstStride], (src6 +src7 )*20 - (src5 +src8 )*6 + (src4 +src9 )*3 - (src3 +src10));\
905
        OP(dst[ 7*dstStride], (src7 +src8 )*20 - (src6 +src9 )*6 + (src5 +src10)*3 - (src4 +src11));\
906
        OP(dst[ 8*dstStride], (src8 +src9 )*20 - (src7 +src10)*6 + (src6 +src11)*3 - (src5 +src12));\
907
        OP(dst[ 9*dstStride], (src9 +src10)*20 - (src8 +src11)*6 + (src7 +src12)*3 - (src6 +src13));\
908
        OP(dst[10*dstStride], (src10+src11)*20 - (src9 +src12)*6 + (src8 +src13)*3 - (src7 +src14));\
909
        OP(dst[11*dstStride], (src11+src12)*20 - (src10+src13)*6 + (src9 +src14)*3 - (src8 +src15));\
910
        OP(dst[12*dstStride], (src12+src13)*20 - (src11+src14)*6 + (src10+src15)*3 - (src9 +src16));\
911
        OP(dst[13*dstStride], (src13+src14)*20 - (src12+src15)*6 + (src11+src16)*3 - (src10+src16));\
912
        OP(dst[14*dstStride], (src14+src15)*20 - (src13+src16)*6 + (src12+src16)*3 - (src11+src15));\
913
        OP(dst[15*dstStride], (src15+src16)*20 - (src14+src16)*6 + (src13+src15)*3 - (src12+src14));\
914
        dst++;\
915
        src++;\
916
    }\
917
}\
918
\
919
static void OPNAME ## qpel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
920
{\
921
    uint8_t half[64];\
922
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
923
    OPNAME ## pixels8_l2_8(dst, src, half, stride, stride, 8, 8);\
924
}\
925
\
926
static void OPNAME ## qpel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
927
{\
928
    OPNAME ## mpeg4_qpel8_h_lowpass(dst, src, stride, stride, 8);\
929
}\
930
\
931
static void OPNAME ## qpel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
932
{\
933
    uint8_t half[64];\
934
    put ## RND ## mpeg4_qpel8_h_lowpass(half, src, 8, stride, 8);\
935
    OPNAME ## pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);\
936
}\
937
\
938
static void OPNAME ## qpel8_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
939
{\
940
    uint8_t full[16*9];\
941
    uint8_t half[64];\
942
    copy_block9(full, src, 16, stride, 9);\
943
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
944
    OPNAME ## pixels8_l2_8(dst, full, half, stride, 16, 8, 8);\
945
}\
946
\
947
static void OPNAME ## qpel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
948
{\
949
    uint8_t full[16*9];\
950
    copy_block9(full, src, 16, stride, 9);\
951
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, full, stride, 16);\
952
}\
953
\
954
static void OPNAME ## qpel8_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
955
{\
956
    uint8_t full[16*9];\
957
    uint8_t half[64];\
958
    copy_block9(full, src, 16, stride, 9);\
959
    put ## RND ## mpeg4_qpel8_v_lowpass(half, full, 8, 16);\
960
    OPNAME ## pixels8_l2_8(dst, full+16, half, stride, 16, 8, 8);\
961
}\
962
void ff_ ## OPNAME ## qpel8_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
963
{\
964
    uint8_t full[16*9];\
965
    uint8_t halfH[72];\
966
    uint8_t halfV[64];\
967
    uint8_t halfHV[64];\
968
    copy_block9(full, src, 16, stride, 9);\
969
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
970
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
971
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
972
    OPNAME ## pixels8_l4_8(dst, full, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
973
}\
974
static void OPNAME ## qpel8_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
975
{\
976
    uint8_t full[16*9];\
977
    uint8_t halfH[72];\
978
    uint8_t halfHV[64];\
979
    copy_block9(full, src, 16, stride, 9);\
980
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
981
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
982
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
983
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
984
}\
985
void ff_ ## OPNAME ## qpel8_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
986
{\
987
    uint8_t full[16*9];\
988
    uint8_t halfH[72];\
989
    uint8_t halfV[64];\
990
    uint8_t halfHV[64];\
991
    copy_block9(full, src, 16, stride, 9);\
992
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
993
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
994
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
995
    OPNAME ## pixels8_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
996
}\
997
static void OPNAME ## qpel8_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
998
{\
999
    uint8_t full[16*9];\
1000
    uint8_t halfH[72];\
1001
    uint8_t halfHV[64];\
1002
    copy_block9(full, src, 16, stride, 9);\
1003
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1004
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1005
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1006
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1007
}\
1008
void ff_ ## OPNAME ## qpel8_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1009
{\
1010
    uint8_t full[16*9];\
1011
    uint8_t halfH[72];\
1012
    uint8_t halfV[64];\
1013
    uint8_t halfHV[64];\
1014
    copy_block9(full, src, 16, stride, 9);\
1015
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1016
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1017
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1018
    OPNAME ## pixels8_l4_8(dst, full+16, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1019
}\
1020
static void OPNAME ## qpel8_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1021
{\
1022
    uint8_t full[16*9];\
1023
    uint8_t halfH[72];\
1024
    uint8_t halfHV[64];\
1025
    copy_block9(full, src, 16, stride, 9);\
1026
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1027
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1028
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1029
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1030
}\
1031
void ff_ ## OPNAME ## qpel8_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1032
{\
1033
    uint8_t full[16*9];\
1034
    uint8_t halfH[72];\
1035
    uint8_t halfV[64];\
1036
    uint8_t halfHV[64];\
1037
    copy_block9(full, src, 16, stride, 9);\
1038
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full  , 8, 16, 9);\
1039
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1040
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1041
    OPNAME ## pixels8_l4_8(dst, full+17, halfH+8, halfV, halfHV, stride, 16, 8, 8, 8, 8);\
1042
}\
1043
static void OPNAME ## qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1044
{\
1045
    uint8_t full[16*9];\
1046
    uint8_t halfH[72];\
1047
    uint8_t halfHV[64];\
1048
    copy_block9(full, src, 16, stride, 9);\
1049
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1050
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1051
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1052
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1053
}\
1054
static void OPNAME ## qpel8_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1055
{\
1056
    uint8_t halfH[72];\
1057
    uint8_t halfHV[64];\
1058
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1059
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1060
    OPNAME ## pixels8_l2_8(dst, halfH, halfHV, stride, 8, 8, 8);\
1061
}\
1062
static void OPNAME ## qpel8_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1063
{\
1064
    uint8_t halfH[72];\
1065
    uint8_t halfHV[64];\
1066
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1067
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1068
    OPNAME ## pixels8_l2_8(dst, halfH+8, halfHV, stride, 8, 8, 8);\
1069
}\
1070
void ff_ ## OPNAME ## qpel8_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1071
{\
1072
    uint8_t full[16*9];\
1073
    uint8_t halfH[72];\
1074
    uint8_t halfV[64];\
1075
    uint8_t halfHV[64];\
1076
    copy_block9(full, src, 16, stride, 9);\
1077
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1078
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full, 8, 16);\
1079
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1080
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1081
}\
1082
static void OPNAME ## qpel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1083
{\
1084
    uint8_t full[16*9];\
1085
    uint8_t halfH[72];\
1086
    copy_block9(full, src, 16, stride, 9);\
1087
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1088
    put ## RND ## pixels8_l2_8(halfH, halfH, full, 8, 8, 16, 9);\
1089
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1090
}\
1091
void ff_ ## OPNAME ## qpel8_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1092
{\
1093
    uint8_t full[16*9];\
1094
    uint8_t halfH[72];\
1095
    uint8_t halfV[64];\
1096
    uint8_t halfHV[64];\
1097
    copy_block9(full, src, 16, stride, 9);\
1098
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1099
    put ## RND ## mpeg4_qpel8_v_lowpass(halfV, full+1, 8, 16);\
1100
    put ## RND ## mpeg4_qpel8_v_lowpass(halfHV, halfH, 8, 8);\
1101
    OPNAME ## pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);\
1102
}\
1103
static void OPNAME ## qpel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1104
{\
1105
    uint8_t full[16*9];\
1106
    uint8_t halfH[72];\
1107
    copy_block9(full, src, 16, stride, 9);\
1108
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, full, 8, 16, 9);\
1109
    put ## RND ## pixels8_l2_8(halfH, halfH, full+1, 8, 8, 16, 9);\
1110
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1111
}\
1112
static void OPNAME ## qpel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1113
{\
1114
    uint8_t halfH[72];\
1115
    put ## RND ## mpeg4_qpel8_h_lowpass(halfH, src, 8, stride, 9);\
1116
    OPNAME ## mpeg4_qpel8_v_lowpass(dst, halfH, stride, 8);\
1117
}\
1118
\
1119
static void OPNAME ## qpel16_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1120
{\
1121
    uint8_t half[256];\
1122
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1123
    OPNAME ## pixels16_l2_8(dst, src, half, stride, stride, 16, 16);\
1124
}\
1125
\
1126
static void OPNAME ## qpel16_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1127
{\
1128
    OPNAME ## mpeg4_qpel16_h_lowpass(dst, src, stride, stride, 16);\
1129
}\
1130
\
1131
static void OPNAME ## qpel16_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1132
{\
1133
    uint8_t half[256];\
1134
    put ## RND ## mpeg4_qpel16_h_lowpass(half, src, 16, stride, 16);\
1135
    OPNAME ## pixels16_l2_8(dst, src+1, half, stride, stride, 16, 16);\
1136
}\
1137
\
1138
static void OPNAME ## qpel16_mc01_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1139
{\
1140
    uint8_t full[24*17];\
1141
    uint8_t half[256];\
1142
    copy_block17(full, src, 24, stride, 17);\
1143
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1144
    OPNAME ## pixels16_l2_8(dst, full, half, stride, 24, 16, 16);\
1145
}\
1146
\
1147
static void OPNAME ## qpel16_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1148
{\
1149
    uint8_t full[24*17];\
1150
    copy_block17(full, src, 24, stride, 17);\
1151
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, full, stride, 24);\
1152
}\
1153
\
1154
static void OPNAME ## qpel16_mc03_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1155
{\
1156
    uint8_t full[24*17];\
1157
    uint8_t half[256];\
1158
    copy_block17(full, src, 24, stride, 17);\
1159
    put ## RND ## mpeg4_qpel16_v_lowpass(half, full, 16, 24);\
1160
    OPNAME ## pixels16_l2_8(dst, full+24, half, stride, 24, 16, 16);\
1161
}\
1162
void ff_ ## OPNAME ## qpel16_mc11_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1163
{\
1164
    uint8_t full[24*17];\
1165
    uint8_t halfH[272];\
1166
    uint8_t halfV[256];\
1167
    uint8_t halfHV[256];\
1168
    copy_block17(full, src, 24, stride, 17);\
1169
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1170
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1171
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1172
    OPNAME ## pixels16_l4_8(dst, full, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1173
}\
1174
static void OPNAME ## qpel16_mc11_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1175
{\
1176
    uint8_t full[24*17];\
1177
    uint8_t halfH[272];\
1178
    uint8_t halfHV[256];\
1179
    copy_block17(full, src, 24, stride, 17);\
1180
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1181
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1182
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1183
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1184
}\
1185
void ff_ ## OPNAME ## qpel16_mc31_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1186
{\
1187
    uint8_t full[24*17];\
1188
    uint8_t halfH[272];\
1189
    uint8_t halfV[256];\
1190
    uint8_t halfHV[256];\
1191
    copy_block17(full, src, 24, stride, 17);\
1192
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1193
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1194
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1195
    OPNAME ## pixels16_l4_8(dst, full+1, halfH, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1196
}\
1197
static void OPNAME ## qpel16_mc31_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1198
{\
1199
    uint8_t full[24*17];\
1200
    uint8_t halfH[272];\
1201
    uint8_t halfHV[256];\
1202
    copy_block17(full, src, 24, stride, 17);\
1203
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1204
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1205
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1206
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1207
}\
1208
void ff_ ## OPNAME ## qpel16_mc13_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1209
{\
1210
    uint8_t full[24*17];\
1211
    uint8_t halfH[272];\
1212
    uint8_t halfV[256];\
1213
    uint8_t halfHV[256];\
1214
    copy_block17(full, src, 24, stride, 17);\
1215
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1216
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1217
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1218
    OPNAME ## pixels16_l4_8(dst, full+24, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1219
}\
1220
static void OPNAME ## qpel16_mc13_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1221
{\
1222
    uint8_t full[24*17];\
1223
    uint8_t halfH[272];\
1224
    uint8_t halfHV[256];\
1225
    copy_block17(full, src, 24, stride, 17);\
1226
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1227
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1228
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1229
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1230
}\
1231
void ff_ ## OPNAME ## qpel16_mc33_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1232
{\
1233
    uint8_t full[24*17];\
1234
    uint8_t halfH[272];\
1235
    uint8_t halfV[256];\
1236
    uint8_t halfHV[256];\
1237
    copy_block17(full, src, 24, stride, 17);\
1238
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full  , 16, 24, 17);\
1239
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1240
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1241
    OPNAME ## pixels16_l4_8(dst, full+25, halfH+16, halfV, halfHV, stride, 24, 16, 16, 16, 16);\
1242
}\
1243
static void OPNAME ## qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1244
{\
1245
    uint8_t full[24*17];\
1246
    uint8_t halfH[272];\
1247
    uint8_t halfHV[256];\
1248
    copy_block17(full, src, 24, stride, 17);\
1249
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1250
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1251
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1252
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1253
}\
1254
static void OPNAME ## qpel16_mc21_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1255
{\
1256
    uint8_t halfH[272];\
1257
    uint8_t halfHV[256];\
1258
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1259
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1260
    OPNAME ## pixels16_l2_8(dst, halfH, halfHV, stride, 16, 16, 16);\
1261
}\
1262
static void OPNAME ## qpel16_mc23_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1263
{\
1264
    uint8_t halfH[272];\
1265
    uint8_t halfHV[256];\
1266
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1267
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1268
    OPNAME ## pixels16_l2_8(dst, halfH+16, halfHV, stride, 16, 16, 16);\
1269
}\
1270
void ff_ ## OPNAME ## qpel16_mc12_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1271
{\
1272
    uint8_t full[24*17];\
1273
    uint8_t halfH[272];\
1274
    uint8_t halfV[256];\
1275
    uint8_t halfHV[256];\
1276
    copy_block17(full, src, 24, stride, 17);\
1277
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1278
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full, 16, 24);\
1279
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1280
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1281
}\
1282
static void OPNAME ## qpel16_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1283
{\
1284
    uint8_t full[24*17];\
1285
    uint8_t halfH[272];\
1286
    copy_block17(full, src, 24, stride, 17);\
1287
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1288
    put ## RND ## pixels16_l2_8(halfH, halfH, full, 16, 16, 24, 17);\
1289
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1290
}\
1291
void ff_ ## OPNAME ## qpel16_mc32_old_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1292
{\
1293
    uint8_t full[24*17];\
1294
    uint8_t halfH[272];\
1295
    uint8_t halfV[256];\
1296
    uint8_t halfHV[256];\
1297
    copy_block17(full, src, 24, stride, 17);\
1298
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1299
    put ## RND ## mpeg4_qpel16_v_lowpass(halfV, full+1, 16, 24);\
1300
    put ## RND ## mpeg4_qpel16_v_lowpass(halfHV, halfH, 16, 16);\
1301
    OPNAME ## pixels16_l2_8(dst, halfV, halfHV, stride, 16, 16, 16);\
1302
}\
1303
static void OPNAME ## qpel16_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1304
{\
1305
    uint8_t full[24*17];\
1306
    uint8_t halfH[272];\
1307
    copy_block17(full, src, 24, stride, 17);\
1308
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, full, 16, 24, 17);\
1309
    put ## RND ## pixels16_l2_8(halfH, halfH, full+1, 16, 16, 24, 17);\
1310
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1311
}\
1312
static void OPNAME ## qpel16_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)\
1313
{\
1314
    uint8_t halfH[272];\
1315
    put ## RND ## mpeg4_qpel16_h_lowpass(halfH, src, 16, stride, 17);\
1316
    OPNAME ## mpeg4_qpel16_v_lowpass(dst, halfH, stride, 16);\
1317
}
1318
 
1319
#define op_avg(a, b) a = (((a)+cm[((b) + 16)>>5]+1)>>1)
1320
#define op_avg_no_rnd(a, b) a = (((a)+cm[((b) + 15)>>5])>>1)
1321
#define op_put(a, b) a = cm[((b) + 16)>>5]
1322
#define op_put_no_rnd(a, b) a = cm[((b) + 15)>>5]
1323
 
1324
QPEL_MC(0, put_       , _       , op_put)
1325
QPEL_MC(1, put_no_rnd_, _no_rnd_, op_put_no_rnd)
1326
QPEL_MC(0, avg_       , _       , op_avg)
1327
//QPEL_MC(1, avg_no_rnd , _       , op_avg)
1328
#undef op_avg
1329
#undef op_avg_no_rnd
1330
#undef op_put
1331
#undef op_put_no_rnd
1332
 
1333
void ff_put_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1334
{
1335
    put_pixels8_8_c(dst, src, stride, 8);
1336
}
1337
void ff_avg_pixels8x8_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1338
{
1339
    avg_pixels8_8_c(dst, src, stride, 8);
1340
}
1341
void ff_put_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1342
{
1343
    put_pixels16_8_c(dst, src, stride, 16);
1344
}
1345
void ff_avg_pixels16x16_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1346
{
1347
    avg_pixels16_8_c(dst, src, stride, 16);
1348
}
1349
 
1350
#define put_qpel8_mc00_c  ff_put_pixels8x8_c
1351
#define avg_qpel8_mc00_c  ff_avg_pixels8x8_c
1352
#define put_qpel16_mc00_c ff_put_pixels16x16_c
1353
#define avg_qpel16_mc00_c ff_avg_pixels16x16_c
1354
#define put_no_rnd_qpel8_mc00_c  ff_put_pixels8x8_c
1355
#define put_no_rnd_qpel16_mc00_c ff_put_pixels16x16_c
1356
 
1357
static void wmv2_mspel8_h_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int h){
1358
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1359
    int i;
1360
 
1361
    for(i=0; i
1362
        dst[0]= cm[(9*(src[0] + src[1]) - (src[-1] + src[2]) + 8)>>4];
1363
        dst[1]= cm[(9*(src[1] + src[2]) - (src[ 0] + src[3]) + 8)>>4];
1364
        dst[2]= cm[(9*(src[2] + src[3]) - (src[ 1] + src[4]) + 8)>>4];
1365
        dst[3]= cm[(9*(src[3] + src[4]) - (src[ 2] + src[5]) + 8)>>4];
1366
        dst[4]= cm[(9*(src[4] + src[5]) - (src[ 3] + src[6]) + 8)>>4];
1367
        dst[5]= cm[(9*(src[5] + src[6]) - (src[ 4] + src[7]) + 8)>>4];
1368
        dst[6]= cm[(9*(src[6] + src[7]) - (src[ 5] + src[8]) + 8)>>4];
1369
        dst[7]= cm[(9*(src[7] + src[8]) - (src[ 6] + src[9]) + 8)>>4];
1370
        dst+=dstStride;
1371
        src+=srcStride;
1372
    }
1373
}
1374
 
1375
#if CONFIG_RV40_DECODER
1376
void ff_put_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1377
{
1378
    put_pixels16_xy2_8_c(dst, src, stride, 16);
1379
}
1380
void ff_avg_rv40_qpel16_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1381
{
1382
    avg_pixels16_xy2_8_c(dst, src, stride, 16);
1383
}
1384
void ff_put_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1385
{
1386
    put_pixels8_xy2_8_c(dst, src, stride, 8);
1387
}
1388
void ff_avg_rv40_qpel8_mc33_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1389
{
1390
    avg_pixels8_xy2_8_c(dst, src, stride, 8);
1391
}
1392
#endif /* CONFIG_RV40_DECODER */
1393
 
1394
#if CONFIG_DIRAC_DECODER
1395
#define DIRAC_MC(OPNAME)\
1396
void ff_ ## OPNAME ## _dirac_pixels8_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1397
{\
1398
     OPNAME ## _pixels8_8_c(dst, src[0], stride, h);\
1399
}\
1400
void ff_ ## OPNAME ## _dirac_pixels16_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1401
{\
1402
    OPNAME ## _pixels16_8_c(dst, src[0], stride, h);\
1403
}\
1404
void ff_ ## OPNAME ## _dirac_pixels32_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1405
{\
1406
    OPNAME ## _pixels16_8_c(dst   , src[0]   , stride, h);\
1407
    OPNAME ## _pixels16_8_c(dst+16, src[0]+16, stride, h);\
1408
}\
1409
void ff_ ## OPNAME ## _dirac_pixels8_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1410
{\
1411
    OPNAME ## _pixels8_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1412
}\
1413
void ff_ ## OPNAME ## _dirac_pixels16_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1414
{\
1415
    OPNAME ## _pixels16_l2_8(dst, src[0], src[1], stride, stride, stride, h);\
1416
}\
1417
void ff_ ## OPNAME ## _dirac_pixels32_l2_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1418
{\
1419
    OPNAME ## _pixels16_l2_8(dst   , src[0]   , src[1]   , stride, stride, stride, h);\
1420
    OPNAME ## _pixels16_l2_8(dst+16, src[0]+16, src[1]+16, stride, stride, stride, h);\
1421
}\
1422
void ff_ ## OPNAME ## _dirac_pixels8_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1423
{\
1424
    OPNAME ## _pixels8_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1425
}\
1426
void ff_ ## OPNAME ## _dirac_pixels16_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1427
{\
1428
    OPNAME ## _pixels16_l4_8(dst, src[0], src[1], src[2], src[3], stride, stride, stride, stride, stride, h);\
1429
}\
1430
void ff_ ## OPNAME ## _dirac_pixels32_l4_c(uint8_t *dst, const uint8_t *src[5], int stride, int h)\
1431
{\
1432
    OPNAME ## _pixels16_l4_8(dst   , src[0]   , src[1]   , src[2]   , src[3]   , stride, stride, stride, stride, stride, h);\
1433
    OPNAME ## _pixels16_l4_8(dst+16, src[0]+16, src[1]+16, src[2]+16, src[3]+16, stride, stride, stride, stride, stride, h);\
1434
}
1435
DIRAC_MC(put)
1436
DIRAC_MC(avg)
1437
#endif
1438
 
1439
static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){
1440
    const uint8_t *cm = ff_cropTbl + MAX_NEG_CROP;
1441
    int i;
1442
 
1443
    for(i=0; i
1444
        const int src_1= src[ -srcStride];
1445
        const int src0 = src[0          ];
1446
        const int src1 = src[  srcStride];
1447
        const int src2 = src[2*srcStride];
1448
        const int src3 = src[3*srcStride];
1449
        const int src4 = src[4*srcStride];
1450
        const int src5 = src[5*srcStride];
1451
        const int src6 = src[6*srcStride];
1452
        const int src7 = src[7*srcStride];
1453
        const int src8 = src[8*srcStride];
1454
        const int src9 = src[9*srcStride];
1455
        dst[0*dstStride]= cm[(9*(src0 + src1) - (src_1 + src2) + 8)>>4];
1456
        dst[1*dstStride]= cm[(9*(src1 + src2) - (src0  + src3) + 8)>>4];
1457
        dst[2*dstStride]= cm[(9*(src2 + src3) - (src1  + src4) + 8)>>4];
1458
        dst[3*dstStride]= cm[(9*(src3 + src4) - (src2  + src5) + 8)>>4];
1459
        dst[4*dstStride]= cm[(9*(src4 + src5) - (src3  + src6) + 8)>>4];
1460
        dst[5*dstStride]= cm[(9*(src5 + src6) - (src4  + src7) + 8)>>4];
1461
        dst[6*dstStride]= cm[(9*(src6 + src7) - (src5  + src8) + 8)>>4];
1462
        dst[7*dstStride]= cm[(9*(src7 + src8) - (src6  + src9) + 8)>>4];
1463
        src++;
1464
        dst++;
1465
    }
1466
}
1467
 
1468
static void put_mspel8_mc10_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1469
{
1470
    uint8_t half[64];
1471
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1472
    put_pixels8_l2_8(dst, src, half, stride, stride, 8, 8);
1473
}
1474
 
1475
static void put_mspel8_mc20_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1476
{
1477
    wmv2_mspel8_h_lowpass(dst, src, stride, stride, 8);
1478
}
1479
 
1480
static void put_mspel8_mc30_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1481
{
1482
    uint8_t half[64];
1483
    wmv2_mspel8_h_lowpass(half, src, 8, stride, 8);
1484
    put_pixels8_l2_8(dst, src+1, half, stride, stride, 8, 8);
1485
}
1486
 
1487
static void put_mspel8_mc02_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1488
{
1489
    wmv2_mspel8_v_lowpass(dst, src, stride, stride, 8);
1490
}
1491
 
1492
static void put_mspel8_mc12_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1493
{
1494
    uint8_t halfH[88];
1495
    uint8_t halfV[64];
1496
    uint8_t halfHV[64];
1497
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1498
    wmv2_mspel8_v_lowpass(halfV, src, 8, stride, 8);
1499
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1500
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1501
}
1502
static void put_mspel8_mc32_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1503
{
1504
    uint8_t halfH[88];
1505
    uint8_t halfV[64];
1506
    uint8_t halfHV[64];
1507
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1508
    wmv2_mspel8_v_lowpass(halfV, src+1, 8, stride, 8);
1509
    wmv2_mspel8_v_lowpass(halfHV, halfH+8, 8, 8, 8);
1510
    put_pixels8_l2_8(dst, halfV, halfHV, stride, 8, 8, 8);
1511
}
1512
static void put_mspel8_mc22_c(uint8_t *dst, uint8_t *src, ptrdiff_t stride)
1513
{
1514
    uint8_t halfH[88];
1515
    wmv2_mspel8_h_lowpass(halfH, src-stride, 8, stride, 11);
1516
    wmv2_mspel8_v_lowpass(dst, halfH+8, stride, 8, 8);
1517
}
1518
 
1519
static void h263_v_loop_filter_c(uint8_t *src, int stride, int qscale){
1520
    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1521
    int x;
1522
    const int strength= ff_h263_loop_filter_strength[qscale];
1523
 
1524
    for(x=0; x<8; x++){
1525
        int d1, d2, ad1;
1526
        int p0= src[x-2*stride];
1527
        int p1= src[x-1*stride];
1528
        int p2= src[x+0*stride];
1529
        int p3= src[x+1*stride];
1530
        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1531
 
1532
        if     (d<-2*strength) d1= 0;
1533
        else if(d<-  strength) d1=-2*strength - d;
1534
        else if(d<   strength) d1= d;
1535
        else if(d< 2*strength) d1= 2*strength - d;
1536
        else                   d1= 0;
1537
 
1538
        p1 += d1;
1539
        p2 -= d1;
1540
        if(p1&256) p1= ~(p1>>31);
1541
        if(p2&256) p2= ~(p2>>31);
1542
 
1543
        src[x-1*stride] = p1;
1544
        src[x+0*stride] = p2;
1545
 
1546
        ad1= FFABS(d1)>>1;
1547
 
1548
        d2= av_clip((p0-p3)/4, -ad1, ad1);
1549
 
1550
        src[x-2*stride] = p0 - d2;
1551
        src[x+  stride] = p3 + d2;
1552
    }
1553
    }
1554
}
1555
 
1556
static void h263_h_loop_filter_c(uint8_t *src, int stride, int qscale){
1557
    if(CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
1558
    int y;
1559
    const int strength= ff_h263_loop_filter_strength[qscale];
1560
 
1561
    for(y=0; y<8; y++){
1562
        int d1, d2, ad1;
1563
        int p0= src[y*stride-2];
1564
        int p1= src[y*stride-1];
1565
        int p2= src[y*stride+0];
1566
        int p3= src[y*stride+1];
1567
        int d = (p0 - p3 + 4*(p2 - p1)) / 8;
1568
 
1569
        if     (d<-2*strength) d1= 0;
1570
        else if(d<-  strength) d1=-2*strength - d;
1571
        else if(d<   strength) d1= d;
1572
        else if(d< 2*strength) d1= 2*strength - d;
1573
        else                   d1= 0;
1574
 
1575
        p1 += d1;
1576
        p2 -= d1;
1577
        if(p1&256) p1= ~(p1>>31);
1578
        if(p2&256) p2= ~(p2>>31);
1579
 
1580
        src[y*stride-1] = p1;
1581
        src[y*stride+0] = p2;
1582
 
1583
        ad1= FFABS(d1)>>1;
1584
 
1585
        d2= av_clip((p0-p3)/4, -ad1, ad1);
1586
 
1587
        src[y*stride-2] = p0 - d2;
1588
        src[y*stride+1] = p3 + d2;
1589
    }
1590
    }
1591
}
1592
 
1593
static inline int pix_abs16_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1594
{
1595
    int s, i;
1596
 
1597
    s = 0;
1598
    for(i=0;i
1599
        s += abs(pix1[0] - pix2[0]);
1600
        s += abs(pix1[1] - pix2[1]);
1601
        s += abs(pix1[2] - pix2[2]);
1602
        s += abs(pix1[3] - pix2[3]);
1603
        s += abs(pix1[4] - pix2[4]);
1604
        s += abs(pix1[5] - pix2[5]);
1605
        s += abs(pix1[6] - pix2[6]);
1606
        s += abs(pix1[7] - pix2[7]);
1607
        s += abs(pix1[8] - pix2[8]);
1608
        s += abs(pix1[9] - pix2[9]);
1609
        s += abs(pix1[10] - pix2[10]);
1610
        s += abs(pix1[11] - pix2[11]);
1611
        s += abs(pix1[12] - pix2[12]);
1612
        s += abs(pix1[13] - pix2[13]);
1613
        s += abs(pix1[14] - pix2[14]);
1614
        s += abs(pix1[15] - pix2[15]);
1615
        pix1 += line_size;
1616
        pix2 += line_size;
1617
    }
1618
    return s;
1619
}
1620
 
1621
static int pix_abs16_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1622
{
1623
    int s, i;
1624
 
1625
    s = 0;
1626
    for(i=0;i
1627
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1628
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1629
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1630
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1631
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1632
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1633
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1634
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1635
        s += abs(pix1[8] - avg2(pix2[8], pix2[9]));
1636
        s += abs(pix1[9] - avg2(pix2[9], pix2[10]));
1637
        s += abs(pix1[10] - avg2(pix2[10], pix2[11]));
1638
        s += abs(pix1[11] - avg2(pix2[11], pix2[12]));
1639
        s += abs(pix1[12] - avg2(pix2[12], pix2[13]));
1640
        s += abs(pix1[13] - avg2(pix2[13], pix2[14]));
1641
        s += abs(pix1[14] - avg2(pix2[14], pix2[15]));
1642
        s += abs(pix1[15] - avg2(pix2[15], pix2[16]));
1643
        pix1 += line_size;
1644
        pix2 += line_size;
1645
    }
1646
    return s;
1647
}
1648
 
1649
static int pix_abs16_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1650
{
1651
    int s, i;
1652
    uint8_t *pix3 = pix2 + line_size;
1653
 
1654
    s = 0;
1655
    for(i=0;i
1656
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1657
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1658
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1659
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1660
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1661
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1662
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1663
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1664
        s += abs(pix1[8] - avg2(pix2[8], pix3[8]));
1665
        s += abs(pix1[9] - avg2(pix2[9], pix3[9]));
1666
        s += abs(pix1[10] - avg2(pix2[10], pix3[10]));
1667
        s += abs(pix1[11] - avg2(pix2[11], pix3[11]));
1668
        s += abs(pix1[12] - avg2(pix2[12], pix3[12]));
1669
        s += abs(pix1[13] - avg2(pix2[13], pix3[13]));
1670
        s += abs(pix1[14] - avg2(pix2[14], pix3[14]));
1671
        s += abs(pix1[15] - avg2(pix2[15], pix3[15]));
1672
        pix1 += line_size;
1673
        pix2 += line_size;
1674
        pix3 += line_size;
1675
    }
1676
    return s;
1677
}
1678
 
1679
static int pix_abs16_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1680
{
1681
    int s, i;
1682
    uint8_t *pix3 = pix2 + line_size;
1683
 
1684
    s = 0;
1685
    for(i=0;i
1686
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1687
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1688
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1689
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1690
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1691
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1692
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1693
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1694
        s += abs(pix1[8] - avg4(pix2[8], pix2[9], pix3[8], pix3[9]));
1695
        s += abs(pix1[9] - avg4(pix2[9], pix2[10], pix3[9], pix3[10]));
1696
        s += abs(pix1[10] - avg4(pix2[10], pix2[11], pix3[10], pix3[11]));
1697
        s += abs(pix1[11] - avg4(pix2[11], pix2[12], pix3[11], pix3[12]));
1698
        s += abs(pix1[12] - avg4(pix2[12], pix2[13], pix3[12], pix3[13]));
1699
        s += abs(pix1[13] - avg4(pix2[13], pix2[14], pix3[13], pix3[14]));
1700
        s += abs(pix1[14] - avg4(pix2[14], pix2[15], pix3[14], pix3[15]));
1701
        s += abs(pix1[15] - avg4(pix2[15], pix2[16], pix3[15], pix3[16]));
1702
        pix1 += line_size;
1703
        pix2 += line_size;
1704
        pix3 += line_size;
1705
    }
1706
    return s;
1707
}
1708
 
1709
static inline int pix_abs8_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1710
{
1711
    int s, i;
1712
 
1713
    s = 0;
1714
    for(i=0;i
1715
        s += abs(pix1[0] - pix2[0]);
1716
        s += abs(pix1[1] - pix2[1]);
1717
        s += abs(pix1[2] - pix2[2]);
1718
        s += abs(pix1[3] - pix2[3]);
1719
        s += abs(pix1[4] - pix2[4]);
1720
        s += abs(pix1[5] - pix2[5]);
1721
        s += abs(pix1[6] - pix2[6]);
1722
        s += abs(pix1[7] - pix2[7]);
1723
        pix1 += line_size;
1724
        pix2 += line_size;
1725
    }
1726
    return s;
1727
}
1728
 
1729
static int pix_abs8_x2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1730
{
1731
    int s, i;
1732
 
1733
    s = 0;
1734
    for(i=0;i
1735
        s += abs(pix1[0] - avg2(pix2[0], pix2[1]));
1736
        s += abs(pix1[1] - avg2(pix2[1], pix2[2]));
1737
        s += abs(pix1[2] - avg2(pix2[2], pix2[3]));
1738
        s += abs(pix1[3] - avg2(pix2[3], pix2[4]));
1739
        s += abs(pix1[4] - avg2(pix2[4], pix2[5]));
1740
        s += abs(pix1[5] - avg2(pix2[5], pix2[6]));
1741
        s += abs(pix1[6] - avg2(pix2[6], pix2[7]));
1742
        s += abs(pix1[7] - avg2(pix2[7], pix2[8]));
1743
        pix1 += line_size;
1744
        pix2 += line_size;
1745
    }
1746
    return s;
1747
}
1748
 
1749
static int pix_abs8_y2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1750
{
1751
    int s, i;
1752
    uint8_t *pix3 = pix2 + line_size;
1753
 
1754
    s = 0;
1755
    for(i=0;i
1756
        s += abs(pix1[0] - avg2(pix2[0], pix3[0]));
1757
        s += abs(pix1[1] - avg2(pix2[1], pix3[1]));
1758
        s += abs(pix1[2] - avg2(pix2[2], pix3[2]));
1759
        s += abs(pix1[3] - avg2(pix2[3], pix3[3]));
1760
        s += abs(pix1[4] - avg2(pix2[4], pix3[4]));
1761
        s += abs(pix1[5] - avg2(pix2[5], pix3[5]));
1762
        s += abs(pix1[6] - avg2(pix2[6], pix3[6]));
1763
        s += abs(pix1[7] - avg2(pix2[7], pix3[7]));
1764
        pix1 += line_size;
1765
        pix2 += line_size;
1766
        pix3 += line_size;
1767
    }
1768
    return s;
1769
}
1770
 
1771
static int pix_abs8_xy2_c(void *v, uint8_t *pix1, uint8_t *pix2, int line_size, int h)
1772
{
1773
    int s, i;
1774
    uint8_t *pix3 = pix2 + line_size;
1775
 
1776
    s = 0;
1777
    for(i=0;i
1778
        s += abs(pix1[0] - avg4(pix2[0], pix2[1], pix3[0], pix3[1]));
1779
        s += abs(pix1[1] - avg4(pix2[1], pix2[2], pix3[1], pix3[2]));
1780
        s += abs(pix1[2] - avg4(pix2[2], pix2[3], pix3[2], pix3[3]));
1781
        s += abs(pix1[3] - avg4(pix2[3], pix2[4], pix3[3], pix3[4]));
1782
        s += abs(pix1[4] - avg4(pix2[4], pix2[5], pix3[4], pix3[5]));
1783
        s += abs(pix1[5] - avg4(pix2[5], pix2[6], pix3[5], pix3[6]));
1784
        s += abs(pix1[6] - avg4(pix2[6], pix2[7], pix3[6], pix3[7]));
1785
        s += abs(pix1[7] - avg4(pix2[7], pix2[8], pix3[7], pix3[8]));
1786
        pix1 += line_size;
1787
        pix2 += line_size;
1788
        pix3 += line_size;
1789
    }
1790
    return s;
1791
}
1792
 
1793
static int nsse16_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1794
    MpegEncContext *c = v;
1795
    int score1=0;
1796
    int score2=0;
1797
    int x,y;
1798
 
1799
    for(y=0; y
1800
        for(x=0; x<16; x++){
1801
            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
1802
        }
1803
        if(y+1
1804
            for(x=0; x<15; x++){
1805
                score2+= FFABS(  s1[x  ] - s1[x  +stride]
1806
                             - s1[x+1] + s1[x+1+stride])
1807
                        -FFABS(  s2[x  ] - s2[x  +stride]
1808
                             - s2[x+1] + s2[x+1+stride]);
1809
            }
1810
        }
1811
        s1+= stride;
1812
        s2+= stride;
1813
    }
1814
 
1815
    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1816
    else  return score1 + FFABS(score2)*8;
1817
}
1818
 
1819
static int nsse8_c(void *v, uint8_t *s1, uint8_t *s2, int stride, int h){
1820
    MpegEncContext *c = v;
1821
    int score1=0;
1822
    int score2=0;
1823
    int x,y;
1824
 
1825
    for(y=0; y
1826
        for(x=0; x<8; x++){
1827
            score1+= (s1[x  ] - s2[x ])*(s1[x  ] - s2[x ]);
1828
        }
1829
        if(y+1
1830
            for(x=0; x<7; x++){
1831
                score2+= FFABS(  s1[x  ] - s1[x  +stride]
1832
                             - s1[x+1] + s1[x+1+stride])
1833
                        -FFABS(  s2[x  ] - s2[x  +stride]
1834
                             - s2[x+1] + s2[x+1+stride]);
1835
            }
1836
        }
1837
        s1+= stride;
1838
        s2+= stride;
1839
    }
1840
 
1841
    if(c) return score1 + FFABS(score2)*c->avctx->nsse_weight;
1842
    else  return score1 + FFABS(score2)*8;
1843
}
1844
 
1845
static int try_8x8basis_c(int16_t rem[64], int16_t weight[64], int16_t basis[64], int scale){
1846
    int i;
1847
    unsigned int sum=0;
1848
 
1849
    for(i=0; i<8*8; i++){
1850
        int b= rem[i] + ((basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT));
1851
        int w= weight[i];
1852
        b>>= RECON_SHIFT;
1853
        av_assert2(-512
1854
 
1855
        sum += (w*b)*(w*b)>>4;
1856
    }
1857
    return sum>>2;
1858
}
1859
 
1860
static void add_8x8basis_c(int16_t rem[64], int16_t basis[64], int scale){
1861
    int i;
1862
 
1863
    for(i=0; i<8*8; i++){
1864
        rem[i] += (basis[i]*scale + (1<<(BASIS_SHIFT - RECON_SHIFT-1)))>>(BASIS_SHIFT - RECON_SHIFT);
1865
    }
1866
}
1867
 
1868
static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
1869
    return 0;
1870
}
1871
 
1872
void ff_set_cmp(DSPContext* c, me_cmp_func *cmp, int type){
1873
    int i;
1874
 
1875
    memset(cmp, 0, sizeof(void*)*6);
1876
 
1877
    for(i=0; i<6; i++){
1878
        switch(type&0xFF){
1879
        case FF_CMP_SAD:
1880
            cmp[i]= c->sad[i];
1881
            break;
1882
        case FF_CMP_SATD:
1883
            cmp[i]= c->hadamard8_diff[i];
1884
            break;
1885
        case FF_CMP_SSE:
1886
            cmp[i]= c->sse[i];
1887
            break;
1888
        case FF_CMP_DCT:
1889
            cmp[i]= c->dct_sad[i];
1890
            break;
1891
        case FF_CMP_DCT264:
1892
            cmp[i]= c->dct264_sad[i];
1893
            break;
1894
        case FF_CMP_DCTMAX:
1895
            cmp[i]= c->dct_max[i];
1896
            break;
1897
        case FF_CMP_PSNR:
1898
            cmp[i]= c->quant_psnr[i];
1899
            break;
1900
        case FF_CMP_BIT:
1901
            cmp[i]= c->bit[i];
1902
            break;
1903
        case FF_CMP_RD:
1904
            cmp[i]= c->rd[i];
1905
            break;
1906
        case FF_CMP_VSAD:
1907
            cmp[i]= c->vsad[i];
1908
            break;
1909
        case FF_CMP_VSSE:
1910
            cmp[i]= c->vsse[i];
1911
            break;
1912
        case FF_CMP_ZERO:
1913
            cmp[i]= zero_cmp;
1914
            break;
1915
        case FF_CMP_NSSE:
1916
            cmp[i]= c->nsse[i];
1917
            break;
1918
#if CONFIG_DWT
1919
        case FF_CMP_W53:
1920
            cmp[i]= c->w53[i];
1921
            break;
1922
        case FF_CMP_W97:
1923
            cmp[i]= c->w97[i];
1924
            break;
1925
#endif
1926
        default:
1927
            av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n");
1928
        }
1929
    }
1930
}
1931
 
1932
static void add_bytes_c(uint8_t *dst, uint8_t *src, int w){
1933
    long i;
1934
    for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
1935
        long a = *(long*)(src+i);
1936
        long b = *(long*)(dst+i);
1937
        *(long*)(dst+i) = ((a&pb_7f) + (b&pb_7f)) ^ ((a^b)&pb_80);
1938
    }
1939
    for(; i
1940
        dst[i+0] += src[i+0];
1941
}
1942
 
1943
static void diff_bytes_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w){
1944
    long i;
1945
#if !HAVE_FAST_UNALIGNED
1946
    if((long)src2 & (sizeof(long)-1)){
1947
        for(i=0; i+7
1948
            dst[i+0] = src1[i+0]-src2[i+0];
1949
            dst[i+1] = src1[i+1]-src2[i+1];
1950
            dst[i+2] = src1[i+2]-src2[i+2];
1951
            dst[i+3] = src1[i+3]-src2[i+3];
1952
            dst[i+4] = src1[i+4]-src2[i+4];
1953
            dst[i+5] = src1[i+5]-src2[i+5];
1954
            dst[i+6] = src1[i+6]-src2[i+6];
1955
            dst[i+7] = src1[i+7]-src2[i+7];
1956
        }
1957
    }else
1958
#endif
1959
    for(i=0; i<=w-(int)sizeof(long); i+=sizeof(long)){
1960
        long a = *(long*)(src1+i);
1961
        long b = *(long*)(src2+i);
1962
        *(long*)(dst+i) = ((a|pb_80) - (b&pb_7f)) ^ ((a^b^pb_80)&pb_80);
1963
    }
1964
    for(; i
1965
        dst[i+0] = src1[i+0]-src2[i+0];
1966
}
1967
 
1968
static void add_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *diff, int w, int *left, int *left_top){
1969
    int i;
1970
    uint8_t l, lt;
1971
 
1972
    l= *left;
1973
    lt= *left_top;
1974
 
1975
    for(i=0; i
1976
        l= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF) + diff[i];
1977
        lt= src1[i];
1978
        dst[i]= l;
1979
    }
1980
 
1981
    *left= l;
1982
    *left_top= lt;
1983
}
1984
 
1985
static void sub_hfyu_median_prediction_c(uint8_t *dst, const uint8_t *src1, const uint8_t *src2, int w, int *left, int *left_top){
1986
    int i;
1987
    uint8_t l, lt;
1988
 
1989
    l= *left;
1990
    lt= *left_top;
1991
 
1992
    for(i=0; i
1993
        const int pred= mid_pred(l, src1[i], (l + src1[i] - lt)&0xFF);
1994
        lt= src1[i];
1995
        l= src2[i];
1996
        dst[i]= l - pred;
1997
    }
1998
 
1999
    *left= l;
2000
    *left_top= lt;
2001
}
2002
 
2003
static int add_hfyu_left_prediction_c(uint8_t *dst, const uint8_t *src, int w, int acc){
2004
    int i;
2005
 
2006
    for(i=0; i
2007
        acc+= src[i];
2008
        dst[i]= acc;
2009
        i++;
2010
        acc+= src[i];
2011
        dst[i]= acc;
2012
    }
2013
 
2014
    for(; i
2015
        acc+= src[i];
2016
        dst[i]= acc;
2017
    }
2018
 
2019
    return acc;
2020
}
2021
 
2022
#if HAVE_BIGENDIAN
2023
#define B 3
2024
#define G 2
2025
#define R 1
2026
#define A 0
2027
#else
2028
#define B 0
2029
#define G 1
2030
#define R 2
2031
#define A 3
2032
#endif
2033
static void add_hfyu_left_prediction_bgr32_c(uint8_t *dst, const uint8_t *src, int w, int *red, int *green, int *blue, int *alpha){
2034
    int i;
2035
    int r,g,b,a;
2036
    r= *red;
2037
    g= *green;
2038
    b= *blue;
2039
    a= *alpha;
2040
 
2041
    for(i=0; i
2042
        b+= src[4*i+B];
2043
        g+= src[4*i+G];
2044
        r+= src[4*i+R];
2045
        a+= src[4*i+A];
2046
 
2047
        dst[4*i+B]= b;
2048
        dst[4*i+G]= g;
2049
        dst[4*i+R]= r;
2050
        dst[4*i+A]= a;
2051
    }
2052
 
2053
    *red= r;
2054
    *green= g;
2055
    *blue= b;
2056
    *alpha= a;
2057
}
2058
#undef B
2059
#undef G
2060
#undef R
2061
#undef A
2062
 
2063
#define BUTTERFLY2(o1,o2,i1,i2) \
2064
o1= (i1)+(i2);\
2065
o2= (i1)-(i2);
2066
 
2067
#define BUTTERFLY1(x,y) \
2068
{\
2069
    int a,b;\
2070
    a= x;\
2071
    b= y;\
2072
    x= a+b;\
2073
    y= a-b;\
2074
}
2075
 
2076
#define BUTTERFLYA(x,y) (FFABS((x)+(y)) + FFABS((x)-(y)))
2077
 
2078
static int hadamard8_diff8x8_c(/*MpegEncContext*/ void *s, uint8_t *dst, uint8_t *src, int stride, int h){
2079
    int i;
2080
    int temp[64];
2081
    int sum=0;
2082
 
2083
    av_assert2(h==8);
2084
 
2085
    for(i=0; i<8; i++){
2086
        //FIXME try pointer walks
2087
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0]-dst[stride*i+0],src[stride*i+1]-dst[stride*i+1]);
2088
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2]-dst[stride*i+2],src[stride*i+3]-dst[stride*i+3]);
2089
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4]-dst[stride*i+4],src[stride*i+5]-dst[stride*i+5]);
2090
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6]-dst[stride*i+6],src[stride*i+7]-dst[stride*i+7]);
2091
 
2092
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2093
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2094
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2095
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2096
 
2097
        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2098
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2099
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2100
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2101
    }
2102
 
2103
    for(i=0; i<8; i++){
2104
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2105
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2106
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2107
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2108
 
2109
        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2110
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2111
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2112
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2113
 
2114
        sum +=
2115
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2116
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2117
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2118
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2119
    }
2120
    return sum;
2121
}
2122
 
2123
static int hadamard8_intra8x8_c(/*MpegEncContext*/ void *s, uint8_t *src, uint8_t *dummy, int stride, int h){
2124
    int i;
2125
    int temp[64];
2126
    int sum=0;
2127
 
2128
    av_assert2(h==8);
2129
 
2130
    for(i=0; i<8; i++){
2131
        //FIXME try pointer walks
2132
        BUTTERFLY2(temp[8*i+0], temp[8*i+1], src[stride*i+0],src[stride*i+1]);
2133
        BUTTERFLY2(temp[8*i+2], temp[8*i+3], src[stride*i+2],src[stride*i+3]);
2134
        BUTTERFLY2(temp[8*i+4], temp[8*i+5], src[stride*i+4],src[stride*i+5]);
2135
        BUTTERFLY2(temp[8*i+6], temp[8*i+7], src[stride*i+6],src[stride*i+7]);
2136
 
2137
        BUTTERFLY1(temp[8*i+0], temp[8*i+2]);
2138
        BUTTERFLY1(temp[8*i+1], temp[8*i+3]);
2139
        BUTTERFLY1(temp[8*i+4], temp[8*i+6]);
2140
        BUTTERFLY1(temp[8*i+5], temp[8*i+7]);
2141
 
2142
        BUTTERFLY1(temp[8*i+0], temp[8*i+4]);
2143
        BUTTERFLY1(temp[8*i+1], temp[8*i+5]);
2144
        BUTTERFLY1(temp[8*i+2], temp[8*i+6]);
2145
        BUTTERFLY1(temp[8*i+3], temp[8*i+7]);
2146
    }
2147
 
2148
    for(i=0; i<8; i++){
2149
        BUTTERFLY1(temp[8*0+i], temp[8*1+i]);
2150
        BUTTERFLY1(temp[8*2+i], temp[8*3+i]);
2151
        BUTTERFLY1(temp[8*4+i], temp[8*5+i]);
2152
        BUTTERFLY1(temp[8*6+i], temp[8*7+i]);
2153
 
2154
        BUTTERFLY1(temp[8*0+i], temp[8*2+i]);
2155
        BUTTERFLY1(temp[8*1+i], temp[8*3+i]);
2156
        BUTTERFLY1(temp[8*4+i], temp[8*6+i]);
2157
        BUTTERFLY1(temp[8*5+i], temp[8*7+i]);
2158
 
2159
        sum +=
2160
             BUTTERFLYA(temp[8*0+i], temp[8*4+i])
2161
            +BUTTERFLYA(temp[8*1+i], temp[8*5+i])
2162
            +BUTTERFLYA(temp[8*2+i], temp[8*6+i])
2163
            +BUTTERFLYA(temp[8*3+i], temp[8*7+i]);
2164
    }
2165
 
2166
    sum -= FFABS(temp[8*0] + temp[8*4]); // -mean
2167
 
2168
    return sum;
2169
}
2170
 
2171
static int dct_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2172
    MpegEncContext * const s= (MpegEncContext *)c;
2173
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2174
 
2175
    av_assert2(h==8);
2176
 
2177
    s->dsp.diff_pixels(temp, src1, src2, stride);
2178
    s->dsp.fdct(temp);
2179
    return s->dsp.sum_abs_dctelem(temp);
2180
}
2181
 
2182
#if CONFIG_GPL
2183
#define DCT8_1D {\
2184
    const int s07 = SRC(0) + SRC(7);\
2185
    const int s16 = SRC(1) + SRC(6);\
2186
    const int s25 = SRC(2) + SRC(5);\
2187
    const int s34 = SRC(3) + SRC(4);\
2188
    const int a0 = s07 + s34;\
2189
    const int a1 = s16 + s25;\
2190
    const int a2 = s07 - s34;\
2191
    const int a3 = s16 - s25;\
2192
    const int d07 = SRC(0) - SRC(7);\
2193
    const int d16 = SRC(1) - SRC(6);\
2194
    const int d25 = SRC(2) - SRC(5);\
2195
    const int d34 = SRC(3) - SRC(4);\
2196
    const int a4 = d16 + d25 + (d07 + (d07>>1));\
2197
    const int a5 = d07 - d34 - (d25 + (d25>>1));\
2198
    const int a6 = d07 + d34 - (d16 + (d16>>1));\
2199
    const int a7 = d16 - d25 + (d34 + (d34>>1));\
2200
    DST(0,  a0 + a1     ) ;\
2201
    DST(1,  a4 + (a7>>2)) ;\
2202
    DST(2,  a2 + (a3>>1)) ;\
2203
    DST(3,  a5 + (a6>>2)) ;\
2204
    DST(4,  a0 - a1     ) ;\
2205
    DST(5,  a6 - (a5>>2)) ;\
2206
    DST(6, (a2>>1) - a3 ) ;\
2207
    DST(7, (a4>>2) - a7 ) ;\
2208
}
2209
 
2210
static int dct264_sad8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2211
    MpegEncContext * const s= (MpegEncContext *)c;
2212
    int16_t dct[8][8];
2213
    int i;
2214
    int sum=0;
2215
 
2216
    s->dsp.diff_pixels(dct[0], src1, src2, stride);
2217
 
2218
#define SRC(x) dct[i][x]
2219
#define DST(x,v) dct[i][x]= v
2220
    for( i = 0; i < 8; i++ )
2221
        DCT8_1D
2222
#undef SRC
2223
#undef DST
2224
 
2225
#define SRC(x) dct[x][i]
2226
#define DST(x,v) sum += FFABS(v)
2227
    for( i = 0; i < 8; i++ )
2228
        DCT8_1D
2229
#undef SRC
2230
#undef DST
2231
    return sum;
2232
}
2233
#endif
2234
 
2235
static int dct_max8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2236
    MpegEncContext * const s= (MpegEncContext *)c;
2237
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2238
    int sum=0, i;
2239
 
2240
    av_assert2(h==8);
2241
 
2242
    s->dsp.diff_pixels(temp, src1, src2, stride);
2243
    s->dsp.fdct(temp);
2244
 
2245
    for(i=0; i<64; i++)
2246
        sum= FFMAX(sum, FFABS(temp[i]));
2247
 
2248
    return sum;
2249
}
2250
 
2251
static int quant_psnr8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2252
    MpegEncContext * const s= (MpegEncContext *)c;
2253
    LOCAL_ALIGNED_16(int16_t, temp, [64*2]);
2254
    int16_t * const bak = temp+64;
2255
    int sum=0, i;
2256
 
2257
    av_assert2(h==8);
2258
    s->mb_intra=0;
2259
 
2260
    s->dsp.diff_pixels(temp, src1, src2, stride);
2261
 
2262
    memcpy(bak, temp, 64*sizeof(int16_t));
2263
 
2264
    s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2265
    s->dct_unquantize_inter(s, temp, 0, s->qscale);
2266
    ff_simple_idct_8(temp); //FIXME
2267
 
2268
    for(i=0; i<64; i++)
2269
        sum+= (temp[i]-bak[i])*(temp[i]-bak[i]);
2270
 
2271
    return sum;
2272
}
2273
 
2274
static int rd8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2275
    MpegEncContext * const s= (MpegEncContext *)c;
2276
    const uint8_t *scantable= s->intra_scantable.permutated;
2277
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2278
    LOCAL_ALIGNED_16(uint8_t, lsrc1, [64]);
2279
    LOCAL_ALIGNED_16(uint8_t, lsrc2, [64]);
2280
    int i, last, run, bits, level, distortion, start_i;
2281
    const int esc_length= s->ac_esc_length;
2282
    uint8_t * length;
2283
    uint8_t * last_length;
2284
 
2285
    av_assert2(h==8);
2286
 
2287
    copy_block8(lsrc1, src1, 8, stride, 8);
2288
    copy_block8(lsrc2, src2, 8, stride, 8);
2289
 
2290
    s->dsp.diff_pixels(temp, lsrc1, lsrc2, 8);
2291
 
2292
    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2293
 
2294
    bits=0;
2295
 
2296
    if (s->mb_intra) {
2297
        start_i = 1;
2298
        length     = s->intra_ac_vlc_length;
2299
        last_length= s->intra_ac_vlc_last_length;
2300
        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2301
    } else {
2302
        start_i = 0;
2303
        length     = s->inter_ac_vlc_length;
2304
        last_length= s->inter_ac_vlc_last_length;
2305
    }
2306
 
2307
    if(last>=start_i){
2308
        run=0;
2309
        for(i=start_i; i
2310
            int j= scantable[i];
2311
            level= temp[j];
2312
 
2313
            if(level){
2314
                level+=64;
2315
                if((level&(~127)) == 0){
2316
                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
2317
                }else
2318
                    bits+= esc_length;
2319
                run=0;
2320
            }else
2321
                run++;
2322
        }
2323
        i= scantable[last];
2324
 
2325
        level= temp[i] + 64;
2326
 
2327
        av_assert2(level - 64);
2328
 
2329
        if((level&(~127)) == 0){
2330
            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2331
        }else
2332
            bits+= esc_length;
2333
 
2334
    }
2335
 
2336
    if(last>=0){
2337
        if(s->mb_intra)
2338
            s->dct_unquantize_intra(s, temp, 0, s->qscale);
2339
        else
2340
            s->dct_unquantize_inter(s, temp, 0, s->qscale);
2341
    }
2342
 
2343
    s->dsp.idct_add(lsrc2, 8, temp);
2344
 
2345
    distortion= s->dsp.sse[1](NULL, lsrc2, lsrc1, 8, 8);
2346
 
2347
    return distortion + ((bits*s->qscale*s->qscale*109 + 64)>>7);
2348
}
2349
 
2350
static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride, int h){
2351
    MpegEncContext * const s= (MpegEncContext *)c;
2352
    const uint8_t *scantable= s->intra_scantable.permutated;
2353
    LOCAL_ALIGNED_16(int16_t, temp, [64]);
2354
    int i, last, run, bits, level, start_i;
2355
    const int esc_length= s->ac_esc_length;
2356
    uint8_t * length;
2357
    uint8_t * last_length;
2358
 
2359
    av_assert2(h==8);
2360
 
2361
    s->dsp.diff_pixels(temp, src1, src2, stride);
2362
 
2363
    s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i);
2364
 
2365
    bits=0;
2366
 
2367
    if (s->mb_intra) {
2368
        start_i = 1;
2369
        length     = s->intra_ac_vlc_length;
2370
        last_length= s->intra_ac_vlc_last_length;
2371
        bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma
2372
    } else {
2373
        start_i = 0;
2374
        length     = s->inter_ac_vlc_length;
2375
        last_length= s->inter_ac_vlc_last_length;
2376
    }
2377
 
2378
    if(last>=start_i){
2379
        run=0;
2380
        for(i=start_i; i
2381
            int j= scantable[i];
2382
            level= temp[j];
2383
 
2384
            if(level){
2385
                level+=64;
2386
                if((level&(~127)) == 0){
2387
                    bits+= length[UNI_AC_ENC_INDEX(run, level)];
2388
                }else
2389
                    bits+= esc_length;
2390
                run=0;
2391
            }else
2392
                run++;
2393
        }
2394
        i= scantable[last];
2395
 
2396
        level= temp[i] + 64;
2397
 
2398
        av_assert2(level - 64);
2399
 
2400
        if((level&(~127)) == 0){
2401
            bits+= last_length[UNI_AC_ENC_INDEX(run, level)];
2402
        }else
2403
            bits+= esc_length;
2404
    }
2405
 
2406
    return bits;
2407
}
2408
 
2409
#define VSAD_INTRA(size) \
2410
static int vsad_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2411
    int score=0;                                                                                            \
2412
    int x,y;                                                                                                \
2413
                                                                                                            \
2414
    for(y=1; y
2415
        for(x=0; x
2416
            score+= FFABS(s[x  ] - s[x  +stride]) + FFABS(s[x+1] - s[x+1+stride])                           \
2417
                   +FFABS(s[x+2] - s[x+2+stride]) + FFABS(s[x+3] - s[x+3+stride]);                          \
2418
        }                                                                                                   \
2419
        s+= stride;                                                                                         \
2420
    }                                                                                                       \
2421
                                                                                                            \
2422
    return score;                                                                                           \
2423
}
2424
VSAD_INTRA(8)
2425
VSAD_INTRA(16)
2426
 
2427
static int vsad16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2428
    int score=0;
2429
    int x,y;
2430
 
2431
    for(y=1; y
2432
        for(x=0; x<16; x++){
2433
            score+= FFABS(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2434
        }
2435
        s1+= stride;
2436
        s2+= stride;
2437
    }
2438
 
2439
    return score;
2440
}
2441
 
2442
#define SQ(a) ((a)*(a))
2443
#define VSSE_INTRA(size) \
2444
static int vsse_intra##size##_c(/*MpegEncContext*/ void *c, uint8_t *s, uint8_t *dummy, int stride, int h){ \
2445
    int score=0;                                                                                            \
2446
    int x,y;                                                                                                \
2447
                                                                                                            \
2448
    for(y=1; y
2449
        for(x=0; x
2450
            score+= SQ(s[x  ] - s[x  +stride]) + SQ(s[x+1] - s[x+1+stride])                                 \
2451
                   +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);                                \
2452
        }                                                                                                   \
2453
        s+= stride;                                                                                         \
2454
    }                                                                                                       \
2455
                                                                                                            \
2456
    return score;                                                                                           \
2457
}
2458
VSSE_INTRA(8)
2459
VSSE_INTRA(16)
2460
 
2461
static int vsse16_c(/*MpegEncContext*/ void *c, uint8_t *s1, uint8_t *s2, int stride, int h){
2462
    int score=0;
2463
    int x,y;
2464
 
2465
    for(y=1; y
2466
        for(x=0; x<16; x++){
2467
            score+= SQ(s1[x  ] - s2[x ] - s1[x  +stride] + s2[x +stride]);
2468
        }
2469
        s1+= stride;
2470
        s2+= stride;
2471
    }
2472
 
2473
    return score;
2474
}
2475
 
2476
static int ssd_int8_vs_int16_c(const int8_t *pix1, const int16_t *pix2,
2477
                               int size){
2478
    int score=0;
2479
    int i;
2480
    for(i=0; i
2481
        score += (pix1[i]-pix2[i])*(pix1[i]-pix2[i]);
2482
    return score;
2483
}
2484
 
2485
#define WRAPPER8_16_SQ(name8, name16)\
2486
static int name16(void /*MpegEncContext*/ *s, uint8_t *dst, uint8_t *src, int stride, int h){\
2487
    int score=0;\
2488
    score +=name8(s, dst           , src           , stride, 8);\
2489
    score +=name8(s, dst+8         , src+8         , stride, 8);\
2490
    if(h==16){\
2491
        dst += 8*stride;\
2492
        src += 8*stride;\
2493
        score +=name8(s, dst           , src           , stride, 8);\
2494
        score +=name8(s, dst+8         , src+8         , stride, 8);\
2495
    }\
2496
    return score;\
2497
}
2498
 
2499
WRAPPER8_16_SQ(hadamard8_diff8x8_c, hadamard8_diff16_c)
2500
WRAPPER8_16_SQ(hadamard8_intra8x8_c, hadamard8_intra16_c)
2501
WRAPPER8_16_SQ(dct_sad8x8_c, dct_sad16_c)
2502
#if CONFIG_GPL
2503
WRAPPER8_16_SQ(dct264_sad8x8_c, dct264_sad16_c)
2504
#endif
2505
WRAPPER8_16_SQ(dct_max8x8_c, dct_max16_c)
2506
WRAPPER8_16_SQ(quant_psnr8x8_c, quant_psnr16_c)
2507
WRAPPER8_16_SQ(rd8x8_c, rd16_c)
2508
WRAPPER8_16_SQ(bit8x8_c, bit16_c)
2509
 
2510
static inline uint32_t clipf_c_one(uint32_t a, uint32_t mini,
2511
                   uint32_t maxi, uint32_t maxisign)
2512
{
2513
 
2514
    if(a > mini) return mini;
2515
    else if((a^(1U<<31)) > maxisign) return maxi;
2516
    else return a;
2517
}
2518
 
2519
static void vector_clipf_c_opposite_sign(float *dst, const float *src, float *min, float *max, int len){
2520
    int i;
2521
    uint32_t mini = *(uint32_t*)min;
2522
    uint32_t maxi = *(uint32_t*)max;
2523
    uint32_t maxisign = maxi ^ (1U<<31);
2524
    uint32_t *dsti = (uint32_t*)dst;
2525
    const uint32_t *srci = (const uint32_t*)src;
2526
    for(i=0; i
2527
        dsti[i + 0] = clipf_c_one(srci[i + 0], mini, maxi, maxisign);
2528
        dsti[i + 1] = clipf_c_one(srci[i + 1], mini, maxi, maxisign);
2529
        dsti[i + 2] = clipf_c_one(srci[i + 2], mini, maxi, maxisign);
2530
        dsti[i + 3] = clipf_c_one(srci[i + 3], mini, maxi, maxisign);
2531
        dsti[i + 4] = clipf_c_one(srci[i + 4], mini, maxi, maxisign);
2532
        dsti[i + 5] = clipf_c_one(srci[i + 5], mini, maxi, maxisign);
2533
        dsti[i + 6] = clipf_c_one(srci[i + 6], mini, maxi, maxisign);
2534
        dsti[i + 7] = clipf_c_one(srci[i + 7], mini, maxi, maxisign);
2535
    }
2536
}
2537
static void vector_clipf_c(float *dst, const float *src, float min, float max, int len){
2538
    int i;
2539
    if(min < 0 && max > 0) {
2540
        vector_clipf_c_opposite_sign(dst, src, &min, &max, len);
2541
    } else {
2542
        for(i=0; i < len; i+=8) {
2543
            dst[i    ] = av_clipf(src[i    ], min, max);
2544
            dst[i + 1] = av_clipf(src[i + 1], min, max);
2545
            dst[i + 2] = av_clipf(src[i + 2], min, max);
2546
            dst[i + 3] = av_clipf(src[i + 3], min, max);
2547
            dst[i + 4] = av_clipf(src[i + 4], min, max);
2548
            dst[i + 5] = av_clipf(src[i + 5], min, max);
2549
            dst[i + 6] = av_clipf(src[i + 6], min, max);
2550
            dst[i + 7] = av_clipf(src[i + 7], min, max);
2551
        }
2552
    }
2553
}
2554
 
2555
static int32_t scalarproduct_int16_c(const int16_t * v1, const int16_t * v2, int order)
2556
{
2557
    int res = 0;
2558
 
2559
    while (order--)
2560
        res += *v1++ * *v2++;
2561
 
2562
    return res;
2563
}
2564
 
2565
static int32_t scalarproduct_and_madd_int16_c(int16_t *v1, const int16_t *v2, const int16_t *v3, int order, int mul)
2566
{
2567
    int res = 0;
2568
    while (order--) {
2569
        res   += *v1 * *v2++;
2570
        *v1++ += mul * *v3++;
2571
    }
2572
    return res;
2573
}
2574
 
2575
static void apply_window_int16_c(int16_t *output, const int16_t *input,
2576
                                 const int16_t *window, unsigned int len)
2577
{
2578
    int i;
2579
    int len2 = len >> 1;
2580
 
2581
    for (i = 0; i < len2; i++) {
2582
        int16_t w       = window[i];
2583
        output[i]       = (MUL16(input[i],       w) + (1 << 14)) >> 15;
2584
        output[len-i-1] = (MUL16(input[len-i-1], w) + (1 << 14)) >> 15;
2585
    }
2586
}
2587
 
2588
static void vector_clip_int32_c(int32_t *dst, const int32_t *src, int32_t min,
2589
                                int32_t max, unsigned int len)
2590
{
2591
    do {
2592
        *dst++ = av_clip(*src++, min, max);
2593
        *dst++ = av_clip(*src++, min, max);
2594
        *dst++ = av_clip(*src++, min, max);
2595
        *dst++ = av_clip(*src++, min, max);
2596
        *dst++ = av_clip(*src++, min, max);
2597
        *dst++ = av_clip(*src++, min, max);
2598
        *dst++ = av_clip(*src++, min, max);
2599
        *dst++ = av_clip(*src++, min, max);
2600
        len -= 8;
2601
    } while (len > 0);
2602
}
2603
 
2604
static void jref_idct_put(uint8_t *dest, int line_size, int16_t *block)
2605
{
2606
    ff_j_rev_dct (block);
2607
    put_pixels_clamped_c(block, dest, line_size);
2608
}
2609
static void jref_idct_add(uint8_t *dest, int line_size, int16_t *block)
2610
{
2611
    ff_j_rev_dct (block);
2612
    add_pixels_clamped_c(block, dest, line_size);
2613
}
2614
 
2615
static void ff_jref_idct4_put(uint8_t *dest, int line_size, int16_t *block)
2616
{
2617
    ff_j_rev_dct4 (block);
2618
    put_pixels_clamped4_c(block, dest, line_size);
2619
}
2620
static void ff_jref_idct4_add(uint8_t *dest, int line_size, int16_t *block)
2621
{
2622
    ff_j_rev_dct4 (block);
2623
    add_pixels_clamped4_c(block, dest, line_size);
2624
}
2625
 
2626
static void ff_jref_idct2_put(uint8_t *dest, int line_size, int16_t *block)
2627
{
2628
    ff_j_rev_dct2 (block);
2629
    put_pixels_clamped2_c(block, dest, line_size);
2630
}
2631
static void ff_jref_idct2_add(uint8_t *dest, int line_size, int16_t *block)
2632
{
2633
    ff_j_rev_dct2 (block);
2634
    add_pixels_clamped2_c(block, dest, line_size);
2635
}
2636
 
2637
static void ff_jref_idct1_put(uint8_t *dest, int line_size, int16_t *block)
2638
{
2639
    dest[0] = av_clip_uint8((block[0] + 4)>>3);
2640
}
2641
static void ff_jref_idct1_add(uint8_t *dest, int line_size, int16_t *block)
2642
{
2643
    dest[0] = av_clip_uint8(dest[0] + ((block[0] + 4)>>3));
2644
}
2645
 
2646
/* init static data */
2647
av_cold void ff_dsputil_static_init(void)
2648
{
2649
    int i;
2650
 
2651
    for(i=0;i<512;i++) {
2652
        ff_squareTbl[i] = (i - 256) * (i - 256);
2653
    }
2654
 
2655
    for(i=0; i<64; i++) ff_inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
2656
}
2657
 
2658
int ff_check_alignment(void){
2659
    static int did_fail=0;
2660
    LOCAL_ALIGNED_16(int, aligned, [4]);
2661
 
2662
    if((intptr_t)aligned & 15){
2663
        if(!did_fail){
2664
#if HAVE_MMX || HAVE_ALTIVEC
2665
            av_log(NULL, AV_LOG_ERROR,
2666
                "Compiler did not align stack variables. Libavcodec has been miscompiled\n"
2667
                "and may be very slow or crash. This is not a bug in libavcodec,\n"
2668
                "but in the compiler. You may try recompiling using gcc >= 4.2.\n"
2669
                "Do not report crashes to FFmpeg developers.\n");
2670
#endif
2671
            did_fail=1;
2672
        }
2673
        return -1;
2674
    }
2675
    return 0;
2676
}
2677
 
2678
av_cold void ff_dsputil_init(DSPContext* c, AVCodecContext *avctx)
2679
{
2680
    ff_check_alignment();
2681
 
2682
#if CONFIG_ENCODERS
2683
    if (avctx->bits_per_raw_sample == 10) {
2684
        c->fdct    = ff_jpeg_fdct_islow_10;
2685
        c->fdct248 = ff_fdct248_islow_10;
2686
    } else {
2687
        if(avctx->dct_algo==FF_DCT_FASTINT) {
2688
            c->fdct    = ff_fdct_ifast;
2689
            c->fdct248 = ff_fdct_ifast248;
2690
        }
2691
        else if(avctx->dct_algo==FF_DCT_FAAN) {
2692
            c->fdct    = ff_faandct;
2693
            c->fdct248 = ff_faandct248;
2694
        }
2695
        else {
2696
            c->fdct    = ff_jpeg_fdct_islow_8; //slow/accurate/default
2697
            c->fdct248 = ff_fdct248_islow_8;
2698
        }
2699
    }
2700
#endif //CONFIG_ENCODERS
2701
 
2702
    if(avctx->lowres==1){
2703
        c->idct_put= ff_jref_idct4_put;
2704
        c->idct_add= ff_jref_idct4_add;
2705
        c->idct    = ff_j_rev_dct4;
2706
        c->idct_permutation_type= FF_NO_IDCT_PERM;
2707
    }else if(avctx->lowres==2){
2708
        c->idct_put= ff_jref_idct2_put;
2709
        c->idct_add= ff_jref_idct2_add;
2710
        c->idct    = ff_j_rev_dct2;
2711
        c->idct_permutation_type= FF_NO_IDCT_PERM;
2712
    }else if(avctx->lowres==3){
2713
        c->idct_put= ff_jref_idct1_put;
2714
        c->idct_add= ff_jref_idct1_add;
2715
        c->idct    = ff_j_rev_dct1;
2716
        c->idct_permutation_type= FF_NO_IDCT_PERM;
2717
    }else{
2718
        if (avctx->bits_per_raw_sample == 10) {
2719
            c->idct_put              = ff_simple_idct_put_10;
2720
            c->idct_add              = ff_simple_idct_add_10;
2721
            c->idct                  = ff_simple_idct_10;
2722
            c->idct_permutation_type = FF_NO_IDCT_PERM;
2723
        } else if (avctx->bits_per_raw_sample == 12) {
2724
            c->idct_put              = ff_simple_idct_put_12;
2725
            c->idct_add              = ff_simple_idct_add_12;
2726
            c->idct                  = ff_simple_idct_12;
2727
            c->idct_permutation_type = FF_NO_IDCT_PERM;
2728
        } else {
2729
        if(avctx->idct_algo==FF_IDCT_INT){
2730
            c->idct_put= jref_idct_put;
2731
            c->idct_add= jref_idct_add;
2732
            c->idct    = ff_j_rev_dct;
2733
            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
2734
        }else if(avctx->idct_algo==FF_IDCT_FAAN){
2735
            c->idct_put= ff_faanidct_put;
2736
            c->idct_add= ff_faanidct_add;
2737
            c->idct    = ff_faanidct;
2738
            c->idct_permutation_type= FF_NO_IDCT_PERM;
2739
        }else{ //accurate/default
2740
            c->idct_put = ff_simple_idct_put_8;
2741
            c->idct_add = ff_simple_idct_add_8;
2742
            c->idct     = ff_simple_idct_8;
2743
            c->idct_permutation_type= FF_NO_IDCT_PERM;
2744
        }
2745
        }
2746
    }
2747
 
2748
    c->diff_pixels = diff_pixels_c;
2749
    c->put_pixels_clamped = put_pixels_clamped_c;
2750
    c->put_signed_pixels_clamped = put_signed_pixels_clamped_c;
2751
    c->add_pixels_clamped = add_pixels_clamped_c;
2752
    c->sum_abs_dctelem = sum_abs_dctelem_c;
2753
    c->gmc1 = gmc1_c;
2754
    c->gmc = ff_gmc_c;
2755
    c->pix_sum = pix_sum_c;
2756
    c->pix_norm1 = pix_norm1_c;
2757
 
2758
    c->fill_block_tab[0] = fill_block16_c;
2759
    c->fill_block_tab[1] = fill_block8_c;
2760
 
2761
    /* TODO [0] 16  [1] 8 */
2762
    c->pix_abs[0][0] = pix_abs16_c;
2763
    c->pix_abs[0][1] = pix_abs16_x2_c;
2764
    c->pix_abs[0][2] = pix_abs16_y2_c;
2765
    c->pix_abs[0][3] = pix_abs16_xy2_c;
2766
    c->pix_abs[1][0] = pix_abs8_c;
2767
    c->pix_abs[1][1] = pix_abs8_x2_c;
2768
    c->pix_abs[1][2] = pix_abs8_y2_c;
2769
    c->pix_abs[1][3] = pix_abs8_xy2_c;
2770
 
2771
    c->put_tpel_pixels_tab[ 0] = put_tpel_pixels_mc00_c;
2772
    c->put_tpel_pixels_tab[ 1] = put_tpel_pixels_mc10_c;
2773
    c->put_tpel_pixels_tab[ 2] = put_tpel_pixels_mc20_c;
2774
    c->put_tpel_pixels_tab[ 4] = put_tpel_pixels_mc01_c;
2775
    c->put_tpel_pixels_tab[ 5] = put_tpel_pixels_mc11_c;
2776
    c->put_tpel_pixels_tab[ 6] = put_tpel_pixels_mc21_c;
2777
    c->put_tpel_pixels_tab[ 8] = put_tpel_pixels_mc02_c;
2778
    c->put_tpel_pixels_tab[ 9] = put_tpel_pixels_mc12_c;
2779
    c->put_tpel_pixels_tab[10] = put_tpel_pixels_mc22_c;
2780
 
2781
    c->avg_tpel_pixels_tab[ 0] = avg_tpel_pixels_mc00_c;
2782
    c->avg_tpel_pixels_tab[ 1] = avg_tpel_pixels_mc10_c;
2783
    c->avg_tpel_pixels_tab[ 2] = avg_tpel_pixels_mc20_c;
2784
    c->avg_tpel_pixels_tab[ 4] = avg_tpel_pixels_mc01_c;
2785
    c->avg_tpel_pixels_tab[ 5] = avg_tpel_pixels_mc11_c;
2786
    c->avg_tpel_pixels_tab[ 6] = avg_tpel_pixels_mc21_c;
2787
    c->avg_tpel_pixels_tab[ 8] = avg_tpel_pixels_mc02_c;
2788
    c->avg_tpel_pixels_tab[ 9] = avg_tpel_pixels_mc12_c;
2789
    c->avg_tpel_pixels_tab[10] = avg_tpel_pixels_mc22_c;
2790
 
2791
#define dspfunc(PFX, IDX, NUM) \
2792
    c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_c; \
2793
    c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_c; \
2794
    c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_c; \
2795
    c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_c; \
2796
    c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_c; \
2797
    c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_c; \
2798
    c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_c; \
2799
    c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_c; \
2800
    c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_c; \
2801
    c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_c; \
2802
    c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_c; \
2803
    c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_c; \
2804
    c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_c; \
2805
    c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_c; \
2806
    c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_c; \
2807
    c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_c
2808
 
2809
    dspfunc(put_qpel, 0, 16);
2810
    dspfunc(put_no_rnd_qpel, 0, 16);
2811
 
2812
    dspfunc(avg_qpel, 0, 16);
2813
    /* dspfunc(avg_no_rnd_qpel, 0, 16); */
2814
 
2815
    dspfunc(put_qpel, 1, 8);
2816
    dspfunc(put_no_rnd_qpel, 1, 8);
2817
 
2818
    dspfunc(avg_qpel, 1, 8);
2819
    /* dspfunc(avg_no_rnd_qpel, 1, 8); */
2820
 
2821
#undef dspfunc
2822
 
2823
    c->put_mspel_pixels_tab[0]= ff_put_pixels8x8_c;
2824
    c->put_mspel_pixels_tab[1]= put_mspel8_mc10_c;
2825
    c->put_mspel_pixels_tab[2]= put_mspel8_mc20_c;
2826
    c->put_mspel_pixels_tab[3]= put_mspel8_mc30_c;
2827
    c->put_mspel_pixels_tab[4]= put_mspel8_mc02_c;
2828
    c->put_mspel_pixels_tab[5]= put_mspel8_mc12_c;
2829
    c->put_mspel_pixels_tab[6]= put_mspel8_mc22_c;
2830
    c->put_mspel_pixels_tab[7]= put_mspel8_mc32_c;
2831
 
2832
#define SET_CMP_FUNC(name) \
2833
    c->name[0]= name ## 16_c;\
2834
    c->name[1]= name ## 8x8_c;
2835
 
2836
    SET_CMP_FUNC(hadamard8_diff)
2837
    c->hadamard8_diff[4]= hadamard8_intra16_c;
2838
    c->hadamard8_diff[5]= hadamard8_intra8x8_c;
2839
    SET_CMP_FUNC(dct_sad)
2840
    SET_CMP_FUNC(dct_max)
2841
#if CONFIG_GPL
2842
    SET_CMP_FUNC(dct264_sad)
2843
#endif
2844
    c->sad[0]= pix_abs16_c;
2845
    c->sad[1]= pix_abs8_c;
2846
    c->sse[0]= sse16_c;
2847
    c->sse[1]= sse8_c;
2848
    c->sse[2]= sse4_c;
2849
    SET_CMP_FUNC(quant_psnr)
2850
    SET_CMP_FUNC(rd)
2851
    SET_CMP_FUNC(bit)
2852
    c->vsad[0]= vsad16_c;
2853
    c->vsad[4]= vsad_intra16_c;
2854
    c->vsad[5]= vsad_intra8_c;
2855
    c->vsse[0]= vsse16_c;
2856
    c->vsse[4]= vsse_intra16_c;
2857
    c->vsse[5]= vsse_intra8_c;
2858
    c->nsse[0]= nsse16_c;
2859
    c->nsse[1]= nsse8_c;
2860
#if CONFIG_SNOW_DECODER || CONFIG_SNOW_ENCODER
2861
    ff_dsputil_init_dwt(c);
2862
#endif
2863
 
2864
    c->ssd_int8_vs_int16 = ssd_int8_vs_int16_c;
2865
 
2866
    c->add_bytes= add_bytes_c;
2867
    c->diff_bytes= diff_bytes_c;
2868
    c->add_hfyu_median_prediction= add_hfyu_median_prediction_c;
2869
    c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
2870
    c->add_hfyu_left_prediction  = add_hfyu_left_prediction_c;
2871
    c->add_hfyu_left_prediction_bgr32 = add_hfyu_left_prediction_bgr32_c;
2872
    c->bswap_buf= bswap_buf;
2873
    c->bswap16_buf = bswap16_buf;
2874
 
2875
    if (CONFIG_H263_DECODER || CONFIG_H263_ENCODER) {
2876
        c->h263_h_loop_filter= h263_h_loop_filter_c;
2877
        c->h263_v_loop_filter= h263_v_loop_filter_c;
2878
    }
2879
 
2880
    c->try_8x8basis= try_8x8basis_c;
2881
    c->add_8x8basis= add_8x8basis_c;
2882
 
2883
    c->vector_clipf = vector_clipf_c;
2884
    c->scalarproduct_int16 = scalarproduct_int16_c;
2885
    c->scalarproduct_and_madd_int16 = scalarproduct_and_madd_int16_c;
2886
    c->apply_window_int16 = apply_window_int16_c;
2887
    c->vector_clip_int32 = vector_clip_int32_c;
2888
 
2889
    c->shrink[0]= av_image_copy_plane;
2890
    c->shrink[1]= ff_shrink22;
2891
    c->shrink[2]= ff_shrink44;
2892
    c->shrink[3]= ff_shrink88;
2893
 
2894
    c->add_pixels8 = add_pixels8_c;
2895
 
2896
#undef FUNC
2897
#undef FUNCC
2898
#define FUNC(f, depth) f ## _ ## depth
2899
#define FUNCC(f, depth) f ## _ ## depth ## _c
2900
 
2901
    c->draw_edges                    = FUNCC(draw_edges, 8);
2902
    c->clear_block                   = FUNCC(clear_block, 8);
2903
    c->clear_blocks                  = FUNCC(clear_blocks, 8);
2904
 
2905
#define BIT_DEPTH_FUNCS(depth) \
2906
    c->get_pixels                    = FUNCC(get_pixels,   depth);
2907
 
2908
    switch (avctx->bits_per_raw_sample) {
2909
    case 9:
2910
    case 10:
2911
    case 12:
2912
    case 14:
2913
        BIT_DEPTH_FUNCS(16);
2914
        break;
2915
    default:
2916
        if(avctx->bits_per_raw_sample<=8 || avctx->codec_type != AVMEDIA_TYPE_VIDEO) {
2917
            BIT_DEPTH_FUNCS(8);
2918
        }
2919
        break;
2920
    }
2921
 
2922
 
2923
    if (ARCH_ALPHA)
2924
        ff_dsputil_init_alpha(c, avctx);
2925
    if (ARCH_ARM)
2926
        ff_dsputil_init_arm(c, avctx);
2927
    if (ARCH_BFIN)
2928
        ff_dsputil_init_bfin(c, avctx);
2929
    if (ARCH_PPC)
2930
        ff_dsputil_init_ppc(c, avctx);
2931
    if (ARCH_SH4)
2932
        ff_dsputil_init_sh4(c, avctx);
2933
    if (HAVE_VIS)
2934
        ff_dsputil_init_vis(c, avctx);
2935
    if (ARCH_X86)
2936
        ff_dsputil_init_x86(c, avctx);
2937
 
2938
    ff_init_scantable_permutation(c->idct_permutation,
2939
                                  c->idct_permutation_type);
2940
}
2941
 
2942
av_cold void dsputil_init(DSPContext* c, AVCodecContext *avctx)
2943
{
2944
    ff_dsputil_init(c, avctx);
2945
}
2946
 
2947
av_cold void avpriv_dsputil_init(DSPContext *c, AVCodecContext *avctx)
2948
{
2949
    ff_dsputil_init(c, avctx);
2950
}