Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * VC-1 and WMV3 decoder - DSP functions
3
 * Copyright (c) 2006 Konstantin Shishkov
4
 *
5
 * This file is part of FFmpeg.
6
 *
7
 * FFmpeg is free software; you can redistribute it and/or
8
 * modify it under the terms of the GNU Lesser General Public
9
 * License as published by the Free Software Foundation; either
10
 * version 2.1 of the License, or (at your option) any later version.
11
 *
12
 * FFmpeg is distributed in the hope that it will be useful,
13
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15
 * Lesser General Public License for more details.
16
 *
17
 * You should have received a copy of the GNU Lesser General Public
18
 * License along with FFmpeg; if not, write to the Free Software
19
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20
 */
21
 
22
/**
23
* @file
24
 * VC-1 and WMV3 decoder
25
 *
26
 */
27
 
28
#include "libavutil/avassert.h"
29
#include "libavutil/common.h"
30
#include "h264chroma.h"
31
#include "rnd_avg.h"
32
#include "vc1dsp.h"
33
 
34
 
35
/** Apply overlap transform to horizontal edge
36
*/
37
static void vc1_v_overlap_c(uint8_t* src, int stride)
38
{
39
    int i;
40
    int a, b, c, d;
41
    int d1, d2;
42
    int rnd = 1;
43
    for(i = 0; i < 8; i++) {
44
        a = src[-2*stride];
45
        b = src[-stride];
46
        c = src[0];
47
        d = src[stride];
48
        d1 = (a - d + 3 + rnd) >> 3;
49
        d2 = (a - d + b - c + 4 - rnd) >> 3;
50
 
51
        src[-2*stride] = a - d1;
52
        src[-stride] = av_clip_uint8(b - d2);
53
        src[0] = av_clip_uint8(c + d2);
54
        src[stride] = d + d1;
55
        src++;
56
        rnd = !rnd;
57
    }
58
}
59
 
60
/** Apply overlap transform to vertical edge
61
*/
62
static void vc1_h_overlap_c(uint8_t* src, int stride)
63
{
64
    int i;
65
    int a, b, c, d;
66
    int d1, d2;
67
    int rnd = 1;
68
    for(i = 0; i < 8; i++) {
69
        a = src[-2];
70
        b = src[-1];
71
        c = src[0];
72
        d = src[1];
73
        d1 = (a - d + 3 + rnd) >> 3;
74
        d2 = (a - d + b - c + 4 - rnd) >> 3;
75
 
76
        src[-2] = a - d1;
77
        src[-1] = av_clip_uint8(b - d2);
78
        src[0] = av_clip_uint8(c + d2);
79
        src[1] = d + d1;
80
        src += stride;
81
        rnd = !rnd;
82
    }
83
}
84
 
85
static void vc1_v_s_overlap_c(int16_t *top,  int16_t *bottom)
86
{
87
    int i;
88
    int a, b, c, d;
89
    int d1, d2;
90
    int rnd1 = 4, rnd2 = 3;
91
    for(i = 0; i < 8; i++) {
92
        a = top[48];
93
        b = top[56];
94
        c = bottom[0];
95
        d = bottom[8];
96
        d1 = a - d;
97
        d2 = a - d + b - c;
98
 
99
        top[48]   = ((a << 3) - d1 + rnd1) >> 3;
100
        top[56]   = ((b << 3) - d2 + rnd2) >> 3;
101
        bottom[0] = ((c << 3) + d2 + rnd1) >> 3;
102
        bottom[8] = ((d << 3) + d1 + rnd2) >> 3;
103
 
104
        bottom++;
105
        top++;
106
        rnd2 = 7 - rnd2;
107
        rnd1 = 7 - rnd1;
108
    }
109
}
110
 
111
static void vc1_h_s_overlap_c(int16_t *left, int16_t *right)
112
{
113
    int i;
114
    int a, b, c, d;
115
    int d1, d2;
116
    int rnd1 = 4, rnd2 = 3;
117
    for(i = 0; i < 8; i++) {
118
        a = left[6];
119
        b = left[7];
120
        c = right[0];
121
        d = right[1];
122
        d1 = a - d;
123
        d2 = a - d + b - c;
124
 
125
        left[6]  = ((a << 3) - d1 + rnd1) >> 3;
126
        left[7]  = ((b << 3) - d2 + rnd2) >> 3;
127
        right[0] = ((c << 3) + d2 + rnd1) >> 3;
128
        right[1] = ((d << 3) + d1 + rnd2) >> 3;
129
 
130
        right += 8;
131
        left += 8;
132
        rnd2 = 7 - rnd2;
133
        rnd1 = 7 - rnd1;
134
    }
135
}
136
 
137
/**
138
 * VC-1 in-loop deblocking filter for one line
139
 * @param src source block type
140
 * @param stride block stride
141
 * @param pq block quantizer
142
 * @return whether other 3 pairs should be filtered or not
143
 * @see 8.6
144
 */
145
static av_always_inline int vc1_filter_line(uint8_t* src, int stride, int pq){
146
    int a0 = (2*(src[-2*stride] - src[ 1*stride]) - 5*(src[-1*stride] - src[ 0*stride]) + 4) >> 3;
147
    int a0_sign = a0 >> 31;        /* Store sign */
148
    a0 = (a0 ^ a0_sign) - a0_sign; /* a0 = FFABS(a0); */
149
    if(a0 < pq){
150
        int a1 = FFABS((2*(src[-4*stride] - src[-1*stride]) - 5*(src[-3*stride] - src[-2*stride]) + 4) >> 3);
151
        int a2 = FFABS((2*(src[ 0*stride] - src[ 3*stride]) - 5*(src[ 1*stride] - src[ 2*stride]) + 4) >> 3);
152
        if(a1 < a0 || a2 < a0){
153
            int clip = src[-1*stride] - src[ 0*stride];
154
            int clip_sign = clip >> 31;
155
            clip = ((clip ^ clip_sign) - clip_sign)>>1;
156
            if(clip){
157
                int a3 = FFMIN(a1, a2);
158
                int d = 5 * (a3 - a0);
159
                int d_sign = (d >> 31);
160
                d = ((d ^ d_sign) - d_sign) >> 3;
161
                d_sign ^= a0_sign;
162
 
163
                if( d_sign ^ clip_sign )
164
                    d = 0;
165
                else{
166
                    d = FFMIN(d, clip);
167
                    d = (d ^ d_sign) - d_sign;          /* Restore sign */
168
                    src[-1*stride] = av_clip_uint8(src[-1*stride] - d);
169
                    src[ 0*stride] = av_clip_uint8(src[ 0*stride] + d);
170
                }
171
                return 1;
172
            }
173
        }
174
    }
175
    return 0;
176
}
177
 
178
/**
179
 * VC-1 in-loop deblocking filter
180
 * @param src source block type
181
 * @param step distance between horizontally adjacent elements
182
 * @param stride distance between vertically adjacent elements
183
 * @param len edge length to filter (4 or 8 pixels)
184
 * @param pq block quantizer
185
 * @see 8.6
186
 */
187
static inline void vc1_loop_filter(uint8_t* src, int step, int stride, int len, int pq)
188
{
189
    int i;
190
    int filt3;
191
 
192
    for(i = 0; i < len; i += 4){
193
        filt3 = vc1_filter_line(src + 2*step, stride, pq);
194
        if(filt3){
195
            vc1_filter_line(src + 0*step, stride, pq);
196
            vc1_filter_line(src + 1*step, stride, pq);
197
            vc1_filter_line(src + 3*step, stride, pq);
198
        }
199
        src += step * 4;
200
    }
201
}
202
 
203
static void vc1_v_loop_filter4_c(uint8_t *src, int stride, int pq)
204
{
205
    vc1_loop_filter(src, 1, stride, 4, pq);
206
}
207
 
208
static void vc1_h_loop_filter4_c(uint8_t *src, int stride, int pq)
209
{
210
    vc1_loop_filter(src, stride, 1, 4, pq);
211
}
212
 
213
static void vc1_v_loop_filter8_c(uint8_t *src, int stride, int pq)
214
{
215
    vc1_loop_filter(src, 1, stride, 8, pq);
216
}
217
 
218
static void vc1_h_loop_filter8_c(uint8_t *src, int stride, int pq)
219
{
220
    vc1_loop_filter(src, stride, 1, 8, pq);
221
}
222
 
223
static void vc1_v_loop_filter16_c(uint8_t *src, int stride, int pq)
224
{
225
    vc1_loop_filter(src, 1, stride, 16, pq);
226
}
227
 
228
static void vc1_h_loop_filter16_c(uint8_t *src, int stride, int pq)
229
{
230
    vc1_loop_filter(src, stride, 1, 16, pq);
231
}
232
 
233
/** Do inverse transform on 8x8 block
234
*/
235
static void vc1_inv_trans_8x8_dc_c(uint8_t *dest, int linesize, int16_t *block)
236
{
237
    int i;
238
    int dc = block[0];
239
    dc = (3 * dc +  1) >> 1;
240
    dc = (3 * dc + 16) >> 5;
241
    for(i = 0; i < 8; i++){
242
        dest[0] = av_clip_uint8(dest[0] + dc);
243
        dest[1] = av_clip_uint8(dest[1] + dc);
244
        dest[2] = av_clip_uint8(dest[2] + dc);
245
        dest[3] = av_clip_uint8(dest[3] + dc);
246
        dest[4] = av_clip_uint8(dest[4] + dc);
247
        dest[5] = av_clip_uint8(dest[5] + dc);
248
        dest[6] = av_clip_uint8(dest[6] + dc);
249
        dest[7] = av_clip_uint8(dest[7] + dc);
250
        dest += linesize;
251
    }
252
}
253
 
254
static void vc1_inv_trans_8x8_c(int16_t block[64])
255
{
256
    int i;
257
    register int t1,t2,t3,t4,t5,t6,t7,t8;
258
    int16_t *src, *dst, temp[64];
259
 
260
    src = block;
261
    dst = temp;
262
    for(i = 0; i < 8; i++){
263
        t1 = 12 * (src[ 0] + src[32]) + 4;
264
        t2 = 12 * (src[ 0] - src[32]) + 4;
265
        t3 = 16 * src[16] +  6 * src[48];
266
        t4 =  6 * src[16] - 16 * src[48];
267
 
268
        t5 = t1 + t3;
269
        t6 = t2 + t4;
270
        t7 = t2 - t4;
271
        t8 = t1 - t3;
272
 
273
        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
274
        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
275
        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
276
        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
277
 
278
        dst[0] = (t5 + t1) >> 3;
279
        dst[1] = (t6 + t2) >> 3;
280
        dst[2] = (t7 + t3) >> 3;
281
        dst[3] = (t8 + t4) >> 3;
282
        dst[4] = (t8 - t4) >> 3;
283
        dst[5] = (t7 - t3) >> 3;
284
        dst[6] = (t6 - t2) >> 3;
285
        dst[7] = (t5 - t1) >> 3;
286
 
287
        src += 1;
288
        dst += 8;
289
    }
290
 
291
    src = temp;
292
    dst = block;
293
    for(i = 0; i < 8; i++){
294
        t1 = 12 * (src[ 0] + src[32]) + 64;
295
        t2 = 12 * (src[ 0] - src[32]) + 64;
296
        t3 = 16 * src[16] +  6 * src[48];
297
        t4 =  6 * src[16] - 16 * src[48];
298
 
299
        t5 = t1 + t3;
300
        t6 = t2 + t4;
301
        t7 = t2 - t4;
302
        t8 = t1 - t3;
303
 
304
        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
305
        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
306
        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
307
        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
308
 
309
        dst[ 0] = (t5 + t1) >> 7;
310
        dst[ 8] = (t6 + t2) >> 7;
311
        dst[16] = (t7 + t3) >> 7;
312
        dst[24] = (t8 + t4) >> 7;
313
        dst[32] = (t8 - t4 + 1) >> 7;
314
        dst[40] = (t7 - t3 + 1) >> 7;
315
        dst[48] = (t6 - t2 + 1) >> 7;
316
        dst[56] = (t5 - t1 + 1) >> 7;
317
 
318
        src++;
319
        dst++;
320
    }
321
}
322
 
323
/** Do inverse transform on 8x4 part of block
324
*/
325
static void vc1_inv_trans_8x4_dc_c(uint8_t *dest, int linesize, int16_t *block)
326
{
327
    int i;
328
    int dc = block[0];
329
    dc = ( 3 * dc +  1) >> 1;
330
    dc = (17 * dc + 64) >> 7;
331
    for(i = 0; i < 4; i++){
332
        dest[0] = av_clip_uint8(dest[0] + dc);
333
        dest[1] = av_clip_uint8(dest[1] + dc);
334
        dest[2] = av_clip_uint8(dest[2] + dc);
335
        dest[3] = av_clip_uint8(dest[3] + dc);
336
        dest[4] = av_clip_uint8(dest[4] + dc);
337
        dest[5] = av_clip_uint8(dest[5] + dc);
338
        dest[6] = av_clip_uint8(dest[6] + dc);
339
        dest[7] = av_clip_uint8(dest[7] + dc);
340
        dest += linesize;
341
    }
342
}
343
 
344
static void vc1_inv_trans_8x4_c(uint8_t *dest, int linesize, int16_t *block)
345
{
346
    int i;
347
    register int t1,t2,t3,t4,t5,t6,t7,t8;
348
    int16_t *src, *dst;
349
 
350
    src = block;
351
    dst = block;
352
    for(i = 0; i < 4; i++){
353
        t1 = 12 * (src[0] + src[4]) + 4;
354
        t2 = 12 * (src[0] - src[4]) + 4;
355
        t3 = 16 * src[2] +  6 * src[6];
356
        t4 =  6 * src[2] - 16 * src[6];
357
 
358
        t5 = t1 + t3;
359
        t6 = t2 + t4;
360
        t7 = t2 - t4;
361
        t8 = t1 - t3;
362
 
363
        t1 = 16 * src[1] + 15 * src[3] +  9 * src[5] +  4 * src[7];
364
        t2 = 15 * src[1] -  4 * src[3] - 16 * src[5] -  9 * src[7];
365
        t3 =  9 * src[1] - 16 * src[3] +  4 * src[5] + 15 * src[7];
366
        t4 =  4 * src[1] -  9 * src[3] + 15 * src[5] - 16 * src[7];
367
 
368
        dst[0] = (t5 + t1) >> 3;
369
        dst[1] = (t6 + t2) >> 3;
370
        dst[2] = (t7 + t3) >> 3;
371
        dst[3] = (t8 + t4) >> 3;
372
        dst[4] = (t8 - t4) >> 3;
373
        dst[5] = (t7 - t3) >> 3;
374
        dst[6] = (t6 - t2) >> 3;
375
        dst[7] = (t5 - t1) >> 3;
376
 
377
        src += 8;
378
        dst += 8;
379
    }
380
 
381
    src = block;
382
    for(i = 0; i < 8; i++){
383
        t1 = 17 * (src[ 0] + src[16]) + 64;
384
        t2 = 17 * (src[ 0] - src[16]) + 64;
385
        t3 = 22 * src[ 8] + 10 * src[24];
386
        t4 = 22 * src[24] - 10 * src[ 8];
387
 
388
        dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t1 + t3) >> 7));
389
        dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t2 - t4) >> 7));
390
        dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t2 + t4) >> 7));
391
        dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t1 - t3) >> 7));
392
 
393
        src ++;
394
        dest++;
395
    }
396
}
397
 
398
/** Do inverse transform on 4x8 parts of block
399
*/
400
static void vc1_inv_trans_4x8_dc_c(uint8_t *dest, int linesize, int16_t *block)
401
{
402
    int i;
403
    int dc = block[0];
404
    dc = (17 * dc +  4) >> 3;
405
    dc = (12 * dc + 64) >> 7;
406
    for(i = 0; i < 8; i++){
407
        dest[0] = av_clip_uint8(dest[0] + dc);
408
        dest[1] = av_clip_uint8(dest[1] + dc);
409
        dest[2] = av_clip_uint8(dest[2] + dc);
410
        dest[3] = av_clip_uint8(dest[3] + dc);
411
        dest += linesize;
412
    }
413
}
414
 
415
static void vc1_inv_trans_4x8_c(uint8_t *dest, int linesize, int16_t *block)
416
{
417
    int i;
418
    register int t1,t2,t3,t4,t5,t6,t7,t8;
419
    int16_t *src, *dst;
420
 
421
    src = block;
422
    dst = block;
423
    for(i = 0; i < 8; i++){
424
        t1 = 17 * (src[0] + src[2]) + 4;
425
        t2 = 17 * (src[0] - src[2]) + 4;
426
        t3 = 22 * src[1] + 10 * src[3];
427
        t4 = 22 * src[3] - 10 * src[1];
428
 
429
        dst[0] = (t1 + t3) >> 3;
430
        dst[1] = (t2 - t4) >> 3;
431
        dst[2] = (t2 + t4) >> 3;
432
        dst[3] = (t1 - t3) >> 3;
433
 
434
        src += 8;
435
        dst += 8;
436
    }
437
 
438
    src = block;
439
    for(i = 0; i < 4; i++){
440
        t1 = 12 * (src[ 0] + src[32]) + 64;
441
        t2 = 12 * (src[ 0] - src[32]) + 64;
442
        t3 = 16 * src[16] +  6 * src[48];
443
        t4 =  6 * src[16] - 16 * src[48];
444
 
445
        t5 = t1 + t3;
446
        t6 = t2 + t4;
447
        t7 = t2 - t4;
448
        t8 = t1 - t3;
449
 
450
        t1 = 16 * src[ 8] + 15 * src[24] +  9 * src[40] +  4 * src[56];
451
        t2 = 15 * src[ 8] -  4 * src[24] - 16 * src[40] -  9 * src[56];
452
        t3 =  9 * src[ 8] - 16 * src[24] +  4 * src[40] + 15 * src[56];
453
        t4 =  4 * src[ 8] -  9 * src[24] + 15 * src[40] - 16 * src[56];
454
 
455
        dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t5 + t1) >> 7));
456
        dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t6 + t2) >> 7));
457
        dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t7 + t3) >> 7));
458
        dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t8 + t4) >> 7));
459
        dest[4*linesize] = av_clip_uint8(dest[4*linesize] + ((t8 - t4 + 1) >> 7));
460
        dest[5*linesize] = av_clip_uint8(dest[5*linesize] + ((t7 - t3 + 1) >> 7));
461
        dest[6*linesize] = av_clip_uint8(dest[6*linesize] + ((t6 - t2 + 1) >> 7));
462
        dest[7*linesize] = av_clip_uint8(dest[7*linesize] + ((t5 - t1 + 1) >> 7));
463
 
464
        src ++;
465
        dest++;
466
    }
467
}
468
 
469
/** Do inverse transform on 4x4 part of block
470
*/
471
static void vc1_inv_trans_4x4_dc_c(uint8_t *dest, int linesize, int16_t *block)
472
{
473
    int i;
474
    int dc = block[0];
475
    dc = (17 * dc +  4) >> 3;
476
    dc = (17 * dc + 64) >> 7;
477
    for(i = 0; i < 4; i++){
478
        dest[0] = av_clip_uint8(dest[0] + dc);
479
        dest[1] = av_clip_uint8(dest[1] + dc);
480
        dest[2] = av_clip_uint8(dest[2] + dc);
481
        dest[3] = av_clip_uint8(dest[3] + dc);
482
        dest += linesize;
483
    }
484
}
485
 
486
static void vc1_inv_trans_4x4_c(uint8_t *dest, int linesize, int16_t *block)
487
{
488
    int i;
489
    register int t1,t2,t3,t4;
490
    int16_t *src, *dst;
491
 
492
    src = block;
493
    dst = block;
494
    for(i = 0; i < 4; i++){
495
        t1 = 17 * (src[0] + src[2]) + 4;
496
        t2 = 17 * (src[0] - src[2]) + 4;
497
        t3 = 22 * src[1] + 10 * src[3];
498
        t4 = 22 * src[3] - 10 * src[1];
499
 
500
        dst[0] = (t1 + t3) >> 3;
501
        dst[1] = (t2 - t4) >> 3;
502
        dst[2] = (t2 + t4) >> 3;
503
        dst[3] = (t1 - t3) >> 3;
504
 
505
        src += 8;
506
        dst += 8;
507
    }
508
 
509
    src = block;
510
    for(i = 0; i < 4; i++){
511
        t1 = 17 * (src[ 0] + src[16]) + 64;
512
        t2 = 17 * (src[ 0] - src[16]) + 64;
513
        t3 = 22 * src[ 8] + 10 * src[24];
514
        t4 = 22 * src[24] - 10 * src[ 8];
515
 
516
        dest[0*linesize] = av_clip_uint8(dest[0*linesize] + ((t1 + t3) >> 7));
517
        dest[1*linesize] = av_clip_uint8(dest[1*linesize] + ((t2 - t4) >> 7));
518
        dest[2*linesize] = av_clip_uint8(dest[2*linesize] + ((t2 + t4) >> 7));
519
        dest[3*linesize] = av_clip_uint8(dest[3*linesize] + ((t1 - t3) >> 7));
520
 
521
        src ++;
522
        dest++;
523
    }
524
}
525
 
526
/* motion compensation functions */
527
/** Filter in case of 2 filters */
528
#define VC1_MSPEL_FILTER_16B(DIR, TYPE)                                 \
529
static av_always_inline int vc1_mspel_ ## DIR ## _filter_16bits(const TYPE *src, int stride, int mode) \
530
{                                                                       \
531
    switch(mode){                                                       \
532
    case 0: /* no shift - should not occur */                           \
533
        return 0;                                                       \
534
    case 1: /* 1/4 shift */                                             \
535
        return -4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2]; \
536
    case 2: /* 1/2 shift */                                             \
537
        return -src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2]; \
538
    case 3: /* 3/4 shift */                                             \
539
        return -3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2]; \
540
    }                                                                   \
541
    return 0; /* should not occur */                                    \
542
}
543
 
544
VC1_MSPEL_FILTER_16B(ver, uint8_t)
545
VC1_MSPEL_FILTER_16B(hor, int16_t)
546
 
547
 
548
/** Filter used to interpolate fractional pel values
549
 */
550
static av_always_inline int vc1_mspel_filter(const uint8_t *src, int stride, int mode, int r)
551
{
552
    switch(mode){
553
    case 0: //no shift
554
        return src[0];
555
    case 1: // 1/4 shift
556
        return (-4*src[-stride] + 53*src[0] + 18*src[stride] - 3*src[stride*2] + 32 - r) >> 6;
557
    case 2: // 1/2 shift
558
        return (-src[-stride] + 9*src[0] + 9*src[stride] - src[stride*2] + 8 - r) >> 4;
559
    case 3: // 3/4 shift
560
        return (-3*src[-stride] + 18*src[0] + 53*src[stride] - 4*src[stride*2] + 32 - r) >> 6;
561
    }
562
    return 0; //should not occur
563
}
564
 
565
/** Function used to do motion compensation with bicubic interpolation
566
 */
567
#define VC1_MSPEL_MC(OP, OP4, OPNAME)\
568
static av_always_inline void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, ptrdiff_t stride, int hmode, int vmode, int rnd)\
569
{\
570
    int     i, j;\
571
\
572
    if (vmode) { /* Horizontal filter to apply */\
573
        int r;\
574
\
575
        if (hmode) { /* Vertical filter to apply, output to tmp */\
576
            static const int shift_value[] = { 0, 5, 1, 5 };\
577
            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;\
578
            int16_t          tmp[11*8], *tptr = tmp;\
579
\
580
            r = (1<<(shift-1)) + rnd-1;\
581
\
582
            src -= 1;\
583
            for(j = 0; j < 8; j++) {\
584
                for(i = 0; i < 11; i++)\
585
                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\
586
                src += stride;\
587
                tptr += 11;\
588
            }\
589
\
590
            r = 64-rnd;\
591
            tptr = tmp+1;\
592
            for(j = 0; j < 8; j++) {\
593
                for(i = 0; i < 8; i++)\
594
                    OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\
595
                dst += stride;\
596
                tptr += 11;\
597
            }\
598
\
599
            return;\
600
        }\
601
        else { /* No horizontal filter, output 8 lines to dst */\
602
            r = 1-rnd;\
603
\
604
            for(j = 0; j < 8; j++) {\
605
                for(i = 0; i < 8; i++)\
606
                    OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\
607
                src += stride;\
608
                dst += stride;\
609
            }\
610
            return;\
611
        }\
612
    }\
613
\
614
    /* Horizontal mode with no vertical mode */\
615
    for(j = 0; j < 8; j++) {\
616
        for(i = 0; i < 8; i++)\
617
            OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\
618
        dst += stride;\
619
        src += stride;\
620
    }\
621
}\
622
static void OPNAME ## pixels8x8_c(uint8_t *block, const uint8_t *pixels, ptrdiff_t line_size, int rnd){\
623
    int i;\
624
    for(i=0; i<8; i++){\
625
        OP4(*(uint32_t*)(block  ), AV_RN32(pixels  ));\
626
        OP4(*(uint32_t*)(block+4), AV_RN32(pixels+4));\
627
        pixels+=line_size;\
628
        block +=line_size;\
629
    }\
630
}
631
 
632
#define op_put(a, b) a = av_clip_uint8(b)
633
#define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
634
#define op4_avg(a, b) a = rnd_avg32(a, b)
635
#define op4_put(a, b) a = b
636
 
637
VC1_MSPEL_MC(op_put, op4_put, put_)
638
VC1_MSPEL_MC(op_avg, op4_avg, avg_)
639
 
640
/* pixel functions - really are entry points to vc1_mspel_mc */
641
 
642
#define PUT_VC1_MSPEL(a, b)\
643
static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst,               \
644
                                            const uint8_t *src,         \
645
                                            ptrdiff_t stride, int rnd)  \
646
{                                                                       \
647
    put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                      \
648
}                                                                       \
649
static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst,               \
650
                                            const uint8_t *src,         \
651
                                            ptrdiff_t stride, int rnd)  \
652
{                                                                       \
653
    avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                      \
654
}
655
 
656
PUT_VC1_MSPEL(1, 0)
657
PUT_VC1_MSPEL(2, 0)
658
PUT_VC1_MSPEL(3, 0)
659
 
660
PUT_VC1_MSPEL(0, 1)
661
PUT_VC1_MSPEL(1, 1)
662
PUT_VC1_MSPEL(2, 1)
663
PUT_VC1_MSPEL(3, 1)
664
 
665
PUT_VC1_MSPEL(0, 2)
666
PUT_VC1_MSPEL(1, 2)
667
PUT_VC1_MSPEL(2, 2)
668
PUT_VC1_MSPEL(3, 2)
669
 
670
PUT_VC1_MSPEL(0, 3)
671
PUT_VC1_MSPEL(1, 3)
672
PUT_VC1_MSPEL(2, 3)
673
PUT_VC1_MSPEL(3, 3)
674
 
675
static void put_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
676
    const int A=(8-x)*(8-y);
677
    const int B=(  x)*(8-y);
678
    const int C=(8-x)*(  y);
679
    const int D=(  x)*(  y);
680
    int i;
681
 
682
    av_assert2(x<8 && y<8 && x>=0 && y>=0);
683
 
684
    for(i=0; i
685
    {
686
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
687
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
688
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
689
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
690
        dst[4] = (A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6;
691
        dst[5] = (A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6;
692
        dst[6] = (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6;
693
        dst[7] = (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6;
694
        dst+= stride;
695
        src+= stride;
696
    }
697
}
698
 
699
static void put_no_rnd_vc1_chroma_mc4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y){
700
    const int A=(8-x)*(8-y);
701
    const int B=(  x)*(8-y);
702
    const int C=(8-x)*(  y);
703
    const int D=(  x)*(  y);
704
    int i;
705
 
706
    av_assert2(x<8 && y<8 && x>=0 && y>=0);
707
 
708
    for(i=0; i
709
    {
710
        dst[0] = (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6;
711
        dst[1] = (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6;
712
        dst[2] = (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6;
713
        dst[3] = (A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6;
714
        dst+= stride;
715
        src+= stride;
716
    }
717
}
718
 
719
#define avg2(a,b) ((a+b+1)>>1)
720
static void avg_no_rnd_vc1_chroma_mc8_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
721
    const int A=(8-x)*(8-y);
722
    const int B=(  x)*(8-y);
723
    const int C=(8-x)*(  y);
724
    const int D=(  x)*(  y);
725
    int i;
726
 
727
    av_assert2(x<8 && y<8 && x>=0 && y>=0);
728
 
729
    for(i=0; i
730
    {
731
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
732
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
733
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
734
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
735
        dst[4] = avg2(dst[4], ((A*src[4] + B*src[5] + C*src[stride+4] + D*src[stride+5] + 32 - 4) >> 6));
736
        dst[5] = avg2(dst[5], ((A*src[5] + B*src[6] + C*src[stride+5] + D*src[stride+6] + 32 - 4) >> 6));
737
        dst[6] = avg2(dst[6], ((A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + 32 - 4) >> 6));
738
        dst[7] = avg2(dst[7], ((A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + 32 - 4) >> 6));
739
        dst+= stride;
740
        src+= stride;
741
    }
742
}
743
 
744
static void avg_no_rnd_vc1_chroma_mc4_c(uint8_t *dst/*align 8*/, uint8_t *src/*align 1*/, int stride, int h, int x, int y){
745
    const int A=(8-x)*(8-y);
746
    const int B=(  x)*(8-y);
747
    const int C=(8-x)*(  y);
748
    const int D=(  x)*(  y);
749
    int i;
750
 
751
    av_assert2(x<8 && y<8 && x>=0 && y>=0);
752
 
753
    for(i=0; i
754
    {
755
        dst[0] = avg2(dst[0], ((A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + 32 - 4) >> 6));
756
        dst[1] = avg2(dst[1], ((A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + 32 - 4) >> 6));
757
        dst[2] = avg2(dst[2], ((A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + 32 - 4) >> 6));
758
        dst[3] = avg2(dst[3], ((A*src[3] + B*src[4] + C*src[stride+3] + D*src[stride+4] + 32 - 4) >> 6));
759
        dst+= stride;
760
        src+= stride;
761
    }
762
}
763
 
764
#if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
765
 
766
static void sprite_h_c(uint8_t *dst, const uint8_t *src, int offset, int advance, int count)
767
{
768
    while (count--) {
769
        int a = src[(offset >> 16)    ];
770
        int b = src[(offset >> 16) + 1];
771
        *dst++ = a + ((b - a) * (offset&0xFFFF) >> 16);
772
        offset += advance;
773
    }
774
}
775
 
776
static av_always_inline void sprite_v_template(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1,
777
                                            int two_sprites, const uint8_t *src2a, const uint8_t *src2b, int offset2,
778
                                            int alpha, int scaled, int width)
779
{
780
    int a1, b1, a2, b2;
781
    while (width--) {
782
        a1 = *src1a++;
783
        if (scaled) {
784
            b1 = *src1b++;
785
            a1 = a1 + ((b1 - a1) * offset1 >> 16);
786
        }
787
        if (two_sprites) {
788
            a2 = *src2a++;
789
            if (scaled > 1) {
790
                b2 = *src2b++;
791
                a2 = a2 + ((b2 - a2) * offset2 >> 16);
792
            }
793
            a1 = a1 + ((a2 - a1) * alpha >> 16);
794
        }
795
        *dst++ = a1;
796
    }
797
}
798
 
799
static void sprite_v_single_c(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset, int width)
800
{
801
    sprite_v_template(dst, src1a, src1b, offset, 0, NULL, NULL, 0, 0, 1, width);
802
}
803
 
804
static void sprite_v_double_noscale_c(uint8_t *dst, const uint8_t *src1a, const uint8_t *src2a, int alpha, int width)
805
{
806
    sprite_v_template(dst, src1a, NULL, 0, 1, src2a, NULL, 0, alpha, 0, width);
807
}
808
 
809
static void sprite_v_double_onescale_c(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1,
810
                                                     const uint8_t *src2a, int alpha, int width)
811
{
812
    sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, NULL, 0, alpha, 1, width);
813
}
814
 
815
static void sprite_v_double_twoscale_c(uint8_t *dst, const uint8_t *src1a, const uint8_t *src1b, int offset1,
816
                                                     const uint8_t *src2a, const uint8_t *src2b, int offset2,
817
                                       int alpha, int width)
818
{
819
    sprite_v_template(dst, src1a, src1b, offset1, 1, src2a, src2b, offset2, alpha, 2, width);
820
}
821
 
822
#endif
823
 
824
av_cold void ff_vc1dsp_init(VC1DSPContext* dsp) {
825
    dsp->vc1_inv_trans_8x8 = vc1_inv_trans_8x8_c;
826
    dsp->vc1_inv_trans_4x8 = vc1_inv_trans_4x8_c;
827
    dsp->vc1_inv_trans_8x4 = vc1_inv_trans_8x4_c;
828
    dsp->vc1_inv_trans_4x4 = vc1_inv_trans_4x4_c;
829
    dsp->vc1_inv_trans_8x8_dc = vc1_inv_trans_8x8_dc_c;
830
    dsp->vc1_inv_trans_4x8_dc = vc1_inv_trans_4x8_dc_c;
831
    dsp->vc1_inv_trans_8x4_dc = vc1_inv_trans_8x4_dc_c;
832
    dsp->vc1_inv_trans_4x4_dc = vc1_inv_trans_4x4_dc_c;
833
    dsp->vc1_h_overlap = vc1_h_overlap_c;
834
    dsp->vc1_v_overlap = vc1_v_overlap_c;
835
    dsp->vc1_h_s_overlap = vc1_h_s_overlap_c;
836
    dsp->vc1_v_s_overlap = vc1_v_s_overlap_c;
837
    dsp->vc1_v_loop_filter4 = vc1_v_loop_filter4_c;
838
    dsp->vc1_h_loop_filter4 = vc1_h_loop_filter4_c;
839
    dsp->vc1_v_loop_filter8 = vc1_v_loop_filter8_c;
840
    dsp->vc1_h_loop_filter8 = vc1_h_loop_filter8_c;
841
    dsp->vc1_v_loop_filter16 = vc1_v_loop_filter16_c;
842
    dsp->vc1_h_loop_filter16 = vc1_h_loop_filter16_c;
843
 
844
    dsp->put_vc1_mspel_pixels_tab[ 0] = put_pixels8x8_c;
845
    dsp->put_vc1_mspel_pixels_tab[ 1] = put_vc1_mspel_mc10_c;
846
    dsp->put_vc1_mspel_pixels_tab[ 2] = put_vc1_mspel_mc20_c;
847
    dsp->put_vc1_mspel_pixels_tab[ 3] = put_vc1_mspel_mc30_c;
848
    dsp->put_vc1_mspel_pixels_tab[ 4] = put_vc1_mspel_mc01_c;
849
    dsp->put_vc1_mspel_pixels_tab[ 5] = put_vc1_mspel_mc11_c;
850
    dsp->put_vc1_mspel_pixels_tab[ 6] = put_vc1_mspel_mc21_c;
851
    dsp->put_vc1_mspel_pixels_tab[ 7] = put_vc1_mspel_mc31_c;
852
    dsp->put_vc1_mspel_pixels_tab[ 8] = put_vc1_mspel_mc02_c;
853
    dsp->put_vc1_mspel_pixels_tab[ 9] = put_vc1_mspel_mc12_c;
854
    dsp->put_vc1_mspel_pixels_tab[10] = put_vc1_mspel_mc22_c;
855
    dsp->put_vc1_mspel_pixels_tab[11] = put_vc1_mspel_mc32_c;
856
    dsp->put_vc1_mspel_pixels_tab[12] = put_vc1_mspel_mc03_c;
857
    dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
858
    dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
859
    dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
860
 
861
    dsp->avg_vc1_mspel_pixels_tab[ 0] = avg_pixels8x8_c;
862
    dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c;
863
    dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c;
864
    dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c;
865
    dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c;
866
    dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c;
867
    dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c;
868
    dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c;
869
    dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c;
870
    dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c;
871
    dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c;
872
    dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c;
873
    dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c;
874
    dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
875
    dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
876
    dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
877
 
878
    dsp->put_no_rnd_vc1_chroma_pixels_tab[0]= put_no_rnd_vc1_chroma_mc8_c;
879
    dsp->avg_no_rnd_vc1_chroma_pixels_tab[0]= avg_no_rnd_vc1_chroma_mc8_c;
880
    dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = put_no_rnd_vc1_chroma_mc4_c;
881
    dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = avg_no_rnd_vc1_chroma_mc4_c;
882
 
883
#if CONFIG_WMV3IMAGE_DECODER || CONFIG_VC1IMAGE_DECODER
884
    dsp->sprite_h = sprite_h_c;
885
    dsp->sprite_v_single = sprite_v_single_c;
886
    dsp->sprite_v_double_noscale = sprite_v_double_noscale_c;
887
    dsp->sprite_v_double_onescale = sprite_v_double_onescale_c;
888
    dsp->sprite_v_double_twoscale = sprite_v_double_twoscale_c;
889
#endif
890
 
891
    if (ARCH_PPC)
892
        ff_vc1dsp_init_ppc(dsp);
893
    if (ARCH_X86)
894
        ff_vc1dsp_init_x86(dsp);
895
}