Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
6147 serge 1
/*
2
 * DSP functions for Indeo Video Interactive codecs (Indeo4 and Indeo5)
3
 *
4
 * Copyright (c) 2009-2011 Maxim Poliakovski
5
 *
6
 * This file is part of FFmpeg.
7
 *
8
 * FFmpeg is free software; you can redistribute it and/or
9
 * modify it under the terms of the GNU Lesser General Public
10
 * License as published by the Free Software Foundation; either
11
 * version 2.1 of the License, or (at your option) any later version.
12
 *
13
 * FFmpeg is distributed in the hope that it will be useful,
14
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
16
 * Lesser General Public License for more details.
17
 *
18
 * You should have received a copy of the GNU Lesser General Public
19
 * License along with FFmpeg; if not, write to the Free Software
20
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21
 */
22
 
23
/**
24
 * @file
25
 * DSP functions (inverse transforms, motion compensation, wavelet recompostions)
26
 * for Indeo Video Interactive codecs.
27
 */
28
 
29
#include "avcodec.h"
30
#include "ivi.h"
31
#include "ivi_dsp.h"
32
 
33
void ff_ivi_recompose53(const IVIPlaneDesc *plane, uint8_t *dst,
34
                        const int dst_pitch)
35
{
36
    int             x, y, indx;
37
    int32_t         p0, p1, p2, p3, tmp0, tmp1, tmp2;
38
    int32_t         b0_1, b0_2, b1_1, b1_2, b1_3, b2_1, b2_2, b2_3, b2_4, b2_5, b2_6;
39
    int32_t         b3_1, b3_2, b3_3, b3_4, b3_5, b3_6, b3_7, b3_8, b3_9;
40
    int32_t         pitch, back_pitch;
41
    const short     *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
42
    const int       num_bands = 4;
43
 
44
    /* all bands should have the same pitch */
45
    pitch = plane->bands[0].pitch;
46
 
47
    /* pixels at the position "y-1" will be set to pixels at the "y" for the 1st iteration */
48
    back_pitch = 0;
49
 
50
    /* get pointers to the wavelet bands */
51
    b0_ptr = plane->bands[0].buf;
52
    b1_ptr = plane->bands[1].buf;
53
    b2_ptr = plane->bands[2].buf;
54
    b3_ptr = plane->bands[3].buf;
55
 
56
    for (y = 0; y < plane->height; y += 2) {
57
 
58
        if (y+2 >= plane->height)
59
            pitch= 0;
60
        /* load storage variables with values */
61
        if (num_bands > 0) {
62
            b0_1 = b0_ptr[0];
63
            b0_2 = b0_ptr[pitch];
64
        }
65
 
66
        if (num_bands > 1) {
67
            b1_1 = b1_ptr[back_pitch];
68
            b1_2 = b1_ptr[0];
69
            b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch];
70
        }
71
 
72
        if (num_bands > 2) {
73
            b2_2 = b2_ptr[0];     // b2[x,  y  ]
74
            b2_3 = b2_2;          // b2[x+1,y  ] = b2[x,y]
75
            b2_5 = b2_ptr[pitch]; // b2[x  ,y+1]
76
            b2_6 = b2_5;          // b2[x+1,y+1] = b2[x,y+1]
77
        }
78
 
79
        if (num_bands > 3) {
80
            b3_2 = b3_ptr[back_pitch]; // b3[x  ,y-1]
81
            b3_3 = b3_2;               // b3[x+1,y-1] = b3[x  ,y-1]
82
            b3_5 = b3_ptr[0];          // b3[x  ,y  ]
83
            b3_6 = b3_5;               // b3[x+1,y  ] = b3[x  ,y  ]
84
            b3_8 = b3_2 - b3_5*6 + b3_ptr[pitch];
85
            b3_9 = b3_8;
86
        }
87
 
88
        for (x = 0, indx = 0; x < plane->width; x+=2, indx++) {
89
            if (x+2 >= plane->width) {
90
                b0_ptr --;
91
                b1_ptr --;
92
                b2_ptr --;
93
                b3_ptr --;
94
            }
95
 
96
            /* some values calculated in the previous iterations can */
97
            /* be reused in the next ones, so do appropriate copying */
98
            b2_1 = b2_2; // b2[x-1,y  ] = b2[x,  y  ]
99
            b2_2 = b2_3; // b2[x  ,y  ] = b2[x+1,y  ]
100
            b2_4 = b2_5; // b2[x-1,y+1] = b2[x  ,y+1]
101
            b2_5 = b2_6; // b2[x  ,y+1] = b2[x+1,y+1]
102
            b3_1 = b3_2; // b3[x-1,y-1] = b3[x  ,y-1]
103
            b3_2 = b3_3; // b3[x  ,y-1] = b3[x+1,y-1]
104
            b3_4 = b3_5; // b3[x-1,y  ] = b3[x  ,y  ]
105
            b3_5 = b3_6; // b3[x  ,y  ] = b3[x+1,y  ]
106
            b3_7 = b3_8; // vert_HPF(x-1)
107
            b3_8 = b3_9; // vert_HPF(x  )
108
 
109
            p0 = p1 = p2 = p3 = 0;
110
 
111
            /* process the LL-band by applying LPF both vertically and horizontally */
112
            if (num_bands > 0) {
113
                tmp0 = b0_1;
114
                tmp2 = b0_2;
115
                b0_1 = b0_ptr[indx+1];
116
                b0_2 = b0_ptr[pitch+indx+1];
117
                tmp1 = tmp0 + b0_1;
118
 
119
                p0 =  tmp0 << 4;
120
                p1 =  tmp1 << 3;
121
                p2 = (tmp0 + tmp2) << 3;
122
                p3 = (tmp1 + tmp2 + b0_2) << 2;
123
            }
124
 
125
            /* process the HL-band by applying HPF vertically and LPF horizontally */
126
            if (num_bands > 1) {
127
                tmp0 = b1_2;
128
                tmp1 = b1_1;
129
                b1_2 = b1_ptr[indx+1];
130
                b1_1 = b1_ptr[back_pitch+indx+1];
131
 
132
                tmp2 = tmp1 - tmp0*6 + b1_3;
133
                b1_3 = b1_1 - b1_2*6 + b1_ptr[pitch+indx+1];
134
 
135
                p0 += (tmp0 + tmp1) << 3;
136
                p1 += (tmp0 + tmp1 + b1_1 + b1_2) << 2;
137
                p2 +=  tmp2 << 2;
138
                p3 += (tmp2 + b1_3) << 1;
139
            }
140
 
141
            /* process the LH-band by applying LPF vertically and HPF horizontally */
142
            if (num_bands > 2) {
143
                b2_3 = b2_ptr[indx+1];
144
                b2_6 = b2_ptr[pitch+indx+1];
145
 
146
                tmp0 = b2_1 + b2_2;
147
                tmp1 = b2_1 - b2_2*6 + b2_3;
148
 
149
                p0 += tmp0 << 3;
150
                p1 += tmp1 << 2;
151
                p2 += (tmp0 + b2_4 + b2_5) << 2;
152
                p3 += (tmp1 + b2_4 - b2_5*6 + b2_6) << 1;
153
            }
154
 
155
            /* process the HH-band by applying HPF both vertically and horizontally */
156
            if (num_bands > 3) {
157
                b3_6 = b3_ptr[indx+1];            // b3[x+1,y  ]
158
                b3_3 = b3_ptr[back_pitch+indx+1]; // b3[x+1,y-1]
159
 
160
                tmp0 = b3_1 + b3_4;
161
                tmp1 = b3_2 + b3_5;
162
                tmp2 = b3_3 + b3_6;
163
 
164
                b3_9 = b3_3 - b3_6*6 + b3_ptr[pitch+indx+1];
165
 
166
                p0 += (tmp0 + tmp1) << 2;
167
                p1 += (tmp0 - tmp1*6 + tmp2) << 1;
168
                p2 += (b3_7 + b3_8) << 1;
169
                p3 +=  b3_7 - b3_8*6 + b3_9;
170
            }
171
 
172
            /* output four pixels */
173
            dst[x]             = av_clip_uint8((p0 >> 6) + 128);
174
            dst[x+1]           = av_clip_uint8((p1 >> 6) + 128);
175
            dst[dst_pitch+x]   = av_clip_uint8((p2 >> 6) + 128);
176
            dst[dst_pitch+x+1] = av_clip_uint8((p3 >> 6) + 128);
177
        }// for x
178
 
179
        dst += dst_pitch << 1;
180
 
181
        back_pitch = -pitch;
182
 
183
        b0_ptr += pitch + 1;
184
        b1_ptr += pitch + 1;
185
        b2_ptr += pitch + 1;
186
        b3_ptr += pitch + 1;
187
    }
188
}
189
 
190
void ff_ivi_recompose_haar(const IVIPlaneDesc *plane, uint8_t *dst,
191
                           const int dst_pitch)
192
{
193
    int             x, y, indx, b0, b1, b2, b3, p0, p1, p2, p3;
194
    const short     *b0_ptr, *b1_ptr, *b2_ptr, *b3_ptr;
195
    int32_t         pitch;
196
 
197
    /* all bands should have the same pitch */
198
    pitch = plane->bands[0].pitch;
199
 
200
    /* get pointers to the wavelet bands */
201
    b0_ptr = plane->bands[0].buf;
202
    b1_ptr = plane->bands[1].buf;
203
    b2_ptr = plane->bands[2].buf;
204
    b3_ptr = plane->bands[3].buf;
205
 
206
    for (y = 0; y < plane->height; y += 2) {
207
        for (x = 0, indx = 0; x < plane->width; x += 2, indx++) {
208
            /* load coefficients */
209
            b0 = b0_ptr[indx]; //should be: b0 = (num_bands > 0) ? b0_ptr[indx] : 0;
210
            b1 = b1_ptr[indx]; //should be: b1 = (num_bands > 1) ? b1_ptr[indx] : 0;
211
            b2 = b2_ptr[indx]; //should be: b2 = (num_bands > 2) ? b2_ptr[indx] : 0;
212
            b3 = b3_ptr[indx]; //should be: b3 = (num_bands > 3) ? b3_ptr[indx] : 0;
213
 
214
            /* haar wavelet recomposition */
215
            p0 = (b0 + b1 + b2 + b3 + 2) >> 2;
216
            p1 = (b0 + b1 - b2 - b3 + 2) >> 2;
217
            p2 = (b0 - b1 + b2 - b3 + 2) >> 2;
218
            p3 = (b0 - b1 - b2 + b3 + 2) >> 2;
219
 
220
            /* bias, convert and output four pixels */
221
            dst[x]                 = av_clip_uint8(p0 + 128);
222
            dst[x + 1]             = av_clip_uint8(p1 + 128);
223
            dst[dst_pitch + x]     = av_clip_uint8(p2 + 128);
224
            dst[dst_pitch + x + 1] = av_clip_uint8(p3 + 128);
225
        }// for x
226
 
227
        dst += dst_pitch << 1;
228
 
229
        b0_ptr += pitch;
230
        b1_ptr += pitch;
231
        b2_ptr += pitch;
232
        b3_ptr += pitch;
233
    }// for y
234
}
235
 
236
/** butterfly operation for the inverse Haar transform */
237
#define IVI_HAAR_BFLY(s1, s2, o1, o2, t) \
238
    t  = ((s1) - (s2)) >> 1;\
239
    o1 = ((s1) + (s2)) >> 1;\
240
    o2 = (t);\
241
 
242
/** inverse 8-point Haar transform */
243
#define INV_HAAR8(s1, s5, s3, s7, s2, s4, s6, s8,\
244
                  d1, d2, d3, d4, d5, d6, d7, d8,\
245
                  t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
246
    t1 = (s1) << 1; t5 = (s5) << 1;\
247
    IVI_HAAR_BFLY(t1, t5, t1, t5, t0); IVI_HAAR_BFLY(t1, s3, t1, t3, t0);\
248
    IVI_HAAR_BFLY(t5, s7, t5, t7, t0); IVI_HAAR_BFLY(t1, s2, t1, t2, t0);\
249
    IVI_HAAR_BFLY(t3, s4, t3, t4, t0); IVI_HAAR_BFLY(t5, s6, t5, t6, t0);\
250
    IVI_HAAR_BFLY(t7, s8, t7, t8, t0);\
251
    d1 = COMPENSATE(t1);\
252
    d2 = COMPENSATE(t2);\
253
    d3 = COMPENSATE(t3);\
254
    d4 = COMPENSATE(t4);\
255
    d5 = COMPENSATE(t5);\
256
    d6 = COMPENSATE(t6);\
257
    d7 = COMPENSATE(t7);\
258
    d8 = COMPENSATE(t8); }
259
 
260
/** inverse 4-point Haar transform */
261
#define INV_HAAR4(s1, s3, s5, s7, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
262
    IVI_HAAR_BFLY(s1, s3, t0, t1, t4);\
263
    IVI_HAAR_BFLY(t0, s5, t2, t3, t4);\
264
    d1 = COMPENSATE(t2);\
265
    d2 = COMPENSATE(t3);\
266
    IVI_HAAR_BFLY(t1, s7, t2, t3, t4);\
267
    d3 = COMPENSATE(t2);\
268
    d4 = COMPENSATE(t3); }
269
 
270
void ff_ivi_inverse_haar_8x8(const int32_t *in, int16_t *out, uint32_t pitch,
271
                             const uint8_t *flags)
272
{
273
    int     i, shift, sp1, sp2, sp3, sp4;
274
    const int32_t *src;
275
    int32_t *dst;
276
    int     tmp[64];
277
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
278
 
279
    /* apply the InvHaar8 to all columns */
280
#define COMPENSATE(x) (x)
281
    src = in;
282
    dst = tmp;
283
    for (i = 0; i < 8; i++) {
284
        if (flags[i]) {
285
            /* pre-scaling */
286
            shift = !(i & 4);
287
            sp1 = src[ 0] << shift;
288
            sp2 = src[ 8] << shift;
289
            sp3 = src[16] << shift;
290
            sp4 = src[24] << shift;
291
            INV_HAAR8(    sp1,     sp2,     sp3,     sp4,
292
                      src[32], src[40], src[48], src[56],
293
                      dst[ 0], dst[ 8], dst[16], dst[24],
294
                      dst[32], dst[40], dst[48], dst[56],
295
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
296
        } else
297
            dst[ 0] = dst[ 8] = dst[16] = dst[24] =
298
            dst[32] = dst[40] = dst[48] = dst[56] = 0;
299
 
300
        src++;
301
        dst++;
302
    }
303
#undef  COMPENSATE
304
 
305
    /* apply the InvHaar8 to all rows */
306
#define COMPENSATE(x) (x)
307
    src = tmp;
308
    for (i = 0; i < 8; i++) {
309
        if (   !src[0] && !src[1] && !src[2] && !src[3]
310
            && !src[4] && !src[5] && !src[6] && !src[7]) {
311
            memset(out, 0, 8 * sizeof(out[0]));
312
        } else {
313
            INV_HAAR8(src[0], src[1], src[2], src[3],
314
                      src[4], src[5], src[6], src[7],
315
                      out[0], out[1], out[2], out[3],
316
                      out[4], out[5], out[6], out[7],
317
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
318
        }
319
        src += 8;
320
        out += pitch;
321
    }
322
#undef  COMPENSATE
323
}
324
 
325
void ff_ivi_row_haar8(const int32_t *in, int16_t *out, uint32_t pitch,
326
                      const uint8_t *flags)
327
{
328
    int     i;
329
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
330
 
331
    /* apply the InvHaar8 to all rows */
332
#define COMPENSATE(x) (x)
333
    for (i = 0; i < 8; i++) {
334
        if (   !in[0] && !in[1] && !in[2] && !in[3]
335
            && !in[4] && !in[5] && !in[6] && !in[7]) {
336
            memset(out, 0, 8 * sizeof(out[0]));
337
        } else {
338
            INV_HAAR8(in[0],  in[1],  in[2],  in[3],
339
                      in[4],  in[5],  in[6],  in[7],
340
                      out[0], out[1], out[2], out[3],
341
                      out[4], out[5], out[6], out[7],
342
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
343
        }
344
        in  += 8;
345
        out += pitch;
346
    }
347
#undef  COMPENSATE
348
}
349
 
350
void ff_ivi_col_haar8(const int32_t *in, int16_t *out, uint32_t pitch,
351
                      const uint8_t *flags)
352
{
353
    int     i;
354
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
355
 
356
    /* apply the InvHaar8 to all columns */
357
#define COMPENSATE(x) (x)
358
    for (i = 0; i < 8; i++) {
359
        if (flags[i]) {
360
            INV_HAAR8(in[ 0], in[ 8], in[16], in[24],
361
                      in[32], in[40], in[48], in[56],
362
                      out[0 * pitch], out[1 * pitch],
363
                      out[2 * pitch], out[3 * pitch],
364
                      out[4 * pitch], out[5 * pitch],
365
                      out[6 * pitch], out[7 * pitch],
366
                      t0, t1, t2, t3, t4, t5, t6, t7, t8);
367
        } else
368
            out[0 * pitch] = out[1 * pitch] =
369
            out[2 * pitch] = out[3 * pitch] =
370
            out[4 * pitch] = out[5 * pitch] =
371
            out[6 * pitch] = out[7 * pitch] = 0;
372
 
373
        in++;
374
        out++;
375
    }
376
#undef  COMPENSATE
377
}
378
 
379
void ff_ivi_inverse_haar_4x4(const int32_t *in, int16_t *out, uint32_t pitch,
380
                             const uint8_t *flags)
381
{
382
    int     i, shift, sp1, sp2;
383
    const int32_t *src;
384
    int32_t *dst;
385
    int     tmp[16];
386
    int     t0, t1, t2, t3, t4;
387
 
388
    /* apply the InvHaar4 to all columns */
389
#define COMPENSATE(x) (x)
390
    src = in;
391
    dst = tmp;
392
    for (i = 0; i < 4; i++) {
393
        if (flags[i]) {
394
            /* pre-scaling */
395
            shift = !(i & 2);
396
            sp1 = src[0] << shift;
397
            sp2 = src[4] << shift;
398
            INV_HAAR4(   sp1,    sp2, src[8], src[12],
399
                      dst[0], dst[4], dst[8], dst[12],
400
                      t0, t1, t2, t3, t4);
401
        } else
402
            dst[0] = dst[4] = dst[8] = dst[12] = 0;
403
 
404
        src++;
405
        dst++;
406
    }
407
#undef  COMPENSATE
408
 
409
    /* apply the InvHaar8 to all rows */
410
#define COMPENSATE(x) (x)
411
    src = tmp;
412
    for (i = 0; i < 4; i++) {
413
        if (!src[0] && !src[1] && !src[2] && !src[3]) {
414
            memset(out, 0, 4 * sizeof(out[0]));
415
        } else {
416
            INV_HAAR4(src[0], src[1], src[2], src[3],
417
                      out[0], out[1], out[2], out[3],
418
                      t0, t1, t2, t3, t4);
419
        }
420
        src += 4;
421
        out += pitch;
422
    }
423
#undef  COMPENSATE
424
}
425
 
426
void ff_ivi_row_haar4(const int32_t *in, int16_t *out, uint32_t pitch,
427
                      const uint8_t *flags)
428
{
429
    int     i;
430
    int     t0, t1, t2, t3, t4;
431
 
432
    /* apply the InvHaar4 to all rows */
433
#define COMPENSATE(x) (x)
434
    for (i = 0; i < 4; i++) {
435
        if (!in[0] && !in[1] && !in[2] && !in[3]) {
436
            memset(out, 0, 4 * sizeof(out[0]));
437
        } else {
438
            INV_HAAR4(in[0], in[1], in[2], in[3],
439
                      out[0], out[1], out[2], out[3],
440
                      t0, t1, t2, t3, t4);
441
        }
442
        in  += 4;
443
        out += pitch;
444
    }
445
#undef  COMPENSATE
446
}
447
 
448
void ff_ivi_col_haar4(const int32_t *in, int16_t *out, uint32_t pitch,
449
                      const uint8_t *flags)
450
{
451
    int     i;
452
    int     t0, t1, t2, t3, t4;
453
 
454
    /* apply the InvHaar8 to all columns */
455
#define COMPENSATE(x) (x)
456
    for (i = 0; i < 4; i++) {
457
        if (flags[i]) {
458
            INV_HAAR4(in[0], in[4], in[8], in[12],
459
                      out[0 * pitch], out[1 * pitch],
460
                      out[2 * pitch], out[3 * pitch],
461
                      t0, t1, t2, t3, t4);
462
        } else
463
            out[0 * pitch] = out[1 * pitch] =
464
            out[2 * pitch] = out[3 * pitch] = 0;
465
 
466
        in++;
467
        out++;
468
    }
469
#undef  COMPENSATE
470
}
471
 
472
void ff_ivi_dc_haar_2d(const int32_t *in, int16_t *out, uint32_t pitch,
473
                       int blk_size)
474
{
475
    int     x, y;
476
    int16_t dc_coeff;
477
 
478
    dc_coeff = (*in + 0) >> 3;
479
 
480
    for (y = 0; y < blk_size; out += pitch, y++) {
481
        for (x = 0; x < blk_size; x++)
482
            out[x] = dc_coeff;
483
    }
484
}
485
 
486
/** butterfly operation for the inverse slant transform */
487
#define IVI_SLANT_BFLY(s1, s2, o1, o2, t) \
488
    t  = (s1) - (s2);\
489
    o1 = (s1) + (s2);\
490
    o2 = (t);\
491
 
492
/** This is a reflection a,b = 1/2, 5/4 for the inverse slant transform */
493
#define IVI_IREFLECT(s1, s2, o1, o2, t) \
494
    t  = (((s1) + (s2)*2 + 2) >> 2) + (s1);\
495
    o2 = (((s1)*2 - (s2) + 2) >> 2) - (s2);\
496
    o1 = (t);\
497
 
498
/** This is a reflection a,b = 1/2, 7/8 for the inverse slant transform */
499
#define IVI_SLANT_PART4(s1, s2, o1, o2, t) \
500
    t  = (s2) + (((s1)*4  - (s2) + 4) >> 3);\
501
    o2 = (s1) + ((-(s1) - (s2)*4 + 4) >> 3);\
502
    o1 = (t);\
503
 
504
/** inverse slant8 transform */
505
#define IVI_INV_SLANT8(s1, s4, s8, s5, s2, s6, s3, s7,\
506
                       d1, d2, d3, d4, d5, d6, d7, d8,\
507
                       t0, t1, t2, t3, t4, t5, t6, t7, t8) {\
508
    IVI_SLANT_PART4(s4, s5, t4, t5, t0);\
509
\
510
    IVI_SLANT_BFLY(s1, t5, t1, t5, t0); IVI_SLANT_BFLY(s2, s6, t2, t6, t0);\
511
    IVI_SLANT_BFLY(s7, s3, t7, t3, t0); IVI_SLANT_BFLY(t4, s8, t4, t8, t0);\
512
\
513
    IVI_SLANT_BFLY(t1, t2, t1, t2, t0); IVI_IREFLECT  (t4, t3, t4, t3, t0);\
514
    IVI_SLANT_BFLY(t5, t6, t5, t6, t0); IVI_IREFLECT  (t8, t7, t8, t7, t0);\
515
    IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
516
    IVI_SLANT_BFLY(t5, t8, t5, t8, t0); IVI_SLANT_BFLY(t6, t7, t6, t7, t0);\
517
    d1 = COMPENSATE(t1);\
518
    d2 = COMPENSATE(t2);\
519
    d3 = COMPENSATE(t3);\
520
    d4 = COMPENSATE(t4);\
521
    d5 = COMPENSATE(t5);\
522
    d6 = COMPENSATE(t6);\
523
    d7 = COMPENSATE(t7);\
524
    d8 = COMPENSATE(t8);}
525
 
526
/** inverse slant4 transform */
527
#define IVI_INV_SLANT4(s1, s4, s2, s3, d1, d2, d3, d4, t0, t1, t2, t3, t4) {\
528
    IVI_SLANT_BFLY(s1, s2, t1, t2, t0); IVI_IREFLECT  (s4, s3, t4, t3, t0);\
529
\
530
    IVI_SLANT_BFLY(t1, t4, t1, t4, t0); IVI_SLANT_BFLY(t2, t3, t2, t3, t0);\
531
    d1 = COMPENSATE(t1);\
532
    d2 = COMPENSATE(t2);\
533
    d3 = COMPENSATE(t3);\
534
    d4 = COMPENSATE(t4);}
535
 
536
void ff_ivi_inverse_slant_8x8(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
537
{
538
    int     i;
539
    const int32_t *src;
540
    int32_t *dst;
541
    int     tmp[64];
542
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
543
 
544
#define COMPENSATE(x) (x)
545
    src = in;
546
    dst = tmp;
547
    for (i = 0; i < 8; i++) {
548
        if (flags[i]) {
549
            IVI_INV_SLANT8(src[0], src[8], src[16], src[24], src[32], src[40], src[48], src[56],
550
                           dst[0], dst[8], dst[16], dst[24], dst[32], dst[40], dst[48], dst[56],
551
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
552
        } else
553
            dst[0] = dst[8] = dst[16] = dst[24] = dst[32] = dst[40] = dst[48] = dst[56] = 0;
554
 
555
            src++;
556
            dst++;
557
    }
558
#undef COMPENSATE
559
 
560
#define COMPENSATE(x) (((x) + 1)>>1)
561
    src = tmp;
562
    for (i = 0; i < 8; i++) {
563
        if (!src[0] && !src[1] && !src[2] && !src[3] && !src[4] && !src[5] && !src[6] && !src[7]) {
564
            memset(out, 0, 8*sizeof(out[0]));
565
        } else {
566
            IVI_INV_SLANT8(src[0], src[1], src[2], src[3], src[4], src[5], src[6], src[7],
567
                           out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7],
568
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
569
        }
570
        src += 8;
571
        out += pitch;
572
    }
573
#undef COMPENSATE
574
}
575
 
576
void ff_ivi_inverse_slant_4x4(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
577
{
578
    int     i;
579
    const int32_t *src;
580
    int32_t *dst;
581
    int     tmp[16];
582
    int     t0, t1, t2, t3, t4;
583
 
584
#define COMPENSATE(x) (x)
585
    src = in;
586
    dst = tmp;
587
    for (i = 0; i < 4; i++) {
588
        if (flags[i]) {
589
            IVI_INV_SLANT4(src[0], src[4], src[8], src[12],
590
                           dst[0], dst[4], dst[8], dst[12],
591
                           t0, t1, t2, t3, t4);
592
        } else
593
            dst[0] = dst[4] = dst[8] = dst[12] = 0;
594
 
595
            src++;
596
            dst++;
597
    }
598
#undef COMPENSATE
599
 
600
#define COMPENSATE(x) (((x) + 1)>>1)
601
    src = tmp;
602
    for (i = 0; i < 4; i++) {
603
        if (!src[0] && !src[1] && !src[2] && !src[3]) {
604
            out[0] = out[1] = out[2] = out[3] = 0;
605
        } else {
606
            IVI_INV_SLANT4(src[0], src[1], src[2], src[3],
607
                           out[0], out[1], out[2], out[3],
608
                           t0, t1, t2, t3, t4);
609
        }
610
        src += 4;
611
        out += pitch;
612
    }
613
#undef COMPENSATE
614
}
615
 
616
void ff_ivi_dc_slant_2d(const int32_t *in, int16_t *out, uint32_t pitch, int blk_size)
617
{
618
    int     x, y;
619
    int16_t dc_coeff;
620
 
621
    dc_coeff = (*in + 1) >> 1;
622
 
623
    for (y = 0; y < blk_size; out += pitch, y++) {
624
        for (x = 0; x < blk_size; x++)
625
            out[x] = dc_coeff;
626
    }
627
}
628
 
629
void ff_ivi_row_slant8(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
630
{
631
    int     i;
632
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
633
 
634
#define COMPENSATE(x) (((x) + 1)>>1)
635
    for (i = 0; i < 8; i++) {
636
        if (!in[0] && !in[1] && !in[2] && !in[3] && !in[4] && !in[5] && !in[6] && !in[7]) {
637
            memset(out, 0, 8*sizeof(out[0]));
638
        } else {
639
            IVI_INV_SLANT8( in[0],  in[1],  in[2],  in[3],  in[4],  in[5],  in[6],  in[7],
640
                           out[0], out[1], out[2], out[3], out[4], out[5], out[6], out[7],
641
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
642
        }
643
        in += 8;
644
        out += pitch;
645
    }
646
#undef COMPENSATE
647
}
648
 
649
void ff_ivi_dc_row_slant(const int32_t *in, int16_t *out, uint32_t pitch, int blk_size)
650
{
651
    int     x, y;
652
    int16_t dc_coeff;
653
 
654
    dc_coeff = (*in + 1) >> 1;
655
 
656
    for (x = 0; x < blk_size; x++)
657
        out[x] = dc_coeff;
658
 
659
    out += pitch;
660
 
661
    for (y = 1; y < blk_size; out += pitch, y++) {
662
        for (x = 0; x < blk_size; x++)
663
            out[x] = 0;
664
    }
665
}
666
 
667
void ff_ivi_col_slant8(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
668
{
669
    int     i, row2, row4, row8;
670
    int     t0, t1, t2, t3, t4, t5, t6, t7, t8;
671
 
672
    row2 = pitch << 1;
673
    row4 = pitch << 2;
674
    row8 = pitch << 3;
675
 
676
#define COMPENSATE(x) (((x) + 1)>>1)
677
    for (i = 0; i < 8; i++) {
678
        if (flags[i]) {
679
            IVI_INV_SLANT8(in[0], in[8], in[16], in[24], in[32], in[40], in[48], in[56],
680
                           out[0], out[pitch], out[row2], out[row2 + pitch], out[row4],
681
                           out[row4 + pitch],  out[row4 + row2], out[row8 - pitch],
682
                           t0, t1, t2, t3, t4, t5, t6, t7, t8);
683
        } else {
684
            out[0] = out[pitch] = out[row2] = out[row2 + pitch] = out[row4] =
685
            out[row4 + pitch] =  out[row4 + row2] = out[row8 - pitch] = 0;
686
        }
687
 
688
        in++;
689
        out++;
690
    }
691
#undef COMPENSATE
692
}
693
 
694
void ff_ivi_dc_col_slant(const int32_t *in, int16_t *out, uint32_t pitch, int blk_size)
695
{
696
    int     x, y;
697
    int16_t dc_coeff;
698
 
699
    dc_coeff = (*in + 1) >> 1;
700
 
701
    for (y = 0; y < blk_size; out += pitch, y++) {
702
        out[0] = dc_coeff;
703
        for (x = 1; x < blk_size; x++)
704
            out[x] = 0;
705
    }
706
}
707
 
708
void ff_ivi_row_slant4(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
709
{
710
    int     i;
711
    int     t0, t1, t2, t3, t4;
712
 
713
#define COMPENSATE(x) (((x) + 1)>>1)
714
    for (i = 0; i < 4; i++) {
715
        if (!in[0] && !in[1] && !in[2] && !in[3]) {
716
            memset(out, 0, 4*sizeof(out[0]));
717
        } else {
718
            IVI_INV_SLANT4( in[0],  in[1],  in[2],  in[3],
719
                           out[0], out[1], out[2], out[3],
720
                           t0, t1, t2, t3, t4);
721
        }
722
        in  += 4;
723
        out += pitch;
724
    }
725
#undef COMPENSATE
726
}
727
 
728
void ff_ivi_col_slant4(const int32_t *in, int16_t *out, uint32_t pitch, const uint8_t *flags)
729
{
730
    int     i, row2;
731
    int     t0, t1, t2, t3, t4;
732
 
733
    row2 = pitch << 1;
734
 
735
#define COMPENSATE(x) (((x) + 1)>>1)
736
    for (i = 0; i < 4; i++) {
737
        if (flags[i]) {
738
            IVI_INV_SLANT4(in[0], in[4], in[8], in[12],
739
                           out[0], out[pitch], out[row2], out[row2 + pitch],
740
                           t0, t1, t2, t3, t4);
741
        } else {
742
            out[0] = out[pitch] = out[row2] = out[row2 + pitch] = 0;
743
        }
744
 
745
        in++;
746
        out++;
747
    }
748
#undef COMPENSATE
749
}
750
 
751
void ff_ivi_put_pixels_8x8(const int32_t *in, int16_t *out, uint32_t pitch,
752
                           const uint8_t *flags)
753
{
754
    int     x, y;
755
 
756
    for (y = 0; y < 8; out += pitch, in += 8, y++)
757
        for (x = 0; x < 8; x++)
758
            out[x] = in[x];
759
}
760
 
761
void ff_ivi_put_dc_pixel_8x8(const int32_t *in, int16_t *out, uint32_t pitch,
762
                             int blk_size)
763
{
764
    int     y;
765
 
766
    out[0] = in[0];
767
    memset(out + 1, 0, 7*sizeof(out[0]));
768
    out += pitch;
769
 
770
    for (y = 1; y < 8; out += pitch, y++)
771
        memset(out, 0, 8*sizeof(out[0]));
772
}
773
 
774
#define IVI_MC_TEMPLATE(size, suffix, OP) \
775
static void ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, \
776
                                                 uint32_t dpitch, \
777
                                                 const int16_t *ref_buf, \
778
                                                 uint32_t pitch, int mc_type) \
779
{ \
780
    int     i, j; \
781
    const int16_t *wptr; \
782
\
783
    switch (mc_type) { \
784
    case 0: /* fullpel (no interpolation) */ \
785
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) { \
786
            for (j = 0; j < size; j++) {\
787
                OP(buf[j], ref_buf[j]); \
788
            } \
789
        } \
790
        break; \
791
    case 1: /* horizontal halfpel interpolation */ \
792
        for (i = 0; i < size; i++, buf += dpitch, ref_buf += pitch) \
793
            for (j = 0; j < size; j++) \
794
                OP(buf[j], (ref_buf[j] + ref_buf[j+1]) >> 1); \
795
        break; \
796
    case 2: /* vertical halfpel interpolation */ \
797
        wptr = ref_buf + pitch; \
798
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
799
            for (j = 0; j < size; j++) \
800
                OP(buf[j], (ref_buf[j] + wptr[j]) >> 1); \
801
        break; \
802
    case 3: /* vertical and horizontal halfpel interpolation */ \
803
        wptr = ref_buf + pitch; \
804
        for (i = 0; i < size; i++, buf += dpitch, wptr += pitch, ref_buf += pitch) \
805
            for (j = 0; j < size; j++) \
806
                OP(buf[j], (ref_buf[j] + ref_buf[j+1] + wptr[j] + wptr[j+1]) >> 2); \
807
        break; \
808
    } \
809
} \
810
\
811
void ff_ivi_mc_ ## size ##x## size ## suffix(int16_t *buf, const int16_t *ref_buf, \
812
                                             uint32_t pitch, int mc_type) \
813
{ \
814
    ivi_mc_ ## size ##x## size ## suffix(buf, pitch, ref_buf, pitch, mc_type); \
815
} \
816
 
817
#define IVI_MC_AVG_TEMPLATE(size, suffix, OP) \
818
void ff_ivi_mc_avg_ ## size ##x## size ## suffix(int16_t *buf, \
819
                                                 const int16_t *ref_buf, \
820
                                                 const int16_t *ref_buf2, \
821
                                                 uint32_t pitch, \
822
                                                 int mc_type, int mc_type2) \
823
{ \
824
    int16_t tmp[size * size]; \
825
    int i, j; \
826
\
827
    ivi_mc_ ## size ##x## size ## _no_delta(tmp, size, ref_buf, pitch, mc_type); \
828
    ivi_mc_ ## size ##x## size ## _delta(tmp, size, ref_buf2, pitch, mc_type2); \
829
    for (i = 0; i < size; i++, buf += pitch) { \
830
        for (j = 0; j < size; j++) {\
831
            OP(buf[j], tmp[i * size + j] >> 1); \
832
        } \
833
    } \
834
} \
835
 
836
#define OP_PUT(a, b)  (a) = (b)
837
#define OP_ADD(a, b)  (a) += (b)
838
 
839
IVI_MC_TEMPLATE(8, _no_delta, OP_PUT)
840
IVI_MC_TEMPLATE(8, _delta,    OP_ADD)
841
IVI_MC_TEMPLATE(4, _no_delta, OP_PUT)
842
IVI_MC_TEMPLATE(4, _delta,    OP_ADD)
843
IVI_MC_AVG_TEMPLATE(8, _no_delta, OP_PUT)
844
IVI_MC_AVG_TEMPLATE(8, _delta,    OP_ADD)
845
IVI_MC_AVG_TEMPLATE(4, _no_delta, OP_PUT)
846
IVI_MC_AVG_TEMPLATE(4, _delta,    OP_ADD)