Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * RoQ Video Encoder.
3
 *
4
 * Copyright (C) 2007 Vitor Sessak 
5
 * Copyright (C) 2004-2007 Eric Lasota
6
 *    Based on RoQ specs (C) 2001 Tim Ferguson
7
 *
8
 * This file is part of FFmpeg.
9
 *
10
 * FFmpeg is free software; you can redistribute it and/or
11
 * modify it under the terms of the GNU Lesser General Public
12
 * License as published by the Free Software Foundation; either
13
 * version 2.1 of the License, or (at your option) any later version.
14
 *
15
 * FFmpeg is distributed in the hope that it will be useful,
16
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18
 * Lesser General Public License for more details.
19
 *
20
 * You should have received a copy of the GNU Lesser General Public
21
 * License along with FFmpeg; if not, write to the Free Software
22
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23
 */
24
 
25
/**
26
 * @file
27
 * id RoQ encoder by Vitor. Based on the Switchblade3 library and the
28
 * Switchblade3 FFmpeg glue by Eric Lasota.
29
 */
30
 
31
/*
32
 * COSTS:
33
 * Level 1:
34
 *  SKIP - 2 bits
35
 *  MOTION - 2 + 8 bits
36
 *  CODEBOOK - 2 + 8 bits
37
 *  SUBDIVIDE - 2 + combined subcel cost
38
 *
39
 * Level 2:
40
 *  SKIP - 2 bits
41
 *  MOTION - 2 + 8 bits
42
 *  CODEBOOK - 2 + 8 bits
43
 *  SUBDIVIDE - 2 + 4*8 bits
44
 *
45
 * Maximum cost: 138 bits per cel
46
 *
47
 * Proper evaluation requires LCD fraction comparison, which requires
48
 * Squared Error (SE) loss * savings increase
49
 *
50
 * Maximum savings increase: 136 bits
51
 * Maximum SE loss without overflow: 31580641
52
 * Components in 8x8 supercel: 192
53
 * Maximum SE precision per component: 164482
54
 *    >65025, so no truncation is needed (phew)
55
 */
56
 
57
#include 
58
 
59
#include "libavutil/attributes.h"
60
#include "roqvideo.h"
61
#include "bytestream.h"
62
#include "elbg.h"
63
#include "internal.h"
64
#include "mathops.h"
65
 
66
#define CHROMA_BIAS 1
67
 
68
/**
69
 * Maximum number of generated 4x4 codebooks. Can't be 256 to workaround a
70
 * Quake 3 bug.
71
 */
72
#define MAX_CBS_4x4 255
73
 
74
#define MAX_CBS_2x2 256 ///< Maximum number of 2x2 codebooks.
75
 
76
/* The cast is useful when multiplying it by INT_MAX */
77
#define ROQ_LAMBDA_SCALE ((uint64_t) FF_LAMBDA_SCALE)
78
 
79
/* Macroblock support functions */
80
static void unpack_roq_cell(roq_cell *cell, uint8_t u[4*3])
81
{
82
    memcpy(u  , cell->y, 4);
83
    memset(u+4, cell->u, 4);
84
    memset(u+8, cell->v, 4);
85
}
86
 
87
static void unpack_roq_qcell(uint8_t cb2[], roq_qcell *qcell, uint8_t u[4*4*3])
88
{
89
    int i,cp;
90
    static const int offsets[4] = {0, 2, 8, 10};
91
 
92
    for (cp=0; cp<3; cp++)
93
        for (i=0; i<4; i++) {
94
            u[4*4*cp + offsets[i]  ] = cb2[qcell->idx[i]*2*2*3 + 4*cp  ];
95
            u[4*4*cp + offsets[i]+1] = cb2[qcell->idx[i]*2*2*3 + 4*cp+1];
96
            u[4*4*cp + offsets[i]+4] = cb2[qcell->idx[i]*2*2*3 + 4*cp+2];
97
            u[4*4*cp + offsets[i]+5] = cb2[qcell->idx[i]*2*2*3 + 4*cp+3];
98
        }
99
}
100
 
101
 
102
static void enlarge_roq_mb4(uint8_t base[3*16], uint8_t u[3*64])
103
{
104
    int x,y,cp;
105
 
106
    for(cp=0; cp<3; cp++)
107
        for(y=0; y<8; y++)
108
            for(x=0; x<8; x++)
109
                *u++ = base[(y/2)*4 + (x/2) + 16*cp];
110
}
111
 
112
static inline int square(int x)
113
{
114
    return x*x;
115
}
116
 
117
static inline int eval_sse(const uint8_t *a, const uint8_t *b, int count)
118
{
119
    int diff=0;
120
 
121
    while(count--)
122
        diff += square(*b++ - *a++);
123
 
124
    return diff;
125
}
126
 
127
// FIXME Could use DSPContext.sse, but it is not so speed critical (used
128
// just for motion estimation).
129
static int block_sse(uint8_t * const *buf1, uint8_t * const *buf2, int x1, int y1,
130
                     int x2, int y2, const int *stride1, const int *stride2, int size)
131
{
132
    int i, k;
133
    int sse=0;
134
 
135
    for (k=0; k<3; k++) {
136
        int bias = (k ? CHROMA_BIAS : 4);
137
        for (i=0; i
138
            sse += bias*eval_sse(buf1[k] + (y1+i)*stride1[k] + x1,
139
                                 buf2[k] + (y2+i)*stride2[k] + x2, size);
140
    }
141
 
142
    return sse;
143
}
144
 
145
static int eval_motion_dist(RoqContext *enc, int x, int y, motion_vect vect,
146
                             int size)
147
{
148
    int mx=vect.d[0];
149
    int my=vect.d[1];
150
 
151
    if (mx < -7 || mx > 7)
152
        return INT_MAX;
153
 
154
    if (my < -7 || my > 7)
155
        return INT_MAX;
156
 
157
    mx += x;
158
    my += y;
159
 
160
    if ((unsigned) mx > enc->width-size || (unsigned) my > enc->height-size)
161
        return INT_MAX;
162
 
163
    return block_sse(enc->frame_to_enc->data, enc->last_frame->data, x, y,
164
                     mx, my,
165
                     enc->frame_to_enc->linesize, enc->last_frame->linesize,
166
                     size);
167
}
168
 
169
/**
170
 * @return distortion between two macroblocks
171
 */
172
static inline int squared_diff_macroblock(uint8_t a[], uint8_t b[], int size)
173
{
174
    int cp, sdiff=0;
175
 
176
    for(cp=0;cp<3;cp++) {
177
        int bias = (cp ? CHROMA_BIAS : 4);
178
        sdiff += bias*eval_sse(a, b, size*size);
179
        a += size*size;
180
        b += size*size;
181
    }
182
 
183
    return sdiff;
184
}
185
 
186
typedef struct
187
{
188
    int eval_dist[4];
189
    int best_bit_use;
190
    int best_coding;
191
 
192
    int subCels[4];
193
    motion_vect motion;
194
    int cbEntry;
195
} SubcelEvaluation;
196
 
197
typedef struct
198
{
199
    int eval_dist[4];
200
    int best_coding;
201
 
202
    SubcelEvaluation subCels[4];
203
 
204
    motion_vect motion;
205
    int cbEntry;
206
 
207
    int sourceX, sourceY;
208
} CelEvaluation;
209
 
210
typedef struct
211
{
212
    int numCB4;
213
    int numCB2;
214
    int usedCB2[MAX_CBS_2x2];
215
    int usedCB4[MAX_CBS_4x4];
216
    uint8_t unpacked_cb2[MAX_CBS_2x2*2*2*3];
217
    uint8_t unpacked_cb4[MAX_CBS_4x4*4*4*3];
218
    uint8_t unpacked_cb4_enlarged[MAX_CBS_4x4*8*8*3];
219
} RoqCodebooks;
220
 
221
/**
222
 * Temporary vars
223
 */
224
typedef struct RoqTempData
225
{
226
    CelEvaluation *cel_evals;
227
 
228
    int f2i4[MAX_CBS_4x4];
229
    int i2f4[MAX_CBS_4x4];
230
    int f2i2[MAX_CBS_2x2];
231
    int i2f2[MAX_CBS_2x2];
232
 
233
    int mainChunkSize;
234
 
235
    int numCB4;
236
    int numCB2;
237
 
238
    RoqCodebooks codebooks;
239
 
240
    int *closest_cb2;
241
    int used_option[4];
242
} RoqTempdata;
243
 
244
/**
245
 * Initialize cel evaluators and set their source coordinates
246
 */
247
static void create_cel_evals(RoqContext *enc, RoqTempdata *tempData)
248
{
249
    int n=0, x, y, i;
250
 
251
    tempData->cel_evals = av_malloc(enc->width*enc->height/64 * sizeof(CelEvaluation));
252
 
253
    /* Map to the ROQ quadtree order */
254
    for (y=0; yheight; y+=16)
255
        for (x=0; xwidth; x+=16)
256
            for(i=0; i<4; i++) {
257
                tempData->cel_evals[n  ].sourceX = x + (i&1)*8;
258
                tempData->cel_evals[n++].sourceY = y + (i&2)*4;
259
            }
260
}
261
 
262
/**
263
 * Get macroblocks from parts of the image
264
 */
265
static void get_frame_mb(const AVFrame *frame, int x, int y, uint8_t mb[], int dim)
266
{
267
    int i, j, cp;
268
 
269
    for (cp=0; cp<3; cp++) {
270
        int stride = frame->linesize[cp];
271
        for (i=0; i
272
            for (j=0; j
273
                *mb++ = frame->data[cp][(y+i)*stride + x + j];
274
    }
275
}
276
 
277
/**
278
 * Find the codebook with the lowest distortion from an image
279
 */
280
static int index_mb(uint8_t cluster[], uint8_t cb[], int numCB,
281
                    int *outIndex, int dim)
282
{
283
    int i, lDiff = INT_MAX, pick=0;
284
 
285
    /* Diff against the others */
286
    for (i=0; i
287
        int diff = squared_diff_macroblock(cluster, cb + i*dim*dim*3, dim);
288
        if (diff < lDiff) {
289
            lDiff = diff;
290
            pick = i;
291
        }
292
    }
293
 
294
    *outIndex = pick;
295
    return lDiff;
296
}
297
 
298
#define EVAL_MOTION(MOTION) \
299
    do { \
300
        diff = eval_motion_dist(enc, j, i, MOTION, blocksize); \
301
            \
302
        if (diff < lowestdiff) { \
303
            lowestdiff = diff; \
304
            bestpick = MOTION; \
305
        } \
306
    } while(0)
307
 
308
static void motion_search(RoqContext *enc, int blocksize)
309
{
310
    static const motion_vect offsets[8] = {
311
        {{ 0,-1}},
312
        {{ 0, 1}},
313
        {{-1, 0}},
314
        {{ 1, 0}},
315
        {{-1, 1}},
316
        {{ 1,-1}},
317
        {{-1,-1}},
318
        {{ 1, 1}},
319
    };
320
 
321
    int diff, lowestdiff, oldbest;
322
    int off[3];
323
    motion_vect bestpick = {{0,0}};
324
    int i, j, k, offset;
325
 
326
    motion_vect *last_motion;
327
    motion_vect *this_motion;
328
    motion_vect vect, vect2;
329
 
330
    int max=(enc->width/blocksize)*enc->height/blocksize;
331
 
332
    if (blocksize == 4) {
333
        last_motion = enc->last_motion4;
334
        this_motion = enc->this_motion4;
335
    } else {
336
        last_motion = enc->last_motion8;
337
        this_motion = enc->this_motion8;
338
    }
339
 
340
    for (i=0; iheight; i+=blocksize)
341
        for (j=0; jwidth; j+=blocksize) {
342
            lowestdiff = eval_motion_dist(enc, j, i, (motion_vect) {{0,0}},
343
                                          blocksize);
344
            bestpick.d[0] = 0;
345
            bestpick.d[1] = 0;
346
 
347
            if (blocksize == 4)
348
                EVAL_MOTION(enc->this_motion8[(i/8)*(enc->width/8) + j/8]);
349
 
350
            offset = (i/blocksize)*enc->width/blocksize + j/blocksize;
351
            if (offset < max && offset >= 0)
352
                EVAL_MOTION(last_motion[offset]);
353
 
354
            offset++;
355
            if (offset < max && offset >= 0)
356
                EVAL_MOTION(last_motion[offset]);
357
 
358
            offset = (i/blocksize + 1)*enc->width/blocksize + j/blocksize;
359
            if (offset < max && offset >= 0)
360
                EVAL_MOTION(last_motion[offset]);
361
 
362
            off[0]= (i/blocksize)*enc->width/blocksize + j/blocksize - 1;
363
            off[1]= off[0] - enc->width/blocksize + 1;
364
            off[2]= off[1] + 1;
365
 
366
            if (i) {
367
 
368
                for(k=0; k<2; k++)
369
                    vect.d[k]= mid_pred(this_motion[off[0]].d[k],
370
                                        this_motion[off[1]].d[k],
371
                                        this_motion[off[2]].d[k]);
372
 
373
                EVAL_MOTION(vect);
374
                for(k=0; k<3; k++)
375
                    EVAL_MOTION(this_motion[off[k]]);
376
            } else if(j)
377
                EVAL_MOTION(this_motion[off[0]]);
378
 
379
            vect = bestpick;
380
 
381
            oldbest = -1;
382
            while (oldbest != lowestdiff) {
383
                oldbest = lowestdiff;
384
                for (k=0; k<8; k++) {
385
                    vect2 = vect;
386
                    vect2.d[0] += offsets[k].d[0];
387
                    vect2.d[1] += offsets[k].d[1];
388
                    EVAL_MOTION(vect2);
389
                }
390
                vect = bestpick;
391
            }
392
            offset = (i/blocksize)*enc->width/blocksize + j/blocksize;
393
            this_motion[offset] = bestpick;
394
        }
395
}
396
 
397
/**
398
 * Get distortion for all options available to a subcel
399
 */
400
static void gather_data_for_subcel(SubcelEvaluation *subcel, int x,
401
                                   int y, RoqContext *enc, RoqTempdata *tempData)
402
{
403
    uint8_t mb4[4*4*3];
404
    uint8_t mb2[2*2*3];
405
    int cluster_index;
406
    int i, best_dist;
407
 
408
    static const int bitsUsed[4] = {2, 10, 10, 34};
409
 
410
    if (enc->framesSinceKeyframe >= 1) {
411
        subcel->motion = enc->this_motion4[y*enc->width/16 + x/4];
412
 
413
        subcel->eval_dist[RoQ_ID_FCC] =
414
            eval_motion_dist(enc, x, y,
415
                             enc->this_motion4[y*enc->width/16 + x/4], 4);
416
    } else
417
        subcel->eval_dist[RoQ_ID_FCC] = INT_MAX;
418
 
419
    if (enc->framesSinceKeyframe >= 2)
420
        subcel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data,
421
                                                  enc->current_frame->data, x,
422
                                                  y, x, y,
423
                                                  enc->frame_to_enc->linesize,
424
                                                  enc->current_frame->linesize,
425
                                                  4);
426
    else
427
        subcel->eval_dist[RoQ_ID_MOT] = INT_MAX;
428
 
429
    cluster_index = y*enc->width/16 + x/4;
430
 
431
    get_frame_mb(enc->frame_to_enc, x, y, mb4, 4);
432
 
433
    subcel->eval_dist[RoQ_ID_SLD] = index_mb(mb4,
434
                                             tempData->codebooks.unpacked_cb4,
435
                                             tempData->codebooks.numCB4,
436
                                             &subcel->cbEntry, 4);
437
 
438
    subcel->eval_dist[RoQ_ID_CCC] = 0;
439
 
440
    for(i=0;i<4;i++) {
441
        subcel->subCels[i] = tempData->closest_cb2[cluster_index*4+i];
442
 
443
        get_frame_mb(enc->frame_to_enc, x+2*(i&1),
444
                     y+(i&2), mb2, 2);
445
 
446
        subcel->eval_dist[RoQ_ID_CCC] +=
447
            squared_diff_macroblock(tempData->codebooks.unpacked_cb2 + subcel->subCels[i]*2*2*3, mb2, 2);
448
    }
449
 
450
    best_dist = INT_MAX;
451
    for (i=0; i<4; i++)
452
        if (ROQ_LAMBDA_SCALE*subcel->eval_dist[i] + enc->lambda*bitsUsed[i] <
453
            best_dist) {
454
            subcel->best_coding = i;
455
            subcel->best_bit_use = bitsUsed[i];
456
            best_dist = ROQ_LAMBDA_SCALE*subcel->eval_dist[i] +
457
                enc->lambda*bitsUsed[i];
458
        }
459
}
460
 
461
/**
462
 * Get distortion for all options available to a cel
463
 */
464
static void gather_data_for_cel(CelEvaluation *cel, RoqContext *enc,
465
                                RoqTempdata *tempData)
466
{
467
    uint8_t mb8[8*8*3];
468
    int index = cel->sourceY*enc->width/64 + cel->sourceX/8;
469
    int i, j, best_dist, divide_bit_use;
470
 
471
    int bitsUsed[4] = {2, 10, 10, 0};
472
 
473
    if (enc->framesSinceKeyframe >= 1) {
474
        cel->motion = enc->this_motion8[index];
475
 
476
        cel->eval_dist[RoQ_ID_FCC] =
477
            eval_motion_dist(enc, cel->sourceX, cel->sourceY,
478
                             enc->this_motion8[index], 8);
479
    } else
480
        cel->eval_dist[RoQ_ID_FCC] = INT_MAX;
481
 
482
    if (enc->framesSinceKeyframe >= 2)
483
        cel->eval_dist[RoQ_ID_MOT] = block_sse(enc->frame_to_enc->data,
484
                                               enc->current_frame->data,
485
                                               cel->sourceX, cel->sourceY,
486
                                               cel->sourceX, cel->sourceY,
487
                                               enc->frame_to_enc->linesize,
488
                                               enc->current_frame->linesize,8);
489
    else
490
        cel->eval_dist[RoQ_ID_MOT] = INT_MAX;
491
 
492
    get_frame_mb(enc->frame_to_enc, cel->sourceX, cel->sourceY, mb8, 8);
493
 
494
    cel->eval_dist[RoQ_ID_SLD] =
495
        index_mb(mb8, tempData->codebooks.unpacked_cb4_enlarged,
496
                 tempData->codebooks.numCB4, &cel->cbEntry, 8);
497
 
498
    gather_data_for_subcel(cel->subCels + 0, cel->sourceX+0, cel->sourceY+0, enc, tempData);
499
    gather_data_for_subcel(cel->subCels + 1, cel->sourceX+4, cel->sourceY+0, enc, tempData);
500
    gather_data_for_subcel(cel->subCels + 2, cel->sourceX+0, cel->sourceY+4, enc, tempData);
501
    gather_data_for_subcel(cel->subCels + 3, cel->sourceX+4, cel->sourceY+4, enc, tempData);
502
 
503
    cel->eval_dist[RoQ_ID_CCC] = 0;
504
    divide_bit_use = 0;
505
    for (i=0; i<4; i++) {
506
        cel->eval_dist[RoQ_ID_CCC] +=
507
            cel->subCels[i].eval_dist[cel->subCels[i].best_coding];
508
        divide_bit_use += cel->subCels[i].best_bit_use;
509
    }
510
 
511
    best_dist = INT_MAX;
512
    bitsUsed[3] = 2 + divide_bit_use;
513
 
514
    for (i=0; i<4; i++)
515
        if (ROQ_LAMBDA_SCALE*cel->eval_dist[i] + enc->lambda*bitsUsed[i] <
516
            best_dist) {
517
            cel->best_coding = i;
518
            best_dist = ROQ_LAMBDA_SCALE*cel->eval_dist[i] +
519
                enc->lambda*bitsUsed[i];
520
        }
521
 
522
    tempData->used_option[cel->best_coding]++;
523
    tempData->mainChunkSize += bitsUsed[cel->best_coding];
524
 
525
    if (cel->best_coding == RoQ_ID_SLD)
526
        tempData->codebooks.usedCB4[cel->cbEntry]++;
527
 
528
    if (cel->best_coding == RoQ_ID_CCC)
529
        for (i=0; i<4; i++) {
530
            if (cel->subCels[i].best_coding == RoQ_ID_SLD)
531
                tempData->codebooks.usedCB4[cel->subCels[i].cbEntry]++;
532
            else if (cel->subCels[i].best_coding == RoQ_ID_CCC)
533
                for (j=0; j<4; j++)
534
                    tempData->codebooks.usedCB2[cel->subCels[i].subCels[j]]++;
535
        }
536
}
537
 
538
static void remap_codebooks(RoqContext *enc, RoqTempdata *tempData)
539
{
540
    int i, j, idx=0;
541
 
542
    /* Make remaps for the final codebook usage */
543
    for (i=0; i
544
        if (tempData->codebooks.usedCB4[i]) {
545
            tempData->i2f4[i] = idx;
546
            tempData->f2i4[idx] = i;
547
            for (j=0; j<4; j++)
548
                tempData->codebooks.usedCB2[enc->cb4x4[i].idx[j]]++;
549
            idx++;
550
        }
551
    }
552
 
553
    tempData->numCB4 = idx;
554
 
555
    idx = 0;
556
    for (i=0; i
557
        if (tempData->codebooks.usedCB2[i]) {
558
            tempData->i2f2[i] = idx;
559
            tempData->f2i2[idx] = i;
560
            idx++;
561
        }
562
    }
563
    tempData->numCB2 = idx;
564
 
565
}
566
 
567
/**
568
 * Write codebook chunk
569
 */
570
static void write_codebooks(RoqContext *enc, RoqTempdata *tempData)
571
{
572
    int i, j;
573
    uint8_t **outp= &enc->out_buf;
574
 
575
    if (tempData->numCB2) {
576
        bytestream_put_le16(outp, RoQ_QUAD_CODEBOOK);
577
        bytestream_put_le32(outp, tempData->numCB2*6 + tempData->numCB4*4);
578
        bytestream_put_byte(outp, tempData->numCB4);
579
        bytestream_put_byte(outp, tempData->numCB2);
580
 
581
        for (i=0; inumCB2; i++) {
582
            bytestream_put_buffer(outp, enc->cb2x2[tempData->f2i2[i]].y, 4);
583
            bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].u);
584
            bytestream_put_byte(outp, enc->cb2x2[tempData->f2i2[i]].v);
585
        }
586
 
587
        for (i=0; inumCB4; i++)
588
            for (j=0; j<4; j++)
589
                bytestream_put_byte(outp, tempData->i2f2[enc->cb4x4[tempData->f2i4[i]].idx[j]]);
590
 
591
    }
592
}
593
 
594
static inline uint8_t motion_arg(motion_vect mot)
595
{
596
    uint8_t ax = 8 - ((uint8_t) mot.d[0]);
597
    uint8_t ay = 8 - ((uint8_t) mot.d[1]);
598
    return ((ax&15)<<4) | (ay&15);
599
}
600
 
601
typedef struct
602
{
603
    int typeSpool;
604
    int typeSpoolLength;
605
    uint8_t argumentSpool[64];
606
    uint8_t *args;
607
    uint8_t **pout;
608
} CodingSpool;
609
 
610
/* NOTE: Typecodes must be spooled AFTER arguments!! */
611
static void write_typecode(CodingSpool *s, uint8_t type)
612
{
613
    s->typeSpool |= (type & 3) << (14 - s->typeSpoolLength);
614
    s->typeSpoolLength += 2;
615
    if (s->typeSpoolLength == 16) {
616
        bytestream_put_le16(s->pout, s->typeSpool);
617
        bytestream_put_buffer(s->pout, s->argumentSpool,
618
                              s->args - s->argumentSpool);
619
        s->typeSpoolLength = 0;
620
        s->typeSpool = 0;
621
        s->args = s->argumentSpool;
622
    }
623
}
624
 
625
static void reconstruct_and_encode_image(RoqContext *enc, RoqTempdata *tempData, int w, int h, int numBlocks)
626
{
627
    int i, j, k;
628
    int x, y;
629
    int subX, subY;
630
    int dist=0;
631
 
632
    roq_qcell *qcell;
633
    CelEvaluation *eval;
634
 
635
    CodingSpool spool;
636
 
637
    spool.typeSpool=0;
638
    spool.typeSpoolLength=0;
639
    spool.args = spool.argumentSpool;
640
    spool.pout = &enc->out_buf;
641
 
642
    if (tempData->used_option[RoQ_ID_CCC]%2)
643
        tempData->mainChunkSize+=8; //FIXME
644
 
645
    /* Write the video chunk header */
646
    bytestream_put_le16(&enc->out_buf, RoQ_QUAD_VQ);
647
    bytestream_put_le32(&enc->out_buf, tempData->mainChunkSize/8);
648
    bytestream_put_byte(&enc->out_buf, 0x0);
649
    bytestream_put_byte(&enc->out_buf, 0x0);
650
 
651
    for (i=0; i
652
        eval = tempData->cel_evals + i;
653
 
654
        x = eval->sourceX;
655
        y = eval->sourceY;
656
        dist += eval->eval_dist[eval->best_coding];
657
 
658
        switch (eval->best_coding) {
659
        case RoQ_ID_MOT:
660
            write_typecode(&spool, RoQ_ID_MOT);
661
            break;
662
 
663
        case RoQ_ID_FCC:
664
            bytestream_put_byte(&spool.args, motion_arg(eval->motion));
665
 
666
            write_typecode(&spool, RoQ_ID_FCC);
667
            ff_apply_motion_8x8(enc, x, y,
668
                                eval->motion.d[0], eval->motion.d[1]);
669
            break;
670
 
671
        case RoQ_ID_SLD:
672
            bytestream_put_byte(&spool.args, tempData->i2f4[eval->cbEntry]);
673
            write_typecode(&spool, RoQ_ID_SLD);
674
 
675
            qcell = enc->cb4x4 + eval->cbEntry;
676
            ff_apply_vector_4x4(enc, x  , y  , enc->cb2x2 + qcell->idx[0]);
677
            ff_apply_vector_4x4(enc, x+4, y  , enc->cb2x2 + qcell->idx[1]);
678
            ff_apply_vector_4x4(enc, x  , y+4, enc->cb2x2 + qcell->idx[2]);
679
            ff_apply_vector_4x4(enc, x+4, y+4, enc->cb2x2 + qcell->idx[3]);
680
            break;
681
 
682
        case RoQ_ID_CCC:
683
            write_typecode(&spool, RoQ_ID_CCC);
684
 
685
            for (j=0; j<4; j++) {
686
                subX = x + 4*(j&1);
687
                subY = y + 2*(j&2);
688
 
689
                switch(eval->subCels[j].best_coding) {
690
                case RoQ_ID_MOT:
691
                    break;
692
 
693
                case RoQ_ID_FCC:
694
                    bytestream_put_byte(&spool.args,
695
                                        motion_arg(eval->subCels[j].motion));
696
 
697
                    ff_apply_motion_4x4(enc, subX, subY,
698
                                        eval->subCels[j].motion.d[0],
699
                                        eval->subCels[j].motion.d[1]);
700
                    break;
701
 
702
                case RoQ_ID_SLD:
703
                    bytestream_put_byte(&spool.args,
704
                                        tempData->i2f4[eval->subCels[j].cbEntry]);
705
 
706
                    qcell = enc->cb4x4 + eval->subCels[j].cbEntry;
707
 
708
                    ff_apply_vector_2x2(enc, subX  , subY  ,
709
                                        enc->cb2x2 + qcell->idx[0]);
710
                    ff_apply_vector_2x2(enc, subX+2, subY  ,
711
                                        enc->cb2x2 + qcell->idx[1]);
712
                    ff_apply_vector_2x2(enc, subX  , subY+2,
713
                                        enc->cb2x2 + qcell->idx[2]);
714
                    ff_apply_vector_2x2(enc, subX+2, subY+2,
715
                                        enc->cb2x2 + qcell->idx[3]);
716
                    break;
717
 
718
                case RoQ_ID_CCC:
719
                    for (k=0; k<4; k++) {
720
                        int cb_idx = eval->subCels[j].subCels[k];
721
                        bytestream_put_byte(&spool.args,
722
                                            tempData->i2f2[cb_idx]);
723
 
724
                        ff_apply_vector_2x2(enc, subX + 2*(k&1), subY + (k&2),
725
                                            enc->cb2x2 + cb_idx);
726
                    }
727
                    break;
728
                }
729
                write_typecode(&spool, eval->subCels[j].best_coding);
730
            }
731
            break;
732
        }
733
    }
734
 
735
    /* Flush the remainder of the argument/type spool */
736
    while (spool.typeSpoolLength)
737
        write_typecode(&spool, 0x0);
738
 
739
#if 0
740
    uint8_t *fdata[3] = {enc->frame_to_enc->data[0],
741
                           enc->frame_to_enc->data[1],
742
                           enc->frame_to_enc->data[2]};
743
    uint8_t *cdata[3] = {enc->current_frame->data[0],
744
                           enc->current_frame->data[1],
745
                           enc->current_frame->data[2]};
746
    av_log(enc->avctx, AV_LOG_ERROR, "Expected distortion: %i Actual: %i\n",
747
           dist,
748
           block_sse(fdata, cdata, 0, 0, 0, 0,
749
                     enc->frame_to_enc->linesize,
750
                     enc->current_frame->linesize,
751
                     enc->width));  //WARNING: Square dimensions implied...
752
#endif
753
}
754
 
755
 
756
/**
757
 * Create a single YUV cell from a 2x2 section of the image
758
 */
759
static inline void frame_block_to_cell(uint8_t *block, uint8_t * const *data,
760
                                       int top, int left, const int *stride)
761
{
762
    int i, j, u=0, v=0;
763
 
764
    for (i=0; i<2; i++)
765
        for (j=0; j<2; j++) {
766
            int x = (top+i)*stride[0] + left + j;
767
            *block++ = data[0][x];
768
            x = (top+i)*stride[1] + left + j;
769
            u       += data[1][x];
770
            v       += data[2][x];
771
        }
772
 
773
    *block++ = (u+2)/4;
774
    *block++ = (v+2)/4;
775
}
776
 
777
/**
778
 * Create YUV clusters for the entire image
779
 */
780
static void create_clusters(const AVFrame *frame, int w, int h, uint8_t *yuvClusters)
781
{
782
    int i, j, k, l;
783
 
784
    for (i=0; i
785
        for (j=0; j
786
            for (k=0; k < 2; k++)
787
                for (l=0; l < 2; l++)
788
                    frame_block_to_cell(yuvClusters + (l + 2*k)*6, frame->data,
789
                                        i+2*k, j+2*l, frame->linesize);
790
            yuvClusters += 24;
791
        }
792
}
793
 
794
static void generate_codebook(RoqContext *enc, RoqTempdata *tempdata,
795
                              int *points, int inputCount, roq_cell *results,
796
                              int size, int cbsize)
797
{
798
    int i, j, k;
799
    int c_size = size*size/4;
800
    int *buf;
801
    int *codebook = av_malloc(6*c_size*cbsize*sizeof(int));
802
    int *closest_cb;
803
 
804
    if (size == 4)
805
        closest_cb = av_malloc(6*c_size*inputCount*sizeof(int));
806
    else
807
        closest_cb = tempdata->closest_cb2;
808
 
809
    ff_init_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
810
    ff_do_elbg(points, 6*c_size, inputCount, codebook, cbsize, 1, closest_cb, &enc->randctx);
811
 
812
    if (size == 4)
813
        av_free(closest_cb);
814
 
815
    buf = codebook;
816
    for (i=0; i
817
        for (k=0; k
818
            for(j=0; j<4; j++)
819
                results->y[j] = *buf++;
820
 
821
            results->u =    (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
822
            results->v =    (*buf++ + CHROMA_BIAS/2)/CHROMA_BIAS;
823
            results++;
824
        }
825
 
826
    av_free(codebook);
827
}
828
 
829
static void generate_new_codebooks(RoqContext *enc, RoqTempdata *tempData)
830
{
831
    int i,j;
832
    RoqCodebooks *codebooks = &tempData->codebooks;
833
    int max = enc->width*enc->height/16;
834
    uint8_t mb2[3*4];
835
    roq_cell *results4 = av_malloc(sizeof(roq_cell)*MAX_CBS_4x4*4);
836
    uint8_t *yuvClusters=av_malloc(sizeof(int)*max*6*4);
837
    int *points = av_malloc(max*6*4*sizeof(int));
838
    int bias;
839
 
840
    /* Subsample YUV data */
841
    create_clusters(enc->frame_to_enc, enc->width, enc->height, yuvClusters);
842
 
843
    /* Cast to integer and apply chroma bias */
844
    for (i=0; i
845
        bias = ((i%6)<4) ? 1 : CHROMA_BIAS;
846
        points[i] = bias*yuvClusters[i];
847
    }
848
 
849
    /* Create 4x4 codebooks */
850
    generate_codebook(enc, tempData, points, max, results4, 4, MAX_CBS_4x4);
851
 
852
    codebooks->numCB4 = MAX_CBS_4x4;
853
 
854
    tempData->closest_cb2 = av_malloc(max*4*sizeof(int));
855
 
856
    /* Create 2x2 codebooks */
857
    generate_codebook(enc, tempData, points, max*4, enc->cb2x2, 2, MAX_CBS_2x2);
858
 
859
    codebooks->numCB2 = MAX_CBS_2x2;
860
 
861
    /* Unpack 2x2 codebook clusters */
862
    for (i=0; inumCB2; i++)
863
        unpack_roq_cell(enc->cb2x2 + i, codebooks->unpacked_cb2 + i*2*2*3);
864
 
865
    /* Index all 4x4 entries to the 2x2 entries, unpack, and enlarge */
866
    for (i=0; inumCB4; i++) {
867
        for (j=0; j<4; j++) {
868
            unpack_roq_cell(&results4[4*i + j], mb2);
869
            index_mb(mb2, codebooks->unpacked_cb2, codebooks->numCB2,
870
                     &enc->cb4x4[i].idx[j], 2);
871
        }
872
        unpack_roq_qcell(codebooks->unpacked_cb2, enc->cb4x4 + i,
873
                         codebooks->unpacked_cb4 + i*4*4*3);
874
        enlarge_roq_mb4(codebooks->unpacked_cb4 + i*4*4*3,
875
                        codebooks->unpacked_cb4_enlarged + i*8*8*3);
876
    }
877
 
878
    av_free(yuvClusters);
879
    av_free(points);
880
    av_free(results4);
881
}
882
 
883
static void roq_encode_video(RoqContext *enc)
884
{
885
    RoqTempdata *tempData = enc->tmpData;
886
    int i;
887
 
888
    memset(tempData, 0, sizeof(*tempData));
889
 
890
    create_cel_evals(enc, tempData);
891
 
892
    generate_new_codebooks(enc, tempData);
893
 
894
    if (enc->framesSinceKeyframe >= 1) {
895
        motion_search(enc, 8);
896
        motion_search(enc, 4);
897
    }
898
 
899
 retry_encode:
900
    for (i=0; iwidth*enc->height/64; i++)
901
        gather_data_for_cel(tempData->cel_evals + i, enc, tempData);
902
 
903
    /* Quake 3 can't handle chunks bigger than 65535 bytes */
904
    if (tempData->mainChunkSize/8 > 65535) {
905
        av_log(enc->avctx, AV_LOG_ERROR,
906
               "Warning, generated a frame too big (%d > 65535), "
907
               "try using a smaller qscale value.\n",
908
               tempData->mainChunkSize/8);
909
        enc->lambda *= 1.5;
910
        tempData->mainChunkSize = 0;
911
        memset(tempData->used_option, 0, sizeof(tempData->used_option));
912
        memset(tempData->codebooks.usedCB4, 0,
913
               sizeof(tempData->codebooks.usedCB4));
914
        memset(tempData->codebooks.usedCB2, 0,
915
               sizeof(tempData->codebooks.usedCB2));
916
 
917
        goto retry_encode;
918
    }
919
 
920
    remap_codebooks(enc, tempData);
921
 
922
    write_codebooks(enc, tempData);
923
 
924
    reconstruct_and_encode_image(enc, tempData, enc->width, enc->height,
925
                                 enc->width*enc->height/64);
926
 
927
    enc->avctx->coded_frame = enc->current_frame;
928
 
929
    /* Rotate frame history */
930
    FFSWAP(AVFrame *, enc->current_frame, enc->last_frame);
931
    FFSWAP(motion_vect *, enc->last_motion4, enc->this_motion4);
932
    FFSWAP(motion_vect *, enc->last_motion8, enc->this_motion8);
933
 
934
    av_free(tempData->cel_evals);
935
    av_free(tempData->closest_cb2);
936
 
937
    enc->framesSinceKeyframe++;
938
}
939
 
940
static av_cold int roq_encode_end(AVCodecContext *avctx)
941
{
942
    RoqContext *enc = avctx->priv_data;
943
 
944
    av_frame_free(&enc->current_frame);
945
    av_frame_free(&enc->last_frame);
946
 
947
    av_free(enc->tmpData);
948
    av_free(enc->this_motion4);
949
    av_free(enc->last_motion4);
950
    av_free(enc->this_motion8);
951
    av_free(enc->last_motion8);
952
 
953
    return 0;
954
}
955
 
956
static av_cold int roq_encode_init(AVCodecContext *avctx)
957
{
958
    RoqContext *enc = avctx->priv_data;
959
 
960
    av_lfg_init(&enc->randctx, 1);
961
 
962
    enc->framesSinceKeyframe = 0;
963
    if ((avctx->width & 0xf) || (avctx->height & 0xf)) {
964
        av_log(avctx, AV_LOG_ERROR, "Dimensions must be divisible by 16\n");
965
        return -1;
966
    }
967
 
968
    if (((avctx->width)&(avctx->width-1))||((avctx->height)&(avctx->height-1)))
969
        av_log(avctx, AV_LOG_ERROR, "Warning: dimensions not power of two\n");
970
 
971
    enc->width = avctx->width;
972
    enc->height = avctx->height;
973
 
974
    enc->framesSinceKeyframe = 0;
975
    enc->first_frame = 1;
976
 
977
    enc->last_frame    = av_frame_alloc();
978
    enc->current_frame = av_frame_alloc();
979
    if (!enc->last_frame || !enc->current_frame) {
980
        roq_encode_end(avctx);
981
        return AVERROR(ENOMEM);
982
    }
983
 
984
    enc->tmpData      = av_malloc(sizeof(RoqTempdata));
985
 
986
    enc->this_motion4 =
987
        av_mallocz((enc->width*enc->height/16)*sizeof(motion_vect));
988
 
989
    enc->last_motion4 =
990
        av_malloc ((enc->width*enc->height/16)*sizeof(motion_vect));
991
 
992
    enc->this_motion8 =
993
        av_mallocz((enc->width*enc->height/64)*sizeof(motion_vect));
994
 
995
    enc->last_motion8 =
996
        av_malloc ((enc->width*enc->height/64)*sizeof(motion_vect));
997
 
998
    return 0;
999
}
1000
 
1001
static void roq_write_video_info_chunk(RoqContext *enc)
1002
{
1003
    /* ROQ info chunk */
1004
    bytestream_put_le16(&enc->out_buf, RoQ_INFO);
1005
 
1006
    /* Size: 8 bytes */
1007
    bytestream_put_le32(&enc->out_buf, 8);
1008
 
1009
    /* Unused argument */
1010
    bytestream_put_byte(&enc->out_buf, 0x00);
1011
    bytestream_put_byte(&enc->out_buf, 0x00);
1012
 
1013
    /* Width */
1014
    bytestream_put_le16(&enc->out_buf, enc->width);
1015
 
1016
    /* Height */
1017
    bytestream_put_le16(&enc->out_buf, enc->height);
1018
 
1019
    /* Unused in Quake 3, mimics the output of the real encoder */
1020
    bytestream_put_byte(&enc->out_buf, 0x08);
1021
    bytestream_put_byte(&enc->out_buf, 0x00);
1022
    bytestream_put_byte(&enc->out_buf, 0x04);
1023
    bytestream_put_byte(&enc->out_buf, 0x00);
1024
}
1025
 
1026
static int roq_encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1027
                            const AVFrame *frame, int *got_packet)
1028
{
1029
    RoqContext *enc = avctx->priv_data;
1030
    int size, ret;
1031
 
1032
    enc->avctx = avctx;
1033
 
1034
    enc->frame_to_enc = frame;
1035
 
1036
    if (frame->quality)
1037
        enc->lambda = frame->quality - 1;
1038
    else
1039
        enc->lambda = 2*ROQ_LAMBDA_SCALE;
1040
 
1041
    /* 138 bits max per 8x8 block +
1042
     *     256 codebooks*(6 bytes 2x2 + 4 bytes 4x4) + 8 bytes frame header */
1043
    size = ((enc->width * enc->height / 64) * 138 + 7) / 8 + 256 * (6 + 4) + 8;
1044
    if ((ret = ff_alloc_packet2(avctx, pkt, size)) < 0)
1045
        return ret;
1046
    enc->out_buf = pkt->data;
1047
 
1048
    /* Check for I frame */
1049
    if (enc->framesSinceKeyframe == avctx->gop_size)
1050
        enc->framesSinceKeyframe = 0;
1051
 
1052
    if (enc->first_frame) {
1053
        /* Alloc memory for the reconstruction data (we must know the stride
1054
         for that) */
1055
        if ((ret = ff_get_buffer(avctx, enc->current_frame, 0)) < 0 ||
1056
            (ret = ff_get_buffer(avctx, enc->last_frame,    0)) < 0)
1057
            return ret;
1058
 
1059
        /* Before the first video frame, write a "video info" chunk */
1060
        roq_write_video_info_chunk(enc);
1061
 
1062
        enc->first_frame = 0;
1063
    }
1064
 
1065
    /* Encode the actual frame */
1066
    roq_encode_video(enc);
1067
 
1068
    pkt->size   = enc->out_buf - pkt->data;
1069
    if (enc->framesSinceKeyframe == 1)
1070
        pkt->flags |= AV_PKT_FLAG_KEY;
1071
    *got_packet = 1;
1072
 
1073
    return 0;
1074
}
1075
 
1076
AVCodec ff_roq_encoder = {
1077
    .name                 = "roqvideo",
1078
    .long_name            = NULL_IF_CONFIG_SMALL("id RoQ video"),
1079
    .type                 = AVMEDIA_TYPE_VIDEO,
1080
    .id                   = AV_CODEC_ID_ROQ,
1081
    .priv_data_size       = sizeof(RoqContext),
1082
    .init                 = roq_encode_init,
1083
    .encode2              = roq_encode_frame,
1084
    .close                = roq_encode_end,
1085
    .supported_framerates = (const AVRational[]){ {30,1}, {0,0} },
1086
    .pix_fmts             = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV444P,
1087
                                                        AV_PIX_FMT_NONE },
1088
};