Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4349 Serge 1
/*
2
 * Copyright (C) 2004 Michael Niedermayer 
3
 *
4
 * This file is part of FFmpeg.
5
 *
6
 * FFmpeg is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * FFmpeg is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with FFmpeg; if not, write to the Free Software
18
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19
 */
20
 
21
#include "libavutil/intmath.h"
22
#include "libavutil/log.h"
23
#include "libavutil/opt.h"
24
#include "avcodec.h"
25
#include "dsputil.h"
26
#include "internal.h"
27
#include "snow_dwt.h"
28
#include "snow.h"
29
 
30
#include "rangecoder.h"
31
#include "mathops.h"
32
 
33
#include "mpegvideo.h"
34
#include "h263.h"
35
 
36
static av_cold int encode_init(AVCodecContext *avctx)
37
{
38
    SnowContext *s = avctx->priv_data;
39
    int plane_index, ret;
40
 
41
    if(avctx->strict_std_compliance > FF_COMPLIANCE_EXPERIMENTAL){
42
        av_log(avctx, AV_LOG_ERROR, "This codec is under development, files encoded with it may not be decodable with future versions!!!\n"
43
               "Use vstrict=-2 / -strict -2 to use it anyway.\n");
44
        return -1;
45
    }
46
 
47
    if(avctx->prediction_method == DWT_97
48
       && (avctx->flags & CODEC_FLAG_QSCALE)
49
       && avctx->global_quality == 0){
50
        av_log(avctx, AV_LOG_ERROR, "The 9/7 wavelet is incompatible with lossless mode.\n");
51
        return -1;
52
    }
53
 
54
    s->spatial_decomposition_type= avctx->prediction_method; //FIXME add decorrelator type r transform_type
55
 
56
    s->mv_scale       = (avctx->flags & CODEC_FLAG_QPEL) ? 2 : 4;
57
    s->block_max_depth= (avctx->flags & CODEC_FLAG_4MV ) ? 1 : 0;
58
 
59
    for(plane_index=0; plane_index<3; plane_index++){
60
        s->plane[plane_index].diag_mc= 1;
61
        s->plane[plane_index].htaps= 6;
62
        s->plane[plane_index].hcoeff[0]=  40;
63
        s->plane[plane_index].hcoeff[1]= -10;
64
        s->plane[plane_index].hcoeff[2]=   2;
65
        s->plane[plane_index].fast_mc= 1;
66
    }
67
 
68
    if ((ret = ff_snow_common_init(avctx)) < 0) {
69
        ff_snow_common_end(avctx->priv_data);
70
        return ret;
71
    }
72
    ff_snow_alloc_blocks(s);
73
 
74
    s->version=0;
75
 
76
    s->m.avctx   = avctx;
77
    s->m.flags   = avctx->flags;
78
    s->m.bit_rate= avctx->bit_rate;
79
 
80
    s->m.me.temp      =
81
    s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t));
82
    s->m.me.map       = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
83
    s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t));
84
    s->m.obmc_scratchpad= av_mallocz(MB_SIZE*MB_SIZE*12*sizeof(uint32_t));
85
    if (!s->m.me.scratchpad || !s->m.me.map || !s->m.me.score_map || !s->m.obmc_scratchpad)
86
        return AVERROR(ENOMEM);
87
 
88
    ff_h263_encode_init(&s->m); //mv_penalty
89
 
90
    s->max_ref_frames = FFMAX(FFMIN(avctx->refs, MAX_REF_FRAMES), 1);
91
 
92
    if(avctx->flags&CODEC_FLAG_PASS1){
93
        if(!avctx->stats_out)
94
            avctx->stats_out = av_mallocz(256);
95
 
96
        if (!avctx->stats_out)
97
            return AVERROR(ENOMEM);
98
    }
99
    if((avctx->flags&CODEC_FLAG_PASS2) || !(avctx->flags&CODEC_FLAG_QSCALE)){
100
        if(ff_rate_control_init(&s->m) < 0)
101
            return -1;
102
    }
103
    s->pass1_rc= !(avctx->flags & (CODEC_FLAG_QSCALE|CODEC_FLAG_PASS2));
104
 
105
    switch(avctx->pix_fmt){
106
    case AV_PIX_FMT_YUV444P:
107
//    case AV_PIX_FMT_YUV422P:
108
    case AV_PIX_FMT_YUV420P:
109
//    case AV_PIX_FMT_YUV411P:
110
    case AV_PIX_FMT_YUV410P:
111
        s->nb_planes = 3;
112
        s->colorspace_type= 0;
113
        break;
114
    case AV_PIX_FMT_GRAY8:
115
        s->nb_planes = 1;
116
        s->colorspace_type = 1;
117
        break;
118
/*    case AV_PIX_FMT_RGB32:
119
        s->colorspace= 1;
120
        break;*/
121
    default:
122
        av_log(avctx, AV_LOG_ERROR, "pixel format not supported\n");
123
        return -1;
124
    }
125
    avcodec_get_chroma_sub_sample(avctx->pix_fmt, &s->chroma_h_shift, &s->chroma_v_shift);
126
 
127
    ff_set_cmp(&s->dsp, s->dsp.me_cmp, s->avctx->me_cmp);
128
    ff_set_cmp(&s->dsp, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
129
 
130
    s->input_picture = av_frame_alloc();
131
    if (!s->input_picture)
132
        return AVERROR(ENOMEM);
133
    if ((ret = ff_get_buffer(s->avctx, s->input_picture, AV_GET_BUFFER_FLAG_REF)) < 0)
134
        return ret;
135
 
136
    if(s->avctx->me_method == ME_ITER){
137
        int i;
138
        int size= s->b_width * s->b_height << 2*s->block_max_depth;
139
        for(i=0; imax_ref_frames; i++){
140
            s->ref_mvs[i]= av_mallocz(size*sizeof(int16_t[2]));
141
            s->ref_scores[i]= av_mallocz(size*sizeof(uint32_t));
142
            if (!s->ref_mvs[i] || !s->ref_scores[i])
143
                return AVERROR(ENOMEM);
144
        }
145
    }
146
 
147
    return 0;
148
}
149
 
150
//near copy & paste from dsputil, FIXME
151
static int pix_sum(uint8_t * pix, int line_size, int w, int h)
152
{
153
    int s, i, j;
154
 
155
    s = 0;
156
    for (i = 0; i < h; i++) {
157
        for (j = 0; j < w; j++) {
158
            s += pix[0];
159
            pix ++;
160
        }
161
        pix += line_size - w;
162
    }
163
    return s;
164
}
165
 
166
//near copy & paste from dsputil, FIXME
167
static int pix_norm1(uint8_t * pix, int line_size, int w)
168
{
169
    int s, i, j;
170
    uint32_t *sq = ff_squareTbl + 256;
171
 
172
    s = 0;
173
    for (i = 0; i < w; i++) {
174
        for (j = 0; j < w; j ++) {
175
            s += sq[pix[0]];
176
            pix ++;
177
        }
178
        pix += line_size - w;
179
    }
180
    return s;
181
}
182
 
183
static inline int get_penalty_factor(int lambda, int lambda2, int type){
184
    switch(type&0xFF){
185
    default:
186
    case FF_CMP_SAD:
187
        return lambda>>FF_LAMBDA_SHIFT;
188
    case FF_CMP_DCT:
189
        return (3*lambda)>>(FF_LAMBDA_SHIFT+1);
190
    case FF_CMP_W53:
191
        return (4*lambda)>>(FF_LAMBDA_SHIFT);
192
    case FF_CMP_W97:
193
        return (2*lambda)>>(FF_LAMBDA_SHIFT);
194
    case FF_CMP_SATD:
195
    case FF_CMP_DCT264:
196
        return (2*lambda)>>FF_LAMBDA_SHIFT;
197
    case FF_CMP_RD:
198
    case FF_CMP_PSNR:
199
    case FF_CMP_SSE:
200
    case FF_CMP_NSSE:
201
        return lambda2>>FF_LAMBDA_SHIFT;
202
    case FF_CMP_BIT:
203
        return 1;
204
    }
205
}
206
 
207
//FIXME copy&paste
208
#define P_LEFT P[1]
209
#define P_TOP P[2]
210
#define P_TOPRIGHT P[3]
211
#define P_MEDIAN P[4]
212
#define P_MV1 P[9]
213
#define FLAG_QPEL   1 //must be 1
214
 
215
static int encode_q_branch(SnowContext *s, int level, int x, int y){
216
    uint8_t p_buffer[1024];
217
    uint8_t i_buffer[1024];
218
    uint8_t p_state[sizeof(s->block_state)];
219
    uint8_t i_state[sizeof(s->block_state)];
220
    RangeCoder pc, ic;
221
    uint8_t *pbbak= s->c.bytestream;
222
    uint8_t *pbbak_start= s->c.bytestream_start;
223
    int score, score2, iscore, i_len, p_len, block_s, sum, base_bits;
224
    const int w= s->b_width  << s->block_max_depth;
225
    const int h= s->b_height << s->block_max_depth;
226
    const int rem_depth= s->block_max_depth - level;
227
    const int index= (x + y*w) << rem_depth;
228
    const int block_w= 1<<(LOG2_MB_SIZE - level);
229
    int trx= (x+1)<
230
    int try= (y+1)<
231
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
232
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
233
    const BlockNode *right = trxblock[index+1] : &null_block;
234
    const BlockNode *bottom= tryblock[index+w] : &null_block;
235
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
236
    const BlockNode *tr    = y && trxblock[index-w+(1<
237
    int pl = left->color[0];
238
    int pcb= left->color[1];
239
    int pcr= left->color[2];
240
    int pmx, pmy;
241
    int mx=0, my=0;
242
    int l,cr,cb;
243
    const int stride= s->current_picture->linesize[0];
244
    const int uvstride= s->current_picture->linesize[1];
245
    uint8_t *current_data[3]= { s->input_picture->data[0] + (x + y*  stride)*block_w,
246
                                s->input_picture->data[1] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift),
247
                                s->input_picture->data[2] + ((x*block_w)>>s->chroma_h_shift) + ((y*uvstride*block_w)>>s->chroma_v_shift)};
248
    int P[10][2];
249
    int16_t last_mv[3][2];
250
    int qpel= !!(s->avctx->flags & CODEC_FLAG_QPEL); //unused
251
    const int shift= 1+qpel;
252
    MotionEstContext *c= &s->m.me;
253
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
254
    int mx_context= av_log2(2*FFABS(left->mx - top->mx));
255
    int my_context= av_log2(2*FFABS(left->my - top->my));
256
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
257
    int ref, best_ref, ref_score, ref_mx, ref_my;
258
 
259
    av_assert0(sizeof(s->block_state) >= 256);
260
    if(s->keyframe){
261
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
262
        return 0;
263
    }
264
 
265
//    clip predictors / edge ?
266
 
267
    P_LEFT[0]= left->mx;
268
    P_LEFT[1]= left->my;
269
    P_TOP [0]= top->mx;
270
    P_TOP [1]= top->my;
271
    P_TOPRIGHT[0]= tr->mx;
272
    P_TOPRIGHT[1]= tr->my;
273
 
274
    last_mv[0][0]= s->block[index].mx;
275
    last_mv[0][1]= s->block[index].my;
276
    last_mv[1][0]= right->mx;
277
    last_mv[1][1]= right->my;
278
    last_mv[2][0]= bottom->mx;
279
    last_mv[2][1]= bottom->my;
280
 
281
    s->m.mb_stride=2;
282
    s->m.mb_x=
283
    s->m.mb_y= 0;
284
    c->skip= 0;
285
 
286
    av_assert1(c->  stride ==   stride);
287
    av_assert1(c->uvstride == uvstride);
288
 
289
    c->penalty_factor    = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp);
290
    c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp);
291
    c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp);
292
    c->current_mv_penalty= c->mv_penalty[s->m.f_code=1] + MAX_MV;
293
 
294
    c->xmin = - x*block_w - 16+3;
295
    c->ymin = - y*block_w - 16+3;
296
    c->xmax = - (x+1)*block_w + (w<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
297
    c->ymax = - (y+1)*block_w + (h<<(LOG2_MB_SIZE - s->block_max_depth)) + 16-3;
298
 
299
    if(P_LEFT[0]     > (c->xmax<xmax<
300
    if(P_LEFT[1]     > (c->ymax<ymax<
301
    if(P_TOP[0]      > (c->xmax<xmax<
302
    if(P_TOP[1]      > (c->ymax<ymax<
303
    if(P_TOPRIGHT[0] < (c->xmin<xmin<
304
    if(P_TOPRIGHT[0] > (c->xmax<xmax<
305
    if(P_TOPRIGHT[1] > (c->ymax<ymax<
306
 
307
    P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
308
    P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
309
 
310
    if (!y) {
311
        c->pred_x= P_LEFT[0];
312
        c->pred_y= P_LEFT[1];
313
    } else {
314
        c->pred_x = P_MEDIAN[0];
315
        c->pred_y = P_MEDIAN[1];
316
    }
317
 
318
    score= INT_MAX;
319
    best_ref= 0;
320
    for(ref=0; refref_frames; ref++){
321
        init_ref(c, current_data, s->last_picture[ref]->data, NULL, block_w*x, block_w*y, 0);
322
 
323
        ref_score= ff_epzs_motion_search(&s->m, &ref_mx, &ref_my, P, 0, /*ref_index*/ 0, last_mv,
324
                                         (1<<16)>>shift, level-LOG2_MB_SIZE+4, block_w);
325
 
326
        av_assert2(ref_mx >= c->xmin);
327
        av_assert2(ref_mx <= c->xmax);
328
        av_assert2(ref_my >= c->ymin);
329
        av_assert2(ref_my <= c->ymax);
330
 
331
        ref_score= c->sub_motion_search(&s->m, &ref_mx, &ref_my, ref_score, 0, 0, level-LOG2_MB_SIZE+4, block_w);
332
        ref_score= ff_get_mb_score(&s->m, ref_mx, ref_my, 0, 0, level-LOG2_MB_SIZE+4, block_w, 0);
333
        ref_score+= 2*av_log2(2*ref)*c->penalty_factor;
334
        if(s->ref_mvs[ref]){
335
            s->ref_mvs[ref][index][0]= ref_mx;
336
            s->ref_mvs[ref][index][1]= ref_my;
337
            s->ref_scores[ref][index]= ref_score;
338
        }
339
        if(score > ref_score){
340
            score= ref_score;
341
            best_ref= ref;
342
            mx= ref_mx;
343
            my= ref_my;
344
        }
345
    }
346
    //FIXME if mb_cmp != SSE then intra cannot be compared currently and mb_penalty vs. lambda2
347
 
348
  //  subpel search
349
    base_bits= get_rac_count(&s->c) - 8*(s->c.bytestream - s->c.bytestream_start);
350
    pc= s->c;
351
    pc.bytestream_start=
352
    pc.bytestream= p_buffer; //FIXME end/start? and at the other stoo
353
    memcpy(p_state, s->block_state, sizeof(s->block_state));
354
 
355
    if(level!=s->block_max_depth)
356
        put_rac(&pc, &p_state[4 + s_context], 1);
357
    put_rac(&pc, &p_state[1 + left->type + top->type], 0);
358
    if(s->ref_frames > 1)
359
        put_symbol(&pc, &p_state[128 + 1024 + 32*ref_context], best_ref, 0);
360
    pred_mv(s, &pmx, &pmy, best_ref, left, top, tr);
361
    put_symbol(&pc, &p_state[128 + 32*(mx_context + 16*!!best_ref)], mx - pmx, 1);
362
    put_symbol(&pc, &p_state[128 + 32*(my_context + 16*!!best_ref)], my - pmy, 1);
363
    p_len= pc.bytestream - pc.bytestream_start;
364
    score += (s->lambda2*(get_rac_count(&pc)-base_bits))>>FF_LAMBDA_SHIFT;
365
 
366
    block_s= block_w*block_w;
367
    sum = pix_sum(current_data[0], stride, block_w, block_w);
368
    l= (sum + block_s/2)/block_s;
369
    iscore = pix_norm1(current_data[0], stride, block_w) - 2*l*sum + l*l*block_s;
370
 
371
    if (s->nb_planes > 2) {
372
        block_s= block_w*block_w>>(s->chroma_h_shift + s->chroma_v_shift);
373
        sum = pix_sum(current_data[1], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
374
        cb= (sum + block_s/2)/block_s;
375
    //    iscore += pix_norm1(¤t_mb[1][0], uvstride, block_w>>1) - 2*cb*sum + cb*cb*block_s;
376
        sum = pix_sum(current_data[2], uvstride, block_w>>s->chroma_h_shift, block_w>>s->chroma_v_shift);
377
        cr= (sum + block_s/2)/block_s;
378
    //    iscore += pix_norm1(¤t_mb[2][0], uvstride, block_w>>1) - 2*cr*sum + cr*cr*block_s;
379
    }else
380
        cb = cr = 0;
381
 
382
    ic= s->c;
383
    ic.bytestream_start=
384
    ic.bytestream= i_buffer; //FIXME end/start? and at the other stoo
385
    memcpy(i_state, s->block_state, sizeof(s->block_state));
386
    if(level!=s->block_max_depth)
387
        put_rac(&ic, &i_state[4 + s_context], 1);
388
    put_rac(&ic, &i_state[1 + left->type + top->type], 1);
389
    put_symbol(&ic, &i_state[32],  l-pl , 1);
390
    if (s->nb_planes > 2) {
391
        put_symbol(&ic, &i_state[64], cb-pcb, 1);
392
        put_symbol(&ic, &i_state[96], cr-pcr, 1);
393
    }
394
    i_len= ic.bytestream - ic.bytestream_start;
395
    iscore += (s->lambda2*(get_rac_count(&ic)-base_bits))>>FF_LAMBDA_SHIFT;
396
 
397
//    assert(score==256*256*256*64-1);
398
    av_assert1(iscore < 255*255*256 + s->lambda2*10);
399
    av_assert1(iscore >= 0);
400
    av_assert1(l>=0 && l<=255);
401
    av_assert1(pl>=0 && pl<=255);
402
 
403
    if(level==0){
404
        int varc= iscore >> 8;
405
        int vard= score >> 8;
406
        if (vard <= 64 || vard < varc)
407
            c->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
408
        else
409
            c->scene_change_score+= s->m.qscale;
410
    }
411
 
412
    if(level!=s->block_max_depth){
413
        put_rac(&s->c, &s->block_state[4 + s_context], 0);
414
        score2 = encode_q_branch(s, level+1, 2*x+0, 2*y+0);
415
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+0);
416
        score2+= encode_q_branch(s, level+1, 2*x+0, 2*y+1);
417
        score2+= encode_q_branch(s, level+1, 2*x+1, 2*y+1);
418
        score2+= s->lambda2>>FF_LAMBDA_SHIFT; //FIXME exact split overhead
419
 
420
        if(score2 < score && score2 < iscore)
421
            return score2;
422
    }
423
 
424
    if(iscore < score){
425
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
426
        memcpy(pbbak, i_buffer, i_len);
427
        s->c= ic;
428
        s->c.bytestream_start= pbbak_start;
429
        s->c.bytestream= pbbak + i_len;
430
        set_blocks(s, level, x, y, l, cb, cr, pmx, pmy, 0, BLOCK_INTRA);
431
        memcpy(s->block_state, i_state, sizeof(s->block_state));
432
        return iscore;
433
    }else{
434
        memcpy(pbbak, p_buffer, p_len);
435
        s->c= pc;
436
        s->c.bytestream_start= pbbak_start;
437
        s->c.bytestream= pbbak + p_len;
438
        set_blocks(s, level, x, y, pl, pcb, pcr, mx, my, best_ref, 0);
439
        memcpy(s->block_state, p_state, sizeof(s->block_state));
440
        return score;
441
    }
442
}
443
 
444
static void encode_q_branch2(SnowContext *s, int level, int x, int y){
445
    const int w= s->b_width  << s->block_max_depth;
446
    const int rem_depth= s->block_max_depth - level;
447
    const int index= (x + y*w) << rem_depth;
448
    int trx= (x+1)<
449
    BlockNode *b= &s->block[index];
450
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
451
    const BlockNode *top   = y ? &s->block[index-w] : &null_block;
452
    const BlockNode *tl    = y && x ? &s->block[index-w-1] : left;
453
    const BlockNode *tr    = y && trxblock[index-w+(1<
454
    int pl = left->color[0];
455
    int pcb= left->color[1];
456
    int pcr= left->color[2];
457
    int pmx, pmy;
458
    int ref_context= av_log2(2*left->ref) + av_log2(2*top->ref);
459
    int mx_context= av_log2(2*FFABS(left->mx - top->mx)) + 16*!!b->ref;
460
    int my_context= av_log2(2*FFABS(left->my - top->my)) + 16*!!b->ref;
461
    int s_context= 2*left->level + 2*top->level + tl->level + tr->level;
462
 
463
    if(s->keyframe){
464
        set_blocks(s, level, x, y, pl, pcb, pcr, 0, 0, 0, BLOCK_INTRA);
465
        return;
466
    }
467
 
468
    if(level!=s->block_max_depth){
469
        if(same_block(b,b+1) && same_block(b,b+w) && same_block(b,b+w+1)){
470
            put_rac(&s->c, &s->block_state[4 + s_context], 1);
471
        }else{
472
            put_rac(&s->c, &s->block_state[4 + s_context], 0);
473
            encode_q_branch2(s, level+1, 2*x+0, 2*y+0);
474
            encode_q_branch2(s, level+1, 2*x+1, 2*y+0);
475
            encode_q_branch2(s, level+1, 2*x+0, 2*y+1);
476
            encode_q_branch2(s, level+1, 2*x+1, 2*y+1);
477
            return;
478
        }
479
    }
480
    if(b->type & BLOCK_INTRA){
481
        pred_mv(s, &pmx, &pmy, 0, left, top, tr);
482
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 1);
483
        put_symbol(&s->c, &s->block_state[32], b->color[0]-pl , 1);
484
        if (s->nb_planes > 2) {
485
            put_symbol(&s->c, &s->block_state[64], b->color[1]-pcb, 1);
486
            put_symbol(&s->c, &s->block_state[96], b->color[2]-pcr, 1);
487
        }
488
        set_blocks(s, level, x, y, b->color[0], b->color[1], b->color[2], pmx, pmy, 0, BLOCK_INTRA);
489
    }else{
490
        pred_mv(s, &pmx, &pmy, b->ref, left, top, tr);
491
        put_rac(&s->c, &s->block_state[1 + (left->type&1) + (top->type&1)], 0);
492
        if(s->ref_frames > 1)
493
            put_symbol(&s->c, &s->block_state[128 + 1024 + 32*ref_context], b->ref, 0);
494
        put_symbol(&s->c, &s->block_state[128 + 32*mx_context], b->mx - pmx, 1);
495
        put_symbol(&s->c, &s->block_state[128 + 32*my_context], b->my - pmy, 1);
496
        set_blocks(s, level, x, y, pl, pcb, pcr, b->mx, b->my, b->ref, 0);
497
    }
498
}
499
 
500
static int get_dc(SnowContext *s, int mb_x, int mb_y, int plane_index){
501
    int i, x2, y2;
502
    Plane *p= &s->plane[plane_index];
503
    const int block_size = MB_SIZE >> s->block_max_depth;
504
    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
505
    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
506
    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
507
    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
508
    const int ref_stride= s->current_picture->linesize[plane_index];
509
    uint8_t *src= s-> input_picture->data[plane_index];
510
    IDWTELEM *dst= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4; //FIXME change to unsigned
511
    const int b_stride = s->b_width << s->block_max_depth;
512
    const int w= p->width;
513
    const int h= p->height;
514
    int index= mb_x + mb_y*b_stride;
515
    BlockNode *b= &s->block[index];
516
    BlockNode backup= *b;
517
    int ab=0;
518
    int aa=0;
519
 
520
    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc stuff above
521
 
522
    b->type|= BLOCK_INTRA;
523
    b->color[plane_index]= 0;
524
    memset(dst, 0, obmc_stride*obmc_stride*sizeof(IDWTELEM));
525
 
526
    for(i=0; i<4; i++){
527
        int mb_x2= mb_x + (i &1) - 1;
528
        int mb_y2= mb_y + (i>>1) - 1;
529
        int x= block_w*mb_x2 + block_w/2;
530
        int y= block_h*mb_y2 + block_h/2;
531
 
532
        add_yblock(s, 0, NULL, dst + (i&1)*block_w + (i>>1)*obmc_stride*block_h, NULL, obmc,
533
                    x, y, block_w, block_h, w, h, obmc_stride, ref_stride, obmc_stride, mb_x2, mb_y2, 0, 0, plane_index);
534
 
535
        for(y2= FFMAX(y, 0); y2
536
            for(x2= FFMAX(x, 0); x2
537
                int index= x2-(block_w*mb_x - block_w/2) + (y2-(block_h*mb_y - block_h/2))*obmc_stride;
538
                int obmc_v= obmc[index];
539
                int d;
540
                if(y<0) obmc_v += obmc[index + block_h*obmc_stride];
541
                if(x<0) obmc_v += obmc[index + block_w];
542
                if(y+block_h>h) obmc_v += obmc[index - block_h*obmc_stride];
543
                if(x+block_w>w) obmc_v += obmc[index - block_w];
544
                //FIXME precalculate this or simplify it somehow else
545
 
546
                d = -dst[index] + (1<<(FRAC_BITS-1));
547
                dst[index] = d;
548
                ab += (src[x2 + y2*ref_stride] - (d>>FRAC_BITS)) * obmc_v;
549
                aa += obmc_v * obmc_v; //FIXME precalculate this
550
            }
551
        }
552
    }
553
    *b= backup;
554
 
555
    return av_clip( ROUNDED_DIV(ab<
556
}
557
 
558
static inline int get_block_bits(SnowContext *s, int x, int y, int w){
559
    const int b_stride = s->b_width << s->block_max_depth;
560
    const int b_height = s->b_height<< s->block_max_depth;
561
    int index= x + y*b_stride;
562
    const BlockNode *b     = &s->block[index];
563
    const BlockNode *left  = x ? &s->block[index-1] : &null_block;
564
    const BlockNode *top   = y ? &s->block[index-b_stride] : &null_block;
565
    const BlockNode *tl    = y && x ? &s->block[index-b_stride-1] : left;
566
    const BlockNode *tr    = y && x+wblock[index-b_stride+w] : tl;
567
    int dmx, dmy;
568
//  int mx_context= av_log2(2*FFABS(left->mx - top->mx));
569
//  int my_context= av_log2(2*FFABS(left->my - top->my));
570
 
571
    if(x<0 || x>=b_stride || y>=b_height)
572
        return 0;
573
/*
574
1            0      0
575
01X          1-2    1
576
001XX        3-6    2-3
577
0001XXX      7-14   4-7
578
00001XXXX   15-30   8-15
579
*/
580
//FIXME try accurate rate
581
//FIXME intra and inter predictors if surrounding blocks are not the same type
582
    if(b->type & BLOCK_INTRA){
583
        return 3+2*( av_log2(2*FFABS(left->color[0] - b->color[0]))
584
                   + av_log2(2*FFABS(left->color[1] - b->color[1]))
585
                   + av_log2(2*FFABS(left->color[2] - b->color[2])));
586
    }else{
587
        pred_mv(s, &dmx, &dmy, b->ref, left, top, tr);
588
        dmx-= b->mx;
589
        dmy-= b->my;
590
        return 2*(1 + av_log2(2*FFABS(dmx)) //FIXME kill the 2* can be merged in lambda
591
                    + av_log2(2*FFABS(dmy))
592
                    + av_log2(2*b->ref));
593
    }
594
}
595
 
596
static int get_block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index, uint8_t (*obmc_edged)[MB_SIZE * 2]){
597
    Plane *p= &s->plane[plane_index];
598
    const int block_size = MB_SIZE >> s->block_max_depth;
599
    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
600
    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
601
    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
602
    const int ref_stride= s->current_picture->linesize[plane_index];
603
    uint8_t *dst= s->current_picture->data[plane_index];
604
    uint8_t *src= s->  input_picture->data[plane_index];
605
    IDWTELEM *pred= (IDWTELEM*)s->m.obmc_scratchpad + plane_index*block_size*block_size*4;
606
    uint8_t *cur = s->scratchbuf;
607
    uint8_t *tmp = s->emu_edge_buffer;
608
    const int b_stride = s->b_width << s->block_max_depth;
609
    const int b_height = s->b_height<< s->block_max_depth;
610
    const int w= p->width;
611
    const int h= p->height;
612
    int distortion;
613
    int rate= 0;
614
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
615
    int sx= block_w*mb_x - block_w/2;
616
    int sy= block_h*mb_y - block_h/2;
617
    int x0= FFMAX(0,-sx);
618
    int y0= FFMAX(0,-sy);
619
    int x1= FFMIN(block_w*2, w-sx);
620
    int y1= FFMIN(block_h*2, h-sy);
621
    int i,x,y;
622
 
623
    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below chckinhg only block_w
624
 
625
    ff_snow_pred_block(s, cur, tmp, ref_stride, sx, sy, block_w*2, block_h*2, &s->block[mb_x + mb_y*b_stride], plane_index, w, h);
626
 
627
    for(y=y0; y
628
        const uint8_t *obmc1= obmc_edged[y];
629
        const IDWTELEM *pred1 = pred + y*obmc_stride;
630
        uint8_t *cur1 = cur + y*ref_stride;
631
        uint8_t *dst1 = dst + sx + (sy+y)*ref_stride;
632
        for(x=x0; x
633
#if FRAC_BITS >= LOG2_OBMC_MAX
634
            int v = (cur1[x] * obmc1[x]) << (FRAC_BITS - LOG2_OBMC_MAX);
635
#else
636
            int v = (cur1[x] * obmc1[x] + (1<<(LOG2_OBMC_MAX - FRAC_BITS-1))) >> (LOG2_OBMC_MAX - FRAC_BITS);
637
#endif
638
            v = (v + pred1[x]) >> FRAC_BITS;
639
            if(v&(~255)) v= ~(v>>31);
640
            dst1[x] = v;
641
        }
642
    }
643
 
644
    /* copy the regions where obmc[] = (uint8_t)256 */
645
    if(LOG2_OBMC_MAX == 8
646
        && (mb_x == 0 || mb_x == b_stride-1)
647
        && (mb_y == 0 || mb_y == b_height-1)){
648
        if(mb_x == 0)
649
            x1 = block_w;
650
        else
651
            x0 = block_w;
652
        if(mb_y == 0)
653
            y1 = block_h;
654
        else
655
            y0 = block_h;
656
        for(y=y0; y
657
            memcpy(dst + sx+x0 + (sy+y)*ref_stride, cur + x0 + y*ref_stride, x1-x0);
658
    }
659
 
660
    if(block_w==16){
661
        /* FIXME rearrange dsputil to fit 32x32 cmp functions */
662
        /* FIXME check alignment of the cmp wavelet vs the encoding wavelet */
663
        /* FIXME cmps overlap but do not cover the wavelet's whole support.
664
         * So improving the score of one block is not strictly guaranteed
665
         * to improve the score of the whole frame, thus iterative motion
666
         * estimation does not always converge. */
667
        if(s->avctx->me_cmp == FF_CMP_W97)
668
            distortion = ff_w97_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
669
        else if(s->avctx->me_cmp == FF_CMP_W53)
670
            distortion = ff_w53_32_c(&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, 32);
671
        else{
672
            distortion = 0;
673
            for(i=0; i<4; i++){
674
                int off = sx+16*(i&1) + (sy+16*(i>>1))*ref_stride;
675
                distortion += s->dsp.me_cmp[0](&s->m, src + off, dst + off, ref_stride, 16);
676
            }
677
        }
678
    }else{
679
        av_assert2(block_w==8);
680
        distortion = s->dsp.me_cmp[0](&s->m, src + sx + sy*ref_stride, dst + sx + sy*ref_stride, ref_stride, block_w*2);
681
    }
682
 
683
    if(plane_index==0){
684
        for(i=0; i<4; i++){
685
/* ..RRr
686
 * .RXx.
687
 * rxx..
688
 */
689
            rate += get_block_bits(s, mb_x + (i&1) - (i>>1), mb_y + (i>>1), 1);
690
        }
691
        if(mb_x == b_stride-2)
692
            rate += get_block_bits(s, mb_x + 1, mb_y + 1, 1);
693
    }
694
    return distortion + rate*penalty_factor;
695
}
696
 
697
static int get_4block_rd(SnowContext *s, int mb_x, int mb_y, int plane_index){
698
    int i, y2;
699
    Plane *p= &s->plane[plane_index];
700
    const int block_size = MB_SIZE >> s->block_max_depth;
701
    const int block_w    = plane_index ? block_size>>s->chroma_h_shift : block_size;
702
    const int block_h    = plane_index ? block_size>>s->chroma_v_shift : block_size;
703
    const uint8_t *obmc  = plane_index ? ff_obmc_tab[s->block_max_depth+s->chroma_h_shift] : ff_obmc_tab[s->block_max_depth];
704
    const int obmc_stride= plane_index ? (2*block_size)>>s->chroma_h_shift : 2*block_size;
705
    const int ref_stride= s->current_picture->linesize[plane_index];
706
    uint8_t *dst= s->current_picture->data[plane_index];
707
    uint8_t *src= s-> input_picture->data[plane_index];
708
    //FIXME zero_dst is const but add_yblock changes dst if add is 0 (this is never the case for dst=zero_dst
709
    // const has only been removed from zero_dst to suppress a warning
710
    static IDWTELEM zero_dst[4096]; //FIXME
711
    const int b_stride = s->b_width << s->block_max_depth;
712
    const int w= p->width;
713
    const int h= p->height;
714
    int distortion= 0;
715
    int rate= 0;
716
    const int penalty_factor= get_penalty_factor(s->lambda, s->lambda2, s->avctx->me_cmp);
717
 
718
    av_assert2(s->chroma_h_shift == s->chroma_v_shift); //obmc and square assumtions below
719
 
720
    for(i=0; i<9; i++){
721
        int mb_x2= mb_x + (i%3) - 1;
722
        int mb_y2= mb_y + (i/3) - 1;
723
        int x= block_w*mb_x2 + block_w/2;
724
        int y= block_h*mb_y2 + block_h/2;
725
 
726
        add_yblock(s, 0, NULL, zero_dst, dst, obmc,
727
                   x, y, block_w, block_h, w, h, /*dst_stride*/0, ref_stride, obmc_stride, mb_x2, mb_y2, 1, 1, plane_index);
728
 
729
        //FIXME find a cleaner/simpler way to skip the outside stuff
730
        for(y2= y; y2<0; y2++)
731
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
732
        for(y2= h; y2
733
            memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, block_w);
734
        if(x<0){
735
            for(y2= y; y2
736
                memcpy(dst + x + y2*ref_stride, src + x + y2*ref_stride, -x);
737
        }
738
        if(x+block_w > w){
739
            for(y2= y; y2
740
                memcpy(dst + w + y2*ref_stride, src + w + y2*ref_stride, x+block_w - w);
741
        }
742
 
743
        av_assert1(block_w== 8 || block_w==16);
744
        distortion += s->dsp.me_cmp[block_w==8](&s->m, src + x + y*ref_stride, dst + x + y*ref_stride, ref_stride, block_h);
745
    }
746
 
747
    if(plane_index==0){
748
        BlockNode *b= &s->block[mb_x+mb_y*b_stride];
749
        int merged= same_block(b,b+1) && same_block(b,b+b_stride) && same_block(b,b+b_stride+1);
750
 
751
/* ..RRRr
752
 * .RXXx.
753
 * .RXXx.
754
 * rxxx.
755
 */
756
        if(merged)
757
            rate = get_block_bits(s, mb_x, mb_y, 2);
758
        for(i=merged?4:0; i<9; i++){
759
            static const int dxy[9][2] = {{0,0},{1,0},{0,1},{1,1},{2,0},{2,1},{-1,2},{0,2},{1,2}};
760
            rate += get_block_bits(s, mb_x + dxy[i][0], mb_y + dxy[i][1], 1);
761
        }
762
    }
763
    return distortion + rate*penalty_factor;
764
}
765
 
766
static int encode_subband_c0run(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
767
    const int w= b->width;
768
    const int h= b->height;
769
    int x, y;
770
 
771
    if(1){
772
        int run=0;
773
        int *runs = s->run_buffer;
774
        int run_index=0;
775
        int max_index;
776
 
777
        for(y=0; y
778
            for(x=0; x
779
                int v, p=0;
780
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
781
                v= src[x + y*stride];
782
 
783
                if(y){
784
                    t= src[x + (y-1)*stride];
785
                    if(x){
786
                        lt= src[x - 1 + (y-1)*stride];
787
                    }
788
                    if(x + 1 < w){
789
                        rt= src[x + 1 + (y-1)*stride];
790
                    }
791
                }
792
                if(x){
793
                    l= src[x - 1 + y*stride];
794
                    /*if(x > 1){
795
                        if(orientation==1) ll= src[y + (x-2)*stride];
796
                        else               ll= src[x - 2 + y*stride];
797
                    }*/
798
                }
799
                if(parent){
800
                    int px= x>>1;
801
                    int py= y>>1;
802
                    if(pxparent->width && pyparent->height)
803
                        p= parent[px + py*2*stride];
804
                }
805
                if(!(/*ll|*/l|lt|t|rt|p)){
806
                    if(v){
807
                        runs[run_index++]= run;
808
                        run=0;
809
                    }else{
810
                        run++;
811
                    }
812
                }
813
            }
814
        }
815
        max_index= run_index;
816
        runs[run_index++]= run;
817
        run_index=0;
818
        run= runs[run_index++];
819
 
820
        put_symbol2(&s->c, b->state[30], max_index, 0);
821
        if(run_index <= max_index)
822
            put_symbol2(&s->c, b->state[1], run, 3);
823
 
824
        for(y=0; y
825
            if(s->c.bytestream_end - s->c.bytestream < w*40){
826
                av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
827
                return -1;
828
            }
829
            for(x=0; x
830
                int v, p=0;
831
                int /*ll=0, */l=0, lt=0, t=0, rt=0;
832
                v= src[x + y*stride];
833
 
834
                if(y){
835
                    t= src[x + (y-1)*stride];
836
                    if(x){
837
                        lt= src[x - 1 + (y-1)*stride];
838
                    }
839
                    if(x + 1 < w){
840
                        rt= src[x + 1 + (y-1)*stride];
841
                    }
842
                }
843
                if(x){
844
                    l= src[x - 1 + y*stride];
845
                    /*if(x > 1){
846
                        if(orientation==1) ll= src[y + (x-2)*stride];
847
                        else               ll= src[x - 2 + y*stride];
848
                    }*/
849
                }
850
                if(parent){
851
                    int px= x>>1;
852
                    int py= y>>1;
853
                    if(pxparent->width && pyparent->height)
854
                        p= parent[px + py*2*stride];
855
                }
856
                if(/*ll|*/l|lt|t|rt|p){
857
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
858
 
859
                    put_rac(&s->c, &b->state[0][context], !!v);
860
                }else{
861
                    if(!run){
862
                        run= runs[run_index++];
863
 
864
                        if(run_index <= max_index)
865
                            put_symbol2(&s->c, b->state[1], run, 3);
866
                        av_assert2(v);
867
                    }else{
868
                        run--;
869
                        av_assert2(!v);
870
                    }
871
                }
872
                if(v){
873
                    int context= av_log2(/*FFABS(ll) + */3*FFABS(l) + FFABS(lt) + 2*FFABS(t) + FFABS(rt) + FFABS(p));
874
                    int l2= 2*FFABS(l) + (l<0);
875
                    int t2= 2*FFABS(t) + (t<0);
876
 
877
                    put_symbol2(&s->c, b->state[context + 2], FFABS(v)-1, context-4);
878
                    put_rac(&s->c, &b->state[0][16 + 1 + 3 + ff_quant3bA[l2&0xFF] + 3*ff_quant3bA[t2&0xFF]], v<0);
879
                }
880
            }
881
        }
882
    }
883
    return 0;
884
}
885
 
886
static int encode_subband(SnowContext *s, SubBand *b, const IDWTELEM *src, const IDWTELEM *parent, int stride, int orientation){
887
//    encode_subband_qtree(s, b, src, parent, stride, orientation);
888
//    encode_subband_z0run(s, b, src, parent, stride, orientation);
889
    return encode_subband_c0run(s, b, src, parent, stride, orientation);
890
//    encode_subband_dzr(s, b, src, parent, stride, orientation);
891
}
892
 
893
static av_always_inline int check_block(SnowContext *s, int mb_x, int mb_y, int p[3], int intra, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
894
    const int b_stride= s->b_width << s->block_max_depth;
895
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
896
    BlockNode backup= *block;
897
    unsigned value;
898
    int rd, index;
899
 
900
    av_assert2(mb_x>=0 && mb_y>=0);
901
    av_assert2(mb_x
902
 
903
    if(intra){
904
        block->color[0] = p[0];
905
        block->color[1] = p[1];
906
        block->color[2] = p[2];
907
        block->type |= BLOCK_INTRA;
908
    }else{
909
        index= (p[0] + 31*p[1]) & (ME_CACHE_SIZE-1);
910
        value= s->me_cache_generation + (p[0]>>10) + (p[1]<<6) + (block->ref<<12);
911
        if(s->me_cache[index] == value)
912
            return 0;
913
        s->me_cache[index]= value;
914
 
915
        block->mx= p[0];
916
        block->my= p[1];
917
        block->type &= ~BLOCK_INTRA;
918
    }
919
 
920
    rd= get_block_rd(s, mb_x, mb_y, 0, obmc_edged);
921
 
922
//FIXME chroma
923
    if(rd < *best_rd){
924
        *best_rd= rd;
925
        return 1;
926
    }else{
927
        *block= backup;
928
        return 0;
929
    }
930
}
931
 
932
/* special case for int[2] args we discard afterwards,
933
 * fixes compilation problem with gcc 2.95 */
934
static av_always_inline int check_block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, uint8_t (*obmc_edged)[MB_SIZE * 2], int *best_rd){
935
    int p[2] = {p0, p1};
936
    return check_block(s, mb_x, mb_y, p, 0, obmc_edged, best_rd);
937
}
938
 
939
static av_always_inline int check_4block_inter(SnowContext *s, int mb_x, int mb_y, int p0, int p1, int ref, int *best_rd){
940
    const int b_stride= s->b_width << s->block_max_depth;
941
    BlockNode *block= &s->block[mb_x + mb_y * b_stride];
942
    BlockNode backup[4];
943
    unsigned value;
944
    int rd, index;
945
 
946
    /* We don't initialize backup[] during variable declaration, because
947
     * that fails to compile on MSVC: "cannot convert from 'BlockNode' to
948
     * 'int16_t'". */
949
    backup[0] = block[0];
950
    backup[1] = block[1];
951
    backup[2] = block[b_stride];
952
    backup[3] = block[b_stride + 1];
953
 
954
    av_assert2(mb_x>=0 && mb_y>=0);
955
    av_assert2(mb_x
956
    av_assert2(((mb_x|mb_y)&1) == 0);
957
 
958
    index= (p0 + 31*p1) & (ME_CACHE_SIZE-1);
959
    value= s->me_cache_generation + (p0>>10) + (p1<<6) + (block->ref<<12);
960
    if(s->me_cache[index] == value)
961
        return 0;
962
    s->me_cache[index]= value;
963
 
964
    block->mx= p0;
965
    block->my= p1;
966
    block->ref= ref;
967
    block->type &= ~BLOCK_INTRA;
968
    block[1]= block[b_stride]= block[b_stride+1]= *block;
969
 
970
    rd= get_4block_rd(s, mb_x, mb_y, 0);
971
 
972
//FIXME chroma
973
    if(rd < *best_rd){
974
        *best_rd= rd;
975
        return 1;
976
    }else{
977
        block[0]= backup[0];
978
        block[1]= backup[1];
979
        block[b_stride]= backup[2];
980
        block[b_stride+1]= backup[3];
981
        return 0;
982
    }
983
}
984
 
985
static void iterative_me(SnowContext *s){
986
    int pass, mb_x, mb_y;
987
    const int b_width = s->b_width  << s->block_max_depth;
988
    const int b_height= s->b_height << s->block_max_depth;
989
    const int b_stride= b_width;
990
    int color[3];
991
 
992
    {
993
        RangeCoder r = s->c;
994
        uint8_t state[sizeof(s->block_state)];
995
        memcpy(state, s->block_state, sizeof(s->block_state));
996
        for(mb_y= 0; mb_yb_height; mb_y++)
997
            for(mb_x= 0; mb_xb_width; mb_x++)
998
                encode_q_branch(s, 0, mb_x, mb_y);
999
        s->c = r;
1000
        memcpy(s->block_state, state, sizeof(s->block_state));
1001
    }
1002
 
1003
    for(pass=0; pass<25; pass++){
1004
        int change= 0;
1005
 
1006
        for(mb_y= 0; mb_y
1007
            for(mb_x= 0; mb_x
1008
                int dia_change, i, j, ref;
1009
                int best_rd= INT_MAX, ref_rd;
1010
                BlockNode backup, ref_b;
1011
                const int index= mb_x + mb_y * b_stride;
1012
                BlockNode *block= &s->block[index];
1013
                BlockNode *tb =                   mb_y            ? &s->block[index-b_stride  ] : NULL;
1014
                BlockNode *lb = mb_x                              ? &s->block[index         -1] : NULL;
1015
                BlockNode *rb = mb_x+1block[index         +1] : NULL;
1016
                BlockNode *bb =                   mb_y+1block[index+b_stride  ] : NULL;
1017
                BlockNode *tlb= mb_x           && mb_y            ? &s->block[index-b_stride-1] : NULL;
1018
                BlockNode *trb= mb_x+1block[index-b_stride+1] : NULL;
1019
                BlockNode *blb= mb_x           && mb_y+1block[index+b_stride-1] : NULL;
1020
                BlockNode *brb= mb_x+1block[index+b_stride+1] : NULL;
1021
                const int b_w= (MB_SIZE >> s->block_max_depth);
1022
                uint8_t obmc_edged[MB_SIZE * 2][MB_SIZE * 2];
1023
 
1024
                if(pass && (block->type & BLOCK_OPT))
1025
                    continue;
1026
                block->type |= BLOCK_OPT;
1027
 
1028
                backup= *block;
1029
 
1030
                if(!s->me_cache_generation)
1031
                    memset(s->me_cache, 0, sizeof(s->me_cache));
1032
                s->me_cache_generation += 1<<22;
1033
 
1034
                //FIXME precalculate
1035
                {
1036
                    int x, y;
1037
                    for (y = 0; y < b_w * 2; y++)
1038
                        memcpy(obmc_edged[y], ff_obmc_tab[s->block_max_depth] + y * b_w * 2, b_w * 2);
1039
                    if(mb_x==0)
1040
                        for(y=0; y
1041
                            memset(obmc_edged[y], obmc_edged[y][0] + obmc_edged[y][b_w-1], b_w);
1042
                    if(mb_x==b_stride-1)
1043
                        for(y=0; y
1044
                            memset(obmc_edged[y]+b_w, obmc_edged[y][b_w] + obmc_edged[y][b_w*2-1], b_w);
1045
                    if(mb_y==0){
1046
                        for(x=0; x
1047
                            obmc_edged[0][x] += obmc_edged[b_w-1][x];
1048
                        for(y=1; y
1049
                            memcpy(obmc_edged[y], obmc_edged[0], b_w*2);
1050
                    }
1051
                    if(mb_y==b_height-1){
1052
                        for(x=0; x
1053
                            obmc_edged[b_w*2-1][x] += obmc_edged[b_w][x];
1054
                        for(y=b_w; y
1055
                            memcpy(obmc_edged[y], obmc_edged[b_w*2-1], b_w*2);
1056
                    }
1057
                }
1058
 
1059
                //skip stuff outside the picture
1060
                if(mb_x==0 || mb_y==0 || mb_x==b_width-1 || mb_y==b_height-1){
1061
                    uint8_t *src= s->  input_picture->data[0];
1062
                    uint8_t *dst= s->current_picture->data[0];
1063
                    const int stride= s->current_picture->linesize[0];
1064
                    const int block_w= MB_SIZE >> s->block_max_depth;
1065
                    const int block_h= MB_SIZE >> s->block_max_depth;
1066
                    const int sx= block_w*mb_x - block_w/2;
1067
                    const int sy= block_h*mb_y - block_h/2;
1068
                    const int w= s->plane[0].width;
1069
                    const int h= s->plane[0].height;
1070
                    int y;
1071
 
1072
                    for(y=sy; y<0; y++)
1073
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
1074
                    for(y=h; y
1075
                        memcpy(dst + sx + y*stride, src + sx + y*stride, block_w*2);
1076
                    if(sx<0){
1077
                        for(y=sy; y
1078
                            memcpy(dst + sx + y*stride, src + sx + y*stride, -sx);
1079
                    }
1080
                    if(sx+block_w*2 > w){
1081
                        for(y=sy; y
1082
                            memcpy(dst + w + y*stride, src + w + y*stride, sx+block_w*2 - w);
1083
                    }
1084
                }
1085
 
1086
                // intra(black) = neighbors' contribution to the current block
1087
                for(i=0; i < s->nb_planes; i++)
1088
                    color[i]= get_dc(s, mb_x, mb_y, i);
1089
 
1090
                // get previous score (cannot be cached due to OBMC)
1091
                if(pass > 0 && (block->type&BLOCK_INTRA)){
1092
                    int color0[3]= {block->color[0], block->color[1], block->color[2]};
1093
                    check_block(s, mb_x, mb_y, color0, 1, obmc_edged, &best_rd);
1094
                }else
1095
                    check_block_inter(s, mb_x, mb_y, block->mx, block->my, obmc_edged, &best_rd);
1096
 
1097
                ref_b= *block;
1098
                ref_rd= best_rd;
1099
                for(ref=0; ref < s->ref_frames; ref++){
1100
                    int16_t (*mvr)[2]= &s->ref_mvs[ref][index];
1101
                    if(s->ref_scores[ref][index] > s->ref_scores[ref_b.ref][index]*3/2) //FIXME tune threshold
1102
                        continue;
1103
                    block->ref= ref;
1104
                    best_rd= INT_MAX;
1105
 
1106
                    check_block_inter(s, mb_x, mb_y, mvr[0][0], mvr[0][1], obmc_edged, &best_rd);
1107
                    check_block_inter(s, mb_x, mb_y, 0, 0, obmc_edged, &best_rd);
1108
                    if(tb)
1109
                        check_block_inter(s, mb_x, mb_y, mvr[-b_stride][0], mvr[-b_stride][1], obmc_edged, &best_rd);
1110
                    if(lb)
1111
                        check_block_inter(s, mb_x, mb_y, mvr[-1][0], mvr[-1][1], obmc_edged, &best_rd);
1112
                    if(rb)
1113
                        check_block_inter(s, mb_x, mb_y, mvr[1][0], mvr[1][1], obmc_edged, &best_rd);
1114
                    if(bb)
1115
                        check_block_inter(s, mb_x, mb_y, mvr[b_stride][0], mvr[b_stride][1], obmc_edged, &best_rd);
1116
 
1117
                    /* fullpel ME */
1118
                    //FIXME avoid subpel interpolation / round to nearest integer
1119
                    do{
1120
                        dia_change=0;
1121
                        for(i=0; iavctx->dia_size, 1); i++){
1122
                            for(j=0; j
1123
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my+(4*j), obmc_edged, &best_rd);
1124
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my-(4*j), obmc_edged, &best_rd);
1125
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+4*(i-j), block->my-(4*j), obmc_edged, &best_rd);
1126
                                dia_change |= check_block_inter(s, mb_x, mb_y, block->mx-4*(i-j), block->my+(4*j), obmc_edged, &best_rd);
1127
                            }
1128
                        }
1129
                    }while(dia_change);
1130
                    /* subpel ME */
1131
                    do{
1132
                        static const int square[8][2]= {{+1, 0},{-1, 0},{ 0,+1},{ 0,-1},{+1,+1},{-1,-1},{+1,-1},{-1,+1},};
1133
                        dia_change=0;
1134
                        for(i=0; i<8; i++)
1135
                            dia_change |= check_block_inter(s, mb_x, mb_y, block->mx+square[i][0], block->my+square[i][1], obmc_edged, &best_rd);
1136
                    }while(dia_change);
1137
                    //FIXME or try the standard 2 pass qpel or similar
1138
 
1139
                    mvr[0][0]= block->mx;
1140
                    mvr[0][1]= block->my;
1141
                    if(ref_rd > best_rd){
1142
                        ref_rd= best_rd;
1143
                        ref_b= *block;
1144
                    }
1145
                }
1146
                best_rd= ref_rd;
1147
                *block= ref_b;
1148
                check_block(s, mb_x, mb_y, color, 1, obmc_edged, &best_rd);
1149
                //FIXME RD style color selection
1150
                if(!same_block(block, &backup)){
1151
                    if(tb ) tb ->type &= ~BLOCK_OPT;
1152
                    if(lb ) lb ->type &= ~BLOCK_OPT;
1153
                    if(rb ) rb ->type &= ~BLOCK_OPT;
1154
                    if(bb ) bb ->type &= ~BLOCK_OPT;
1155
                    if(tlb) tlb->type &= ~BLOCK_OPT;
1156
                    if(trb) trb->type &= ~BLOCK_OPT;
1157
                    if(blb) blb->type &= ~BLOCK_OPT;
1158
                    if(brb) brb->type &= ~BLOCK_OPT;
1159
                    change ++;
1160
                }
1161
            }
1162
        }
1163
        av_log(s->avctx, AV_LOG_ERROR, "pass:%d changed:%d\n", pass, change);
1164
        if(!change)
1165
            break;
1166
    }
1167
 
1168
    if(s->block_max_depth == 1){
1169
        int change= 0;
1170
        for(mb_y= 0; mb_y
1171
            for(mb_x= 0; mb_x
1172
                int i;
1173
                int best_rd, init_rd;
1174
                const int index= mb_x + mb_y * b_stride;
1175
                BlockNode *b[4];
1176
 
1177
                b[0]= &s->block[index];
1178
                b[1]= b[0]+1;
1179
                b[2]= b[0]+b_stride;
1180
                b[3]= b[2]+1;
1181
                if(same_block(b[0], b[1]) &&
1182
                   same_block(b[0], b[2]) &&
1183
                   same_block(b[0], b[3]))
1184
                    continue;
1185
 
1186
                if(!s->me_cache_generation)
1187
                    memset(s->me_cache, 0, sizeof(s->me_cache));
1188
                s->me_cache_generation += 1<<22;
1189
 
1190
                init_rd= best_rd= get_4block_rd(s, mb_x, mb_y, 0);
1191
 
1192
                //FIXME more multiref search?
1193
                check_4block_inter(s, mb_x, mb_y,
1194
                                   (b[0]->mx + b[1]->mx + b[2]->mx + b[3]->mx + 2) >> 2,
1195
                                   (b[0]->my + b[1]->my + b[2]->my + b[3]->my + 2) >> 2, 0, &best_rd);
1196
 
1197
                for(i=0; i<4; i++)
1198
                    if(!(b[i]->type&BLOCK_INTRA))
1199
                        check_4block_inter(s, mb_x, mb_y, b[i]->mx, b[i]->my, b[i]->ref, &best_rd);
1200
 
1201
                if(init_rd != best_rd)
1202
                    change++;
1203
            }
1204
        }
1205
        av_log(s->avctx, AV_LOG_ERROR, "pass:4mv changed:%d\n", change*4);
1206
    }
1207
}
1208
 
1209
static void encode_blocks(SnowContext *s, int search){
1210
    int x, y;
1211
    int w= s->b_width;
1212
    int h= s->b_height;
1213
 
1214
    if(s->avctx->me_method == ME_ITER && !s->keyframe && search)
1215
        iterative_me(s);
1216
 
1217
    for(y=0; y
1218
        if(s->c.bytestream_end - s->c.bytestream < w*MB_SIZE*MB_SIZE*3){ //FIXME nicer limit
1219
            av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n");
1220
            return;
1221
        }
1222
        for(x=0; x
1223
            if(s->avctx->me_method == ME_ITER || !search)
1224
                encode_q_branch2(s, 0, x, y);
1225
            else
1226
                encode_q_branch (s, 0, x, y);
1227
        }
1228
    }
1229
}
1230
 
1231
static void quantize(SnowContext *s, SubBand *b, IDWTELEM *dst, DWTELEM *src, int stride, int bias){
1232
    const int w= b->width;
1233
    const int h= b->height;
1234
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1235
    const int qmul= ff_qexp[qlog&(QROOT-1)]<<((qlog>>QSHIFT) + ENCODER_EXTRA_BITS);
1236
    int x,y, thres1, thres2;
1237
 
1238
    if(s->qlog == LOSSLESS_QLOG){
1239
        for(y=0; y
1240
            for(x=0; x
1241
                dst[x + y*stride]= src[x + y*stride];
1242
        return;
1243
    }
1244
 
1245
    bias= bias ? 0 : (3*qmul)>>3;
1246
    thres1= ((qmul - bias)>>QEXPSHIFT) - 1;
1247
    thres2= 2*thres1;
1248
 
1249
    if(!bias){
1250
        for(y=0; y
1251
            for(x=0; x
1252
                int i= src[x + y*stride];
1253
 
1254
                if((unsigned)(i+thres1) > thres2){
1255
                    if(i>=0){
1256
                        i<<= QEXPSHIFT;
1257
                        i/= qmul; //FIXME optimize
1258
                        dst[x + y*stride]=  i;
1259
                    }else{
1260
                        i= -i;
1261
                        i<<= QEXPSHIFT;
1262
                        i/= qmul; //FIXME optimize
1263
                        dst[x + y*stride]= -i;
1264
                    }
1265
                }else
1266
                    dst[x + y*stride]= 0;
1267
            }
1268
        }
1269
    }else{
1270
        for(y=0; y
1271
            for(x=0; x
1272
                int i= src[x + y*stride];
1273
 
1274
                if((unsigned)(i+thres1) > thres2){
1275
                    if(i>=0){
1276
                        i<<= QEXPSHIFT;
1277
                        i= (i + bias) / qmul; //FIXME optimize
1278
                        dst[x + y*stride]=  i;
1279
                    }else{
1280
                        i= -i;
1281
                        i<<= QEXPSHIFT;
1282
                        i= (i + bias) / qmul; //FIXME optimize
1283
                        dst[x + y*stride]= -i;
1284
                    }
1285
                }else
1286
                    dst[x + y*stride]= 0;
1287
            }
1288
        }
1289
    }
1290
}
1291
 
1292
static void dequantize(SnowContext *s, SubBand *b, IDWTELEM *src, int stride){
1293
    const int w= b->width;
1294
    const int h= b->height;
1295
    const int qlog= av_clip(s->qlog + b->qlog, 0, QROOT*16);
1296
    const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1297
    const int qadd= (s->qbias*qmul)>>QBIAS_SHIFT;
1298
    int x,y;
1299
 
1300
    if(s->qlog == LOSSLESS_QLOG) return;
1301
 
1302
    for(y=0; y
1303
        for(x=0; x
1304
            int i= src[x + y*stride];
1305
            if(i<0){
1306
                src[x + y*stride]= -((-i*qmul + qadd)>>(QEXPSHIFT)); //FIXME try different bias
1307
            }else if(i>0){
1308
                src[x + y*stride]=  (( i*qmul + qadd)>>(QEXPSHIFT));
1309
            }
1310
        }
1311
    }
1312
}
1313
 
1314
static void decorrelate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
1315
    const int w= b->width;
1316
    const int h= b->height;
1317
    int x,y;
1318
 
1319
    for(y=h-1; y>=0; y--){
1320
        for(x=w-1; x>=0; x--){
1321
            int i= x + y*stride;
1322
 
1323
            if(x){
1324
                if(use_median){
1325
                    if(y && x+1
1326
                    else  src[i] -= src[i - 1];
1327
                }else{
1328
                    if(y) src[i] -= mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1329
                    else  src[i] -= src[i - 1];
1330
                }
1331
            }else{
1332
                if(y) src[i] -= src[i - stride];
1333
            }
1334
        }
1335
    }
1336
}
1337
 
1338
static void correlate(SnowContext *s, SubBand *b, IDWTELEM *src, int stride, int inverse, int use_median){
1339
    const int w= b->width;
1340
    const int h= b->height;
1341
    int x,y;
1342
 
1343
    for(y=0; y
1344
        for(x=0; x
1345
            int i= x + y*stride;
1346
 
1347
            if(x){
1348
                if(use_median){
1349
                    if(y && x+1
1350
                    else  src[i] += src[i - 1];
1351
                }else{
1352
                    if(y) src[i] += mid_pred(src[i - 1], src[i - stride], src[i - 1] + src[i - stride] - src[i - 1 - stride]);
1353
                    else  src[i] += src[i - 1];
1354
                }
1355
            }else{
1356
                if(y) src[i] += src[i - stride];
1357
            }
1358
        }
1359
    }
1360
}
1361
 
1362
static void encode_qlogs(SnowContext *s){
1363
    int plane_index, level, orientation;
1364
 
1365
    for(plane_index=0; plane_indexnb_planes, 2); plane_index++){
1366
        for(level=0; levelspatial_decomposition_count; level++){
1367
            for(orientation=level ? 1:0; orientation<4; orientation++){
1368
                if(orientation==2) continue;
1369
                put_symbol(&s->c, s->header_state, s->plane[plane_index].band[level][orientation].qlog, 1);
1370
            }
1371
        }
1372
    }
1373
}
1374
 
1375
static void encode_header(SnowContext *s){
1376
    int plane_index, i;
1377
    uint8_t kstate[32];
1378
 
1379
    memset(kstate, MID_STATE, sizeof(kstate));
1380
 
1381
    put_rac(&s->c, kstate, s->keyframe);
1382
    if(s->keyframe || s->always_reset){
1383
        ff_snow_reset_contexts(s);
1384
        s->last_spatial_decomposition_type=
1385
        s->last_qlog=
1386
        s->last_qbias=
1387
        s->last_mv_scale=
1388
        s->last_block_max_depth= 0;
1389
        for(plane_index=0; plane_index<2; plane_index++){
1390
            Plane *p= &s->plane[plane_index];
1391
            p->last_htaps=0;
1392
            p->last_diag_mc=0;
1393
            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
1394
        }
1395
    }
1396
    if(s->keyframe){
1397
        put_symbol(&s->c, s->header_state, s->version, 0);
1398
        put_rac(&s->c, s->header_state, s->always_reset);
1399
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_type, 0);
1400
        put_symbol(&s->c, s->header_state, s->temporal_decomposition_count, 0);
1401
        put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1402
        put_symbol(&s->c, s->header_state, s->colorspace_type, 0);
1403
        if (s->nb_planes > 2) {
1404
            put_symbol(&s->c, s->header_state, s->chroma_h_shift, 0);
1405
            put_symbol(&s->c, s->header_state, s->chroma_v_shift, 0);
1406
        }
1407
        put_rac(&s->c, s->header_state, s->spatial_scalability);
1408
//        put_rac(&s->c, s->header_state, s->rate_scalability);
1409
        put_symbol(&s->c, s->header_state, s->max_ref_frames-1, 0);
1410
 
1411
        encode_qlogs(s);
1412
    }
1413
 
1414
    if(!s->keyframe){
1415
        int update_mc=0;
1416
        for(plane_index=0; plane_indexnb_planes, 2); plane_index++){
1417
            Plane *p= &s->plane[plane_index];
1418
            update_mc |= p->last_htaps   != p->htaps;
1419
            update_mc |= p->last_diag_mc != p->diag_mc;
1420
            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
1421
        }
1422
        put_rac(&s->c, s->header_state, update_mc);
1423
        if(update_mc){
1424
            for(plane_index=0; plane_indexnb_planes, 2); plane_index++){
1425
                Plane *p= &s->plane[plane_index];
1426
                put_rac(&s->c, s->header_state, p->diag_mc);
1427
                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
1428
                for(i= p->htaps/2; i; i--)
1429
                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
1430
            }
1431
        }
1432
        if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
1433
            put_rac(&s->c, s->header_state, 1);
1434
            put_symbol(&s->c, s->header_state, s->spatial_decomposition_count, 0);
1435
            encode_qlogs(s);
1436
        }else
1437
            put_rac(&s->c, s->header_state, 0);
1438
    }
1439
 
1440
    put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
1441
    put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
1442
    put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
1443
    put_symbol(&s->c, s->header_state, s->qbias           - s->last_qbias   , 1);
1444
    put_symbol(&s->c, s->header_state, s->block_max_depth - s->last_block_max_depth, 1);
1445
 
1446
}
1447
 
1448
static void update_last_header_values(SnowContext *s){
1449
    int plane_index;
1450
 
1451
    if(!s->keyframe){
1452
        for(plane_index=0; plane_index<2; plane_index++){
1453
            Plane *p= &s->plane[plane_index];
1454
            p->last_diag_mc= p->diag_mc;
1455
            p->last_htaps  = p->htaps;
1456
            memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
1457
        }
1458
    }
1459
 
1460
    s->last_spatial_decomposition_type  = s->spatial_decomposition_type;
1461
    s->last_qlog                        = s->qlog;
1462
    s->last_qbias                       = s->qbias;
1463
    s->last_mv_scale                    = s->mv_scale;
1464
    s->last_block_max_depth             = s->block_max_depth;
1465
    s->last_spatial_decomposition_count = s->spatial_decomposition_count;
1466
}
1467
 
1468
static int qscale2qlog(int qscale){
1469
    return rint(QROOT*log2(qscale / (float)FF_QP2LAMBDA))
1470
           + 61*QROOT/8; ///< 64 > 60
1471
}
1472
 
1473
static int ratecontrol_1pass(SnowContext *s, AVFrame *pict)
1474
{
1475
    /* Estimate the frame's complexity as a sum of weighted dwt coefficients.
1476
     * FIXME we know exact mv bits at this point,
1477
     * but ratecontrol isn't set up to include them. */
1478
    uint32_t coef_sum= 0;
1479
    int level, orientation, delta_qlog;
1480
 
1481
    for(level=0; levelspatial_decomposition_count; level++){
1482
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1483
            SubBand *b= &s->plane[0].band[level][orientation];
1484
            IDWTELEM *buf= b->ibuf;
1485
            const int w= b->width;
1486
            const int h= b->height;
1487
            const int stride= b->stride;
1488
            const int qlog= av_clip(2*QROOT + b->qlog, 0, QROOT*16);
1489
            const int qmul= ff_qexp[qlog&(QROOT-1)]<<(qlog>>QSHIFT);
1490
            const int qdiv= (1<<16)/qmul;
1491
            int x, y;
1492
            //FIXME this is ugly
1493
            for(y=0; y
1494
                for(x=0; x
1495
                    buf[x+y*stride]= b->buf[x+y*stride];
1496
            if(orientation==0)
1497
                decorrelate(s, b, buf, stride, 1, 0);
1498
            for(y=0; y
1499
                for(x=0; x
1500
                    coef_sum+= abs(buf[x+y*stride]) * qdiv >> 16;
1501
        }
1502
    }
1503
 
1504
    /* ugly, ratecontrol just takes a sqrt again */
1505
    coef_sum = (uint64_t)coef_sum * coef_sum >> 16;
1506
    av_assert0(coef_sum < INT_MAX);
1507
 
1508
    if(pict->pict_type == AV_PICTURE_TYPE_I){
1509
        s->m.current_picture.mb_var_sum= coef_sum;
1510
        s->m.current_picture.mc_mb_var_sum= 0;
1511
    }else{
1512
        s->m.current_picture.mc_mb_var_sum= coef_sum;
1513
        s->m.current_picture.mb_var_sum= 0;
1514
    }
1515
 
1516
    pict->quality= ff_rate_estimate_qscale(&s->m, 1);
1517
    if (pict->quality < 0)
1518
        return INT_MIN;
1519
    s->lambda= pict->quality * 3/2;
1520
    delta_qlog= qscale2qlog(pict->quality) - s->qlog;
1521
    s->qlog+= delta_qlog;
1522
    return delta_qlog;
1523
}
1524
 
1525
static void calculate_visual_weight(SnowContext *s, Plane *p){
1526
    int width = p->width;
1527
    int height= p->height;
1528
    int level, orientation, x, y;
1529
 
1530
    for(level=0; levelspatial_decomposition_count; level++){
1531
        for(orientation=level ? 1 : 0; orientation<4; orientation++){
1532
            SubBand *b= &p->band[level][orientation];
1533
            IDWTELEM *ibuf= b->ibuf;
1534
            int64_t error=0;
1535
 
1536
            memset(s->spatial_idwt_buffer, 0, sizeof(*s->spatial_idwt_buffer)*width*height);
1537
            ibuf[b->width/2 + b->height/2*b->stride]= 256*16;
1538
            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, width, height, width, s->spatial_decomposition_type, s->spatial_decomposition_count);
1539
            for(y=0; y
1540
                for(x=0; x
1541
                    int64_t d= s->spatial_idwt_buffer[x + y*width]*16;
1542
                    error += d*d;
1543
                }
1544
            }
1545
 
1546
            b->qlog= (int)(log(352256.0/sqrt(error)) / log(pow(2.0, 1.0/QROOT))+0.5);
1547
        }
1548
    }
1549
}
1550
 
1551
static int encode_frame(AVCodecContext *avctx, AVPacket *pkt,
1552
                        AVFrame *pict, int *got_packet)
1553
{
1554
    SnowContext *s = avctx->priv_data;
1555
    RangeCoder * const c= &s->c;
1556
    AVFrame *pic = pict;
1557
    const int width= s->avctx->width;
1558
    const int height= s->avctx->height;
1559
    int level, orientation, plane_index, i, y, ret;
1560
    uint8_t rc_header_bak[sizeof(s->header_state)];
1561
    uint8_t rc_block_bak[sizeof(s->block_state)];
1562
 
1563
    if ((ret = ff_alloc_packet2(avctx, pkt, s->b_width*s->b_height*MB_SIZE*MB_SIZE*3 + FF_MIN_BUFFER_SIZE)) < 0)
1564
        return ret;
1565
 
1566
    ff_init_range_encoder(c, pkt->data, pkt->size);
1567
    ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1568
 
1569
    for(i=0; i < s->nb_planes; i++){
1570
        int hshift= i ? s->chroma_h_shift : 0;
1571
        int vshift= i ? s->chroma_v_shift : 0;
1572
        for(y=0; y<(height>>vshift); y++)
1573
            memcpy(&s->input_picture->data[i][y * s->input_picture->linesize[i]],
1574
                   &pict->data[i][y * pict->linesize[i]],
1575
                   width>>hshift);
1576
        s->dsp.draw_edges(s->input_picture->data[i], s->input_picture->linesize[i],
1577
                            width >> hshift, height >> vshift,
1578
                            EDGE_WIDTH >> hshift, EDGE_WIDTH >> vshift,
1579
                            EDGE_TOP | EDGE_BOTTOM);
1580
 
1581
    }
1582
    emms_c();
1583
    s->new_picture = pict;
1584
 
1585
    s->m.picture_number= avctx->frame_number;
1586
    if(avctx->flags&CODEC_FLAG_PASS2){
1587
        s->m.pict_type = pic->pict_type = s->m.rc_context.entry[avctx->frame_number].new_pict_type;
1588
        s->keyframe = pic->pict_type == AV_PICTURE_TYPE_I;
1589
        if(!(avctx->flags&CODEC_FLAG_QSCALE)) {
1590
            pic->quality = ff_rate_estimate_qscale(&s->m, 0);
1591
            if (pic->quality < 0)
1592
                return -1;
1593
        }
1594
    }else{
1595
        s->keyframe= avctx->gop_size==0 || avctx->frame_number % avctx->gop_size == 0;
1596
        s->m.pict_type = pic->pict_type = s->keyframe ? AV_PICTURE_TYPE_I : AV_PICTURE_TYPE_P;
1597
    }
1598
 
1599
    if(s->pass1_rc && avctx->frame_number == 0)
1600
        pic->quality = 2*FF_QP2LAMBDA;
1601
    if (pic->quality) {
1602
        s->qlog   = qscale2qlog(pic->quality);
1603
        s->lambda = pic->quality * 3/2;
1604
    }
1605
    if (s->qlog < 0 || (!pic->quality && (avctx->flags & CODEC_FLAG_QSCALE))) {
1606
        s->qlog= LOSSLESS_QLOG;
1607
        s->lambda = 0;
1608
    }//else keep previous frame's qlog until after motion estimation
1609
 
1610
    ff_snow_frame_start(s);
1611
    avctx->coded_frame= s->current_picture;
1612
 
1613
    s->m.current_picture_ptr= &s->m.current_picture;
1614
    s->m.last_picture.f.pts = s->m.current_picture.f.pts;
1615
    s->m.current_picture.f.pts = pict->pts;
1616
    if(pic->pict_type == AV_PICTURE_TYPE_P){
1617
        int block_width = (width +15)>>4;
1618
        int block_height= (height+15)>>4;
1619
        int stride= s->current_picture->linesize[0];
1620
 
1621
        av_assert0(s->current_picture->data[0]);
1622
        av_assert0(s->last_picture[0]->data[0]);
1623
 
1624
        s->m.avctx= s->avctx;
1625
        s->m.current_picture.f.data[0] = s->current_picture->data[0];
1626
        s->m.   last_picture.f.data[0] = s->last_picture[0]->data[0];
1627
        s->m.    new_picture.f.data[0] = s->  input_picture->data[0];
1628
        s->m.   last_picture_ptr= &s->m.   last_picture;
1629
        s->m.linesize=
1630
        s->m.   last_picture.f.linesize[0] =
1631
        s->m.    new_picture.f.linesize[0] =
1632
        s->m.current_picture.f.linesize[0] = stride;
1633
        s->m.uvlinesize= s->current_picture->linesize[1];
1634
        s->m.width = width;
1635
        s->m.height= height;
1636
        s->m.mb_width = block_width;
1637
        s->m.mb_height= block_height;
1638
        s->m.mb_stride=   s->m.mb_width+1;
1639
        s->m.b8_stride= 2*s->m.mb_width+1;
1640
        s->m.f_code=1;
1641
        s->m.pict_type = pic->pict_type;
1642
        s->m.me_method= s->avctx->me_method;
1643
        s->m.me.scene_change_score=0;
1644
        s->m.flags= s->avctx->flags;
1645
        s->m.quarter_sample= (s->avctx->flags & CODEC_FLAG_QPEL)!=0;
1646
        s->m.out_format= FMT_H263;
1647
        s->m.unrestricted_mv= 1;
1648
 
1649
        s->m.lambda = s->lambda;
1650
        s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7);
1651
        s->lambda2= s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT;
1652
 
1653
        s->m.dsp= s->dsp; //move
1654
        s->m.hdsp = s->hdsp;
1655
        ff_init_me(&s->m);
1656
        s->hdsp = s->m.hdsp;
1657
        s->dsp= s->m.dsp;
1658
    }
1659
 
1660
    if(s->pass1_rc){
1661
        memcpy(rc_header_bak, s->header_state, sizeof(s->header_state));
1662
        memcpy(rc_block_bak, s->block_state, sizeof(s->block_state));
1663
    }
1664
 
1665
redo_frame:
1666
 
1667
    if (pic->pict_type == AV_PICTURE_TYPE_I)
1668
        s->spatial_decomposition_count= 5;
1669
    else
1670
        s->spatial_decomposition_count= 5;
1671
 
1672
    while(   !(width >>(s->chroma_h_shift + s->spatial_decomposition_count))
1673
          || !(height>>(s->chroma_v_shift + s->spatial_decomposition_count)))
1674
        s->spatial_decomposition_count--;
1675
 
1676
    if (s->spatial_decomposition_count <= 0) {
1677
        av_log(avctx, AV_LOG_ERROR, "Resolution too low\n");
1678
        return AVERROR(EINVAL);
1679
    }
1680
 
1681
    s->m.pict_type = pic->pict_type;
1682
    s->qbias = pic->pict_type == AV_PICTURE_TYPE_P ? 2 : 0;
1683
 
1684
    ff_snow_common_init_after_header(avctx);
1685
 
1686
    if(s->last_spatial_decomposition_count != s->spatial_decomposition_count){
1687
        for(plane_index=0; plane_index < s->nb_planes; plane_index++){
1688
            calculate_visual_weight(s, &s->plane[plane_index]);
1689
        }
1690
    }
1691
 
1692
    encode_header(s);
1693
    s->m.misc_bits = 8*(s->c.bytestream - s->c.bytestream_start);
1694
    encode_blocks(s, 1);
1695
    s->m.mv_bits = 8*(s->c.bytestream - s->c.bytestream_start) - s->m.misc_bits;
1696
 
1697
    for(plane_index=0; plane_index < s->nb_planes; plane_index++){
1698
        Plane *p= &s->plane[plane_index];
1699
        int w= p->width;
1700
        int h= p->height;
1701
        int x, y;
1702
//        int bits= put_bits_count(&s->c.pb);
1703
 
1704
        if (!s->memc_only) {
1705
            //FIXME optimize
1706
            if(pict->data[plane_index]) //FIXME gray hack
1707
                for(y=0; y
1708
                    for(x=0; x
1709
                        s->spatial_idwt_buffer[y*w + x]= pict->data[plane_index][y*pict->linesize[plane_index] + x]<
1710
                    }
1711
                }
1712
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 0);
1713
 
1714
            if(   plane_index==0
1715
               && pic->pict_type == AV_PICTURE_TYPE_P
1716
               && !(avctx->flags&CODEC_FLAG_PASS2)
1717
               && s->m.me.scene_change_score > s->avctx->scenechange_threshold){
1718
                ff_init_range_encoder(c, pkt->data, pkt->size);
1719
                ff_build_rac_states(c, 0.05*(1LL<<32), 256-8);
1720
                pic->pict_type= AV_PICTURE_TYPE_I;
1721
                s->keyframe=1;
1722
                s->current_picture->key_frame=1;
1723
                goto redo_frame;
1724
            }
1725
 
1726
            if(s->qlog == LOSSLESS_QLOG){
1727
                for(y=0; y
1728
                    for(x=0; x
1729
                        s->spatial_dwt_buffer[y*w + x]= (s->spatial_idwt_buffer[y*w + x] + (1<<(FRAC_BITS-1))-1)>>FRAC_BITS;
1730
                    }
1731
                }
1732
            }else{
1733
                for(y=0; y
1734
                    for(x=0; x
1735
                        s->spatial_dwt_buffer[y*w + x]=s->spatial_idwt_buffer[y*w + x]<
1736
                    }
1737
                }
1738
            }
1739
 
1740
            ff_spatial_dwt(s->spatial_dwt_buffer, s->temp_dwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
1741
 
1742
            if(s->pass1_rc && plane_index==0){
1743
                int delta_qlog = ratecontrol_1pass(s, pic);
1744
                if (delta_qlog <= INT_MIN)
1745
                    return -1;
1746
                if(delta_qlog){
1747
                    //reordering qlog in the bitstream would eliminate this reset
1748
                    ff_init_range_encoder(c, pkt->data, pkt->size);
1749
                    memcpy(s->header_state, rc_header_bak, sizeof(s->header_state));
1750
                    memcpy(s->block_state, rc_block_bak, sizeof(s->block_state));
1751
                    encode_header(s);
1752
                    encode_blocks(s, 0);
1753
                }
1754
            }
1755
 
1756
            for(level=0; levelspatial_decomposition_count; level++){
1757
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
1758
                    SubBand *b= &p->band[level][orientation];
1759
 
1760
                    quantize(s, b, b->ibuf, b->buf, b->stride, s->qbias);
1761
                    if(orientation==0)
1762
                        decorrelate(s, b, b->ibuf, b->stride, pic->pict_type == AV_PICTURE_TYPE_P, 0);
1763
                    if (!s->no_bitstream)
1764
                    encode_subband(s, b, b->ibuf, b->parent ? b->parent->ibuf : NULL, b->stride, orientation);
1765
                    av_assert0(b->parent==NULL || b->parent->stride == b->stride*2);
1766
                    if(orientation==0)
1767
                        correlate(s, b, b->ibuf, b->stride, 1, 0);
1768
                }
1769
            }
1770
 
1771
            for(level=0; levelspatial_decomposition_count; level++){
1772
                for(orientation=level ? 1 : 0; orientation<4; orientation++){
1773
                    SubBand *b= &p->band[level][orientation];
1774
 
1775
                    dequantize(s, b, b->ibuf, b->stride);
1776
                }
1777
            }
1778
 
1779
            ff_spatial_idwt(s->spatial_idwt_buffer, s->temp_idwt_buffer, w, h, w, s->spatial_decomposition_type, s->spatial_decomposition_count);
1780
            if(s->qlog == LOSSLESS_QLOG){
1781
                for(y=0; y
1782
                    for(x=0; x
1783
                        s->spatial_idwt_buffer[y*w + x]<<=FRAC_BITS;
1784
                    }
1785
                }
1786
            }
1787
            predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
1788
        }else{
1789
            //ME/MC only
1790
            if(pic->pict_type == AV_PICTURE_TYPE_I){
1791
                for(y=0; y
1792
                    for(x=0; x
1793
                        s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x]=
1794
                            pict->data[plane_index][y*pict->linesize[plane_index] + x];
1795
                    }
1796
                }
1797
            }else{
1798
                memset(s->spatial_idwt_buffer, 0, sizeof(IDWTELEM)*w*h);
1799
                predict_plane(s, s->spatial_idwt_buffer, plane_index, 1);
1800
            }
1801
        }
1802
        if(s->avctx->flags&CODEC_FLAG_PSNR){
1803
            int64_t error= 0;
1804
 
1805
            if(pict->data[plane_index]) //FIXME gray hack
1806
                for(y=0; y
1807
                    for(x=0; x
1808
                        int d= s->current_picture->data[plane_index][y*s->current_picture->linesize[plane_index] + x] - pict->data[plane_index][y*pict->linesize[plane_index] + x];
1809
                        error += d*d;
1810
                    }
1811
                }
1812
            s->avctx->error[plane_index] += error;
1813
            s->current_picture->error[plane_index] = error;
1814
        }
1815
 
1816
    }
1817
 
1818
    update_last_header_values(s);
1819
 
1820
    ff_snow_release_buffer(avctx);
1821
 
1822
    s->current_picture->coded_picture_number = avctx->frame_number;
1823
    s->current_picture->pict_type = pict->pict_type;
1824
    s->current_picture->quality = pict->quality;
1825
    s->m.frame_bits = 8*(s->c.bytestream - s->c.bytestream_start);
1826
    s->m.p_tex_bits = s->m.frame_bits - s->m.misc_bits - s->m.mv_bits;
1827
    s->m.current_picture.f.display_picture_number =
1828
    s->m.current_picture.f.coded_picture_number   = avctx->frame_number;
1829
    s->m.current_picture.f.quality                = pic->quality;
1830
    s->m.total_bits += 8*(s->c.bytestream - s->c.bytestream_start);
1831
    if(s->pass1_rc)
1832
        if (ff_rate_estimate_qscale(&s->m, 0) < 0)
1833
            return -1;
1834
    if(avctx->flags&CODEC_FLAG_PASS1)
1835
        ff_write_pass1_stats(&s->m);
1836
    s->m.last_pict_type = s->m.pict_type;
1837
    avctx->frame_bits = s->m.frame_bits;
1838
    avctx->mv_bits = s->m.mv_bits;
1839
    avctx->misc_bits = s->m.misc_bits;
1840
    avctx->p_tex_bits = s->m.p_tex_bits;
1841
 
1842
    emms_c();
1843
 
1844
    pkt->size = ff_rac_terminate(c);
1845
    if (avctx->coded_frame->key_frame)
1846
        pkt->flags |= AV_PKT_FLAG_KEY;
1847
    *got_packet = 1;
1848
 
1849
    return 0;
1850
}
1851
 
1852
static av_cold int encode_end(AVCodecContext *avctx)
1853
{
1854
    SnowContext *s = avctx->priv_data;
1855
 
1856
    ff_snow_common_end(s);
1857
    ff_rate_control_uninit(&s->m);
1858
    av_frame_free(&s->input_picture);
1859
    av_free(avctx->stats_out);
1860
 
1861
    return 0;
1862
}
1863
 
1864
#define OFFSET(x) offsetof(SnowContext, x)
1865
#define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
1866
static const AVOption options[] = {
1867
    { "memc_only",      "Only do ME/MC (I frames -> ref, P frame -> ME+MC).",   OFFSET(memc_only), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
1868
    { "no_bitstream",   "Skip final bitstream writeout.",                    OFFSET(no_bitstream), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE },
1869
    { NULL },
1870
};
1871
 
1872
static const AVClass snowenc_class = {
1873
    .class_name = "snow encoder",
1874
    .item_name  = av_default_item_name,
1875
    .option     = options,
1876
    .version    = LIBAVUTIL_VERSION_INT,
1877
};
1878
 
1879
AVCodec ff_snow_encoder = {
1880
    .name           = "snow",
1881
    .long_name      = NULL_IF_CONFIG_SMALL("Snow"),
1882
    .type           = AVMEDIA_TYPE_VIDEO,
1883
    .id             = AV_CODEC_ID_SNOW,
1884
    .priv_data_size = sizeof(SnowContext),
1885
    .init           = encode_init,
1886
    .encode2        = encode_frame,
1887
    .close          = encode_end,
1888
    .pix_fmts       = (const enum AVPixelFormat[]){
1889
        AV_PIX_FMT_YUV420P, AV_PIX_FMT_YUV410P, AV_PIX_FMT_YUV444P,
1890
        AV_PIX_FMT_GRAY8,
1891
        AV_PIX_FMT_NONE
1892
    },
1893
    .priv_class     = &snowenc_class,
1894
};
1895
 
1896
 
1897
#ifdef TEST
1898
#undef malloc
1899
#undef free
1900
#undef printf
1901
 
1902
#include "libavutil/lfg.h"
1903
#include "libavutil/mathematics.h"
1904
 
1905
int main(void){
1906
#define width  256
1907
#define height 256
1908
    int buffer[2][width*height];
1909
    SnowContext s;
1910
    int i;
1911
    AVLFG prng;
1912
    s.spatial_decomposition_count=6;
1913
    s.spatial_decomposition_type=1;
1914
 
1915
    s.temp_dwt_buffer  = av_mallocz(width * sizeof(DWTELEM));
1916
    s.temp_idwt_buffer = av_mallocz(width * sizeof(IDWTELEM));
1917
 
1918
    av_lfg_init(&prng, 1);
1919
 
1920
    printf("testing 5/3 DWT\n");
1921
    for(i=0; i
1922
        buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
1923
 
1924
    ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
1925
    ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
1926
 
1927
    for(i=0; i
1928
        if(buffer[0][i]!= buffer[1][i]) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
1929
 
1930
    printf("testing 9/7 DWT\n");
1931
    s.spatial_decomposition_type=0;
1932
    for(i=0; i
1933
        buffer[0][i] = buffer[1][i] = av_lfg_get(&prng) % 54321 - 12345;
1934
 
1935
    ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
1936
    ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
1937
 
1938
    for(i=0; i
1939
        if(FFABS(buffer[0][i] - buffer[1][i])>20) printf("fsck: %6d %12d %7d\n",i, buffer[0][i], buffer[1][i]);
1940
 
1941
    {
1942
    int level, orientation, x, y;
1943
    int64_t errors[8][4];
1944
    int64_t g=0;
1945
 
1946
        memset(errors, 0, sizeof(errors));
1947
        s.spatial_decomposition_count=3;
1948
        s.spatial_decomposition_type=0;
1949
        for(level=0; level
1950
            for(orientation=level ? 1 : 0; orientation<4; orientation++){
1951
                int w= width  >> (s.spatial_decomposition_count-level);
1952
                int h= height >> (s.spatial_decomposition_count-level);
1953
                int stride= width  << (s.spatial_decomposition_count-level);
1954
                DWTELEM *buf= buffer[0];
1955
                int64_t error=0;
1956
 
1957
                if(orientation&1) buf+=w;
1958
                if(orientation>1) buf+=stride>>1;
1959
 
1960
                memset(buffer[0], 0, sizeof(int)*width*height);
1961
                buf[w/2 + h/2*stride]= 256*256;
1962
                ff_spatial_idwt((IDWTELEM*)buffer[0], s.temp_idwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
1963
                for(y=0; y
1964
                    for(x=0; x
1965
                        int64_t d= buffer[0][x + y*width];
1966
                        error += d*d;
1967
                        if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9 && level==2) printf("%8"PRId64" ", d);
1968
                    }
1969
                    if(FFABS(height/2-y)<9 && level==2) printf("\n");
1970
                }
1971
                error= (int)(sqrt(error)+0.5);
1972
                errors[level][orientation]= error;
1973
                if(g) g=av_gcd(g, error);
1974
                else g= error;
1975
            }
1976
        }
1977
        printf("static int const visual_weight[][4]={\n");
1978
        for(level=0; level
1979
            printf("  {");
1980
            for(orientation=0; orientation<4; orientation++){
1981
                printf("%8"PRId64",", errors[level][orientation]/g);
1982
            }
1983
            printf("},\n");
1984
        }
1985
        printf("};\n");
1986
        {
1987
            int level=2;
1988
            int w= width  >> (s.spatial_decomposition_count-level);
1989
            //int h= height >> (s.spatial_decomposition_count-level);
1990
            int stride= width  << (s.spatial_decomposition_count-level);
1991
            DWTELEM *buf= buffer[0];
1992
            int64_t error=0;
1993
 
1994
            buf+=w;
1995
            buf+=stride>>1;
1996
 
1997
            memset(buffer[0], 0, sizeof(int)*width*height);
1998
            for(y=0; y
1999
                for(x=0; x
2000
                    int tab[4]={0,2,3,1};
2001
                    buffer[0][x+width*y]= 256*256*tab[(x&1) + 2*(y&1)];
2002
                }
2003
            }
2004
            ff_spatial_dwt(buffer[0], s.temp_dwt_buffer, width, height, width, s.spatial_decomposition_type, s.spatial_decomposition_count);
2005
            for(y=0; y
2006
                for(x=0; x
2007
                    int64_t d= buffer[0][x + y*width];
2008
                    error += d*d;
2009
                    if(FFABS(width/2-x)<9 && FFABS(height/2-y)<9) printf("%8"PRId64" ", d);
2010
                }
2011
                if(FFABS(height/2-y)<9) printf("\n");
2012
            }
2013
        }
2014
 
2015
    }
2016
    return 0;
2017
}
2018
#endif /* TEST */