Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5361 serge 1
/*
2
 * Copyright © 2012 Intel Corporation
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the
6
 * "Software"), to deal in the Software without restriction, including
7
 * without limitation the rights to use, copy, modify, merge, publish,
8
 * distribute, sub license, and/or sell copies of the Software, and to
9
 * permit persons to whom the Software is furnished to do so, subject to
10
 * the following conditions:
11
 *
12
 * The above copyright notice and this permission notice (including the
13
 * next paragraph) shall be included in all copies or substantial portions
14
 * of the Software.
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
19
 * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
20
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 * Authors:
25
 *    Xiang Haihao 
26
 *    Zhao Yakui 
27
 *
28
 */
29
 
30
#include 
31
#include 
32
#include 
33
#include 
34
#include 
35
 
36
#include "intel_batchbuffer.h"
37
#include "i965_defines.h"
38
#include "i965_structs.h"
39
#include "i965_drv_video.h"
40
#include "i965_encoder.h"
41
#include "i965_encoder_utils.h"
42
#include "gen6_mfc.h"
43
#include "gen6_vme.h"
44
#include "intel_media.h"
45
 
46
#define BRC_CLIP(x, min, max)                                   \
47
    {                                                           \
48
        x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
49
    }
50
 
51
#define BRC_P_B_QP_DIFF 4
52
#define BRC_I_P_QP_DIFF 2
53
#define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
54
 
55
#define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
56
#define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
57
 
58
#define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
59
#define BRC_CY 0.1 /* weight for */
60
#define BRC_CX_UNDERFLOW 5.
61
#define BRC_CX_OVERFLOW -4.
62
 
63
#define BRC_PI_0_5 1.5707963267948966192313216916398
64
 
65
#ifndef HAVE_LOG2F
66
#define log2f(x) (logf(x)/(float)M_LN2)
67
#endif
68
 
69
int intel_avc_enc_slice_type_fixup(int slice_type)
70
{
71
    if (slice_type == SLICE_TYPE_SP ||
72
        slice_type == SLICE_TYPE_P)
73
        slice_type = SLICE_TYPE_P;
74
    else if (slice_type == SLICE_TYPE_SI ||
75
             slice_type == SLICE_TYPE_I)
76
        slice_type = SLICE_TYPE_I;
77
    else {
78
        if (slice_type != SLICE_TYPE_B)
79
            WARN_ONCE("Invalid slice type for H.264 encoding!\n");
80
 
81
        slice_type = SLICE_TYPE_B;
82
    }
83
 
84
    return slice_type;
85
}
86
 
87
static void
88
intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
89
                                        struct gen6_mfc_context *mfc_context)
90
{
91
    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
92
    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
93
    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
94
    float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
95
    int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
96
    int intra_mb_size = inter_mb_size * 5.0;
97
    int i;
98
 
99
    mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
100
    mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
101
    mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
102
    mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
103
    mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
104
    mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
105
 
106
    for(i = 0 ; i < 3; i++) {
107
        mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
108
        mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
109
        mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
110
        mfc_context->bit_rate_control_context[i].GrowInit = 6;
111
        mfc_context->bit_rate_control_context[i].GrowResistance = 4;
112
        mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
113
        mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
114
 
115
        mfc_context->bit_rate_control_context[i].Correct[0] = 8;
116
        mfc_context->bit_rate_control_context[i].Correct[1] = 4;
117
        mfc_context->bit_rate_control_context[i].Correct[2] = 2;
118
        mfc_context->bit_rate_control_context[i].Correct[3] = 2;
119
        mfc_context->bit_rate_control_context[i].Correct[4] = 4;
120
        mfc_context->bit_rate_control_context[i].Correct[5] = 8;
121
    }
122
 
123
    mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
124
    mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
125
    mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
126
 
127
    mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
128
    mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
129
    mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
130
}
131
 
132
static void intel_mfc_brc_init(struct encode_state *encode_state,
133
                               struct intel_encoder_context* encoder_context)
134
{
135
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
136
    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
137
    VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
138
    VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
139
    double bitrate = pSequenceParameter->bits_per_second;
140
    double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
141
    int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
142
    int intra_period = pSequenceParameter->intra_period;
143
    int ip_period = pSequenceParameter->ip_period;
144
    double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
145
    double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
146
    double bpf;
147
 
148
    if (pSequenceParameter->ip_period) {
149
        pnum = (intra_period + ip_period - 1)/ip_period - 1;
150
        bnum = intra_period - inum - pnum;
151
    }
152
 
153
    mfc_context->brc.mode = encoder_context->rate_control_mode;
154
 
155
    mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
156
                                                             (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
157
    mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
158
    mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
159
 
160
    mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
161
    mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
162
    mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
163
 
164
    bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
165
 
166
    mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
167
    mfc_context->hrd.current_buffer_fullness =
168
        (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
169
        pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
170
    mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
171
    mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
172
    mfc_context->hrd.violation_noted = 0;
173
 
174
    if ((bpf > qp51_size) && (bpf < qp1_size)) {
175
        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
176
    }
177
    else if (bpf >= qp1_size)
178
        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
179
    else if (bpf <= qp51_size)
180
        mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
181
 
182
    mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
183
    mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
184
 
185
    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
186
    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
187
    BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
188
}
189
 
190
int intel_mfc_update_hrd(struct encode_state *encode_state,
191
                         struct gen6_mfc_context *mfc_context,
192
                         int frame_bits)
193
{
194
    double prev_bf = mfc_context->hrd.current_buffer_fullness;
195
 
196
    mfc_context->hrd.current_buffer_fullness -= frame_bits;
197
 
198
    if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
199
        mfc_context->hrd.current_buffer_fullness = prev_bf;
200
        return BRC_UNDERFLOW;
201
    }
202
 
203
    mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
204
    if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
205
        if (mfc_context->brc.mode == VA_RC_VBR)
206
            mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
207
        else {
208
            mfc_context->hrd.current_buffer_fullness = prev_bf;
209
            return BRC_OVERFLOW;
210
        }
211
    }
212
    return BRC_NO_HRD_VIOLATION;
213
}
214
 
215
int intel_mfc_brc_postpack(struct encode_state *encode_state,
216
                           struct gen6_mfc_context *mfc_context,
217
                           int frame_bits)
218
{
219
    gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
220
    VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
221
    int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
222
    int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
223
    int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
224
    int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
225
    int qp; // quantizer of previously encoded slice of current type
226
    int qpn; // predicted quantizer for next frame of current type in integer format
227
    double qpf; // predicted quantizer for next frame of current type in float format
228
    double delta_qp; // QP correction
229
    int target_frame_size, frame_size_next;
230
    /* Notes:
231
     *  x - how far we are from HRD buffer borders
232
     *  y - how far we are from target HRD buffer fullness
233
     */
234
    double x, y;
235
    double frame_size_alpha;
236
 
237
    qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
238
 
239
    target_frame_size = mfc_context->brc.target_frame_size[slicetype];
240
    if (mfc_context->hrd.buffer_capacity < 5)
241
        frame_size_alpha = 0;
242
    else
243
        frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
244
    if (frame_size_alpha > 30) frame_size_alpha = 30;
245
    frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
246
        (double)(frame_size_alpha + 1.);
247
 
248
    /* frame_size_next: avoiding negative number and too small value */
249
    if ((double)frame_size_next < (double)(target_frame_size * 0.25))
250
        frame_size_next = (int)((double)target_frame_size * 0.25);
251
 
252
    qpf = (double)qp * target_frame_size / frame_size_next;
253
    qpn = (int)(qpf + 0.5);
254
 
255
    if (qpn == qp) {
256
        /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
257
        mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
258
        if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
259
            qpn++;
260
            mfc_context->brc.qpf_rounding_accumulator = 0.;
261
        } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
262
            qpn--;
263
            mfc_context->brc.qpf_rounding_accumulator = 0.;
264
        }
265
    }
266
    /* making sure that QP is not changing too fast */
267
    if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
268
    else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
269
    /* making sure that with QP predictions we did do not leave QPs range */
270
    BRC_CLIP(qpn, 1, 51);
271
 
272
    /* checking wthether HRD compliance is still met */
273
    sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
274
 
275
    /* calculating QP delta as some function*/
276
    x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
277
    if (x > 0) {
278
        x /= mfc_context->hrd.target_buffer_fullness;
279
        y = mfc_context->hrd.current_buffer_fullness;
280
    }
281
    else {
282
        x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
283
        y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
284
    }
285
    if (y < 0.01) y = 0.01;
286
    if (x > 1) x = 1;
287
    else if (x < -1) x = -1;
288
 
289
    delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
290
    qpn = (int)(qpn + delta_qp + 0.5);
291
 
292
    /* making sure that with QP predictions we did do not leave QPs range */
293
    BRC_CLIP(qpn, 1, 51);
294
 
295
    if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
296
        /* correcting QPs of slices of other types */
297
        if (slicetype == SLICE_TYPE_P) {
298
            if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
299
                mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
300
            if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
301
                mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
302
        } else if (slicetype == SLICE_TYPE_I) {
303
            if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
304
                mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
305
            if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
306
                mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
307
        } else { // SLICE_TYPE_B
308
            if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
309
                mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
310
            if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
311
                mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
312
        }
313
        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
314
        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
315
        BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
316
    } else if (sts == BRC_UNDERFLOW) { // underflow
317
        if (qpn <= qp) qpn = qp + 1;
318
        if (qpn > 51) {
319
            qpn = 51;
320
            sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
321
        }
322
    } else if (sts == BRC_OVERFLOW) {
323
        if (qpn >= qp) qpn = qp - 1;
324
        if (qpn < 1) { // < 0 (?) overflow with minQP
325
            qpn = 1;
326
            sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
327
        }
328
    }
329
 
330
    mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
331
 
332
    return sts;
333
}
334
 
335
static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
336
                                       struct intel_encoder_context *encoder_context)
337
{
338
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
339
    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
340
    unsigned int rate_control_mode = encoder_context->rate_control_mode;
341
    int target_bit_rate = pSequenceParameter->bits_per_second;
342
 
343
    // current we only support CBR mode.
344
    if (rate_control_mode == VA_RC_CBR) {
345
        mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
346
        mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
347
        mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
348
        mfc_context->vui_hrd.i_cpb_removal_delay = 2;
349
        mfc_context->vui_hrd.i_frame_number = 0;
350
 
351
        mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
352
        mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
353
        mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
354
    }
355
 
356
}
357
 
358
void
359
intel_mfc_hrd_context_update(struct encode_state *encode_state,
360
                             struct gen6_mfc_context *mfc_context)
361
{
362
    mfc_context->vui_hrd.i_frame_number++;
363
}
364
 
365
int intel_mfc_interlace_check(VADriverContextP ctx,
366
                              struct encode_state *encode_state,
367
                              struct intel_encoder_context *encoder_context)
368
{
369
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
370
    VAEncSliceParameterBufferH264 *pSliceParameter;
371
    int i;
372
    int mbCount = 0;
373
    int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
374
    int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
375
 
376
    for (i = 0; i < encode_state->num_slice_params_ext; i++) {
377
        pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
378
        mbCount += pSliceParameter->num_macroblocks;
379
    }
380
 
381
    if ( mbCount == ( width_in_mbs * height_in_mbs ) )
382
        return 0;
383
 
384
    return 1;
385
}
386
 
387
/*
388
 * Check whether the parameters related with CBR are updated and decide whether
389
 * it needs to reinitialize the configuration related with CBR.
390
 * Currently it will check the following parameters:
391
 *      bits_per_second
392
 *      frame_rate
393
 *      gop_configuration(intra_period, ip_period, intra_idr_period)
394
 */
395
static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
396
                           struct intel_encoder_context *encoder_context)
397
{
398
    unsigned int rate_control_mode = encoder_context->rate_control_mode;
399
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
400
    double cur_fps, cur_bitrate;
401
    VAEncSequenceParameterBufferH264 *pSequenceParameter;
402
 
403
 
404
    if (rate_control_mode != VA_RC_CBR) {
405
        return false;
406
    }
407
 
408
    pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
409
 
410
    cur_bitrate = pSequenceParameter->bits_per_second;
411
    cur_fps = (double)pSequenceParameter->time_scale /
412
                (2 * (double)pSequenceParameter->num_units_in_tick);
413
 
414
    if ((cur_bitrate == mfc_context->brc.saved_bps) &&
415
        (cur_fps == mfc_context->brc.saved_fps) &&
416
        (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
417
        (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
418
        (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
419
        /* the parameters related with CBR are not updaetd */
420
        return false;
421
    }
422
 
423
    mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
424
    mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
425
    mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
426
    mfc_context->brc.saved_fps = cur_fps;
427
    mfc_context->brc.saved_bps = cur_bitrate;
428
    return true;
429
}
430
 
431
void intel_mfc_brc_prepare(struct encode_state *encode_state,
432
                           struct intel_encoder_context *encoder_context)
433
{
434
    unsigned int rate_control_mode = encoder_context->rate_control_mode;
435
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
436
 
437
    if (rate_control_mode == VA_RC_CBR) {
438
        bool brc_updated;
439
        assert(encoder_context->codec != CODEC_MPEG2);
440
 
441
        brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
442
 
443
        /*Programing bit rate control */
444
        if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
445
             brc_updated) {
446
            intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
447
            intel_mfc_brc_init(encode_state, encoder_context);
448
        }
449
 
450
        /*Programing HRD control */
451
        if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
452
            intel_mfc_hrd_context_init(encode_state, encoder_context);
453
    }
454
}
455
 
456
static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length)
457
{
458
    int i, found;
459
    int leading_zero_cnt, byte_length, zero_byte;
460
    int nal_unit_type;
461
    int skip_cnt = 0;
462
 
463
#define NAL_UNIT_TYPE_MASK 0x1f
464
#define HW_MAX_SKIP_LENGTH 15
465
 
466
    byte_length = ALIGN(bits_length, 32) >> 3;
467
 
468
 
469
    leading_zero_cnt = 0;
470
    found = 0;
471
    for(i = 0; i < byte_length - 4; i++) {
472
        if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
473
            ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
474
                found = 1;
475
                break;
476
            }
477
        leading_zero_cnt++;
478
    }
479
    if (!found) {
480
        /* warning message is complained. But anyway it will be inserted. */
481
        WARN_ONCE("Invalid packed header data. "
482
                   "Can't find the 000001 start_prefix code\n");
483
        return 0;
484
    }
485
    i = leading_zero_cnt;
486
 
487
    zero_byte = 0;
488
    if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
489
        zero_byte = 1;
490
 
491
    skip_cnt = leading_zero_cnt + zero_byte + 3;
492
 
493
    /* the unit header byte is accounted */
494
    nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
495
    skip_cnt += 1;
496
 
497
    if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
498
        /* more unit header bytes are accounted for MVC/SVC */
499
        skip_cnt += 3;
500
    }
501
    if (skip_cnt > HW_MAX_SKIP_LENGTH) {
502
        WARN_ONCE("Too many leading zeros are padded for packed data. "
503
                   "It is beyond the HW range.!!!\n");
504
    }
505
    return skip_cnt;
506
}
507
 
508
void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
509
                                              struct encode_state *encode_state,
510
                                              struct intel_encoder_context *encoder_context,
511
                                              struct intel_batchbuffer *slice_batch)
512
{
513
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
514
    int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
515
    unsigned int rate_control_mode = encoder_context->rate_control_mode;
516
    unsigned int skip_emul_byte_cnt;
517
 
518
    if (encode_state->packed_header_data[idx]) {
519
        VAEncPackedHeaderParameterBuffer *param = NULL;
520
        unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
521
        unsigned int length_in_bits;
522
 
523
        assert(encode_state->packed_header_param[idx]);
524
        param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
525
        length_in_bits = param->bit_length;
526
 
527
        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
528
        mfc_context->insert_object(ctx,
529
                                   encoder_context,
530
                                   header_data,
531
                                   ALIGN(length_in_bits, 32) >> 5,
532
                                   length_in_bits & 0x1f,
533
                                   skip_emul_byte_cnt,
534
                                   0,
535
                                   0,
536
                                   !param->has_emulation_bytes,
537
                                   slice_batch);
538
    }
539
 
540
    idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
541
 
542
    if (encode_state->packed_header_data[idx]) {
543
        VAEncPackedHeaderParameterBuffer *param = NULL;
544
        unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
545
        unsigned int length_in_bits;
546
 
547
        assert(encode_state->packed_header_param[idx]);
548
        param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
549
        length_in_bits = param->bit_length;
550
 
551
        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
552
 
553
        mfc_context->insert_object(ctx,
554
                                   encoder_context,
555
                                   header_data,
556
                                   ALIGN(length_in_bits, 32) >> 5,
557
                                   length_in_bits & 0x1f,
558
                                   skip_emul_byte_cnt,
559
                                   0,
560
                                   0,
561
                                   !param->has_emulation_bytes,
562
                                   slice_batch);
563
    }
564
 
565
    idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
566
 
567
    if (encode_state->packed_header_data[idx]) {
568
        VAEncPackedHeaderParameterBuffer *param = NULL;
569
        unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
570
        unsigned int length_in_bits;
571
 
572
        assert(encode_state->packed_header_param[idx]);
573
        param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
574
        length_in_bits = param->bit_length;
575
 
576
        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
577
        mfc_context->insert_object(ctx,
578
                                   encoder_context,
579
                                   header_data,
580
                                   ALIGN(length_in_bits, 32) >> 5,
581
                                   length_in_bits & 0x1f,
582
                                   skip_emul_byte_cnt,
583
                                   0,
584
                                   0,
585
                                   !param->has_emulation_bytes,
586
                                   slice_batch);
587
    } else if (rate_control_mode == VA_RC_CBR) {
588
        // this is frist AU
589
        struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
590
 
591
        unsigned char *sei_data = NULL;
592
 
593
        int length_in_bits = build_avc_sei_buffer_timing(
594
            mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
595
            mfc_context->vui_hrd.i_initial_cpb_removal_delay,
596
            0,
597
            mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
598
            mfc_context->vui_hrd.i_dpb_output_delay_length,
599
            0,
600
            &sei_data);
601
        mfc_context->insert_object(ctx,
602
                                   encoder_context,
603
                                   (unsigned int *)sei_data,
604
                                   ALIGN(length_in_bits, 32) >> 5,
605
                                   length_in_bits & 0x1f,
606
                                   4,
607
                                   0,
608
                                   0,
609
                                   1,
610
                                   slice_batch);
611
        free(sei_data);
612
    }
613
}
614
 
615
VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
616
                               struct encode_state *encode_state,
617
                               struct intel_encoder_context *encoder_context)
618
{
619
    struct i965_driver_data *i965 = i965_driver_data(ctx);
620
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
621
    struct object_surface *obj_surface;
622
    struct object_buffer *obj_buffer;
623
    GenAvcSurface *gen6_avc_surface;
624
    dri_bo *bo;
625
    VAStatus vaStatus = VA_STATUS_SUCCESS;
626
    int i, j, enable_avc_ildb = 0;
627
    VAEncSliceParameterBufferH264 *slice_param;
628
    struct i965_coded_buffer_segment *coded_buffer_segment;
629
    VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
630
    int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
631
    int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
632
 
633
    if (IS_GEN6(i965->intel.device_info)) {
634
	/* On the SNB it should be fixed to 128 for the DMV buffer */
635
	width_in_mbs = 128;
636
    }
637
 
638
    for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
639
        assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
640
        slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
641
 
642
        for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
643
            assert((slice_param->slice_type == SLICE_TYPE_I) ||
644
                   (slice_param->slice_type == SLICE_TYPE_SI) ||
645
                   (slice_param->slice_type == SLICE_TYPE_P) ||
646
                   (slice_param->slice_type == SLICE_TYPE_SP) ||
647
                   (slice_param->slice_type == SLICE_TYPE_B));
648
 
649
            if (slice_param->disable_deblocking_filter_idc != 1) {
650
                enable_avc_ildb = 1;
651
                break;
652
            }
653
 
654
            slice_param++;
655
        }
656
    }
657
 
658
    /*Setup all the input&output object*/
659
 
660
    /* Setup current frame and current direct mv buffer*/
661
    obj_surface = encode_state->reconstructed_object;
662
    i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
663
 
664
    if ( obj_surface->private_data == NULL) {
665
        gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
666
        gen6_avc_surface->dmv_top =
667
            dri_bo_alloc(i965->intel.bufmgr,
668
                         "Buffer",
669
                         68 * width_in_mbs * height_in_mbs,
670
                         64);
671
        gen6_avc_surface->dmv_bottom =
672
            dri_bo_alloc(i965->intel.bufmgr,
673
                         "Buffer",
674
                         68 * width_in_mbs * height_in_mbs,
675
                         64);
676
        assert(gen6_avc_surface->dmv_top);
677
        assert(gen6_avc_surface->dmv_bottom);
678
        obj_surface->private_data = (void *)gen6_avc_surface;
679
        obj_surface->free_private_data = (void *)gen_free_avc_surface;
680
    }
681
    gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
682
    mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
683
    mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
684
    dri_bo_reference(gen6_avc_surface->dmv_top);
685
    dri_bo_reference(gen6_avc_surface->dmv_bottom);
686
 
687
    if (enable_avc_ildb) {
688
        mfc_context->post_deblocking_output.bo = obj_surface->bo;
689
        dri_bo_reference(mfc_context->post_deblocking_output.bo);
690
    } else {
691
        mfc_context->pre_deblocking_output.bo = obj_surface->bo;
692
        dri_bo_reference(mfc_context->pre_deblocking_output.bo);
693
    }
694
 
695
    mfc_context->surface_state.width = obj_surface->orig_width;
696
    mfc_context->surface_state.height = obj_surface->orig_height;
697
    mfc_context->surface_state.w_pitch = obj_surface->width;
698
    mfc_context->surface_state.h_pitch = obj_surface->height;
699
 
700
    /* Setup reference frames and direct mv buffers*/
701
    for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
702
        obj_surface = encode_state->reference_objects[i];
703
 
704
        if (obj_surface && obj_surface->bo) {
705
            mfc_context->reference_surfaces[i].bo = obj_surface->bo;
706
            dri_bo_reference(obj_surface->bo);
707
 
708
            /* Check DMV buffer */
709
            if ( obj_surface->private_data == NULL) {
710
 
711
                gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
712
                gen6_avc_surface->dmv_top =
713
                    dri_bo_alloc(i965->intel.bufmgr,
714
                                 "Buffer",
715
                                 68 * width_in_mbs * height_in_mbs,
716
                                 64);
717
                gen6_avc_surface->dmv_bottom =
718
                    dri_bo_alloc(i965->intel.bufmgr,
719
                                 "Buffer",
720
                                 68 * width_in_mbs * height_in_mbs,
721
                                 64);
722
                assert(gen6_avc_surface->dmv_top);
723
                assert(gen6_avc_surface->dmv_bottom);
724
                obj_surface->private_data = gen6_avc_surface;
725
                obj_surface->free_private_data = gen_free_avc_surface;
726
            }
727
 
728
            gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
729
            /* Setup DMV buffer */
730
            mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
731
            mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
732
            dri_bo_reference(gen6_avc_surface->dmv_top);
733
            dri_bo_reference(gen6_avc_surface->dmv_bottom);
734
        } else {
735
            break;
736
        }
737
    }
738
 
739
    mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
740
    dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
741
 
742
    obj_buffer = encode_state->coded_buf_object;
743
    bo = obj_buffer->buffer_store->bo;
744
    mfc_context->mfc_indirect_pak_bse_object.bo = bo;
745
    mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
746
    mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
747
    dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
748
 
749
    dri_bo_map(bo, 1);
750
    coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
751
    coded_buffer_segment->mapped = 0;
752
    coded_buffer_segment->codec = encoder_context->codec;
753
    dri_bo_unmap(bo);
754
 
755
    return vaStatus;
756
}
757
/*
758
 * The LUT uses the pair of 4-bit units: (shift, base) structure.
759
 * 2^K * X = value .
760
 * So it is necessary to convert one cost into the nearest LUT format.
761
 * The derivation is:
762
 * 2^K *x = 2^n * (1 + deltaX)
763
 *    k + log2(x) = n + log2(1 + deltaX)
764
 *    log2(x) = n - k + log2(1 + deltaX)
765
 *    As X is in the range of [1, 15]
766
 *      4 > n - k + log2(1 + deltaX) >= 0
767
 *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
768
 *    Then we can derive the corresponding K and get the nearest LUT format.
769
 */
770
int intel_format_lutvalue(int value, int max)
771
{
772
    int ret;
773
    int logvalue, temp1, temp2;
774
 
775
    if (value <= 0)
776
        return 0;
777
 
778
    logvalue = (int)(log2f((float)value));
779
    if (logvalue < 4) {
780
        ret = value;
781
    } else {
782
        int error, temp_value, base, j, temp_err;
783
        error = value;
784
        j = logvalue - 4 + 1;
785
        ret = -1;
786
        for(; j <= logvalue; j++) {
787
            if (j == 0) {
788
                base = value >> j;
789
            } else {
790
                base = (value + (1 << (j - 1)) - 1) >> j;
791
            }
792
            if (base >= 16)
793
                continue;
794
 
795
            temp_value = base << j;
796
            temp_err = abs(value - temp_value);
797
            if (temp_err < error) {
798
                error = temp_err;
799
                ret = (j << 4) | base;
800
                if (temp_err == 0)
801
                    break;
802
            }
803
        }
804
    }
805
    temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
806
    temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
807
    if (temp1 > temp2)
808
        ret = max;
809
    return ret;
810
 
811
}
812
 
813
 
814
#define		QP_MAX			52
815
 
816
 
817
static float intel_lambda_qp(int qp)
818
{
819
    float value, lambdaf;
820
    value = qp;
821
    value = value / 6 - 2;
822
    if (value < 0)
823
        value = 0;
824
    lambdaf = roundf(powf(2, value));
825
    return lambdaf;
826
}
827
 
828
 
829
void intel_vme_update_mbmv_cost(VADriverContextP ctx,
830
                                struct encode_state *encode_state,
831
                                struct intel_encoder_context *encoder_context)
832
{
833
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
834
    struct gen6_vme_context *vme_context = encoder_context->vme_context;
835
    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
836
    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
837
    int qp, m_cost, j, mv_count;
838
    uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
839
    float   lambda, m_costf;
840
 
841
    int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
842
 
843
 
844
    if (encoder_context->rate_control_mode == VA_RC_CQP)
845
	qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
846
    else
847
	qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
848
 
849
    if (vme_state_message == NULL)
850
	return;
851
 
852
    assert(qp <= QP_MAX);
853
    lambda = intel_lambda_qp(qp);
854
    if (slice_type == SLICE_TYPE_I) {
855
	vme_state_message[MODE_INTRA_16X16] = 0;
856
	m_cost = lambda * 4;
857
	vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
858
	m_cost = lambda * 16;
859
	vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
860
	m_cost = lambda * 3;
861
	vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
862
    } else {
863
    	m_cost = 0;
864
	vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
865
	for (j = 1; j < 3; j++) {
866
            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
867
            m_cost = (int)m_costf;
868
            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
869
   	}
870
    	mv_count = 3;
871
    	for (j = 4; j <= 64; j *= 2) {
872
            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
873
            m_cost = (int)m_costf;
874
            vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
875
            mv_count++;
876
	}
877
 
878
	if (qp <= 25) {
879
            vme_state_message[MODE_INTRA_16X16] = 0x4a;
880
            vme_state_message[MODE_INTRA_8X8] = 0x4a;
881
            vme_state_message[MODE_INTRA_4X4] = 0x4a;
882
            vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
883
            vme_state_message[MODE_INTER_16X16] = 0x4a;
884
            vme_state_message[MODE_INTER_16X8] = 0x4a;
885
            vme_state_message[MODE_INTER_8X8] = 0x4a;
886
            vme_state_message[MODE_INTER_8X4] = 0x4a;
887
            vme_state_message[MODE_INTER_4X4] = 0x4a;
888
            vme_state_message[MODE_INTER_BWD] = 0x2a;
889
            return;
890
	}
891
	m_costf = lambda * 10;
892
	vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
893
	m_cost = lambda * 14;
894
	vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
895
	m_cost = lambda * 24;
896
	vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
897
	m_costf = lambda * 3.5;
898
	m_cost = m_costf;
899
	vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
900
    	if (slice_type == SLICE_TYPE_P) {
901
            m_costf = lambda * 2.5;
902
            m_cost = m_costf;
903
            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
904
            m_costf = lambda * 4;
905
            m_cost = m_costf;
906
            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
907
            m_costf = lambda * 1.5;
908
            m_cost = m_costf;
909
            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
910
            m_costf = lambda * 3;
911
            m_cost = m_costf;
912
            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
913
            m_costf = lambda * 5;
914
            m_cost = m_costf;
915
            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
916
            /* BWD is not used in P-frame */
917
            vme_state_message[MODE_INTER_BWD] = 0;
918
	} else {
919
            m_costf = lambda * 2.5;
920
            m_cost = m_costf;
921
            vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
922
            m_costf = lambda * 5.5;
923
            m_cost = m_costf;
924
            vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
925
            m_costf = lambda * 3.5;
926
            m_cost = m_costf;
927
            vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
928
            m_costf = lambda * 5.0;
929
            m_cost = m_costf;
930
            vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
931
            m_costf = lambda * 6.5;
932
            m_cost = m_costf;
933
            vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
934
            m_costf = lambda * 1.5;
935
            m_cost = m_costf;
936
            vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
937
	}
938
    }
939
}
940
 
941
 
942
#define		MB_SCOREBOARD_A		(1 << 0)
943
#define		MB_SCOREBOARD_B		(1 << 1)
944
#define		MB_SCOREBOARD_C		(1 << 2)
945
void
946
gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
947
{
948
    vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
949
    vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
950
    vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
951
                                                           MB_SCOREBOARD_B |
952
                                                           MB_SCOREBOARD_C);
953
 
954
    /* In VME prediction the current mb depends on the neighbour
955
     * A/B/C macroblock. So the left/up/up-right dependency should
956
     * be considered.
957
     */
958
    vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
959
    vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
960
    vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
961
    vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
962
    vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
963
    vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
964
 
965
    vme_context->gpe_context.vfe_desc7.dword = 0;
966
    return;
967
}
968
 
969
/* check whether the mb of (x_index, y_index) is out of bound */
970
static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
971
{
972
    int mb_index;
973
    if (x_index < 0 || x_index >= mb_width)
974
        return -1;
975
    if (y_index < 0 || y_index >= mb_height)
976
        return -1;
977
 
978
    mb_index = y_index * mb_width + x_index;
979
    if (mb_index < first_mb || mb_index > (first_mb + num_mb))
980
        return -1;
981
    return 0;
982
}
983
 
984
void
985
gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
986
                                     struct encode_state *encode_state,
987
                                     int mb_width, int mb_height,
988
                                     int kernel,
989
                                     int transform_8x8_mode_flag,
990
                                     struct intel_encoder_context *encoder_context)
991
{
992
    struct gen6_vme_context *vme_context = encoder_context->vme_context;
993
    int mb_row;
994
    int s;
995
    unsigned int *command_ptr;
996
 
997
#define		USE_SCOREBOARD		(1 << 21)
998
 
999
    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1000
    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1001
 
1002
    for (s = 0; s < encode_state->num_slice_params_ext; s++) {
1003
	VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
1004
	int first_mb = pSliceParameter->macroblock_address;
1005
	int num_mb = pSliceParameter->num_macroblocks;
1006
	unsigned int mb_intra_ub, score_dep;
1007
	int x_outer, y_outer, x_inner, y_inner;
1008
	int xtemp_outer = 0;
1009
 
1010
	x_outer = first_mb % mb_width;
1011
	y_outer = first_mb / mb_width;
1012
	mb_row = y_outer;
1013
 
1014
	for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1015
	    x_inner = x_outer;
1016
	    y_inner = y_outer;
1017
	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1018
		mb_intra_ub = 0;
1019
		score_dep = 0;
1020
		if (x_inner != 0) {
1021
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1022
		    score_dep |= MB_SCOREBOARD_A;
1023
		}
1024
		if (y_inner != mb_row) {
1025
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1026
		    score_dep |= MB_SCOREBOARD_B;
1027
		    if (x_inner != 0)
1028
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1029
		    if (x_inner != (mb_width -1)) {
1030
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1031
			score_dep |= MB_SCOREBOARD_C;
1032
                    }
1033
		}
1034
 
1035
            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1036
		*command_ptr++ = kernel;
1037
		*command_ptr++ = USE_SCOREBOARD;
1038
		/* Indirect data */
1039
		*command_ptr++ = 0;
1040
		/* the (X, Y) term of scoreboard */
1041
		*command_ptr++ = ((y_inner << 16) | x_inner);
1042
		*command_ptr++ = score_dep;
1043
		/*inline data */
1044
		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1045
		*command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1046
		x_inner -= 2;
1047
		y_inner += 1;
1048
	    }
1049
	    x_outer += 1;
1050
	}
1051
 
1052
	xtemp_outer = mb_width - 2;
1053
	if (xtemp_outer < 0)
1054
            xtemp_outer = 0;
1055
	x_outer = xtemp_outer;
1056
	y_outer = first_mb / mb_width;
1057
	for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1058
	    y_inner = y_outer;
1059
	    x_inner = x_outer;
1060
	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1061
	    	mb_intra_ub = 0;
1062
		score_dep = 0;
1063
		if (x_inner != 0) {
1064
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1065
		    score_dep |= MB_SCOREBOARD_A;
1066
		}
1067
		if (y_inner != mb_row) {
1068
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1069
		    score_dep |= MB_SCOREBOARD_B;
1070
		    if (x_inner != 0)
1071
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1072
 
1073
		    if (x_inner != (mb_width -1)) {
1074
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1075
			score_dep |= MB_SCOREBOARD_C;
1076
                    }
1077
		}
1078
 
1079
            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1080
		*command_ptr++ = kernel;
1081
		*command_ptr++ = USE_SCOREBOARD;
1082
		/* Indirect data */
1083
		*command_ptr++ = 0;
1084
		/* the (X, Y) term of scoreboard */
1085
		*command_ptr++ = ((y_inner << 16) | x_inner);
1086
		*command_ptr++ = score_dep;
1087
		/*inline data */
1088
		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1089
		*command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
1090
 
1091
		x_inner -= 2;
1092
		y_inner += 1;
1093
	    }
1094
	    x_outer++;
1095
	    if (x_outer >= mb_width) {
1096
		y_outer += 1;
1097
		x_outer = xtemp_outer;
1098
	    }
1099
	}
1100
    }
1101
 
1102
    *command_ptr++ = 0;
1103
    *command_ptr++ = MI_BATCH_BUFFER_END;
1104
 
1105
    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1106
}
1107
 
1108
static uint8_t
1109
intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
1110
{
1111
    unsigned int is_long_term =
1112
        !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
1113
    unsigned int is_top_field =
1114
        !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
1115
    unsigned int is_bottom_field =
1116
        !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
1117
 
1118
    return ((is_long_term                         << 6) |
1119
            ((is_top_field ^ is_bottom_field ^ 1) << 5) |
1120
            (frame_store_id                       << 1) |
1121
            ((is_top_field ^ 1) & is_bottom_field));
1122
}
1123
 
1124
void
1125
intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
1126
                            struct encode_state *encode_state,
1127
                            struct intel_encoder_context *encoder_context)
1128
{
1129
    struct gen6_vme_context *vme_context = encoder_context->vme_context;
1130
    struct intel_batchbuffer *batch = encoder_context->base.batch;
1131
    int slice_type;
1132
    struct object_surface *obj_surface;
1133
    unsigned int fref_entry, bref_entry;
1134
    int frame_index, i;
1135
    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1136
 
1137
    fref_entry = 0x80808080;
1138
    bref_entry = 0x80808080;
1139
    slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
1140
 
1141
    if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
1142
        int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
1143
 
1144
        if (ref_idx_l0 > 3) {
1145
            WARN_ONCE("ref_idx_l0 is out of range\n");
1146
            ref_idx_l0 = 0;
1147
        }
1148
 
1149
        obj_surface = vme_context->used_reference_objects[0];
1150
        frame_index = -1;
1151
        for (i = 0; i < 16; i++) {
1152
            if (obj_surface &&
1153
                obj_surface == encode_state->reference_objects[i]) {
1154
                frame_index = i;
1155
                break;
1156
            }
1157
        }
1158
        if (frame_index == -1) {
1159
            WARN_ONCE("RefPicList0 is not found in DPB!\n");
1160
        } else {
1161
            int ref_idx_l0_shift = ref_idx_l0 * 8;
1162
            fref_entry &= ~(0xFF << ref_idx_l0_shift);
1163
            fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
1164
        }
1165
    }
1166
 
1167
    if (slice_type == SLICE_TYPE_B) {
1168
        int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
1169
 
1170
        if (ref_idx_l1 > 3) {
1171
            WARN_ONCE("ref_idx_l1 is out of range\n");
1172
            ref_idx_l1 = 0;
1173
        }
1174
 
1175
        obj_surface = vme_context->used_reference_objects[1];
1176
        frame_index = -1;
1177
        for (i = 0; i < 16; i++) {
1178
            if (obj_surface &&
1179
                obj_surface == encode_state->reference_objects[i]) {
1180
                frame_index = i;
1181
                break;
1182
            }
1183
        }
1184
        if (frame_index == -1) {
1185
            WARN_ONCE("RefPicList1 is not found in DPB!\n");
1186
        } else {
1187
            int ref_idx_l1_shift = ref_idx_l1 * 8;
1188
            bref_entry &= ~(0xFF << ref_idx_l1_shift);
1189
            bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
1190
        }
1191
    }
1192
 
1193
    BEGIN_BCS_BATCH(batch, 10);
1194
    OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1195
    OUT_BCS_BATCH(batch, 0);                  //Select L0
1196
    OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
1197
    for(i = 0; i < 7; i++) {
1198
        OUT_BCS_BATCH(batch, 0x80808080);
1199
    }
1200
    ADVANCE_BCS_BATCH(batch);
1201
 
1202
    BEGIN_BCS_BATCH(batch, 10);
1203
    OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
1204
    OUT_BCS_BATCH(batch, 1);                  //Select L1
1205
    OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
1206
    for(i = 0; i < 7; i++) {
1207
        OUT_BCS_BATCH(batch, 0x80808080);
1208
    }
1209
    ADVANCE_BCS_BATCH(batch);
1210
}
1211
 
1212
 
1213
void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
1214
                                 struct encode_state *encode_state,
1215
                                 struct intel_encoder_context *encoder_context)
1216
{
1217
    struct gen6_vme_context *vme_context = encoder_context->vme_context;
1218
    uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
1219
    VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
1220
    int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
1221
    int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
1222
    uint32_t mv_x, mv_y;
1223
    VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
1224
    VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
1225
    slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
1226
 
1227
    if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
1228
        mv_x = 512;
1229
        mv_y = 64;
1230
    } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
1231
        mv_x = 1024;
1232
        mv_y = 128;
1233
    } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
1234
        mv_x = 2048;
1235
        mv_y = 128;
1236
    } else {
1237
        WARN_ONCE("Incorrect Mpeg2 level setting!\n");
1238
        mv_x = 512;
1239
        mv_y = 64;
1240
    }
1241
 
1242
    pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
1243
    if (pic_param->picture_type != VAEncPictureTypeIntra) {
1244
        int qp, m_cost, j, mv_count;
1245
        float   lambda, m_costf;
1246
        slice_param = (VAEncSliceParameterBufferMPEG2 *)
1247
            encode_state->slice_params_ext[0]->buffer;
1248
        qp = slice_param->quantiser_scale_code;
1249
        lambda = intel_lambda_qp(qp);
1250
        /* No Intra prediction. So it is zero */
1251
        vme_state_message[MODE_INTRA_8X8] = 0;
1252
        vme_state_message[MODE_INTRA_4X4] = 0;
1253
        vme_state_message[MODE_INTER_MV0] = 0;
1254
        for (j = 1; j < 3; j++) {
1255
            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1256
            m_cost = (int)m_costf;
1257
            vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
1258
        }
1259
        mv_count = 3;
1260
        for (j = 4; j <= 64; j *= 2) {
1261
            m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
1262
            m_cost = (int)m_costf;
1263
            vme_state_message[MODE_INTER_MV0 + mv_count] =
1264
                intel_format_lutvalue(m_cost, 0x6f);
1265
            mv_count++;
1266
        }
1267
        m_cost = lambda;
1268
        /* It can only perform the 16x16 search. So mode cost can be ignored for
1269
         * the other mode. for example: 16x8/8x8
1270
         */
1271
        vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1272
        vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
1273
 
1274
        vme_state_message[MODE_INTER_16X8] = 0;
1275
        vme_state_message[MODE_INTER_8X8] = 0;
1276
        vme_state_message[MODE_INTER_8X4] = 0;
1277
        vme_state_message[MODE_INTER_4X4] = 0;
1278
        vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
1279
 
1280
    }
1281
    vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
1282
 
1283
    vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
1284
        width_in_mbs;
1285
}
1286
 
1287
void
1288
gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
1289
                                           struct encode_state *encode_state,
1290
                                           int mb_width, int mb_height,
1291
                                           int kernel,
1292
                                           struct intel_encoder_context *encoder_context)
1293
{
1294
    struct gen6_vme_context *vme_context = encoder_context->vme_context;
1295
    unsigned int *command_ptr;
1296
 
1297
#define		MPEG2_SCOREBOARD		(1 << 21)
1298
 
1299
    dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
1300
    command_ptr = vme_context->vme_batchbuffer.bo->virtual;
1301
 
1302
    {
1303
	unsigned int mb_intra_ub, score_dep;
1304
	int x_outer, y_outer, x_inner, y_inner;
1305
	int xtemp_outer = 0;
1306
	int first_mb = 0;
1307
	int num_mb = mb_width * mb_height;
1308
 
1309
	x_outer = 0;
1310
	y_outer = 0;
1311
 
1312
 
1313
	for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1314
	    x_inner = x_outer;
1315
	    y_inner = y_outer;
1316
	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1317
		mb_intra_ub = 0;
1318
		score_dep = 0;
1319
		if (x_inner != 0) {
1320
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1321
		    score_dep |= MB_SCOREBOARD_A;
1322
		}
1323
		if (y_inner != 0) {
1324
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1325
		    score_dep |= MB_SCOREBOARD_B;
1326
 
1327
		    if (x_inner != 0)
1328
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1329
 
1330
		    if (x_inner != (mb_width -1)) {
1331
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1332
			score_dep |= MB_SCOREBOARD_C;
1333
		    }
1334
		}
1335
 
1336
            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1337
		*command_ptr++ = kernel;
1338
		*command_ptr++ = MPEG2_SCOREBOARD;
1339
		/* Indirect data */
1340
		*command_ptr++ = 0;
1341
		/* the (X, Y) term of scoreboard */
1342
		*command_ptr++ = ((y_inner << 16) | x_inner);
1343
		*command_ptr++ = score_dep;
1344
		/*inline data */
1345
		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1346
		*command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1347
		x_inner -= 2;
1348
		y_inner += 1;
1349
	    }
1350
	    x_outer += 1;
1351
	}
1352
 
1353
	xtemp_outer = mb_width - 2;
1354
	if (xtemp_outer < 0)
1355
            xtemp_outer = 0;
1356
	x_outer = xtemp_outer;
1357
	y_outer = 0;
1358
	for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
1359
	    y_inner = y_outer;
1360
	    x_inner = x_outer;
1361
	    for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
1362
	    	mb_intra_ub = 0;
1363
		score_dep = 0;
1364
		if (x_inner != 0) {
1365
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
1366
		    score_dep |= MB_SCOREBOARD_A;
1367
		}
1368
		if (y_inner != 0) {
1369
		    mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
1370
		    score_dep |= MB_SCOREBOARD_B;
1371
 
1372
		    if (x_inner != 0)
1373
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
1374
 
1375
		    if (x_inner != (mb_width -1)) {
1376
			mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
1377
			score_dep |= MB_SCOREBOARD_C;
1378
		    }
1379
		}
1380
 
1381
            	*command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
1382
		*command_ptr++ = kernel;
1383
		*command_ptr++ = MPEG2_SCOREBOARD;
1384
		/* Indirect data */
1385
		*command_ptr++ = 0;
1386
		/* the (X, Y) term of scoreboard */
1387
		*command_ptr++ = ((y_inner << 16) | x_inner);
1388
		*command_ptr++ = score_dep;
1389
		/*inline data */
1390
		*command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
1391
		*command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
1392
 
1393
		x_inner -= 2;
1394
		y_inner += 1;
1395
	    }
1396
	    x_outer++;
1397
	    if (x_outer >= mb_width) {
1398
		y_outer += 1;
1399
		x_outer = xtemp_outer;
1400
	    }
1401
	}
1402
    }
1403
 
1404
    *command_ptr++ = 0;
1405
    *command_ptr++ = MI_BATCH_BUFFER_END;
1406
 
1407
    dri_bo_unmap(vme_context->vme_batchbuffer.bo);
1408
    return;
1409
}
1410
 
1411
static int
1412
avc_temporal_find_surface(VAPictureH264 *curr_pic,
1413
                          VAPictureH264 *ref_list,
1414
                          int num_pictures,
1415
                          int dir)
1416
{
1417
    int i, found = -1, min = 0x7FFFFFFF;
1418
 
1419
    for (i = 0; i < num_pictures; i++) {
1420
        int tmp;
1421
 
1422
        if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
1423
            (ref_list[i].picture_id == VA_INVALID_SURFACE))
1424
            break;
1425
 
1426
        tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
1427
 
1428
        if (dir)
1429
            tmp = -tmp;
1430
 
1431
        if (tmp > 0 && tmp < min) {
1432
            min = tmp;
1433
            found = i;
1434
        }
1435
    }
1436
 
1437
    return found;
1438
}
1439
 
1440
void
1441
intel_avc_vme_reference_state(VADriverContextP ctx,
1442
                              struct encode_state *encode_state,
1443
                              struct intel_encoder_context *encoder_context,
1444
                              int list_index,
1445
                              int surface_index,
1446
                              void (* vme_source_surface_state)(
1447
                                  VADriverContextP ctx,
1448
                                  int index,
1449
                                  struct object_surface *obj_surface,
1450
                                  struct intel_encoder_context *encoder_context))
1451
{
1452
    struct gen6_vme_context *vme_context = encoder_context->vme_context;
1453
    struct object_surface *obj_surface = NULL;
1454
    struct i965_driver_data *i965 = i965_driver_data(ctx);
1455
    VASurfaceID ref_surface_id;
1456
    VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1457
    VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
1458
    int max_num_references;
1459
    VAPictureH264 *curr_pic;
1460
    VAPictureH264 *ref_list;
1461
    int ref_idx;
1462
 
1463
    if (list_index == 0) {
1464
        max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
1465
        ref_list = slice_param->RefPicList0;
1466
    } else {
1467
        max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
1468
        ref_list = slice_param->RefPicList1;
1469
    }
1470
 
1471
    if (max_num_references == 1) {
1472
        if (list_index == 0) {
1473
            ref_surface_id = slice_param->RefPicList0[0].picture_id;
1474
            vme_context->used_references[0] = &slice_param->RefPicList0[0];
1475
        } else {
1476
            ref_surface_id = slice_param->RefPicList1[0].picture_id;
1477
            vme_context->used_references[1] = &slice_param->RefPicList1[0];
1478
        }
1479
 
1480
        if (ref_surface_id != VA_INVALID_SURFACE)
1481
            obj_surface = SURFACE(ref_surface_id);
1482
 
1483
        if (!obj_surface ||
1484
            !obj_surface->bo) {
1485
            obj_surface = encode_state->reference_objects[list_index];
1486
            vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
1487
        }
1488
 
1489
        ref_idx = 0;
1490
    } else {
1491
        curr_pic = &pic_param->CurrPic;
1492
 
1493
        /* select the reference frame in temporal space */
1494
        ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
1495
        ref_surface_id = ref_list[ref_idx].picture_id;
1496
 
1497
        if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
1498
            obj_surface = SURFACE(ref_surface_id);
1499
 
1500
        vme_context->used_reference_objects[list_index] = obj_surface;
1501
        vme_context->used_references[list_index] = &ref_list[ref_idx];
1502
    }
1503
 
1504
    if (obj_surface &&
1505
        obj_surface->bo) {
1506
        assert(ref_idx >= 0);
1507
        vme_context->used_reference_objects[list_index] = obj_surface;
1508
        vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
1509
        vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
1510
                                                    ref_idx << 16 |
1511
                                                    ref_idx <<  8 |
1512
                                                    ref_idx);
1513
    } else {
1514
        vme_context->used_reference_objects[list_index] = NULL;
1515
        vme_context->used_references[list_index] = NULL;
1516
        vme_context->ref_index_in_mb[list_index] = 0;
1517
    }
1518
}
1519
 
1520
void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
1521
                                        struct encode_state *encode_state,
1522
                                        struct intel_encoder_context *encoder_context,
1523
                                        int slice_index,
1524
                                        struct intel_batchbuffer *slice_batch)
1525
{
1526
    int count, i, start_index;
1527
    unsigned int length_in_bits;
1528
    VAEncPackedHeaderParameterBuffer *param = NULL;
1529
    unsigned int *header_data = NULL;
1530
    struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
1531
    int slice_header_index;
1532
 
1533
    if (encode_state->slice_header_index[slice_index] == 0)
1534
        slice_header_index = -1;
1535
    else
1536
        slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1537
 
1538
    count = encode_state->slice_rawdata_count[slice_index];
1539
    start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
1540
 
1541
    for (i = 0; i < count; i++) {
1542
        unsigned int skip_emul_byte_cnt;
1543
 
1544
        header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
1545
 
1546
        param = (VAEncPackedHeaderParameterBuffer *)
1547
                    (encode_state->packed_header_params_ext[start_index + i]->buffer);
1548
 
1549
        /* skip the slice header packed data type as it is lastly inserted */
1550
        if (param->type == VAEncPackedHeaderSlice)
1551
            continue;
1552
 
1553
        length_in_bits = param->bit_length;
1554
 
1555
        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1556
 
1557
        /* as the slice header is still required, the last header flag is set to
1558
         * zero.
1559
         */
1560
        mfc_context->insert_object(ctx,
1561
                                   encoder_context,
1562
                                   header_data,
1563
                                   ALIGN(length_in_bits, 32) >> 5,
1564
                                   length_in_bits & 0x1f,
1565
                                   skip_emul_byte_cnt,
1566
                                   0,
1567
                                   0,
1568
                                   !param->has_emulation_bytes,
1569
                                   slice_batch);
1570
    }
1571
 
1572
    if (slice_header_index == -1) {
1573
        unsigned char *slice_header = NULL;
1574
        int slice_header_length_in_bits = 0;
1575
        VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
1576
        VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
1577
        VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
1578
 
1579
        /* No slice header data is passed. And the driver needs to generate it */
1580
        /* For the Normal H264 */
1581
        slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
1582
                                                             pPicParameter,
1583
                                                             pSliceParameter,
1584
                                                             &slice_header);
1585
        mfc_context->insert_object(ctx, encoder_context,
1586
                                   (unsigned int *)slice_header,
1587
                                   ALIGN(slice_header_length_in_bits, 32) >> 5,
1588
                                   slice_header_length_in_bits & 0x1f,
1589
                                   5,  /* first 5 bytes are start code + nal unit type */
1590
                                   1, 0, 1, slice_batch);
1591
 
1592
        free(slice_header);
1593
    } else {
1594
        unsigned int skip_emul_byte_cnt;
1595
 
1596
        header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
1597
 
1598
        param = (VAEncPackedHeaderParameterBuffer *)
1599
                    (encode_state->packed_header_params_ext[slice_header_index]->buffer);
1600
        length_in_bits = param->bit_length;
1601
 
1602
        /* as the slice header is the last header data for one slice,
1603
         * the last header flag is set to one.
1604
         */
1605
        skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
1606
 
1607
        mfc_context->insert_object(ctx,
1608
                                   encoder_context,
1609
                                   header_data,
1610
                                   ALIGN(length_in_bits, 32) >> 5,
1611
                                   length_in_bits & 0x1f,
1612
                                   skip_emul_byte_cnt,
1613
                                   1,
1614
                                   0,
1615
                                   !param->has_emulation_bytes,
1616
                                   slice_batch);
1617
    }
1618
 
1619
    return;
1620
}