Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao Yakui <yakui.zhao@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <assert.h>
  34. #include <math.h>
  35.  
  36. #include "intel_batchbuffer.h"
  37. #include "i965_defines.h"
  38. #include "i965_structs.h"
  39. #include "i965_drv_video.h"
  40. #include "i965_encoder.h"
  41. #include "i965_encoder_utils.h"
  42. #include "gen6_mfc.h"
  43. #include "gen6_vme.h"
  44. #include "intel_media.h"
  45.  
  46. #define BRC_CLIP(x, min, max)                                   \
  47.     {                                                           \
  48.         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
  49.     }
  50.  
  51. #define BRC_P_B_QP_DIFF 4
  52. #define BRC_I_P_QP_DIFF 2
  53. #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
  54.  
  55. #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
  56. #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
  57.  
  58. #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
  59. #define BRC_CY 0.1 /* weight for */
  60. #define BRC_CX_UNDERFLOW 5.
  61. #define BRC_CX_OVERFLOW -4.
  62.  
  63. #define BRC_PI_0_5 1.5707963267948966192313216916398
  64.  
  65. #ifndef HAVE_LOG2F
  66. #define log2f(x) (logf(x)/(float)M_LN2)
  67. #endif
  68.  
  69. int intel_avc_enc_slice_type_fixup(int slice_type)
  70. {
  71.     if (slice_type == SLICE_TYPE_SP ||
  72.         slice_type == SLICE_TYPE_P)
  73.         slice_type = SLICE_TYPE_P;
  74.     else if (slice_type == SLICE_TYPE_SI ||
  75.              slice_type == SLICE_TYPE_I)
  76.         slice_type = SLICE_TYPE_I;
  77.     else {
  78.         if (slice_type != SLICE_TYPE_B)
  79.             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
  80.  
  81.         slice_type = SLICE_TYPE_B;
  82.     }
  83.  
  84.     return slice_type;
  85. }
  86.  
  87. static void
  88. intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
  89.                                         struct gen6_mfc_context *mfc_context)
  90. {
  91.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  92.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  93.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  94.     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
  95.     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
  96.     int intra_mb_size = inter_mb_size * 5.0;
  97.     int i;
  98.  
  99.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
  100.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
  101.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
  102.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  103.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
  104.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  105.  
  106.     for(i = 0 ; i < 3; i++) {
  107.         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
  108.         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
  109.         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
  110.         mfc_context->bit_rate_control_context[i].GrowInit = 6;
  111.         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
  112.         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
  113.         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
  114.        
  115.         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
  116.         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
  117.         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
  118.         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
  119.         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
  120.         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
  121.     }
  122.    
  123.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
  124.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
  125.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
  126.  
  127.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
  128.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
  129.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
  130. }
  131.  
  132. static void intel_mfc_brc_init(struct encode_state *encode_state,
  133.                                struct intel_encoder_context* encoder_context)
  134. {
  135.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  136.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  137.     VAEncMiscParameterBuffer* pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
  138.     VAEncMiscParameterHRD* pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
  139.     double bitrate = pSequenceParameter->bits_per_second;
  140.     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
  141.     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
  142.     int intra_period = pSequenceParameter->intra_period;
  143.     int ip_period = pSequenceParameter->ip_period;
  144.     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
  145.     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
  146.     double bpf;
  147.  
  148.     if (pSequenceParameter->ip_period) {
  149.         pnum = (intra_period + ip_period - 1)/ip_period - 1;
  150.         bnum = intra_period - inum - pnum;
  151.     }
  152.  
  153.     mfc_context->brc.mode = encoder_context->rate_control_mode;
  154.  
  155.     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
  156.                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
  157.     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
  158.     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
  159.  
  160.     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
  161.     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
  162.     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
  163.  
  164.     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
  165.  
  166.     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
  167.     mfc_context->hrd.current_buffer_fullness =
  168.         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
  169.         pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
  170.     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
  171.     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
  172.     mfc_context->hrd.violation_noted = 0;
  173.  
  174.     if ((bpf > qp51_size) && (bpf < qp1_size)) {
  175.         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
  176.     }
  177.     else if (bpf >= qp1_size)
  178.         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
  179.     else if (bpf <= qp51_size)
  180.         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
  181.  
  182.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
  183.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
  184.  
  185.     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
  186.     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
  187.     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
  188. }
  189.  
  190. int intel_mfc_update_hrd(struct encode_state *encode_state,
  191.                          struct gen6_mfc_context *mfc_context,
  192.                          int frame_bits)
  193. {
  194.     double prev_bf = mfc_context->hrd.current_buffer_fullness;
  195.  
  196.     mfc_context->hrd.current_buffer_fullness -= frame_bits;
  197.  
  198.     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
  199.         mfc_context->hrd.current_buffer_fullness = prev_bf;
  200.         return BRC_UNDERFLOW;
  201.     }
  202.    
  203.     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
  204.     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
  205.         if (mfc_context->brc.mode == VA_RC_VBR)
  206.             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
  207.         else {
  208.             mfc_context->hrd.current_buffer_fullness = prev_bf;
  209.             return BRC_OVERFLOW;
  210.         }
  211.     }
  212.     return BRC_NO_HRD_VIOLATION;
  213. }
  214.  
  215. int intel_mfc_brc_postpack(struct encode_state *encode_state,
  216.                            struct gen6_mfc_context *mfc_context,
  217.                            int frame_bits)
  218. {
  219.     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
  220.     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  221.     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
  222.     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
  223.     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
  224.     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
  225.     int qp; // quantizer of previously encoded slice of current type
  226.     int qpn; // predicted quantizer for next frame of current type in integer format
  227.     double qpf; // predicted quantizer for next frame of current type in float format
  228.     double delta_qp; // QP correction
  229.     int target_frame_size, frame_size_next;
  230.     /* Notes:
  231.      *  x - how far we are from HRD buffer borders
  232.      *  y - how far we are from target HRD buffer fullness
  233.      */
  234.     double x, y;
  235.     double frame_size_alpha;
  236.  
  237.     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
  238.  
  239.     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
  240.     if (mfc_context->hrd.buffer_capacity < 5)
  241.         frame_size_alpha = 0;
  242.     else
  243.         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
  244.     if (frame_size_alpha > 30) frame_size_alpha = 30;
  245.     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
  246.         (double)(frame_size_alpha + 1.);
  247.  
  248.     /* frame_size_next: avoiding negative number and too small value */
  249.     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
  250.         frame_size_next = (int)((double)target_frame_size * 0.25);
  251.  
  252.     qpf = (double)qp * target_frame_size / frame_size_next;
  253.     qpn = (int)(qpf + 0.5);
  254.  
  255.     if (qpn == qp) {
  256.         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
  257.         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
  258.         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
  259.             qpn++;
  260.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  261.         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
  262.             qpn--;
  263.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  264.         }
  265.     }
  266.     /* making sure that QP is not changing too fast */
  267.     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
  268.     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
  269.     /* making sure that with QP predictions we did do not leave QPs range */
  270.     BRC_CLIP(qpn, 1, 51);
  271.  
  272.     /* checking wthether HRD compliance is still met */
  273.     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
  274.  
  275.     /* calculating QP delta as some function*/
  276.     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
  277.     if (x > 0) {
  278.         x /= mfc_context->hrd.target_buffer_fullness;
  279.         y = mfc_context->hrd.current_buffer_fullness;
  280.     }
  281.     else {
  282.         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
  283.         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
  284.     }
  285.     if (y < 0.01) y = 0.01;
  286.     if (x > 1) x = 1;
  287.     else if (x < -1) x = -1;
  288.  
  289.     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
  290.     qpn = (int)(qpn + delta_qp + 0.5);
  291.  
  292.     /* making sure that with QP predictions we did do not leave QPs range */
  293.     BRC_CLIP(qpn, 1, 51);
  294.  
  295.     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
  296.         /* correcting QPs of slices of other types */
  297.         if (slicetype == SLICE_TYPE_P) {
  298.             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
  299.                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
  300.             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
  301.                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
  302.         } else if (slicetype == SLICE_TYPE_I) {
  303.             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
  304.                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
  305.             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
  306.                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
  307.         } else { // SLICE_TYPE_B
  308.             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
  309.                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
  310.             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
  311.                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
  312.         }
  313.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
  314.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
  315.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
  316.     } else if (sts == BRC_UNDERFLOW) { // underflow
  317.         if (qpn <= qp) qpn = qp + 1;
  318.         if (qpn > 51) {
  319.             qpn = 51;
  320.             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
  321.         }
  322.     } else if (sts == BRC_OVERFLOW) {
  323.         if (qpn >= qp) qpn = qp - 1;
  324.         if (qpn < 1) { // < 0 (?) overflow with minQP
  325.             qpn = 1;
  326.             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
  327.         }
  328.     }
  329.  
  330.     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
  331.  
  332.     return sts;
  333. }
  334.  
  335. static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
  336.                                        struct intel_encoder_context *encoder_context)
  337. {
  338.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  339.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  340.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  341.     int target_bit_rate = pSequenceParameter->bits_per_second;
  342.    
  343.     // current we only support CBR mode.
  344.     if (rate_control_mode == VA_RC_CBR) {
  345.         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
  346.         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
  347.         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
  348.         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
  349.         mfc_context->vui_hrd.i_frame_number = 0;
  350.  
  351.         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
  352.         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
  353.         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
  354.     }
  355.  
  356. }
  357.  
  358. void
  359. intel_mfc_hrd_context_update(struct encode_state *encode_state,
  360.                              struct gen6_mfc_context *mfc_context)
  361. {
  362.     mfc_context->vui_hrd.i_frame_number++;
  363. }
  364.  
  365. int intel_mfc_interlace_check(VADriverContextP ctx,
  366.                               struct encode_state *encode_state,
  367.                               struct intel_encoder_context *encoder_context)
  368. {
  369.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  370.     VAEncSliceParameterBufferH264 *pSliceParameter;
  371.     int i;
  372.     int mbCount = 0;
  373.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  374.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  375.  
  376.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  377.         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
  378.         mbCount += pSliceParameter->num_macroblocks;
  379.     }
  380.    
  381.     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
  382.         return 0;
  383.  
  384.     return 1;
  385. }
  386.  
  387. /*
  388.  * Check whether the parameters related with CBR are updated and decide whether
  389.  * it needs to reinitialize the configuration related with CBR.
  390.  * Currently it will check the following parameters:
  391.  *      bits_per_second
  392.  *      frame_rate
  393.  *      gop_configuration(intra_period, ip_period, intra_idr_period)
  394.  */
  395. static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
  396.                            struct intel_encoder_context *encoder_context)
  397. {
  398.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  399.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  400.     double cur_fps, cur_bitrate;
  401.     VAEncSequenceParameterBufferH264 *pSequenceParameter;
  402.  
  403.  
  404.     if (rate_control_mode != VA_RC_CBR) {
  405.         return false;
  406.     }
  407.  
  408.     pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  409.  
  410.     cur_bitrate = pSequenceParameter->bits_per_second;
  411.     cur_fps = (double)pSequenceParameter->time_scale /
  412.                 (2 * (double)pSequenceParameter->num_units_in_tick);
  413.  
  414.     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
  415.         (cur_fps == mfc_context->brc.saved_fps) &&
  416.         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
  417.         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
  418.         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
  419.         /* the parameters related with CBR are not updaetd */
  420.         return false;
  421.     }
  422.  
  423.     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
  424.     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
  425.     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
  426.     mfc_context->brc.saved_fps = cur_fps;
  427.     mfc_context->brc.saved_bps = cur_bitrate;
  428.     return true;
  429. }
  430.  
  431. void intel_mfc_brc_prepare(struct encode_state *encode_state,
  432.                            struct intel_encoder_context *encoder_context)
  433. {
  434.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  435.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  436.  
  437.     if (rate_control_mode == VA_RC_CBR) {
  438.         bool brc_updated;
  439.         assert(encoder_context->codec != CODEC_MPEG2);
  440.  
  441.         brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
  442.  
  443.         /*Programing bit rate control */
  444.         if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
  445.              brc_updated) {
  446.             intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
  447.             intel_mfc_brc_init(encode_state, encoder_context);
  448.         }
  449.  
  450.         /*Programing HRD control */
  451.         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
  452.             intel_mfc_hrd_context_init(encode_state, encoder_context);    
  453.     }
  454. }
  455.  
  456. static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length)
  457. {
  458.     int i, found;
  459.     int leading_zero_cnt, byte_length, zero_byte;
  460.     int nal_unit_type;
  461.     int skip_cnt = 0;
  462.  
  463. #define NAL_UNIT_TYPE_MASK 0x1f
  464. #define HW_MAX_SKIP_LENGTH 15
  465.  
  466.     byte_length = ALIGN(bits_length, 32) >> 3;
  467.  
  468.  
  469.     leading_zero_cnt = 0;
  470.     found = 0;
  471.     for(i = 0; i < byte_length - 4; i++) {
  472.         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
  473.             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
  474.                 found = 1;
  475.                 break;
  476.             }
  477.         leading_zero_cnt++;
  478.     }
  479.     if (!found) {
  480.         /* warning message is complained. But anyway it will be inserted. */
  481.         WARN_ONCE("Invalid packed header data. "
  482.                    "Can't find the 000001 start_prefix code\n");
  483.         return 0;
  484.     }
  485.     i = leading_zero_cnt;
  486.  
  487.     zero_byte = 0;
  488.     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
  489.         zero_byte = 1;
  490.  
  491.     skip_cnt = leading_zero_cnt + zero_byte + 3;
  492.  
  493.     /* the unit header byte is accounted */
  494.     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
  495.     skip_cnt += 1;
  496.  
  497.     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
  498.         /* more unit header bytes are accounted for MVC/SVC */
  499.         skip_cnt += 3;
  500.     }
  501.     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
  502.         WARN_ONCE("Too many leading zeros are padded for packed data. "
  503.                    "It is beyond the HW range.!!!\n");
  504.     }
  505.     return skip_cnt;
  506. }
  507.  
  508. void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
  509.                                               struct encode_state *encode_state,
  510.                                               struct intel_encoder_context *encoder_context,
  511.                                               struct intel_batchbuffer *slice_batch)
  512. {
  513.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  514.     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
  515.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  516.     unsigned int skip_emul_byte_cnt;
  517.  
  518.     if (encode_state->packed_header_data[idx]) {
  519.         VAEncPackedHeaderParameterBuffer *param = NULL;
  520.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  521.         unsigned int length_in_bits;
  522.  
  523.         assert(encode_state->packed_header_param[idx]);
  524.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  525.         length_in_bits = param->bit_length;
  526.  
  527.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  528.         mfc_context->insert_object(ctx,
  529.                                    encoder_context,
  530.                                    header_data,
  531.                                    ALIGN(length_in_bits, 32) >> 5,
  532.                                    length_in_bits & 0x1f,
  533.                                    skip_emul_byte_cnt,
  534.                                    0,
  535.                                    0,
  536.                                    !param->has_emulation_bytes,
  537.                                    slice_batch);
  538.     }
  539.  
  540.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
  541.  
  542.     if (encode_state->packed_header_data[idx]) {
  543.         VAEncPackedHeaderParameterBuffer *param = NULL;
  544.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  545.         unsigned int length_in_bits;
  546.  
  547.         assert(encode_state->packed_header_param[idx]);
  548.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  549.         length_in_bits = param->bit_length;
  550.  
  551.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  552.  
  553.         mfc_context->insert_object(ctx,
  554.                                    encoder_context,
  555.                                    header_data,
  556.                                    ALIGN(length_in_bits, 32) >> 5,
  557.                                    length_in_bits & 0x1f,
  558.                                    skip_emul_byte_cnt,
  559.                                    0,
  560.                                    0,
  561.                                    !param->has_emulation_bytes,
  562.                                    slice_batch);
  563.     }
  564.    
  565.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
  566.  
  567.     if (encode_state->packed_header_data[idx]) {
  568.         VAEncPackedHeaderParameterBuffer *param = NULL;
  569.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  570.         unsigned int length_in_bits;
  571.  
  572.         assert(encode_state->packed_header_param[idx]);
  573.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  574.         length_in_bits = param->bit_length;
  575.  
  576.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  577.         mfc_context->insert_object(ctx,
  578.                                    encoder_context,
  579.                                    header_data,
  580.                                    ALIGN(length_in_bits, 32) >> 5,
  581.                                    length_in_bits & 0x1f,
  582.                                    skip_emul_byte_cnt,
  583.                                    0,
  584.                                    0,
  585.                                    !param->has_emulation_bytes,
  586.                                    slice_batch);
  587.     } else if (rate_control_mode == VA_RC_CBR) {
  588.         // this is frist AU
  589.         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  590.  
  591.         unsigned char *sei_data = NULL;
  592.    
  593.         int length_in_bits = build_avc_sei_buffer_timing(
  594.             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
  595.             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
  596.             0,
  597.             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
  598.             mfc_context->vui_hrd.i_dpb_output_delay_length,
  599.             0,
  600.             &sei_data);
  601.         mfc_context->insert_object(ctx,
  602.                                    encoder_context,
  603.                                    (unsigned int *)sei_data,
  604.                                    ALIGN(length_in_bits, 32) >> 5,
  605.                                    length_in_bits & 0x1f,
  606.                                    4,  
  607.                                    0,  
  608.                                    0,  
  609.                                    1,
  610.                                    slice_batch);  
  611.         free(sei_data);
  612.     }
  613. }
  614.  
  615. VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
  616.                                struct encode_state *encode_state,
  617.                                struct intel_encoder_context *encoder_context)
  618. {
  619.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  620.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  621.     struct object_surface *obj_surface;
  622.     struct object_buffer *obj_buffer;
  623.     GenAvcSurface *gen6_avc_surface;
  624.     dri_bo *bo;
  625.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  626.     int i, j, enable_avc_ildb = 0;
  627.     VAEncSliceParameterBufferH264 *slice_param;
  628.     struct i965_coded_buffer_segment *coded_buffer_segment;
  629.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  630.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  631.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  632.  
  633.     if (IS_GEN6(i965->intel.device_info)) {
  634.         /* On the SNB it should be fixed to 128 for the DMV buffer */
  635.         width_in_mbs = 128;
  636.     }
  637.  
  638.     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
  639.         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
  640.         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
  641.  
  642.         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
  643.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  644.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  645.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  646.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  647.                    (slice_param->slice_type == SLICE_TYPE_B));
  648.  
  649.             if (slice_param->disable_deblocking_filter_idc != 1) {
  650.                 enable_avc_ildb = 1;
  651.                 break;
  652.             }
  653.  
  654.             slice_param++;
  655.         }
  656.     }
  657.  
  658.     /*Setup all the input&output object*/
  659.  
  660.     /* Setup current frame and current direct mv buffer*/
  661.     obj_surface = encode_state->reconstructed_object;
  662.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  663.  
  664.     if ( obj_surface->private_data == NULL) {
  665.         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  666.         gen6_avc_surface->dmv_top =
  667.             dri_bo_alloc(i965->intel.bufmgr,
  668.                          "Buffer",
  669.                          68 * width_in_mbs * height_in_mbs,
  670.                          64);
  671.         gen6_avc_surface->dmv_bottom =
  672.             dri_bo_alloc(i965->intel.bufmgr,
  673.                          "Buffer",
  674.                          68 * width_in_mbs * height_in_mbs,
  675.                          64);
  676.         assert(gen6_avc_surface->dmv_top);
  677.         assert(gen6_avc_surface->dmv_bottom);
  678.         obj_surface->private_data = (void *)gen6_avc_surface;
  679.         obj_surface->free_private_data = (void *)gen_free_avc_surface;
  680.     }
  681.     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
  682.     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
  683.     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
  684.     dri_bo_reference(gen6_avc_surface->dmv_top);
  685.     dri_bo_reference(gen6_avc_surface->dmv_bottom);
  686.  
  687.     if (enable_avc_ildb) {
  688.         mfc_context->post_deblocking_output.bo = obj_surface->bo;
  689.         dri_bo_reference(mfc_context->post_deblocking_output.bo);
  690.     } else {
  691.         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
  692.         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
  693.     }
  694.  
  695.     mfc_context->surface_state.width = obj_surface->orig_width;
  696.     mfc_context->surface_state.height = obj_surface->orig_height;
  697.     mfc_context->surface_state.w_pitch = obj_surface->width;
  698.     mfc_context->surface_state.h_pitch = obj_surface->height;
  699.    
  700.     /* Setup reference frames and direct mv buffers*/
  701.     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
  702.         obj_surface = encode_state->reference_objects[i];
  703.        
  704.         if (obj_surface && obj_surface->bo) {
  705.             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
  706.             dri_bo_reference(obj_surface->bo);
  707.  
  708.             /* Check DMV buffer */
  709.             if ( obj_surface->private_data == NULL) {
  710.                
  711.                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  712.                 gen6_avc_surface->dmv_top =
  713.                     dri_bo_alloc(i965->intel.bufmgr,
  714.                                  "Buffer",
  715.                                  68 * width_in_mbs * height_in_mbs,
  716.                                  64);
  717.                 gen6_avc_surface->dmv_bottom =
  718.                     dri_bo_alloc(i965->intel.bufmgr,
  719.                                  "Buffer",
  720.                                  68 * width_in_mbs * height_in_mbs,
  721.                                  64);
  722.                 assert(gen6_avc_surface->dmv_top);
  723.                 assert(gen6_avc_surface->dmv_bottom);
  724.                 obj_surface->private_data = gen6_avc_surface;
  725.                 obj_surface->free_private_data = gen_free_avc_surface;
  726.             }
  727.    
  728.             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
  729.             /* Setup DMV buffer */
  730.             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
  731.             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
  732.             dri_bo_reference(gen6_avc_surface->dmv_top);
  733.             dri_bo_reference(gen6_avc_surface->dmv_bottom);
  734.         } else {
  735.             break;
  736.         }
  737.     }
  738.        
  739.     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
  740.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  741.  
  742.     obj_buffer = encode_state->coded_buf_object;
  743.     bo = obj_buffer->buffer_store->bo;
  744.     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
  745.     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  746.     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  747.     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
  748.    
  749.     dri_bo_map(bo, 1);
  750.     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
  751.     coded_buffer_segment->mapped = 0;
  752.     coded_buffer_segment->codec = encoder_context->codec;
  753.     dri_bo_unmap(bo);
  754.  
  755.     return vaStatus;
  756. }
  757. /*
  758.  * The LUT uses the pair of 4-bit units: (shift, base) structure.
  759.  * 2^K * X = value .
  760.  * So it is necessary to convert one cost into the nearest LUT format.
  761.  * The derivation is:
  762.  * 2^K *x = 2^n * (1 + deltaX)
  763.  *    k + log2(x) = n + log2(1 + deltaX)
  764.  *    log2(x) = n - k + log2(1 + deltaX)
  765.  *    As X is in the range of [1, 15]
  766.  *      4 > n - k + log2(1 + deltaX) >= 0
  767.  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
  768.  *    Then we can derive the corresponding K and get the nearest LUT format.
  769.  */
  770. int intel_format_lutvalue(int value, int max)
  771. {
  772.     int ret;
  773.     int logvalue, temp1, temp2;
  774.  
  775.     if (value <= 0)
  776.         return 0;
  777.  
  778.     logvalue = (int)(log2f((float)value));
  779.     if (logvalue < 4) {
  780.         ret = value;
  781.     } else {
  782.         int error, temp_value, base, j, temp_err;
  783.         error = value;
  784.         j = logvalue - 4 + 1;
  785.         ret = -1;
  786.         for(; j <= logvalue; j++) {
  787.             if (j == 0) {
  788.                 base = value >> j;
  789.             } else {
  790.                 base = (value + (1 << (j - 1)) - 1) >> j;
  791.             }
  792.             if (base >= 16)
  793.                 continue;
  794.  
  795.             temp_value = base << j;
  796.             temp_err = abs(value - temp_value);
  797.             if (temp_err < error) {
  798.                 error = temp_err;
  799.                 ret = (j << 4) | base;
  800.                 if (temp_err == 0)
  801.                     break;
  802.             }
  803.         }
  804.     }
  805.     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
  806.     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
  807.     if (temp1 > temp2)
  808.         ret = max;
  809.     return ret;
  810.        
  811. }
  812.  
  813.  
  814. #define         QP_MAX                  52
  815.  
  816.  
  817. static float intel_lambda_qp(int qp)
  818. {
  819.     float value, lambdaf;
  820.     value = qp;
  821.     value = value / 6 - 2;
  822.     if (value < 0)
  823.         value = 0;
  824.     lambdaf = roundf(powf(2, value));
  825.     return lambdaf;
  826. }
  827.  
  828.  
  829. void intel_vme_update_mbmv_cost(VADriverContextP ctx,
  830.                                 struct encode_state *encode_state,
  831.                                 struct intel_encoder_context *encoder_context)
  832. {
  833.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  834.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  835.     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  836.     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  837.     int qp, m_cost, j, mv_count;
  838.     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
  839.     float   lambda, m_costf;
  840.  
  841.     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  842.  
  843.    
  844.     if (encoder_context->rate_control_mode == VA_RC_CQP)
  845.         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
  846.     else
  847.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  848.  
  849.     if (vme_state_message == NULL)
  850.         return;
  851.  
  852.     assert(qp <= QP_MAX);
  853.     lambda = intel_lambda_qp(qp);
  854.     if (slice_type == SLICE_TYPE_I) {
  855.         vme_state_message[MODE_INTRA_16X16] = 0;
  856.         m_cost = lambda * 4;
  857.         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
  858.         m_cost = lambda * 16;
  859.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  860.         m_cost = lambda * 3;
  861.         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
  862.     } else {
  863.         m_cost = 0;
  864.         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
  865.         for (j = 1; j < 3; j++) {
  866.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  867.             m_cost = (int)m_costf;
  868.             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
  869.         }
  870.         mv_count = 3;
  871.         for (j = 4; j <= 64; j *= 2) {
  872.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  873.             m_cost = (int)m_costf;
  874.             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
  875.             mv_count++;
  876.         }
  877.  
  878.         if (qp <= 25) {
  879.             vme_state_message[MODE_INTRA_16X16] = 0x4a;
  880.             vme_state_message[MODE_INTRA_8X8] = 0x4a;
  881.             vme_state_message[MODE_INTRA_4X4] = 0x4a;
  882.             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
  883.             vme_state_message[MODE_INTER_16X16] = 0x4a;
  884.             vme_state_message[MODE_INTER_16X8] = 0x4a;
  885.             vme_state_message[MODE_INTER_8X8] = 0x4a;
  886.             vme_state_message[MODE_INTER_8X4] = 0x4a;
  887.             vme_state_message[MODE_INTER_4X4] = 0x4a;
  888.             vme_state_message[MODE_INTER_BWD] = 0x2a;
  889.             return;
  890.         }
  891.         m_costf = lambda * 10;
  892.         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  893.         m_cost = lambda * 14;
  894.         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
  895.         m_cost = lambda * 24;
  896.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  897.         m_costf = lambda * 3.5;
  898.         m_cost = m_costf;
  899.         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
  900.         if (slice_type == SLICE_TYPE_P) {
  901.             m_costf = lambda * 2.5;
  902.             m_cost = m_costf;
  903.             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  904.             m_costf = lambda * 4;
  905.             m_cost = m_costf;
  906.             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  907.             m_costf = lambda * 1.5;
  908.             m_cost = m_costf;
  909.             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  910.             m_costf = lambda * 3;
  911.             m_cost = m_costf;
  912.             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
  913.             m_costf = lambda * 5;
  914.             m_cost = m_costf;
  915.             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  916.             /* BWD is not used in P-frame */
  917.             vme_state_message[MODE_INTER_BWD] = 0;
  918.         } else {
  919.             m_costf = lambda * 2.5;
  920.             m_cost = m_costf;
  921.             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  922.             m_costf = lambda * 5.5;
  923.             m_cost = m_costf;
  924.             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  925.             m_costf = lambda * 3.5;
  926.             m_cost = m_costf;
  927.             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  928.             m_costf = lambda * 5.0;
  929.             m_cost = m_costf;
  930.             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
  931.             m_costf = lambda * 6.5;
  932.             m_cost = m_costf;
  933.             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  934.             m_costf = lambda * 1.5;
  935.             m_cost = m_costf;
  936.             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
  937.         }
  938.     }
  939. }
  940.  
  941.  
  942. #define         MB_SCOREBOARD_A         (1 << 0)
  943. #define         MB_SCOREBOARD_B         (1 << 1)
  944. #define         MB_SCOREBOARD_C         (1 << 2)
  945. void
  946. gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
  947. {
  948.     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
  949.     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
  950.     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
  951.                                                            MB_SCOREBOARD_B |
  952.                                                            MB_SCOREBOARD_C);
  953.  
  954.     /* In VME prediction the current mb depends on the neighbour
  955.      * A/B/C macroblock. So the left/up/up-right dependency should
  956.      * be considered.
  957.      */
  958.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
  959.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
  960.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
  961.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
  962.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
  963.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
  964.        
  965.     vme_context->gpe_context.vfe_desc7.dword = 0;
  966.     return;
  967. }
  968.  
  969. /* check whether the mb of (x_index, y_index) is out of bound */
  970. static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
  971. {
  972.     int mb_index;
  973.     if (x_index < 0 || x_index >= mb_width)
  974.         return -1;
  975.     if (y_index < 0 || y_index >= mb_height)
  976.         return -1;
  977.        
  978.     mb_index = y_index * mb_width + x_index;
  979.     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
  980.         return -1;
  981.     return 0;
  982. }
  983.  
  984. void
  985. gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
  986.                                      struct encode_state *encode_state,
  987.                                      int mb_width, int mb_height,
  988.                                      int kernel,
  989.                                      int transform_8x8_mode_flag,
  990.                                      struct intel_encoder_context *encoder_context)
  991. {
  992.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  993.     int mb_row;
  994.     int s;
  995.     unsigned int *command_ptr;
  996.  
  997. #define         USE_SCOREBOARD          (1 << 21)
  998.  
  999.     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
  1000.     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
  1001.  
  1002.     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
  1003.         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
  1004.         int first_mb = pSliceParameter->macroblock_address;
  1005.         int num_mb = pSliceParameter->num_macroblocks;
  1006.         unsigned int mb_intra_ub, score_dep;
  1007.         int x_outer, y_outer, x_inner, y_inner;
  1008.         int xtemp_outer = 0;
  1009.  
  1010.         x_outer = first_mb % mb_width;
  1011.         y_outer = first_mb / mb_width;
  1012.         mb_row = y_outer;
  1013.                                  
  1014.         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1015.             x_inner = x_outer;
  1016.             y_inner = y_outer;
  1017.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1018.                 mb_intra_ub = 0;
  1019.                 score_dep = 0;
  1020.                 if (x_inner != 0) {
  1021.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1022.                     score_dep |= MB_SCOREBOARD_A;
  1023.                 }
  1024.                 if (y_inner != mb_row) {
  1025.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1026.                     score_dep |= MB_SCOREBOARD_B;
  1027.                     if (x_inner != 0)
  1028.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1029.                     if (x_inner != (mb_width -1)) {
  1030.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1031.                         score_dep |= MB_SCOREBOARD_C;
  1032.                     }
  1033.                 }
  1034.                                                        
  1035.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1036.                 *command_ptr++ = kernel;
  1037.                 *command_ptr++ = USE_SCOREBOARD;
  1038.                 /* Indirect data */
  1039.                 *command_ptr++ = 0;
  1040.                 /* the (X, Y) term of scoreboard */
  1041.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1042.                 *command_ptr++ = score_dep;
  1043.                 /*inline data */
  1044.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1045.                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
  1046.                 x_inner -= 2;
  1047.                 y_inner += 1;
  1048.             }
  1049.             x_outer += 1;
  1050.         }
  1051.  
  1052.         xtemp_outer = mb_width - 2;
  1053.         if (xtemp_outer < 0)
  1054.             xtemp_outer = 0;
  1055.         x_outer = xtemp_outer;
  1056.         y_outer = first_mb / mb_width;
  1057.         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1058.             y_inner = y_outer;
  1059.             x_inner = x_outer;
  1060.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1061.                 mb_intra_ub = 0;
  1062.                 score_dep = 0;
  1063.                 if (x_inner != 0) {
  1064.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1065.                     score_dep |= MB_SCOREBOARD_A;
  1066.                 }
  1067.                 if (y_inner != mb_row) {
  1068.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1069.                     score_dep |= MB_SCOREBOARD_B;
  1070.                     if (x_inner != 0)
  1071.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1072.  
  1073.                     if (x_inner != (mb_width -1)) {
  1074.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1075.                         score_dep |= MB_SCOREBOARD_C;
  1076.                     }
  1077.                 }
  1078.  
  1079.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1080.                 *command_ptr++ = kernel;
  1081.                 *command_ptr++ = USE_SCOREBOARD;
  1082.                 /* Indirect data */
  1083.                 *command_ptr++ = 0;
  1084.                 /* the (X, Y) term of scoreboard */
  1085.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1086.                 *command_ptr++ = score_dep;
  1087.                 /*inline data */
  1088.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1089.                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
  1090.  
  1091.                 x_inner -= 2;
  1092.                 y_inner += 1;
  1093.             }
  1094.             x_outer++;
  1095.             if (x_outer >= mb_width) {
  1096.                 y_outer += 1;
  1097.                 x_outer = xtemp_outer;
  1098.             }          
  1099.         }
  1100.     }
  1101.  
  1102.     *command_ptr++ = 0;
  1103.     *command_ptr++ = MI_BATCH_BUFFER_END;
  1104.  
  1105.     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
  1106. }
  1107.  
  1108. static uint8_t
  1109. intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
  1110. {
  1111.     unsigned int is_long_term =
  1112.         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
  1113.     unsigned int is_top_field =
  1114.         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
  1115.     unsigned int is_bottom_field =
  1116.         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
  1117.  
  1118.     return ((is_long_term                         << 6) |
  1119.             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
  1120.             (frame_store_id                       << 1) |
  1121.             ((is_top_field ^ 1) & is_bottom_field));
  1122. }
  1123.  
  1124. void
  1125. intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
  1126.                             struct encode_state *encode_state,
  1127.                             struct intel_encoder_context *encoder_context)
  1128. {
  1129.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1130.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1131.     int slice_type;
  1132.     struct object_surface *obj_surface;
  1133.     unsigned int fref_entry, bref_entry;
  1134.     int frame_index, i;
  1135.     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  1136.  
  1137.     fref_entry = 0x80808080;
  1138.     bref_entry = 0x80808080;
  1139.     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  1140.  
  1141.     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
  1142.         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
  1143.  
  1144.         if (ref_idx_l0 > 3) {
  1145.             WARN_ONCE("ref_idx_l0 is out of range\n");
  1146.             ref_idx_l0 = 0;
  1147.         }
  1148.  
  1149.         obj_surface = vme_context->used_reference_objects[0];
  1150.         frame_index = -1;
  1151.         for (i = 0; i < 16; i++) {
  1152.             if (obj_surface &&
  1153.                 obj_surface == encode_state->reference_objects[i]) {
  1154.                 frame_index = i;
  1155.                 break;
  1156.             }
  1157.         }
  1158.         if (frame_index == -1) {
  1159.             WARN_ONCE("RefPicList0 is not found in DPB!\n");
  1160.         } else {
  1161.             int ref_idx_l0_shift = ref_idx_l0 * 8;
  1162.             fref_entry &= ~(0xFF << ref_idx_l0_shift);
  1163.             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
  1164.         }
  1165.     }
  1166.  
  1167.     if (slice_type == SLICE_TYPE_B) {
  1168.         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
  1169.  
  1170.         if (ref_idx_l1 > 3) {
  1171.             WARN_ONCE("ref_idx_l1 is out of range\n");
  1172.             ref_idx_l1 = 0;
  1173.         }
  1174.  
  1175.         obj_surface = vme_context->used_reference_objects[1];
  1176.         frame_index = -1;
  1177.         for (i = 0; i < 16; i++) {
  1178.             if (obj_surface &&
  1179.                 obj_surface == encode_state->reference_objects[i]) {
  1180.                 frame_index = i;
  1181.                 break;
  1182.             }
  1183.         }
  1184.         if (frame_index == -1) {
  1185.             WARN_ONCE("RefPicList1 is not found in DPB!\n");
  1186.         } else {
  1187.             int ref_idx_l1_shift = ref_idx_l1 * 8;
  1188.             bref_entry &= ~(0xFF << ref_idx_l1_shift);
  1189.             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
  1190.         }
  1191.     }
  1192.  
  1193.     BEGIN_BCS_BATCH(batch, 10);
  1194.     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
  1195.     OUT_BCS_BATCH(batch, 0);                  //Select L0
  1196.     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
  1197.     for(i = 0; i < 7; i++) {
  1198.         OUT_BCS_BATCH(batch, 0x80808080);
  1199.     }
  1200.     ADVANCE_BCS_BATCH(batch);
  1201.  
  1202.     BEGIN_BCS_BATCH(batch, 10);
  1203.     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
  1204.     OUT_BCS_BATCH(batch, 1);                  //Select L1
  1205.     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
  1206.     for(i = 0; i < 7; i++) {
  1207.         OUT_BCS_BATCH(batch, 0x80808080);
  1208.     }
  1209.     ADVANCE_BCS_BATCH(batch);
  1210. }
  1211.  
  1212.  
  1213. void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
  1214.                                  struct encode_state *encode_state,
  1215.                                  struct intel_encoder_context *encoder_context)
  1216. {
  1217.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1218.     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
  1219.     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
  1220.     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
  1221.     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
  1222.     uint32_t mv_x, mv_y;
  1223.     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
  1224.     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
  1225.     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
  1226.  
  1227.     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
  1228.         mv_x = 512;
  1229.         mv_y = 64;
  1230.     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
  1231.         mv_x = 1024;
  1232.         mv_y = 128;
  1233.     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
  1234.         mv_x = 2048;
  1235.         mv_y = 128;
  1236.     } else {
  1237.         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
  1238.         mv_x = 512;
  1239.         mv_y = 64;
  1240.     }
  1241.  
  1242.     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
  1243.     if (pic_param->picture_type != VAEncPictureTypeIntra) {
  1244.         int qp, m_cost, j, mv_count;
  1245.         float   lambda, m_costf;
  1246.         slice_param = (VAEncSliceParameterBufferMPEG2 *)
  1247.             encode_state->slice_params_ext[0]->buffer;
  1248.         qp = slice_param->quantiser_scale_code;
  1249.         lambda = intel_lambda_qp(qp);
  1250.         /* No Intra prediction. So it is zero */
  1251.         vme_state_message[MODE_INTRA_8X8] = 0;
  1252.         vme_state_message[MODE_INTRA_4X4] = 0;
  1253.         vme_state_message[MODE_INTER_MV0] = 0;
  1254.         for (j = 1; j < 3; j++) {
  1255.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  1256.             m_cost = (int)m_costf;
  1257.             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
  1258.         }
  1259.         mv_count = 3;
  1260.         for (j = 4; j <= 64; j *= 2) {
  1261.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  1262.             m_cost = (int)m_costf;
  1263.             vme_state_message[MODE_INTER_MV0 + mv_count] =
  1264.                 intel_format_lutvalue(m_cost, 0x6f);
  1265.             mv_count++;
  1266.         }
  1267.         m_cost = lambda;
  1268.         /* It can only perform the 16x16 search. So mode cost can be ignored for
  1269.          * the other mode. for example: 16x8/8x8
  1270.          */
  1271.         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1272.         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1273.                        
  1274.         vme_state_message[MODE_INTER_16X8] = 0;
  1275.         vme_state_message[MODE_INTER_8X8] = 0;
  1276.         vme_state_message[MODE_INTER_8X4] = 0;
  1277.         vme_state_message[MODE_INTER_4X4] = 0;
  1278.         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
  1279.  
  1280.     }
  1281.     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
  1282.  
  1283.     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
  1284.         width_in_mbs;
  1285. }
  1286.  
  1287. void
  1288. gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
  1289.                                            struct encode_state *encode_state,
  1290.                                            int mb_width, int mb_height,
  1291.                                            int kernel,
  1292.                                            struct intel_encoder_context *encoder_context)
  1293. {
  1294.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1295.     unsigned int *command_ptr;
  1296.  
  1297. #define         MPEG2_SCOREBOARD                (1 << 21)
  1298.  
  1299.     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
  1300.     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
  1301.  
  1302.     {
  1303.         unsigned int mb_intra_ub, score_dep;
  1304.         int x_outer, y_outer, x_inner, y_inner;
  1305.         int xtemp_outer = 0;
  1306.         int first_mb = 0;
  1307.         int num_mb = mb_width * mb_height;
  1308.  
  1309.         x_outer = 0;
  1310.         y_outer = 0;
  1311.        
  1312.                                  
  1313.         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1314.             x_inner = x_outer;
  1315.             y_inner = y_outer;
  1316.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1317.                 mb_intra_ub = 0;
  1318.                 score_dep = 0;
  1319.                 if (x_inner != 0) {
  1320.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1321.                     score_dep |= MB_SCOREBOARD_A;
  1322.                 }
  1323.                 if (y_inner != 0) {
  1324.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1325.                     score_dep |= MB_SCOREBOARD_B;
  1326.  
  1327.                     if (x_inner != 0)
  1328.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1329.  
  1330.                     if (x_inner != (mb_width -1)) {
  1331.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1332.                         score_dep |= MB_SCOREBOARD_C;
  1333.                     }
  1334.                 }
  1335.                                                        
  1336.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1337.                 *command_ptr++ = kernel;
  1338.                 *command_ptr++ = MPEG2_SCOREBOARD;
  1339.                 /* Indirect data */
  1340.                 *command_ptr++ = 0;
  1341.                 /* the (X, Y) term of scoreboard */
  1342.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1343.                 *command_ptr++ = score_dep;
  1344.                 /*inline data */
  1345.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1346.                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
  1347.                 x_inner -= 2;
  1348.                 y_inner += 1;
  1349.             }
  1350.             x_outer += 1;
  1351.         }
  1352.  
  1353.         xtemp_outer = mb_width - 2;
  1354.         if (xtemp_outer < 0)
  1355.             xtemp_outer = 0;
  1356.         x_outer = xtemp_outer;
  1357.         y_outer = 0;
  1358.         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1359.             y_inner = y_outer;
  1360.             x_inner = x_outer;
  1361.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1362.                 mb_intra_ub = 0;
  1363.                 score_dep = 0;
  1364.                 if (x_inner != 0) {
  1365.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1366.                     score_dep |= MB_SCOREBOARD_A;
  1367.                 }
  1368.                 if (y_inner != 0) {
  1369.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1370.                     score_dep |= MB_SCOREBOARD_B;
  1371.  
  1372.                     if (x_inner != 0)
  1373.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1374.  
  1375.                     if (x_inner != (mb_width -1)) {
  1376.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1377.                         score_dep |= MB_SCOREBOARD_C;
  1378.                     }
  1379.                 }
  1380.  
  1381.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1382.                 *command_ptr++ = kernel;
  1383.                 *command_ptr++ = MPEG2_SCOREBOARD;
  1384.                 /* Indirect data */
  1385.                 *command_ptr++ = 0;
  1386.                 /* the (X, Y) term of scoreboard */
  1387.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1388.                 *command_ptr++ = score_dep;
  1389.                 /*inline data */
  1390.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1391.                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
  1392.  
  1393.                 x_inner -= 2;
  1394.                 y_inner += 1;
  1395.             }
  1396.             x_outer++;
  1397.             if (x_outer >= mb_width) {
  1398.                 y_outer += 1;
  1399.                 x_outer = xtemp_outer;
  1400.             }          
  1401.         }
  1402.     }
  1403.  
  1404.     *command_ptr++ = 0;
  1405.     *command_ptr++ = MI_BATCH_BUFFER_END;
  1406.  
  1407.     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
  1408.     return;
  1409. }
  1410.  
  1411. static int
  1412. avc_temporal_find_surface(VAPictureH264 *curr_pic,
  1413.                           VAPictureH264 *ref_list,
  1414.                           int num_pictures,
  1415.                           int dir)
  1416. {
  1417.     int i, found = -1, min = 0x7FFFFFFF;
  1418.  
  1419.     for (i = 0; i < num_pictures; i++) {
  1420.         int tmp;
  1421.  
  1422.         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
  1423.             (ref_list[i].picture_id == VA_INVALID_SURFACE))
  1424.             break;
  1425.  
  1426.         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
  1427.  
  1428.         if (dir)
  1429.             tmp = -tmp;
  1430.  
  1431.         if (tmp > 0 && tmp < min) {
  1432.             min = tmp;
  1433.             found = i;
  1434.         }
  1435.     }
  1436.  
  1437.     return found;
  1438. }
  1439.  
  1440. void
  1441. intel_avc_vme_reference_state(VADriverContextP ctx,
  1442.                               struct encode_state *encode_state,
  1443.                               struct intel_encoder_context *encoder_context,
  1444.                               int list_index,
  1445.                               int surface_index,
  1446.                               void (* vme_source_surface_state)(
  1447.                                   VADriverContextP ctx,
  1448.                                   int index,
  1449.                                   struct object_surface *obj_surface,
  1450.                                   struct intel_encoder_context *encoder_context))
  1451. {
  1452.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1453.     struct object_surface *obj_surface = NULL;
  1454.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1455.     VASurfaceID ref_surface_id;
  1456.     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1457.     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  1458.     int max_num_references;
  1459.     VAPictureH264 *curr_pic;
  1460.     VAPictureH264 *ref_list;
  1461.     int ref_idx;
  1462.  
  1463.     if (list_index == 0) {
  1464.         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
  1465.         ref_list = slice_param->RefPicList0;
  1466.     } else {
  1467.         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
  1468.         ref_list = slice_param->RefPicList1;
  1469.     }
  1470.  
  1471.     if (max_num_references == 1) {
  1472.         if (list_index == 0) {
  1473.             ref_surface_id = slice_param->RefPicList0[0].picture_id;
  1474.             vme_context->used_references[0] = &slice_param->RefPicList0[0];
  1475.         } else {
  1476.             ref_surface_id = slice_param->RefPicList1[0].picture_id;
  1477.             vme_context->used_references[1] = &slice_param->RefPicList1[0];
  1478.         }
  1479.  
  1480.         if (ref_surface_id != VA_INVALID_SURFACE)
  1481.             obj_surface = SURFACE(ref_surface_id);
  1482.  
  1483.         if (!obj_surface ||
  1484.             !obj_surface->bo) {
  1485.             obj_surface = encode_state->reference_objects[list_index];
  1486.             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
  1487.         }
  1488.  
  1489.         ref_idx = 0;
  1490.     } else {
  1491.         curr_pic = &pic_param->CurrPic;
  1492.  
  1493.         /* select the reference frame in temporal space */
  1494.         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
  1495.         ref_surface_id = ref_list[ref_idx].picture_id;
  1496.  
  1497.         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
  1498.             obj_surface = SURFACE(ref_surface_id);
  1499.  
  1500.         vme_context->used_reference_objects[list_index] = obj_surface;
  1501.         vme_context->used_references[list_index] = &ref_list[ref_idx];
  1502.     }
  1503.  
  1504.     if (obj_surface &&
  1505.         obj_surface->bo) {
  1506.         assert(ref_idx >= 0);
  1507.         vme_context->used_reference_objects[list_index] = obj_surface;
  1508.         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
  1509.         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
  1510.                                                     ref_idx << 16 |
  1511.                                                     ref_idx <<  8 |
  1512.                                                     ref_idx);
  1513.     } else {
  1514.         vme_context->used_reference_objects[list_index] = NULL;
  1515.         vme_context->used_references[list_index] = NULL;
  1516.         vme_context->ref_index_in_mb[list_index] = 0;
  1517.     }
  1518. }
  1519.  
  1520. void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
  1521.                                         struct encode_state *encode_state,
  1522.                                         struct intel_encoder_context *encoder_context,
  1523.                                         int slice_index,
  1524.                                         struct intel_batchbuffer *slice_batch)
  1525. {
  1526.     int count, i, start_index;
  1527.     unsigned int length_in_bits;
  1528.     VAEncPackedHeaderParameterBuffer *param = NULL;
  1529.     unsigned int *header_data = NULL;
  1530.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1531.     int slice_header_index;
  1532.  
  1533.     if (encode_state->slice_header_index[slice_index] == 0)
  1534.         slice_header_index = -1;
  1535.     else
  1536.         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
  1537.  
  1538.     count = encode_state->slice_rawdata_count[slice_index];
  1539.     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
  1540.  
  1541.     for (i = 0; i < count; i++) {
  1542.         unsigned int skip_emul_byte_cnt;
  1543.  
  1544.         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
  1545.  
  1546.         param = (VAEncPackedHeaderParameterBuffer *)
  1547.                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
  1548.  
  1549.         /* skip the slice header packed data type as it is lastly inserted */
  1550.         if (param->type == VAEncPackedHeaderSlice)
  1551.             continue;
  1552.  
  1553.         length_in_bits = param->bit_length;
  1554.  
  1555.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1556.  
  1557.         /* as the slice header is still required, the last header flag is set to
  1558.          * zero.
  1559.          */
  1560.         mfc_context->insert_object(ctx,
  1561.                                    encoder_context,
  1562.                                    header_data,
  1563.                                    ALIGN(length_in_bits, 32) >> 5,
  1564.                                    length_in_bits & 0x1f,
  1565.                                    skip_emul_byte_cnt,
  1566.                                    0,
  1567.                                    0,
  1568.                                    !param->has_emulation_bytes,
  1569.                                    slice_batch);
  1570.     }
  1571.  
  1572.     if (slice_header_index == -1) {
  1573.         unsigned char *slice_header = NULL;
  1574.         int slice_header_length_in_bits = 0;
  1575.         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1576.         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1577.         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
  1578.  
  1579.         /* No slice header data is passed. And the driver needs to generate it */
  1580.         /* For the Normal H264 */
  1581.         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
  1582.                                                              pPicParameter,
  1583.                                                              pSliceParameter,
  1584.                                                              &slice_header);
  1585.         mfc_context->insert_object(ctx, encoder_context,
  1586.                                    (unsigned int *)slice_header,
  1587.                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
  1588.                                    slice_header_length_in_bits & 0x1f,
  1589.                                    5,  /* first 5 bytes are start code + nal unit type */
  1590.                                    1, 0, 1, slice_batch);
  1591.  
  1592.         free(slice_header);
  1593.     } else {
  1594.         unsigned int skip_emul_byte_cnt;
  1595.  
  1596.         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
  1597.  
  1598.         param = (VAEncPackedHeaderParameterBuffer *)
  1599.                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
  1600.         length_in_bits = param->bit_length;
  1601.  
  1602.         /* as the slice header is the last header data for one slice,
  1603.          * the last header flag is set to one.
  1604.          */
  1605.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1606.  
  1607.         mfc_context->insert_object(ctx,
  1608.                                    encoder_context,
  1609.                                    header_data,
  1610.                                    ALIGN(length_in_bits, 32) >> 5,
  1611.                                    length_in_bits & 0x1f,
  1612.                                    skip_emul_byte_cnt,
  1613.                                    1,
  1614.                                    0,
  1615.                                    !param->has_emulation_bytes,
  1616.                                    slice_batch);
  1617.     }
  1618.  
  1619.     return;
  1620. }
  1621.