Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao Yakui <yakui.zhao@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <assert.h>
  34. #include <math.h>
  35.  
  36. #include "intel_batchbuffer.h"
  37. #include "i965_defines.h"
  38. #include "i965_structs.h"
  39. #include "i965_drv_video.h"
  40. #include "i965_encoder.h"
  41. #include "i965_encoder_utils.h"
  42. #include "gen6_mfc.h"
  43. #include "gen6_vme.h"
  44. #include "gen9_mfc.h"
  45. #include "intel_media.h"
  46.  
  47. #ifndef HAVE_LOG2F
  48. #define log2f(x) (logf(x)/(float)M_LN2)
  49. #endif
  50.  
  51. int intel_avc_enc_slice_type_fixup(int slice_type)
  52. {
  53.     if (slice_type == SLICE_TYPE_SP ||
  54.         slice_type == SLICE_TYPE_P)
  55.         slice_type = SLICE_TYPE_P;
  56.     else if (slice_type == SLICE_TYPE_SI ||
  57.              slice_type == SLICE_TYPE_I)
  58.         slice_type = SLICE_TYPE_I;
  59.     else {
  60.         if (slice_type != SLICE_TYPE_B)
  61.             WARN_ONCE("Invalid slice type for H.264 encoding!\n");
  62.  
  63.         slice_type = SLICE_TYPE_B;
  64.     }
  65.  
  66.     return slice_type;
  67. }
  68.  
  69. static void
  70. intel_mfc_bit_rate_control_context_init(struct encode_state *encode_state,
  71.                                         struct gen6_mfc_context *mfc_context)
  72. {
  73.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  74.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  75.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  76.     float fps =  pSequenceParameter->time_scale * 0.5 / pSequenceParameter->num_units_in_tick ;
  77.     int inter_mb_size = pSequenceParameter->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
  78.     int intra_mb_size = inter_mb_size * 5.0;
  79.     int i;
  80.  
  81.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
  82.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
  83.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
  84.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  85.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_mb_size = inter_mb_size;
  86.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  87.  
  88.     for(i = 0 ; i < 3; i++) {
  89.         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
  90.         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
  91.         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
  92.         mfc_context->bit_rate_control_context[i].GrowInit = 6;
  93.         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
  94.         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
  95.         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
  96.        
  97.         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
  98.         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
  99.         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
  100.         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
  101.         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
  102.         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
  103.     }
  104.    
  105.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
  106.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
  107.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord = (inter_mb_size + 16)/ 16;
  108.  
  109.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
  110.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
  111.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_B].TargetSizeInWord * 1.5;
  112. }
  113.  
  114. static void intel_mfc_brc_init(struct encode_state *encode_state,
  115.                                struct intel_encoder_context* encoder_context)
  116. {
  117.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  118.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  119.     VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
  120.     VAEncMiscParameterHRD* pParameterHRD = NULL;
  121.     double bitrate = pSequenceParameter->bits_per_second;
  122.     double framerate = (double)pSequenceParameter->time_scale /(2 * (double)pSequenceParameter->num_units_in_tick);
  123.     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
  124.     int intra_period = pSequenceParameter->intra_period;
  125.     int ip_period = pSequenceParameter->ip_period;
  126.     double qp1_size = 0.1 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
  127.     double qp51_size = 0.001 * 8 * 3 * (pSequenceParameter->picture_width_in_mbs<<4) * (pSequenceParameter->picture_height_in_mbs<<4)/2;
  128.     double bpf;
  129.  
  130.     if (!encode_state->misc_param[VAEncMiscParameterTypeHRD] || !encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer)
  131.         return;
  132.  
  133.     pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
  134.     pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
  135.  
  136.     if (pSequenceParameter->ip_period) {
  137.         pnum = (intra_period + ip_period - 1)/ip_period - 1;
  138.         bnum = intra_period - inum - pnum;
  139.     }
  140.  
  141.     mfc_context->brc.mode = encoder_context->rate_control_mode;
  142.  
  143.     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/framerate) /
  144.                                                              (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
  145.     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
  146.     mfc_context->brc.target_frame_size[SLICE_TYPE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
  147.  
  148.     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
  149.     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
  150.     mfc_context->brc.gop_nums[SLICE_TYPE_B] = bnum;
  151.  
  152.     bpf = mfc_context->brc.bits_per_frame = bitrate/framerate;
  153.  
  154.     mfc_context->hrd.buffer_size = (double)pParameterHRD->buffer_size;
  155.     mfc_context->hrd.current_buffer_fullness =
  156.         (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
  157.         pParameterHRD->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
  158.     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
  159.     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/qp1_size;
  160.     mfc_context->hrd.violation_noted = 0;
  161.  
  162.     if ((bpf > qp51_size) && (bpf < qp1_size)) {
  163.         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51 - 50*(bpf - qp51_size)/(qp1_size - qp51_size);
  164.     }
  165.     else if (bpf >= qp1_size)
  166.         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 1;
  167.     else if (bpf <= qp51_size)
  168.         mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = 51;
  169.  
  170.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
  171.     mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
  172.  
  173.     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
  174.     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
  175.     BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
  176. }
  177.  
  178. int intel_mfc_update_hrd(struct encode_state *encode_state,
  179.                          struct gen6_mfc_context *mfc_context,
  180.                          int frame_bits)
  181. {
  182.     double prev_bf = mfc_context->hrd.current_buffer_fullness;
  183.  
  184.     mfc_context->hrd.current_buffer_fullness -= frame_bits;
  185.  
  186.     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
  187.         mfc_context->hrd.current_buffer_fullness = prev_bf;
  188.         return BRC_UNDERFLOW;
  189.     }
  190.    
  191.     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
  192.     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
  193.         if (mfc_context->brc.mode == VA_RC_VBR)
  194.             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
  195.         else {
  196.             mfc_context->hrd.current_buffer_fullness = prev_bf;
  197.             return BRC_OVERFLOW;
  198.         }
  199.     }
  200.     return BRC_NO_HRD_VIOLATION;
  201. }
  202.  
  203. int intel_mfc_brc_postpack(struct encode_state *encode_state,
  204.                            struct gen6_mfc_context *mfc_context,
  205.                            int frame_bits)
  206. {
  207.     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
  208.     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  209.     int slicetype = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
  210.     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
  211.     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
  212.     int qpb = mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY;
  213.     int qp; // quantizer of previously encoded slice of current type
  214.     int qpn; // predicted quantizer for next frame of current type in integer format
  215.     double qpf; // predicted quantizer for next frame of current type in float format
  216.     double delta_qp; // QP correction
  217.     int target_frame_size, frame_size_next;
  218.     /* Notes:
  219.      *  x - how far we are from HRD buffer borders
  220.      *  y - how far we are from target HRD buffer fullness
  221.      */
  222.     double x, y;
  223.     double frame_size_alpha;
  224.  
  225.     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
  226.  
  227.     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
  228.     if (mfc_context->hrd.buffer_capacity < 5)
  229.         frame_size_alpha = 0;
  230.     else
  231.         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
  232.     if (frame_size_alpha > 30) frame_size_alpha = 30;
  233.     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
  234.         (double)(frame_size_alpha + 1.);
  235.  
  236.     /* frame_size_next: avoiding negative number and too small value */
  237.     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
  238.         frame_size_next = (int)((double)target_frame_size * 0.25);
  239.  
  240.     qpf = (double)qp * target_frame_size / frame_size_next;
  241.     qpn = (int)(qpf + 0.5);
  242.  
  243.     if (qpn == qp) {
  244.         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
  245.         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
  246.         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
  247.             qpn++;
  248.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  249.         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
  250.             qpn--;
  251.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  252.         }
  253.     }
  254.     /* making sure that QP is not changing too fast */
  255.     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
  256.     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
  257.     /* making sure that with QP predictions we did do not leave QPs range */
  258.     BRC_CLIP(qpn, 1, 51);
  259.  
  260.     /* checking wthether HRD compliance is still met */
  261.     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
  262.  
  263.     /* calculating QP delta as some function*/
  264.     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
  265.     if (x > 0) {
  266.         x /= mfc_context->hrd.target_buffer_fullness;
  267.         y = mfc_context->hrd.current_buffer_fullness;
  268.     }
  269.     else {
  270.         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
  271.         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
  272.     }
  273.     if (y < 0.01) y = 0.01;
  274.     if (x > 1) x = 1;
  275.     else if (x < -1) x = -1;
  276.  
  277.     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
  278.     qpn = (int)(qpn + delta_qp + 0.5);
  279.  
  280.     /* making sure that with QP predictions we did do not leave QPs range */
  281.     BRC_CLIP(qpn, 1, 51);
  282.  
  283.     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
  284.         /* correcting QPs of slices of other types */
  285.         if (slicetype == SLICE_TYPE_P) {
  286.             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
  287.                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
  288.             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
  289.                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
  290.         } else if (slicetype == SLICE_TYPE_I) {
  291.             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
  292.                 mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
  293.             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
  294.                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
  295.         } else { // SLICE_TYPE_B
  296.             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
  297.                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
  298.             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
  299.                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
  300.         }
  301.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, 1, 51);
  302.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, 1, 51);
  303.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_B].QpPrimeY, 1, 51);
  304.     } else if (sts == BRC_UNDERFLOW) { // underflow
  305.         if (qpn <= qp) qpn = qp + 1;
  306.         if (qpn > 51) {
  307.             qpn = 51;
  308.             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
  309.         }
  310.     } else if (sts == BRC_OVERFLOW) {
  311.         if (qpn >= qp) qpn = qp - 1;
  312.         if (qpn < 1) { // < 0 (?) overflow with minQP
  313.             qpn = 1;
  314.             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
  315.         }
  316.     }
  317.  
  318.     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
  319.  
  320.     return sts;
  321. }
  322.  
  323. static void intel_mfc_hrd_context_init(struct encode_state *encode_state,
  324.                                        struct intel_encoder_context *encoder_context)
  325. {
  326.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  327.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  328.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  329.     int target_bit_rate = pSequenceParameter->bits_per_second;
  330.    
  331.     // current we only support CBR mode.
  332.     if (rate_control_mode == VA_RC_CBR) {
  333.         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
  334.         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
  335.         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
  336.         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
  337.         mfc_context->vui_hrd.i_frame_number = 0;
  338.  
  339.         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
  340.         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
  341.         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
  342.     }
  343.  
  344. }
  345.  
  346. void
  347. intel_mfc_hrd_context_update(struct encode_state *encode_state,
  348.                              struct gen6_mfc_context *mfc_context)
  349. {
  350.     mfc_context->vui_hrd.i_frame_number++;
  351. }
  352.  
  353. int intel_mfc_interlace_check(VADriverContextP ctx,
  354.                               struct encode_state *encode_state,
  355.                               struct intel_encoder_context *encoder_context)
  356. {
  357.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  358.     VAEncSliceParameterBufferH264 *pSliceParameter;
  359.     int i;
  360.     int mbCount = 0;
  361.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  362.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  363.  
  364.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  365.         pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[i]->buffer;
  366.         mbCount += pSliceParameter->num_macroblocks;
  367.     }
  368.    
  369.     if ( mbCount == ( width_in_mbs * height_in_mbs ) )
  370.         return 0;
  371.  
  372.     return 1;
  373. }
  374.  
  375. /*
  376.  * Check whether the parameters related with CBR are updated and decide whether
  377.  * it needs to reinitialize the configuration related with CBR.
  378.  * Currently it will check the following parameters:
  379.  *      bits_per_second
  380.  *      frame_rate
  381.  *      gop_configuration(intra_period, ip_period, intra_idr_period)
  382.  */
  383. static bool intel_mfc_brc_updated_check(struct encode_state *encode_state,
  384.                            struct intel_encoder_context *encoder_context)
  385. {
  386.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  387.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  388.     double cur_fps, cur_bitrate;
  389.     VAEncSequenceParameterBufferH264 *pSequenceParameter;
  390.  
  391.  
  392.     if (rate_control_mode != VA_RC_CBR) {
  393.         return false;
  394.     }
  395.  
  396.     pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  397.  
  398.     cur_bitrate = pSequenceParameter->bits_per_second;
  399.     cur_fps = (double)pSequenceParameter->time_scale /
  400.                 (2 * (double)pSequenceParameter->num_units_in_tick);
  401.  
  402.     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
  403.         (cur_fps == mfc_context->brc.saved_fps) &&
  404.         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
  405.         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
  406.         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
  407.         /* the parameters related with CBR are not updaetd */
  408.         return false;
  409.     }
  410.  
  411.     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
  412.     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
  413.     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
  414.     mfc_context->brc.saved_fps = cur_fps;
  415.     mfc_context->brc.saved_bps = cur_bitrate;
  416.     return true;
  417. }
  418.  
  419. void intel_mfc_brc_prepare(struct encode_state *encode_state,
  420.                            struct intel_encoder_context *encoder_context)
  421. {
  422.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  423.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  424.  
  425.     if (rate_control_mode == VA_RC_CBR) {
  426.         bool brc_updated;
  427.         assert(encoder_context->codec != CODEC_MPEG2);
  428.  
  429.         brc_updated = intel_mfc_brc_updated_check(encode_state, encoder_context);
  430.  
  431.         /*Programing bit rate control */
  432.         if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
  433.              brc_updated) {
  434.             intel_mfc_bit_rate_control_context_init(encode_state, mfc_context);
  435.             intel_mfc_brc_init(encode_state, encoder_context);
  436.         }
  437.  
  438.         /*Programing HRD control */
  439.         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
  440.             intel_mfc_hrd_context_init(encode_state, encoder_context);    
  441.     }
  442. }
  443.  
  444. static int intel_avc_find_skipemulcnt(unsigned char *buf, int bits_length)
  445. {
  446.     int i, found;
  447.     int leading_zero_cnt, byte_length, zero_byte;
  448.     int nal_unit_type;
  449.     int skip_cnt = 0;
  450.  
  451. #define NAL_UNIT_TYPE_MASK 0x1f
  452. #define HW_MAX_SKIP_LENGTH 15
  453.  
  454.     byte_length = ALIGN(bits_length, 32) >> 3;
  455.  
  456.  
  457.     leading_zero_cnt = 0;
  458.     found = 0;
  459.     for(i = 0; i < byte_length - 4; i++) {
  460.         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
  461.             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
  462.                 found = 1;
  463.                 break;
  464.             }
  465.         leading_zero_cnt++;
  466.     }
  467.     if (!found) {
  468.         /* warning message is complained. But anyway it will be inserted. */
  469.         WARN_ONCE("Invalid packed header data. "
  470.                    "Can't find the 000001 start_prefix code\n");
  471.         return 0;
  472.     }
  473.     i = leading_zero_cnt;
  474.  
  475.     zero_byte = 0;
  476.     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
  477.         zero_byte = 1;
  478.  
  479.     skip_cnt = leading_zero_cnt + zero_byte + 3;
  480.  
  481.     /* the unit header byte is accounted */
  482.     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
  483.     skip_cnt += 1;
  484.  
  485.     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
  486.         /* more unit header bytes are accounted for MVC/SVC */
  487.         skip_cnt += 3;
  488.     }
  489.     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
  490.         WARN_ONCE("Too many leading zeros are padded for packed data. "
  491.                    "It is beyond the HW range.!!!\n");
  492.     }
  493.     return skip_cnt;
  494. }
  495.  
  496. void intel_mfc_avc_pipeline_header_programing(VADriverContextP ctx,
  497.                                               struct encode_state *encode_state,
  498.                                               struct intel_encoder_context *encoder_context,
  499.                                               struct intel_batchbuffer *slice_batch)
  500. {
  501.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  502.     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SPS);
  503.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  504.     unsigned int skip_emul_byte_cnt;
  505.  
  506.     if (encode_state->packed_header_data[idx]) {
  507.         VAEncPackedHeaderParameterBuffer *param = NULL;
  508.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  509.         unsigned int length_in_bits;
  510.  
  511.         assert(encode_state->packed_header_param[idx]);
  512.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  513.         length_in_bits = param->bit_length;
  514.  
  515.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  516.         mfc_context->insert_object(ctx,
  517.                                    encoder_context,
  518.                                    header_data,
  519.                                    ALIGN(length_in_bits, 32) >> 5,
  520.                                    length_in_bits & 0x1f,
  521.                                    skip_emul_byte_cnt,
  522.                                    0,
  523.                                    0,
  524.                                    !param->has_emulation_bytes,
  525.                                    slice_batch);
  526.     }
  527.  
  528.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_PPS);
  529.  
  530.     if (encode_state->packed_header_data[idx]) {
  531.         VAEncPackedHeaderParameterBuffer *param = NULL;
  532.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  533.         unsigned int length_in_bits;
  534.  
  535.         assert(encode_state->packed_header_param[idx]);
  536.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  537.         length_in_bits = param->bit_length;
  538.  
  539.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  540.  
  541.         mfc_context->insert_object(ctx,
  542.                                    encoder_context,
  543.                                    header_data,
  544.                                    ALIGN(length_in_bits, 32) >> 5,
  545.                                    length_in_bits & 0x1f,
  546.                                    skip_emul_byte_cnt,
  547.                                    0,
  548.                                    0,
  549.                                    !param->has_emulation_bytes,
  550.                                    slice_batch);
  551.     }
  552.    
  553.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderH264_SEI);
  554.  
  555.     if (encode_state->packed_header_data[idx]) {
  556.         VAEncPackedHeaderParameterBuffer *param = NULL;
  557.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  558.         unsigned int length_in_bits;
  559.  
  560.         assert(encode_state->packed_header_param[idx]);
  561.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  562.         length_in_bits = param->bit_length;
  563.  
  564.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  565.         mfc_context->insert_object(ctx,
  566.                                    encoder_context,
  567.                                    header_data,
  568.                                    ALIGN(length_in_bits, 32) >> 5,
  569.                                    length_in_bits & 0x1f,
  570.                                    skip_emul_byte_cnt,
  571.                                    0,
  572.                                    0,
  573.                                    !param->has_emulation_bytes,
  574.                                    slice_batch);
  575.     } else if (rate_control_mode == VA_RC_CBR) {
  576.         // this is frist AU
  577.         struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  578.  
  579.         unsigned char *sei_data = NULL;
  580.    
  581.         int length_in_bits = build_avc_sei_buffer_timing(
  582.             mfc_context->vui_hrd.i_initial_cpb_removal_delay_length,
  583.             mfc_context->vui_hrd.i_initial_cpb_removal_delay,
  584.             0,
  585.             mfc_context->vui_hrd.i_cpb_removal_delay_length,                                                       mfc_context->vui_hrd.i_cpb_removal_delay * mfc_context->vui_hrd.i_frame_number,
  586.             mfc_context->vui_hrd.i_dpb_output_delay_length,
  587.             0,
  588.             &sei_data);
  589.         mfc_context->insert_object(ctx,
  590.                                    encoder_context,
  591.                                    (unsigned int *)sei_data,
  592.                                    ALIGN(length_in_bits, 32) >> 5,
  593.                                    length_in_bits & 0x1f,
  594.                                    5,
  595.                                    0,  
  596.                                    0,  
  597.                                    1,
  598.                                    slice_batch);  
  599.         free(sei_data);
  600.     }
  601. }
  602.  
  603. VAStatus intel_mfc_avc_prepare(VADriverContextP ctx,
  604.                                struct encode_state *encode_state,
  605.                                struct intel_encoder_context *encoder_context)
  606. {
  607.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  608.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  609.     struct object_surface *obj_surface;
  610.     struct object_buffer *obj_buffer;
  611.     GenAvcSurface *gen6_avc_surface;
  612.     dri_bo *bo;
  613.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  614.     int i, j, enable_avc_ildb = 0;
  615.     VAEncSliceParameterBufferH264 *slice_param;
  616.     struct i965_coded_buffer_segment *coded_buffer_segment;
  617.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  618.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  619.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  620.  
  621.     if (IS_GEN6(i965->intel.device_info)) {
  622.         /* On the SNB it should be fixed to 128 for the DMV buffer */
  623.         width_in_mbs = 128;
  624.     }
  625.  
  626.     for (j = 0; j < encode_state->num_slice_params_ext && enable_avc_ildb == 0; j++) {
  627.         assert(encode_state->slice_params_ext && encode_state->slice_params_ext[j]->buffer);
  628.         slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[j]->buffer;
  629.  
  630.         for (i = 0; i < encode_state->slice_params_ext[j]->num_elements; i++) {
  631.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  632.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  633.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  634.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  635.                    (slice_param->slice_type == SLICE_TYPE_B));
  636.  
  637.             if (slice_param->disable_deblocking_filter_idc != 1) {
  638.                 enable_avc_ildb = 1;
  639.                 break;
  640.             }
  641.  
  642.             slice_param++;
  643.         }
  644.     }
  645.  
  646.     /*Setup all the input&output object*/
  647.  
  648.     /* Setup current frame and current direct mv buffer*/
  649.     obj_surface = encode_state->reconstructed_object;
  650.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  651.  
  652.     if ( obj_surface->private_data == NULL) {
  653.         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  654.         assert(gen6_avc_surface);
  655.         gen6_avc_surface->dmv_top =
  656.             dri_bo_alloc(i965->intel.bufmgr,
  657.                          "Buffer",
  658.                          68 * width_in_mbs * height_in_mbs,
  659.                          64);
  660.         gen6_avc_surface->dmv_bottom =
  661.             dri_bo_alloc(i965->intel.bufmgr,
  662.                          "Buffer",
  663.                          68 * width_in_mbs * height_in_mbs,
  664.                          64);
  665.         assert(gen6_avc_surface->dmv_top);
  666.         assert(gen6_avc_surface->dmv_bottom);
  667.         obj_surface->private_data = (void *)gen6_avc_surface;
  668.         obj_surface->free_private_data = (void *)gen_free_avc_surface;
  669.     }
  670.     gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
  671.     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo = gen6_avc_surface->dmv_top;
  672.     mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 1].bo = gen6_avc_surface->dmv_bottom;
  673.     dri_bo_reference(gen6_avc_surface->dmv_top);
  674.     dri_bo_reference(gen6_avc_surface->dmv_bottom);
  675.  
  676.     if (enable_avc_ildb) {
  677.         mfc_context->post_deblocking_output.bo = obj_surface->bo;
  678.         dri_bo_reference(mfc_context->post_deblocking_output.bo);
  679.     } else {
  680.         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
  681.         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
  682.     }
  683.  
  684.     mfc_context->surface_state.width = obj_surface->orig_width;
  685.     mfc_context->surface_state.height = obj_surface->orig_height;
  686.     mfc_context->surface_state.w_pitch = obj_surface->width;
  687.     mfc_context->surface_state.h_pitch = obj_surface->height;
  688.    
  689.     /* Setup reference frames and direct mv buffers*/
  690.     for(i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++) {
  691.         obj_surface = encode_state->reference_objects[i];
  692.        
  693.         if (obj_surface && obj_surface->bo) {
  694.             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
  695.             dri_bo_reference(obj_surface->bo);
  696.  
  697.             /* Check DMV buffer */
  698.             if ( obj_surface->private_data == NULL) {
  699.                
  700.                 gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  701.                 assert(gen6_avc_surface);
  702.                 gen6_avc_surface->dmv_top =
  703.                     dri_bo_alloc(i965->intel.bufmgr,
  704.                                  "Buffer",
  705.                                  68 * width_in_mbs * height_in_mbs,
  706.                                  64);
  707.                 gen6_avc_surface->dmv_bottom =
  708.                     dri_bo_alloc(i965->intel.bufmgr,
  709.                                  "Buffer",
  710.                                  68 * width_in_mbs * height_in_mbs,
  711.                                  64);
  712.                 assert(gen6_avc_surface->dmv_top);
  713.                 assert(gen6_avc_surface->dmv_bottom);
  714.                 obj_surface->private_data = gen6_avc_surface;
  715.                 obj_surface->free_private_data = gen_free_avc_surface;
  716.             }
  717.    
  718.             gen6_avc_surface = (GenAvcSurface *) obj_surface->private_data;
  719.             /* Setup DMV buffer */
  720.             mfc_context->direct_mv_buffers[i*2].bo = gen6_avc_surface->dmv_top;
  721.             mfc_context->direct_mv_buffers[i*2+1].bo = gen6_avc_surface->dmv_bottom;
  722.             dri_bo_reference(gen6_avc_surface->dmv_top);
  723.             dri_bo_reference(gen6_avc_surface->dmv_bottom);
  724.         } else {
  725.             break;
  726.         }
  727.     }
  728.  
  729.     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
  730.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  731.  
  732.     obj_buffer = encode_state->coded_buf_object;
  733.     bo = obj_buffer->buffer_store->bo;
  734.     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
  735.     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  736.     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  737.     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
  738.    
  739.     dri_bo_map(bo, 1);
  740.     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
  741.     coded_buffer_segment->mapped = 0;
  742.     coded_buffer_segment->codec = encoder_context->codec;
  743.     dri_bo_unmap(bo);
  744.  
  745.     return vaStatus;
  746. }
  747. /*
  748.  * The LUT uses the pair of 4-bit units: (shift, base) structure.
  749.  * 2^K * X = value .
  750.  * So it is necessary to convert one cost into the nearest LUT format.
  751.  * The derivation is:
  752.  * 2^K *x = 2^n * (1 + deltaX)
  753.  *    k + log2(x) = n + log2(1 + deltaX)
  754.  *    log2(x) = n - k + log2(1 + deltaX)
  755.  *    As X is in the range of [1, 15]
  756.  *      4 > n - k + log2(1 + deltaX) >= 0
  757.  *      =>    n + log2(1 + deltaX)  >= k > n - 4  + log2(1 + deltaX)
  758.  *    Then we can derive the corresponding K and get the nearest LUT format.
  759.  */
  760. int intel_format_lutvalue(int value, int max)
  761. {
  762.     int ret;
  763.     int logvalue, temp1, temp2;
  764.  
  765.     if (value <= 0)
  766.         return 0;
  767.  
  768.     logvalue = (int)(log2f((float)value));
  769.     if (logvalue < 4) {
  770.         ret = value;
  771.     } else {
  772.         int error, temp_value, base, j, temp_err;
  773.         error = value;
  774.         j = logvalue - 4 + 1;
  775.         ret = -1;
  776.         for(; j <= logvalue; j++) {
  777.             if (j == 0) {
  778.                 base = value >> j;
  779.             } else {
  780.                 base = (value + (1 << (j - 1)) - 1) >> j;
  781.             }
  782.             if (base >= 16)
  783.                 continue;
  784.  
  785.             temp_value = base << j;
  786.             temp_err = abs(value - temp_value);
  787.             if (temp_err < error) {
  788.                 error = temp_err;
  789.                 ret = (j << 4) | base;
  790.                 if (temp_err == 0)
  791.                     break;
  792.             }
  793.         }
  794.     }
  795.     temp1 = (ret & 0xf) << ((ret & 0xf0) >> 4);
  796.     temp2 = (max & 0xf) << ((max & 0xf0) >> 4);
  797.     if (temp1 > temp2)
  798.         ret = max;
  799.     return ret;
  800.  
  801. }
  802.  
  803.  
  804. #define         QP_MAX                  52
  805. #define         VP8_QP_MAX              128
  806.  
  807.  
  808. static float intel_lambda_qp(int qp)
  809. {
  810.     float value, lambdaf;
  811.     value = qp;
  812.     value = value / 6 - 2;
  813.     if (value < 0)
  814.         value = 0;
  815.     lambdaf = roundf(powf(2, value));
  816.     return lambdaf;
  817. }
  818.  
  819.  
  820. void intel_vme_update_mbmv_cost(VADriverContextP ctx,
  821.                                 struct encode_state *encode_state,
  822.                                 struct intel_encoder_context *encoder_context)
  823. {
  824.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  825.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  826.     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  827.     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  828.     int qp, m_cost, j, mv_count;
  829.     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
  830.     float   lambda, m_costf;
  831.  
  832.     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  833.  
  834.    
  835.     if (encoder_context->rate_control_mode == VA_RC_CQP)
  836.         qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
  837.     else
  838.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  839.  
  840.     if (vme_state_message == NULL)
  841.         return;
  842.  
  843.     assert(qp <= QP_MAX);
  844.     lambda = intel_lambda_qp(qp);
  845.     if (slice_type == SLICE_TYPE_I) {
  846.         vme_state_message[MODE_INTRA_16X16] = 0;
  847.         m_cost = lambda * 4;
  848.         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
  849.         m_cost = lambda * 16;
  850.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  851.         m_cost = lambda * 3;
  852.         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
  853.     } else {
  854.         m_cost = 0;
  855.         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
  856.         for (j = 1; j < 3; j++) {
  857.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  858.             m_cost = (int)m_costf;
  859.             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
  860.         }
  861.         mv_count = 3;
  862.         for (j = 4; j <= 64; j *= 2) {
  863.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  864.             m_cost = (int)m_costf;
  865.             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
  866.             mv_count++;
  867.         }
  868.  
  869.         if (qp <= 25) {
  870.             vme_state_message[MODE_INTRA_16X16] = 0x4a;
  871.             vme_state_message[MODE_INTRA_8X8] = 0x4a;
  872.             vme_state_message[MODE_INTRA_4X4] = 0x4a;
  873.             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
  874.             vme_state_message[MODE_INTER_16X16] = 0x4a;
  875.             vme_state_message[MODE_INTER_16X8] = 0x4a;
  876.             vme_state_message[MODE_INTER_8X8] = 0x4a;
  877.             vme_state_message[MODE_INTER_8X4] = 0x4a;
  878.             vme_state_message[MODE_INTER_4X4] = 0x4a;
  879.             vme_state_message[MODE_INTER_BWD] = 0x2a;
  880.             return;
  881.         }
  882.         m_costf = lambda * 10;
  883.         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  884.         m_cost = lambda * 14;
  885.         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
  886.         m_cost = lambda * 24;
  887.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  888.         m_costf = lambda * 3.5;
  889.         m_cost = m_costf;
  890.         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
  891.         if (slice_type == SLICE_TYPE_P) {
  892.             m_costf = lambda * 2.5;
  893.             m_cost = m_costf;
  894.             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  895.             m_costf = lambda * 4;
  896.             m_cost = m_costf;
  897.             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  898.             m_costf = lambda * 1.5;
  899.             m_cost = m_costf;
  900.             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  901.             m_costf = lambda * 3;
  902.             m_cost = m_costf;
  903.             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
  904.             m_costf = lambda * 5;
  905.             m_cost = m_costf;
  906.             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  907.             /* BWD is not used in P-frame */
  908.             vme_state_message[MODE_INTER_BWD] = 0;
  909.         } else {
  910.             m_costf = lambda * 2.5;
  911.             m_cost = m_costf;
  912.             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  913.             m_costf = lambda * 5.5;
  914.             m_cost = m_costf;
  915.             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  916.             m_costf = lambda * 3.5;
  917.             m_cost = m_costf;
  918.             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  919.             m_costf = lambda * 5.0;
  920.             m_cost = m_costf;
  921.             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
  922.             m_costf = lambda * 6.5;
  923.             m_cost = m_costf;
  924.             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  925.             m_costf = lambda * 1.5;
  926.             m_cost = m_costf;
  927.             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
  928.         }
  929.     }
  930. }
  931.  
  932. void intel_vme_vp8_update_mbmv_cost(VADriverContextP ctx,
  933.                                 struct encode_state *encode_state,
  934.                                 struct intel_encoder_context *encoder_context)
  935. {
  936.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  937.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  938.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  939.     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
  940.     int qp, m_cost, j, mv_count;
  941.     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
  942.     float   lambda, m_costf;
  943.  
  944.     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
  945.     int slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
  946.  
  947.     if (vme_state_message == NULL)
  948.         return;
  949.  
  950.     if (encoder_context->rate_control_mode == VA_RC_CQP)
  951.         qp = q_matrix->quantization_index[0];
  952.     else
  953.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  954.  
  955.     lambda = intel_lambda_qp(qp * QP_MAX / VP8_QP_MAX);
  956.     if (is_key_frame) {
  957.         vme_state_message[MODE_INTRA_16X16] = 0;
  958.         m_cost = lambda * 16;
  959.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  960.     } else {
  961.         m_cost = 0;
  962.         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
  963.         for (j = 1; j < 3; j++) {
  964.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  965.             m_cost = (int)m_costf;
  966.             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
  967.         }
  968.         mv_count = 3;
  969.         for (j = 4; j <= 64; j *= 2) {
  970.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  971.             m_cost = (int)m_costf;
  972.             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
  973.             mv_count++;
  974.         }
  975.  
  976.         if (qp < 92 ) {
  977.             vme_state_message[MODE_INTRA_16X16] = 0x4a;
  978.             vme_state_message[MODE_INTRA_4X4] = 0x4a;
  979.             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
  980.             vme_state_message[MODE_INTER_16X16] = 0x4a;
  981.             vme_state_message[MODE_INTER_16X8] = 0x4a;
  982.             vme_state_message[MODE_INTER_8X8] = 0x4a;
  983.             vme_state_message[MODE_INTER_4X4] = 0x4a;
  984.             return;
  985.         }
  986.         m_costf = lambda * 10;
  987.         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  988.         m_cost = lambda * 24;
  989.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  990.            
  991.         m_costf = lambda * 2.5;
  992.         m_cost = m_costf;
  993.         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  994.         m_costf = lambda * 4;
  995.         m_cost = m_costf;
  996.         vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  997.         m_costf = lambda * 1.5;
  998.         m_cost = m_costf;
  999.         vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  1000.         m_costf = lambda * 5;
  1001.         m_cost = m_costf;
  1002.         vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  1003.         /* BWD is not used in P-frame */
  1004.         vme_state_message[MODE_INTER_BWD] = 0;
  1005.     }
  1006. }
  1007.  
  1008. #define         MB_SCOREBOARD_A         (1 << 0)
  1009. #define         MB_SCOREBOARD_B         (1 << 1)
  1010. #define         MB_SCOREBOARD_C         (1 << 2)
  1011. void
  1012. gen7_vme_scoreboard_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
  1013. {
  1014.     vme_context->gpe_context.vfe_desc5.scoreboard0.enable = 1;
  1015.     vme_context->gpe_context.vfe_desc5.scoreboard0.type = SCOREBOARD_STALLING;
  1016.     vme_context->gpe_context.vfe_desc5.scoreboard0.mask = (MB_SCOREBOARD_A |
  1017.                                                            MB_SCOREBOARD_B |
  1018.                                                            MB_SCOREBOARD_C);
  1019.  
  1020.     /* In VME prediction the current mb depends on the neighbour
  1021.      * A/B/C macroblock. So the left/up/up-right dependency should
  1022.      * be considered.
  1023.      */
  1024.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x0 = -1;
  1025.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y0 = 0;
  1026.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x1 = 0;
  1027.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y1 = -1;
  1028.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_x2 = 1;
  1029.     vme_context->gpe_context.vfe_desc6.scoreboard1.delta_y2 = -1;
  1030.  
  1031.     vme_context->gpe_context.vfe_desc7.dword = 0;
  1032.     return;
  1033. }
  1034.  
  1035. /* check whether the mb of (x_index, y_index) is out of bound */
  1036. static inline int loop_in_bounds(int x_index, int y_index, int first_mb, int num_mb, int mb_width, int mb_height)
  1037. {
  1038.     int mb_index;
  1039.     if (x_index < 0 || x_index >= mb_width)
  1040.         return -1;
  1041.     if (y_index < 0 || y_index >= mb_height)
  1042.         return -1;
  1043.  
  1044.     mb_index = y_index * mb_width + x_index;
  1045.     if (mb_index < first_mb || mb_index > (first_mb + num_mb))
  1046.         return -1;
  1047.     return 0;
  1048. }
  1049.  
  1050. void
  1051. gen7_vme_walker_fill_vme_batchbuffer(VADriverContextP ctx,
  1052.                                      struct encode_state *encode_state,
  1053.                                      int mb_width, int mb_height,
  1054.                                      int kernel,
  1055.                                      int transform_8x8_mode_flag,
  1056.                                      struct intel_encoder_context *encoder_context)
  1057. {
  1058.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1059.     int mb_row;
  1060.     int s;
  1061.     unsigned int *command_ptr;
  1062.  
  1063. #define         USE_SCOREBOARD          (1 << 21)
  1064.  
  1065.     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
  1066.     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
  1067.  
  1068.     for (s = 0; s < encode_state->num_slice_params_ext; s++) {
  1069.         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[s]->buffer;
  1070.         int first_mb = pSliceParameter->macroblock_address;
  1071.         int num_mb = pSliceParameter->num_macroblocks;
  1072.         unsigned int mb_intra_ub, score_dep;
  1073.         int x_outer, y_outer, x_inner, y_inner;
  1074.         int xtemp_outer = 0;
  1075.  
  1076.         x_outer = first_mb % mb_width;
  1077.         y_outer = first_mb / mb_width;
  1078.         mb_row = y_outer;
  1079.  
  1080.         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1081.             x_inner = x_outer;
  1082.             y_inner = y_outer;
  1083.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1084.                 mb_intra_ub = 0;
  1085.                 score_dep = 0;
  1086.                 if (x_inner != 0) {
  1087.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1088.                     score_dep |= MB_SCOREBOARD_A;
  1089.                 }
  1090.                 if (y_inner != mb_row) {
  1091.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1092.                     score_dep |= MB_SCOREBOARD_B;
  1093.                     if (x_inner != 0)
  1094.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1095.                     if (x_inner != (mb_width -1)) {
  1096.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1097.                         score_dep |= MB_SCOREBOARD_C;
  1098.                     }
  1099.                 }
  1100.  
  1101.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1102.                 *command_ptr++ = kernel;
  1103.                 *command_ptr++ = USE_SCOREBOARD;
  1104.                 /* Indirect data */
  1105.                 *command_ptr++ = 0;
  1106.                 /* the (X, Y) term of scoreboard */
  1107.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1108.                 *command_ptr++ = score_dep;
  1109.                 /*inline data */
  1110.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1111.                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
  1112.                 x_inner -= 2;
  1113.                 y_inner += 1;
  1114.             }
  1115.             x_outer += 1;
  1116.         }
  1117.  
  1118.         xtemp_outer = mb_width - 2;
  1119.         if (xtemp_outer < 0)
  1120.             xtemp_outer = 0;
  1121.         x_outer = xtemp_outer;
  1122.         y_outer = first_mb / mb_width;
  1123.         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1124.             y_inner = y_outer;
  1125.             x_inner = x_outer;
  1126.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1127.                 mb_intra_ub = 0;
  1128.                 score_dep = 0;
  1129.                 if (x_inner != 0) {
  1130.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1131.                     score_dep |= MB_SCOREBOARD_A;
  1132.                 }
  1133.                 if (y_inner != mb_row) {
  1134.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1135.                     score_dep |= MB_SCOREBOARD_B;
  1136.                     if (x_inner != 0)
  1137.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1138.  
  1139.                     if (x_inner != (mb_width -1)) {
  1140.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1141.                         score_dep |= MB_SCOREBOARD_C;
  1142.                     }
  1143.                 }
  1144.  
  1145.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1146.                 *command_ptr++ = kernel;
  1147.                 *command_ptr++ = USE_SCOREBOARD;
  1148.                 /* Indirect data */
  1149.                 *command_ptr++ = 0;
  1150.                 /* the (X, Y) term of scoreboard */
  1151.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1152.                 *command_ptr++ = score_dep;
  1153.                 /*inline data */
  1154.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1155.                 *command_ptr++ = ((1 << 18) | (1 << 16) | transform_8x8_mode_flag | (mb_intra_ub << 8));
  1156.  
  1157.                 x_inner -= 2;
  1158.                 y_inner += 1;
  1159.             }
  1160.             x_outer++;
  1161.             if (x_outer >= mb_width) {
  1162.                 y_outer += 1;
  1163.                 x_outer = xtemp_outer;
  1164.             }          
  1165.         }
  1166.     }
  1167.  
  1168.     *command_ptr++ = 0;
  1169.     *command_ptr++ = MI_BATCH_BUFFER_END;
  1170.  
  1171.     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
  1172. }
  1173.  
  1174. static uint8_t
  1175. intel_get_ref_idx_state_1(VAPictureH264 *va_pic, unsigned int frame_store_id)
  1176. {
  1177.     unsigned int is_long_term =
  1178.         !!(va_pic->flags & VA_PICTURE_H264_LONG_TERM_REFERENCE);
  1179.     unsigned int is_top_field =
  1180.         !!(va_pic->flags & VA_PICTURE_H264_TOP_FIELD);
  1181.     unsigned int is_bottom_field =
  1182.         !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD);
  1183.  
  1184.     return ((is_long_term                         << 6) |
  1185.             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
  1186.             (frame_store_id                       << 1) |
  1187.             ((is_top_field ^ 1) & is_bottom_field));
  1188. }
  1189.  
  1190. void
  1191. intel_mfc_avc_ref_idx_state(VADriverContextP ctx,
  1192.                             struct encode_state *encode_state,
  1193.                             struct intel_encoder_context *encoder_context)
  1194. {
  1195.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1196.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1197.     int slice_type;
  1198.     struct object_surface *obj_surface;
  1199.     unsigned int fref_entry, bref_entry;
  1200.     int frame_index, i;
  1201.     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  1202.  
  1203.     fref_entry = 0x80808080;
  1204.     bref_entry = 0x80808080;
  1205.     slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  1206.  
  1207.     if (slice_type == SLICE_TYPE_P || slice_type == SLICE_TYPE_B) {
  1208.         int ref_idx_l0 = (vme_context->ref_index_in_mb[0] & 0xff);
  1209.  
  1210.         if (ref_idx_l0 > 3) {
  1211.             WARN_ONCE("ref_idx_l0 is out of range\n");
  1212.             ref_idx_l0 = 0;
  1213.         }
  1214.  
  1215.         obj_surface = vme_context->used_reference_objects[0];
  1216.         frame_index = -1;
  1217.         for (i = 0; i < 16; i++) {
  1218.             if (obj_surface &&
  1219.                 obj_surface == encode_state->reference_objects[i]) {
  1220.                 frame_index = i;
  1221.                 break;
  1222.             }
  1223.         }
  1224.         if (frame_index == -1) {
  1225.             WARN_ONCE("RefPicList0 is not found in DPB!\n");
  1226.         } else {
  1227.             int ref_idx_l0_shift = ref_idx_l0 * 8;
  1228.             fref_entry &= ~(0xFF << ref_idx_l0_shift);
  1229.             fref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[0], frame_index) << ref_idx_l0_shift);
  1230.         }
  1231.         if(frame_index == 1){
  1232.             WARN_ONCE("Input ref list is Wrong !\n");
  1233.         }
  1234.     }
  1235.  
  1236.     if (slice_type == SLICE_TYPE_B) {
  1237.         int ref_idx_l1 = (vme_context->ref_index_in_mb[1] & 0xff);
  1238.  
  1239.         if (ref_idx_l1 > 3) {
  1240.             WARN_ONCE("ref_idx_l1 is out of range\n");
  1241.             ref_idx_l1 = 0;
  1242.         }
  1243.  
  1244.         obj_surface = vme_context->used_reference_objects[1];
  1245.         frame_index = -1;
  1246.         for (i = 0; i < 16; i++) {
  1247.             if (obj_surface &&
  1248.                 obj_surface == encode_state->reference_objects[i]) {
  1249.                 frame_index = i;
  1250.                 break;
  1251.             }
  1252.         }
  1253.         if (frame_index == -1) {
  1254.             WARN_ONCE("RefPicList1 is not found in DPB!\n");
  1255.         } else {
  1256.             int ref_idx_l1_shift = ref_idx_l1 * 8;
  1257.             bref_entry &= ~(0xFF << ref_idx_l1_shift);
  1258.             bref_entry += (intel_get_ref_idx_state_1(vme_context->used_references[1], frame_index) << ref_idx_l1_shift);
  1259.         }
  1260.     }
  1261.  
  1262.     BEGIN_BCS_BATCH(batch, 10);
  1263.     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
  1264.     OUT_BCS_BATCH(batch, 0);                  //Select L0
  1265.     OUT_BCS_BATCH(batch, fref_entry);         //Only 1 reference
  1266.     for(i = 0; i < 7; i++) {
  1267.         OUT_BCS_BATCH(batch, 0x80808080);
  1268.     }
  1269.     ADVANCE_BCS_BATCH(batch);
  1270.  
  1271.     BEGIN_BCS_BATCH(batch, 10);
  1272.     OUT_BCS_BATCH(batch, MFX_AVC_REF_IDX_STATE | 8);
  1273.     OUT_BCS_BATCH(batch, 1);                  //Select L1
  1274.     OUT_BCS_BATCH(batch, bref_entry);         //Only 1 reference
  1275.     for(i = 0; i < 7; i++) {
  1276.         OUT_BCS_BATCH(batch, 0x80808080);
  1277.     }
  1278.     ADVANCE_BCS_BATCH(batch);
  1279. }
  1280.  
  1281.  
  1282. void intel_vme_mpeg2_state_setup(VADriverContextP ctx,
  1283.                                  struct encode_state *encode_state,
  1284.                                  struct intel_encoder_context *encoder_context)
  1285. {
  1286.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1287.     uint32_t *vme_state_message = (uint32_t *)(vme_context->vme_state_message);
  1288.     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
  1289.     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
  1290.     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
  1291.     uint32_t mv_x, mv_y;
  1292.     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
  1293.     VAEncPictureParameterBufferMPEG2 *pic_param = NULL;
  1294.     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
  1295.  
  1296.     if (vme_context->mpeg2_level == MPEG2_LEVEL_LOW) {
  1297.         mv_x = 512;
  1298.         mv_y = 64;
  1299.     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_MAIN) {
  1300.         mv_x = 1024;
  1301.         mv_y = 128;
  1302.     } else if (vme_context->mpeg2_level == MPEG2_LEVEL_HIGH) {
  1303.         mv_x = 2048;
  1304.         mv_y = 128;
  1305.     } else {
  1306.         WARN_ONCE("Incorrect Mpeg2 level setting!\n");
  1307.         mv_x = 512;
  1308.         mv_y = 64;
  1309.     }
  1310.  
  1311.     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
  1312.     if (pic_param->picture_type != VAEncPictureTypeIntra) {
  1313.         int qp, m_cost, j, mv_count;
  1314.         float   lambda, m_costf;
  1315.         slice_param = (VAEncSliceParameterBufferMPEG2 *)
  1316.             encode_state->slice_params_ext[0]->buffer;
  1317.         qp = slice_param->quantiser_scale_code;
  1318.         lambda = intel_lambda_qp(qp);
  1319.         /* No Intra prediction. So it is zero */
  1320.         vme_state_message[MODE_INTRA_8X8] = 0;
  1321.         vme_state_message[MODE_INTRA_4X4] = 0;
  1322.         vme_state_message[MODE_INTER_MV0] = 0;
  1323.         for (j = 1; j < 3; j++) {
  1324.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  1325.             m_cost = (int)m_costf;
  1326.             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
  1327.         }
  1328.         mv_count = 3;
  1329.         for (j = 4; j <= 64; j *= 2) {
  1330.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  1331.             m_cost = (int)m_costf;
  1332.             vme_state_message[MODE_INTER_MV0 + mv_count] =
  1333.                 intel_format_lutvalue(m_cost, 0x6f);
  1334.             mv_count++;
  1335.         }
  1336.         m_cost = lambda;
  1337.         /* It can only perform the 16x16 search. So mode cost can be ignored for
  1338.          * the other mode. for example: 16x8/8x8
  1339.          */
  1340.         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1341.         vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1342.  
  1343.         vme_state_message[MODE_INTER_16X8] = 0;
  1344.         vme_state_message[MODE_INTER_8X8] = 0;
  1345.         vme_state_message[MODE_INTER_8X4] = 0;
  1346.         vme_state_message[MODE_INTER_4X4] = 0;
  1347.         vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
  1348.  
  1349.     }
  1350.     vme_state_message[MPEG2_MV_RANGE] = (mv_y << 16) | (mv_x);
  1351.  
  1352.     vme_state_message[MPEG2_PIC_WIDTH_HEIGHT] = (height_in_mbs << 16) |
  1353.         width_in_mbs;
  1354. }
  1355.  
  1356. void
  1357. gen7_vme_mpeg2_walker_fill_vme_batchbuffer(VADriverContextP ctx,
  1358.                                            struct encode_state *encode_state,
  1359.                                            int mb_width, int mb_height,
  1360.                                            int kernel,
  1361.                                            struct intel_encoder_context *encoder_context)
  1362. {
  1363.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1364.     unsigned int *command_ptr;
  1365.  
  1366. #define         MPEG2_SCOREBOARD                (1 << 21)
  1367.  
  1368.     dri_bo_map(vme_context->vme_batchbuffer.bo, 1);
  1369.     command_ptr = vme_context->vme_batchbuffer.bo->virtual;
  1370.  
  1371.     {
  1372.         unsigned int mb_intra_ub, score_dep;
  1373.         int x_outer, y_outer, x_inner, y_inner;
  1374.         int xtemp_outer = 0;
  1375.         int first_mb = 0;
  1376.         int num_mb = mb_width * mb_height;
  1377.  
  1378.         x_outer = 0;
  1379.         y_outer = 0;
  1380.  
  1381.  
  1382.         for (; x_outer < (mb_width -2 ) && !loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1383.             x_inner = x_outer;
  1384.             y_inner = y_outer;
  1385.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1386.                 mb_intra_ub = 0;
  1387.                 score_dep = 0;
  1388.                 if (x_inner != 0) {
  1389.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1390.                     score_dep |= MB_SCOREBOARD_A;
  1391.                 }
  1392.                 if (y_inner != 0) {
  1393.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1394.                     score_dep |= MB_SCOREBOARD_B;
  1395.  
  1396.                     if (x_inner != 0)
  1397.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1398.  
  1399.                     if (x_inner != (mb_width -1)) {
  1400.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1401.                         score_dep |= MB_SCOREBOARD_C;
  1402.                     }
  1403.                 }
  1404.  
  1405.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1406.                 *command_ptr++ = kernel;
  1407.                 *command_ptr++ = MPEG2_SCOREBOARD;
  1408.                 /* Indirect data */
  1409.                 *command_ptr++ = 0;
  1410.                 /* the (X, Y) term of scoreboard */
  1411.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1412.                 *command_ptr++ = score_dep;
  1413.                 /*inline data */
  1414.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1415.                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
  1416.                 x_inner -= 2;
  1417.                 y_inner += 1;
  1418.             }
  1419.             x_outer += 1;
  1420.         }
  1421.  
  1422.         xtemp_outer = mb_width - 2;
  1423.         if (xtemp_outer < 0)
  1424.             xtemp_outer = 0;
  1425.         x_outer = xtemp_outer;
  1426.         y_outer = 0;
  1427.         for (;!loop_in_bounds(x_outer, y_outer, first_mb, num_mb, mb_width, mb_height); ) {
  1428.             y_inner = y_outer;
  1429.             x_inner = x_outer;
  1430.             for (; !loop_in_bounds(x_inner, y_inner, first_mb, num_mb, mb_width, mb_height);) {
  1431.                 mb_intra_ub = 0;
  1432.                 score_dep = 0;
  1433.                 if (x_inner != 0) {
  1434.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  1435.                     score_dep |= MB_SCOREBOARD_A;
  1436.                 }
  1437.                 if (y_inner != 0) {
  1438.                     mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  1439.                     score_dep |= MB_SCOREBOARD_B;
  1440.  
  1441.                     if (x_inner != 0)
  1442.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  1443.  
  1444.                     if (x_inner != (mb_width -1)) {
  1445.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  1446.                         score_dep |= MB_SCOREBOARD_C;
  1447.                     }
  1448.                 }
  1449.  
  1450.                 *command_ptr++ = (CMD_MEDIA_OBJECT | (8 - 2));
  1451.                 *command_ptr++ = kernel;
  1452.                 *command_ptr++ = MPEG2_SCOREBOARD;
  1453.                 /* Indirect data */
  1454.                 *command_ptr++ = 0;
  1455.                 /* the (X, Y) term of scoreboard */
  1456.                 *command_ptr++ = ((y_inner << 16) | x_inner);
  1457.                 *command_ptr++ = score_dep;
  1458.                 /*inline data */
  1459.                 *command_ptr++ = (mb_width << 16 | y_inner << 8 | x_inner);
  1460.                 *command_ptr++ = ((1 << 18) | (1 << 16) | (mb_intra_ub << 8));
  1461.  
  1462.                 x_inner -= 2;
  1463.                 y_inner += 1;
  1464.             }
  1465.             x_outer++;
  1466.             if (x_outer >= mb_width) {
  1467.                 y_outer += 1;
  1468.                 x_outer = xtemp_outer;
  1469.             }          
  1470.         }
  1471.     }
  1472.  
  1473.     *command_ptr++ = 0;
  1474.     *command_ptr++ = MI_BATCH_BUFFER_END;
  1475.  
  1476.     dri_bo_unmap(vme_context->vme_batchbuffer.bo);
  1477.     return;
  1478. }
  1479.  
  1480. static int
  1481. avc_temporal_find_surface(VAPictureH264 *curr_pic,
  1482.                           VAPictureH264 *ref_list,
  1483.                           int num_pictures,
  1484.                           int dir)
  1485. {
  1486.     int i, found = -1, min = 0x7FFFFFFF;
  1487.  
  1488.     for (i = 0; i < num_pictures; i++) {
  1489.         int tmp;
  1490.  
  1491.         if ((ref_list[i].flags & VA_PICTURE_H264_INVALID) ||
  1492.             (ref_list[i].picture_id == VA_INVALID_SURFACE))
  1493.             break;
  1494.  
  1495.         tmp = curr_pic->TopFieldOrderCnt - ref_list[i].TopFieldOrderCnt;
  1496.  
  1497.         if (dir)
  1498.             tmp = -tmp;
  1499.  
  1500.         if (tmp > 0 && tmp < min) {
  1501.             min = tmp;
  1502.             found = i;
  1503.         }
  1504.     }
  1505.  
  1506.     return found;
  1507. }
  1508.  
  1509. void
  1510. intel_avc_vme_reference_state(VADriverContextP ctx,
  1511.                               struct encode_state *encode_state,
  1512.                               struct intel_encoder_context *encoder_context,
  1513.                               int list_index,
  1514.                               int surface_index,
  1515.                               void (* vme_source_surface_state)(
  1516.                                   VADriverContextP ctx,
  1517.                                   int index,
  1518.                                   struct object_surface *obj_surface,
  1519.                                   struct intel_encoder_context *encoder_context))
  1520. {
  1521.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1522.     struct object_surface *obj_surface = NULL;
  1523.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1524.     VASurfaceID ref_surface_id;
  1525.     VAEncPictureParameterBufferH264 *pic_param = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1526.     VAEncSliceParameterBufferH264 *slice_param = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[0]->buffer;
  1527.     int max_num_references;
  1528.     VAPictureH264 *curr_pic;
  1529.     VAPictureH264 *ref_list;
  1530.     int ref_idx;
  1531.  
  1532.     if (list_index == 0) {
  1533.         max_num_references = pic_param->num_ref_idx_l0_active_minus1 + 1;
  1534.         ref_list = slice_param->RefPicList0;
  1535.     } else {
  1536.         max_num_references = pic_param->num_ref_idx_l1_active_minus1 + 1;
  1537.         ref_list = slice_param->RefPicList1;
  1538.     }
  1539.  
  1540.     if (max_num_references == 1) {
  1541.         if (list_index == 0) {
  1542.             ref_surface_id = slice_param->RefPicList0[0].picture_id;
  1543.             vme_context->used_references[0] = &slice_param->RefPicList0[0];
  1544.         } else {
  1545.             ref_surface_id = slice_param->RefPicList1[0].picture_id;
  1546.             vme_context->used_references[1] = &slice_param->RefPicList1[0];
  1547.         }
  1548.  
  1549.         if (ref_surface_id != VA_INVALID_SURFACE)
  1550.             obj_surface = SURFACE(ref_surface_id);
  1551.  
  1552.         if (!obj_surface ||
  1553.             !obj_surface->bo) {
  1554.             obj_surface = encode_state->reference_objects[list_index];
  1555.             vme_context->used_references[list_index] = &pic_param->ReferenceFrames[list_index];
  1556.         }
  1557.  
  1558.         ref_idx = 0;
  1559.     } else {
  1560.         curr_pic = &pic_param->CurrPic;
  1561.  
  1562.         /* select the reference frame in temporal space */
  1563.         ref_idx = avc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
  1564.         ref_surface_id = ref_list[ref_idx].picture_id;
  1565.  
  1566.         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
  1567.             obj_surface = SURFACE(ref_surface_id);
  1568.  
  1569.         vme_context->used_reference_objects[list_index] = obj_surface;
  1570.         vme_context->used_references[list_index] = &ref_list[ref_idx];
  1571.     }
  1572.  
  1573.     if (obj_surface &&
  1574.         obj_surface->bo) {
  1575.         assert(ref_idx >= 0);
  1576.         vme_context->used_reference_objects[list_index] = obj_surface;
  1577.         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
  1578.         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
  1579.                                                     ref_idx << 16 |
  1580.                                                     ref_idx <<  8 |
  1581.                                                     ref_idx);
  1582.     } else {
  1583.         vme_context->used_reference_objects[list_index] = NULL;
  1584.         vme_context->used_references[list_index] = NULL;
  1585.         vme_context->ref_index_in_mb[list_index] = 0;
  1586.     }
  1587. }
  1588.  
  1589. void intel_avc_slice_insert_packed_data(VADriverContextP ctx,
  1590.                                         struct encode_state *encode_state,
  1591.                                         struct intel_encoder_context *encoder_context,
  1592.                                         int slice_index,
  1593.                                         struct intel_batchbuffer *slice_batch)
  1594. {
  1595.     int count, i, start_index;
  1596.     unsigned int length_in_bits;
  1597.     VAEncPackedHeaderParameterBuffer *param = NULL;
  1598.     unsigned int *header_data = NULL;
  1599.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1600.     int slice_header_index;
  1601.  
  1602.     if (encode_state->slice_header_index[slice_index] == 0)
  1603.         slice_header_index = -1;
  1604.     else
  1605.         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
  1606.  
  1607.     count = encode_state->slice_rawdata_count[slice_index];
  1608.     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
  1609.  
  1610.     for (i = 0; i < count; i++) {
  1611.         unsigned int skip_emul_byte_cnt;
  1612.  
  1613.         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
  1614.  
  1615.         param = (VAEncPackedHeaderParameterBuffer *)
  1616.                     (encode_state->packed_header_params_ext[start_index + i]->buffer);
  1617.  
  1618.         /* skip the slice header packed data type as it is lastly inserted */
  1619.         if (param->type == VAEncPackedHeaderSlice)
  1620.             continue;
  1621.  
  1622.         length_in_bits = param->bit_length;
  1623.  
  1624.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1625.  
  1626.         /* as the slice header is still required, the last header flag is set to
  1627.          * zero.
  1628.          */
  1629.         mfc_context->insert_object(ctx,
  1630.                                    encoder_context,
  1631.                                    header_data,
  1632.                                    ALIGN(length_in_bits, 32) >> 5,
  1633.                                    length_in_bits & 0x1f,
  1634.                                    skip_emul_byte_cnt,
  1635.                                    0,
  1636.                                    0,
  1637.                                    !param->has_emulation_bytes,
  1638.                                    slice_batch);
  1639.     }
  1640.  
  1641.     if (slice_header_index == -1) {
  1642.         unsigned char *slice_header = NULL;
  1643.         int slice_header_length_in_bits = 0;
  1644.         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1645.         VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1646.         VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
  1647.  
  1648.         /* No slice header data is passed. And the driver needs to generate it */
  1649.         /* For the Normal H264 */
  1650.         slice_header_length_in_bits = build_avc_slice_header(pSequenceParameter,
  1651.                                                              pPicParameter,
  1652.                                                              pSliceParameter,
  1653.                                                              &slice_header);
  1654.         mfc_context->insert_object(ctx, encoder_context,
  1655.                                    (unsigned int *)slice_header,
  1656.                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
  1657.                                    slice_header_length_in_bits & 0x1f,
  1658.                                    5,  /* first 5 bytes are start code + nal unit type */
  1659.                                    1, 0, 1, slice_batch);
  1660.  
  1661.         free(slice_header);
  1662.     } else {
  1663.         unsigned int skip_emul_byte_cnt;
  1664.  
  1665.         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
  1666.  
  1667.         param = (VAEncPackedHeaderParameterBuffer *)
  1668.                     (encode_state->packed_header_params_ext[slice_header_index]->buffer);
  1669.         length_in_bits = param->bit_length;
  1670.  
  1671.         /* as the slice header is the last header data for one slice,
  1672.          * the last header flag is set to one.
  1673.          */
  1674.         skip_emul_byte_cnt = intel_avc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1675.  
  1676.         mfc_context->insert_object(ctx,
  1677.                                    encoder_context,
  1678.                                    header_data,
  1679.                                    ALIGN(length_in_bits, 32) >> 5,
  1680.                                    length_in_bits & 0x1f,
  1681.                                    skip_emul_byte_cnt,
  1682.                                    1,
  1683.                                    0,
  1684.                                    !param->has_emulation_bytes,
  1685.                                    slice_batch);
  1686.     }
  1687.  
  1688.     return;
  1689. }
  1690.  
  1691. /* HEVC */
  1692. static int
  1693. hevc_temporal_find_surface(VAPictureHEVC *curr_pic,
  1694.                            VAPictureHEVC *ref_list,
  1695.                            int num_pictures,
  1696.                            int dir)
  1697. {
  1698.     int i, found = -1, min = 0x7FFFFFFF;
  1699.  
  1700.     for (i = 0; i < num_pictures; i++) {
  1701.         int tmp;
  1702.  
  1703.         if ((ref_list[i].flags & VA_PICTURE_HEVC_INVALID) ||
  1704.             (ref_list[i].picture_id == VA_INVALID_SURFACE))
  1705.             break;
  1706.  
  1707.         tmp = curr_pic->pic_order_cnt - ref_list[i].pic_order_cnt;
  1708.  
  1709.         if (dir)
  1710.             tmp = -tmp;
  1711.  
  1712.         if (tmp > 0 && tmp < min) {
  1713.             min = tmp;
  1714.             found = i;
  1715.         }
  1716.     }
  1717.  
  1718.     return found;
  1719. }
  1720. void
  1721. intel_hevc_vme_reference_state(VADriverContextP ctx,
  1722.                                struct encode_state *encode_state,
  1723.                                struct intel_encoder_context *encoder_context,
  1724.                                int list_index,
  1725.                                int surface_index,
  1726.                                void (* vme_source_surface_state)(
  1727.                                    VADriverContextP ctx,
  1728.                                    int index,
  1729.                                    struct object_surface *obj_surface,
  1730.                                    struct intel_encoder_context *encoder_context))
  1731. {
  1732.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1733.     struct object_surface *obj_surface = NULL;
  1734.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1735.     VASurfaceID ref_surface_id;
  1736.     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  1737.     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
  1738.     int max_num_references;
  1739.     VAPictureHEVC *curr_pic;
  1740.     VAPictureHEVC *ref_list;
  1741.     int ref_idx;
  1742.  
  1743.     if (list_index == 0) {
  1744.         max_num_references = pic_param->num_ref_idx_l0_default_active_minus1 + 1;
  1745.         ref_list = slice_param->ref_pic_list0;
  1746.     } else {
  1747.         max_num_references = pic_param->num_ref_idx_l1_default_active_minus1 + 1;
  1748.         ref_list = slice_param->ref_pic_list1;
  1749.     }
  1750.  
  1751.     if (max_num_references == 1) {
  1752.         if (list_index == 0) {
  1753.             ref_surface_id = slice_param->ref_pic_list0[0].picture_id;
  1754.             vme_context->used_references[0] = &slice_param->ref_pic_list0[0];
  1755.         } else {
  1756.             ref_surface_id = slice_param->ref_pic_list1[0].picture_id;
  1757.             vme_context->used_references[1] = &slice_param->ref_pic_list1[0];
  1758.         }
  1759.  
  1760.         if (ref_surface_id != VA_INVALID_SURFACE)
  1761.             obj_surface = SURFACE(ref_surface_id);
  1762.  
  1763.         if (!obj_surface ||
  1764.             !obj_surface->bo) {
  1765.             obj_surface = encode_state->reference_objects[list_index];
  1766.             vme_context->used_references[list_index] = &pic_param->reference_frames[list_index];
  1767.         }
  1768.  
  1769.         ref_idx = 0;
  1770.     } else {
  1771.         curr_pic = &pic_param->decoded_curr_pic;
  1772.  
  1773.         /* select the reference frame in temporal space */
  1774.         ref_idx = hevc_temporal_find_surface(curr_pic, ref_list, max_num_references, list_index == 1);
  1775.         ref_surface_id = ref_list[ref_idx].picture_id;
  1776.  
  1777.         if (ref_surface_id != VA_INVALID_SURFACE) /* otherwise warning later */
  1778.             obj_surface = SURFACE(ref_surface_id);
  1779.  
  1780.         vme_context->used_reference_objects[list_index] = obj_surface;
  1781.         vme_context->used_references[list_index] = &ref_list[ref_idx];
  1782.     }
  1783.  
  1784.     if (obj_surface &&
  1785.         obj_surface->bo) {
  1786.         assert(ref_idx >= 0);
  1787.         vme_context->used_reference_objects[list_index] = obj_surface;
  1788.         vme_source_surface_state(ctx, surface_index, obj_surface, encoder_context);
  1789.         vme_context->ref_index_in_mb[list_index] = (ref_idx << 24 |
  1790.                 ref_idx << 16 |
  1791.                 ref_idx <<  8 |
  1792.                 ref_idx);
  1793.     } else {
  1794.         vme_context->used_reference_objects[list_index] = NULL;
  1795.         vme_context->used_references[list_index] = NULL;
  1796.         vme_context->ref_index_in_mb[list_index] = 0;
  1797.     }
  1798. }
  1799.  
  1800. void intel_vme_hevc_update_mbmv_cost(VADriverContextP ctx,
  1801.                                      struct encode_state *encode_state,
  1802.                                      struct intel_encoder_context *encoder_context)
  1803. {
  1804.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1805.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1806.     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  1807.     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
  1808.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  1809.     int qp, m_cost, j, mv_count;
  1810.     uint8_t *vme_state_message = (uint8_t *)(vme_context->vme_state_message);
  1811.     float   lambda, m_costf;
  1812.  
  1813.     /* here no SI SP slice for HEVC, do not need slice fixup */
  1814.     int slice_type = slice_param->slice_type;
  1815.  
  1816.  
  1817.     qp = pic_param->pic_init_qp + slice_param->slice_qp_delta;
  1818.  
  1819.     if(encoder_context->rate_control_mode == VA_RC_CBR)
  1820.     {
  1821.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  1822.         if(slice_type == HEVC_SLICE_B) {
  1823.             if(pSequenceParameter->ip_period == 1)
  1824.             {
  1825.                 slice_type = HEVC_SLICE_P;
  1826.                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
  1827.  
  1828.             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
  1829.                 slice_type = HEVC_SLICE_P;
  1830.                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
  1831.             }
  1832.         }
  1833.  
  1834.     }
  1835.  
  1836.     if (vme_state_message == NULL)
  1837.         return;
  1838.  
  1839.     assert(qp <= QP_MAX);
  1840.     lambda = intel_lambda_qp(qp);
  1841.     if (slice_type == HEVC_SLICE_I) {
  1842.         vme_state_message[MODE_INTRA_16X16] = 0;
  1843.         m_cost = lambda * 4;
  1844.         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
  1845.         m_cost = lambda * 16;
  1846.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  1847.         m_cost = lambda * 3;
  1848.         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
  1849.     } else {
  1850.         m_cost = 0;
  1851.         vme_state_message[MODE_INTER_MV0] = intel_format_lutvalue(m_cost, 0x6f);
  1852.         for (j = 1; j < 3; j++) {
  1853.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  1854.             m_cost = (int)m_costf;
  1855.             vme_state_message[MODE_INTER_MV0 + j] = intel_format_lutvalue(m_cost, 0x6f);
  1856.         }
  1857.         mv_count = 3;
  1858.         for (j = 4; j <= 64; j *= 2) {
  1859.             m_costf = (log2f((float)(j + 1)) + 1.718f) * lambda;
  1860.             m_cost = (int)m_costf;
  1861.             vme_state_message[MODE_INTER_MV0 + mv_count] = intel_format_lutvalue(m_cost, 0x6f);
  1862.             mv_count++;
  1863.         }
  1864.  
  1865.         if (qp <= 25) {
  1866.             vme_state_message[MODE_INTRA_16X16] = 0x4a;
  1867.             vme_state_message[MODE_INTRA_8X8] = 0x4a;
  1868.             vme_state_message[MODE_INTRA_4X4] = 0x4a;
  1869.             vme_state_message[MODE_INTRA_NONPRED] = 0x4a;
  1870.             vme_state_message[MODE_INTER_16X16] = 0x4a;
  1871.             vme_state_message[MODE_INTER_16X8] = 0x4a;
  1872.             vme_state_message[MODE_INTER_8X8] = 0x4a;
  1873.             vme_state_message[MODE_INTER_8X4] = 0x4a;
  1874.             vme_state_message[MODE_INTER_4X4] = 0x4a;
  1875.             vme_state_message[MODE_INTER_BWD] = 0x2a;
  1876.             return;
  1877.         }
  1878.         m_costf = lambda * 10;
  1879.         vme_state_message[MODE_INTRA_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1880.         m_cost = lambda * 14;
  1881.         vme_state_message[MODE_INTRA_8X8] = intel_format_lutvalue(m_cost, 0x8f);
  1882.         m_cost = lambda * 24;
  1883.         vme_state_message[MODE_INTRA_4X4] = intel_format_lutvalue(m_cost, 0x8f);
  1884.         m_costf = lambda * 3.5;
  1885.         m_cost = m_costf;
  1886.         vme_state_message[MODE_INTRA_NONPRED] = intel_format_lutvalue(m_cost, 0x6f);
  1887.         if (slice_type == HEVC_SLICE_P) {
  1888.             m_costf = lambda * 2.5;
  1889.             m_cost = m_costf;
  1890.             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1891.             m_costf = lambda * 4;
  1892.             m_cost = m_costf;
  1893.             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  1894.             m_costf = lambda * 1.5;
  1895.             m_cost = m_costf;
  1896.             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  1897.             m_costf = lambda * 3;
  1898.             m_cost = m_costf;
  1899.             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
  1900.             m_costf = lambda * 5;
  1901.             m_cost = m_costf;
  1902.             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  1903.             /* BWD is not used in P-frame */
  1904.             vme_state_message[MODE_INTER_BWD] = 0;
  1905.         } else {
  1906.             m_costf = lambda * 2.5;
  1907.             m_cost = m_costf;
  1908.             vme_state_message[MODE_INTER_16X16] = intel_format_lutvalue(m_cost, 0x8f);
  1909.             m_costf = lambda * 5.5;
  1910.             m_cost = m_costf;
  1911.             vme_state_message[MODE_INTER_16X8] = intel_format_lutvalue(m_cost, 0x8f);
  1912.             m_costf = lambda * 3.5;
  1913.             m_cost = m_costf;
  1914.             vme_state_message[MODE_INTER_8X8] = intel_format_lutvalue(m_cost, 0x6f);
  1915.             m_costf = lambda * 5.0;
  1916.             m_cost = m_costf;
  1917.             vme_state_message[MODE_INTER_8X4] = intel_format_lutvalue(m_cost, 0x6f);
  1918.             m_costf = lambda * 6.5;
  1919.             m_cost = m_costf;
  1920.             vme_state_message[MODE_INTER_4X4] = intel_format_lutvalue(m_cost, 0x6f);
  1921.             m_costf = lambda * 1.5;
  1922.             m_cost = m_costf;
  1923.             vme_state_message[MODE_INTER_BWD] = intel_format_lutvalue(m_cost, 0x6f);
  1924.         }
  1925.     }
  1926. }
  1927.