Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Zhao Yakui <yakui.zhao@intel.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <math.h>
  34. #include <assert.h>
  35.  
  36. #include "intel_batchbuffer.h"
  37. #include "i965_defines.h"
  38. #include "i965_structs.h"
  39. #include "i965_drv_video.h"
  40. #include "i965_encoder.h"
  41. #include "i965_encoder_utils.h"
  42. #include "gen6_mfc.h"
  43. #include "gen6_vme.h"
  44. #include "intel_media.h"
  45. #include <va/va_enc_jpeg.h>
  46. #include "vp8_probs.h"
  47.  
  48. #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
  49. #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
  50. #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
  51.  
  52. #define MFC_SOFTWARE_HASWELL    1
  53.  
  54. #define B0_STEP_REV             2
  55. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  56.  
  57. //Zigzag scan order of the the Luma and Chroma components
  58. //Note: Jpeg Spec ISO/IEC 10918-1, Figure A.6 shows the zigzag order differently.
  59. //The Spec is trying to show the zigzag pattern with number positions. The below
  60. //table will use the pattern shown by A.6 and map the position of the elements in the array
  61. static const uint32_t zigzag_direct[64] = {
  62.     0,   1,  8, 16,  9,  2,  3, 10,
  63.     17, 24, 32, 25, 18, 11,  4,  5,
  64.     12, 19, 26, 33, 40, 48, 41, 34,
  65.     27, 20, 13,  6,  7, 14, 21, 28,
  66.     35, 42, 49, 56, 57, 50, 43, 36,
  67.     29, 22, 15, 23, 30, 37, 44, 51,
  68.     58, 59, 52, 45, 38, 31, 39, 46,
  69.     53, 60, 61, 54, 47, 55, 62, 63
  70. };
  71.  
  72. //Default Luminance quantization table
  73. //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.1
  74. static const uint8_t jpeg_luma_quant[64] = {
  75.     16, 11, 10, 16, 24,  40,  51,  61,
  76.     12, 12, 14, 19, 26,  58,  60,  55,
  77.     14, 13, 16, 24, 40,  57,  69,  56,
  78.     14, 17, 22, 29, 51,  87,  80,  62,
  79.     18, 22, 37, 56, 68,  109, 103, 77,
  80.     24, 35, 55, 64, 81,  104, 113, 92,
  81.     49, 64, 78, 87, 103, 121, 120, 101,
  82.     72, 92, 95, 98, 112, 100, 103, 99    
  83. };
  84.  
  85. //Default Chroma quantization table
  86. //Source: Jpeg Spec ISO/IEC 10918-1, Annex K, Table K.2
  87. static const uint8_t jpeg_chroma_quant[64] = {
  88.     17, 18, 24, 47, 99, 99, 99, 99,
  89.     18, 21, 26, 66, 99, 99, 99, 99,
  90.     24, 26, 56, 99, 99, 99, 99, 99,
  91.     47, 66, 99, 99, 99, 99, 99, 99,
  92.     99, 99, 99, 99, 99, 99, 99, 99,
  93.     99, 99, 99, 99, 99, 99, 99, 99,
  94.     99, 99, 99, 99, 99, 99, 99, 99,
  95.     99, 99, 99, 99, 99, 99, 99, 99
  96. };
  97.  
  98.  
  99. static const int va_to_gen7_jpeg_hufftable[2] = {
  100.     MFX_HUFFTABLE_ID_Y,
  101.     MFX_HUFFTABLE_ID_UV
  102. };
  103.  
  104. static const uint32_t gen8_mfc_batchbuffer_avc_intra[][4] = {
  105. #include "shaders/utils/mfc_batchbuffer_avc_intra.g7b"
  106. };
  107.  
  108. static const uint32_t gen8_mfc_batchbuffer_avc_inter[][4] = {
  109. #include "shaders/utils/mfc_batchbuffer_avc_inter.g7b"
  110. };
  111.  
  112. static struct i965_kernel gen8_mfc_kernels[] = {
  113.     {
  114.         "MFC AVC INTRA BATCHBUFFER ",
  115.         MFC_BATCHBUFFER_AVC_INTRA,
  116.         gen8_mfc_batchbuffer_avc_intra,
  117.         sizeof(gen8_mfc_batchbuffer_avc_intra),
  118.         NULL
  119.     },
  120.  
  121.     {
  122.         "MFC AVC INTER BATCHBUFFER ",
  123.         MFC_BATCHBUFFER_AVC_INTER,
  124.         gen8_mfc_batchbuffer_avc_inter,
  125.         sizeof(gen8_mfc_batchbuffer_avc_inter),
  126.         NULL
  127.     },
  128. };
  129.  
  130. #define         INTER_MODE_MASK         0x03
  131. #define         INTER_8X8               0x03
  132. #define         INTER_16X8              0x01
  133. #define         INTER_8X16              0x02
  134. #define         SUBMB_SHAPE_MASK        0x00FF00
  135. #define         INTER_16X16             0x00
  136.  
  137. #define         INTER_MV8               (4 << 20)
  138. #define         INTER_MV32              (6 << 20)
  139.  
  140.  
  141. static void
  142. gen8_mfc_pipe_mode_select(VADriverContextP ctx,
  143.                           int standard_select,
  144.                           struct intel_encoder_context *encoder_context)
  145. {
  146.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  147.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  148.  
  149.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  150.            standard_select == MFX_FORMAT_AVC   ||
  151.            standard_select == MFX_FORMAT_JPEG  ||
  152.            standard_select == MFX_FORMAT_VP8);
  153.  
  154.     BEGIN_BCS_BATCH(batch, 5);
  155.  
  156.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  157.     OUT_BCS_BATCH(batch,
  158.                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
  159.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  160.                   (0 << 10) | /* Stream-Out Enable */
  161.                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
  162.                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
  163.                   (0 << 6)  | /* frame statistics stream-out enable*/
  164.                   (0 << 5)  | /* not in stitch mode */
  165.                   (1 << 4)  | /* encoding mode */
  166.                   (standard_select << 0));  /* standard select: avc or mpeg2 or jpeg*/
  167.     OUT_BCS_BATCH(batch,
  168.                   (0 << 7)  | /* expand NOA bus flag */
  169.                   (0 << 6)  | /* disable slice-level clock gating */
  170.                   (0 << 5)  | /* disable clock gating for NOA */
  171.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  172.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  173.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  174.                   (0 << 1)  |
  175.                   (0 << 0));
  176.     OUT_BCS_BATCH(batch, 0);
  177.     OUT_BCS_BATCH(batch, 0);
  178.  
  179.     ADVANCE_BCS_BATCH(batch);
  180. }
  181.  
  182. static void
  183. gen8_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  184. {
  185.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  186.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  187.  
  188.     BEGIN_BCS_BATCH(batch, 6);
  189.  
  190.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  191.     OUT_BCS_BATCH(batch, 0);
  192.     OUT_BCS_BATCH(batch,
  193.                   ((mfc_context->surface_state.height - 1) << 18) |
  194.                   ((mfc_context->surface_state.width - 1) << 4));
  195.     OUT_BCS_BATCH(batch,
  196.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  197.                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
  198.                   (0 << 22) | /* surface object control state, FIXME??? */
  199.                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
  200.                   (0 << 2)  | /* must be 0 for interleave U/V */
  201.                   (1 << 1)  | /* must be tiled */
  202.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
  203.     OUT_BCS_BATCH(batch,
  204.                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
  205.                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
  206.     OUT_BCS_BATCH(batch, 0);
  207.  
  208.     ADVANCE_BCS_BATCH(batch);
  209. }
  210.  
  211. static void
  212. gen8_mfc_ind_obj_base_addr_state(VADriverContextP ctx,
  213.                                  struct intel_encoder_context *encoder_context)
  214. {
  215.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  216.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  217.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  218.     int vme_size;
  219.     unsigned int bse_offset;
  220.  
  221.     BEGIN_BCS_BATCH(batch, 26);
  222.  
  223.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  224.     /* the DW1-3 is for the MFX indirect bistream offset */
  225.     OUT_BCS_BATCH(batch, 0);
  226.     OUT_BCS_BATCH(batch, 0);
  227.     OUT_BCS_BATCH(batch, 0);
  228.  
  229.     /* the DW4-5 is the MFX upper bound */
  230.     if (encoder_context->codec == CODEC_VP8) {
  231.         OUT_BCS_RELOC(batch,
  232.                 mfc_context->mfc_indirect_pak_bse_object.bo,
  233.                 I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  234.                 mfc_context->mfc_indirect_pak_bse_object.end_offset);
  235.         OUT_BCS_BATCH(batch, 0);
  236.     } else {
  237.         OUT_BCS_BATCH(batch, 0);
  238.         OUT_BCS_BATCH(batch, 0);
  239.     }
  240.  
  241.     if(encoder_context->codec != CODEC_JPEG) {
  242.         vme_size = vme_context->vme_output.size_block * vme_context->vme_output.num_blocks;
  243.         /* the DW6-10 is for MFX Indirect MV Object Base Address */
  244.         OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  245.         OUT_BCS_BATCH(batch, 0);
  246.         OUT_BCS_BATCH(batch, 0);
  247.         OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, vme_size);
  248.         OUT_BCS_BATCH(batch, 0);
  249.     } else {
  250.         /* No VME for JPEG */
  251.         OUT_BCS_BATCH(batch, 0);
  252.         OUT_BCS_BATCH(batch, 0);
  253.         OUT_BCS_BATCH(batch, 0);
  254.         OUT_BCS_BATCH(batch, 0);
  255.         OUT_BCS_BATCH(batch, 0);
  256.     }
  257.  
  258.     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
  259.     OUT_BCS_BATCH(batch, 0);
  260.     OUT_BCS_BATCH(batch, 0);
  261.     OUT_BCS_BATCH(batch, 0);
  262.     OUT_BCS_BATCH(batch, 0);
  263.     OUT_BCS_BATCH(batch, 0);
  264.  
  265.     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
  266.     OUT_BCS_BATCH(batch, 0);
  267.     OUT_BCS_BATCH(batch, 0);
  268.     OUT_BCS_BATCH(batch, 0);
  269.     OUT_BCS_BATCH(batch, 0);
  270.     OUT_BCS_BATCH(batch, 0);
  271.  
  272.     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/       
  273.     bse_offset = (encoder_context->codec == CODEC_JPEG) ? (mfc_context->mfc_indirect_pak_bse_object.offset) : 0;
  274.     OUT_BCS_RELOC(batch,
  275.                   mfc_context->mfc_indirect_pak_bse_object.bo,
  276.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  277.                   bse_offset);
  278.     OUT_BCS_BATCH(batch, 0);
  279.     OUT_BCS_BATCH(batch, 0);
  280.        
  281.     OUT_BCS_RELOC(batch,
  282.                   mfc_context->mfc_indirect_pak_bse_object.bo,
  283.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  284.                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
  285.     OUT_BCS_BATCH(batch, 0);
  286.  
  287.     ADVANCE_BCS_BATCH(batch);
  288. }
  289.  
  290. static void
  291. gen8_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
  292.                        struct intel_encoder_context *encoder_context)
  293. {
  294.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  295.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  296.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  297.  
  298.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  299.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  300.  
  301.     BEGIN_BCS_BATCH(batch, 16);
  302.  
  303.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  304.     /*DW1. MB setting of frame */
  305.     OUT_BCS_BATCH(batch,
  306.                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
  307.     OUT_BCS_BATCH(batch,
  308.                   ((height_in_mbs - 1) << 16) |
  309.                   ((width_in_mbs - 1) << 0));
  310.     /* DW3 QP setting */
  311.     OUT_BCS_BATCH(batch,
  312.                   (0 << 24) |   /* Second Chroma QP Offset */
  313.                   (0 << 16) |   /* Chroma QP Offset */
  314.                   (0 << 14) |   /* Max-bit conformance Intra flag */
  315.                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
  316.                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
  317.                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
  318.                   (0 << 8)  |   /* FIXME: Image Structure */
  319.                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
  320.     OUT_BCS_BATCH(batch,
  321.                   (0 << 16) |   /* Mininum Frame size */
  322.                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
  323.                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
  324.                   (0 << 13) |   /* CABAC 0 word insertion test enable */
  325.                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
  326.                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
  327.                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
  328.                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
  329.                   (0 << 6)  |   /* Only valid for VLD decoding mode */
  330.                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
  331.                   (0 << 4)  |   /* Direct 8x8 inference flag */
  332.                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
  333.                   (1 << 2)  |   /* Frame MB only flag */
  334.                   (0 << 1)  |   /* MBAFF mode is in active */
  335.                   (0 << 0));    /* Field picture flag */
  336.     /* DW5 Trellis quantization */
  337.     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
  338.     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
  339.                   (0xBB8 << 16) |       /* InterMbMaxSz */
  340.                   (0xEE8) );            /* IntraMbMaxSz */
  341.     OUT_BCS_BATCH(batch, 0);            /* Reserved */
  342.     /* DW8. QP delta */
  343.     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
  344.     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
  345.     /* DW10. Bit setting for MB */
  346.     OUT_BCS_BATCH(batch, 0x8C000000);
  347.     OUT_BCS_BATCH(batch, 0x00010000);
  348.     /* DW12. */
  349.     OUT_BCS_BATCH(batch, 0);
  350.     OUT_BCS_BATCH(batch, 0x02010100);
  351.     /* DW14. For short format */
  352.     OUT_BCS_BATCH(batch, 0);
  353.     OUT_BCS_BATCH(batch, 0);
  354.  
  355.     ADVANCE_BCS_BATCH(batch);
  356. }
  357.  
  358. static void
  359. gen8_mfc_qm_state(VADriverContextP ctx,
  360.                   int qm_type,
  361.                   unsigned int *qm,
  362.                   int qm_length,
  363.                   struct intel_encoder_context *encoder_context)
  364. {
  365.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  366.     unsigned int qm_buffer[16];
  367.  
  368.     assert(qm_length <= 16);
  369.     assert(sizeof(*qm) == 4);
  370.     memcpy(qm_buffer, qm, qm_length * 4);
  371.  
  372.     BEGIN_BCS_BATCH(batch, 18);
  373.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  374.     OUT_BCS_BATCH(batch, qm_type << 0);
  375.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  376.     ADVANCE_BCS_BATCH(batch);
  377. }
  378.  
  379. static void
  380. gen8_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  381. {
  382.     unsigned int qm[16] = {
  383.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  384.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  385.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  386.         0x10101010, 0x10101010, 0x10101010, 0x10101010
  387.     };
  388.  
  389.     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
  390.     gen8_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
  391.     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
  392.     gen8_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
  393. }
  394.  
  395. static void
  396. gen8_mfc_fqm_state(VADriverContextP ctx,
  397.                    int fqm_type,
  398.                    unsigned int *fqm,
  399.                    int fqm_length,
  400.                    struct intel_encoder_context *encoder_context)
  401. {
  402.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  403.     unsigned int fqm_buffer[32];
  404.  
  405.     assert(fqm_length <= 32);
  406.     assert(sizeof(*fqm) == 4);
  407.     memcpy(fqm_buffer, fqm, fqm_length * 4);
  408.  
  409.     BEGIN_BCS_BATCH(batch, 34);
  410.     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
  411.     OUT_BCS_BATCH(batch, fqm_type << 0);
  412.     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
  413.     ADVANCE_BCS_BATCH(batch);
  414. }
  415.  
  416. static void
  417. gen8_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  418. {
  419.     unsigned int qm[32] = {
  420.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  421.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  422.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  423.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  424.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  425.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  426.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  427.         0x10001000, 0x10001000, 0x10001000, 0x10001000
  428.     };
  429.  
  430.     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
  431.     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
  432.     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
  433.     gen8_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
  434. }
  435.  
  436. static void
  437. gen8_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
  438.                            unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
  439.                            int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
  440.                            struct intel_batchbuffer *batch)
  441. {
  442.     if (batch == NULL)
  443.         batch = encoder_context->base.batch;
  444.  
  445.     if (data_bits_in_last_dw == 0)
  446.         data_bits_in_last_dw = 32;
  447.  
  448.     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
  449.  
  450.     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
  451.     OUT_BCS_BATCH(batch,
  452.                   (0 << 16) |   /* always start at offset 0 */
  453.                   (data_bits_in_last_dw << 8) |
  454.                   (skip_emul_byte_count << 4) |
  455.                   (!!emulation_flag << 3) |
  456.                   ((!!is_last_header) << 2) |
  457.                   ((!!is_end_of_slice) << 1) |
  458.                   (0 << 0));    /* FIXME: ??? */
  459.     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
  460.  
  461.     ADVANCE_BCS_BATCH(batch);
  462. }
  463.  
  464.  
  465. static void gen8_mfc_init(VADriverContextP ctx,
  466.                           struct encode_state *encode_state,
  467.                           struct intel_encoder_context *encoder_context)
  468. {
  469.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  470.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  471.     dri_bo *bo;
  472.     int i;
  473.     int width_in_mbs = 0;
  474.     int height_in_mbs = 0;
  475.     int slice_batchbuffer_size;
  476.  
  477.     if (encoder_context->codec == CODEC_H264 ||
  478.         encoder_context->codec == CODEC_H264_MVC) {
  479.         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  480.         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  481.         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  482.     } else if (encoder_context->codec == CODEC_MPEG2) {
  483.         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
  484.  
  485.         assert(encoder_context->codec == CODEC_MPEG2);
  486.  
  487.         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
  488.         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
  489.     } else {
  490.         assert(encoder_context->codec == CODEC_JPEG);
  491.         VAEncPictureParameterBufferJPEG *pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
  492.  
  493.         width_in_mbs = ALIGN(pic_param->picture_width, 16) / 16;
  494.         height_in_mbs = ALIGN(pic_param->picture_height, 16) / 16;
  495.     }
  496.  
  497.     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
  498.                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
  499.  
  500.     /*Encode common setup for MFC*/
  501.     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
  502.     mfc_context->post_deblocking_output.bo = NULL;
  503.  
  504.     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
  505.     mfc_context->pre_deblocking_output.bo = NULL;
  506.  
  507.     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
  508.     mfc_context->uncompressed_picture_source.bo = NULL;
  509.  
  510.     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
  511.     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
  512.  
  513.     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
  514.         if (mfc_context->direct_mv_buffers[i].bo != NULL)
  515.             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
  516.         mfc_context->direct_mv_buffers[i].bo = NULL;
  517.     }
  518.  
  519.     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
  520.         if (mfc_context->reference_surfaces[i].bo != NULL)
  521.             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
  522.         mfc_context->reference_surfaces[i].bo = NULL;  
  523.     }
  524.  
  525.     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
  526.     bo = dri_bo_alloc(i965->intel.bufmgr,
  527.                       "Buffer",
  528.                       width_in_mbs * 64,
  529.                       64);
  530.     assert(bo);
  531.     mfc_context->intra_row_store_scratch_buffer.bo = bo;
  532.  
  533.     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
  534.     bo = dri_bo_alloc(i965->intel.bufmgr,
  535.                       "Buffer",
  536.                       width_in_mbs * height_in_mbs * 16,
  537.                       64);
  538.     assert(bo);
  539.     mfc_context->macroblock_status_buffer.bo = bo;
  540.  
  541.     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
  542.     bo = dri_bo_alloc(i965->intel.bufmgr,
  543.                       "Buffer",
  544.                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
  545.                       64);
  546.     assert(bo);
  547.     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  548.  
  549.     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
  550.     bo = dri_bo_alloc(i965->intel.bufmgr,
  551.                       "Buffer",
  552.                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
  553.                       0x1000);
  554.     assert(bo);
  555.     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  556.  
  557.     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
  558.     mfc_context->mfc_batchbuffer_surface.bo = NULL;
  559.  
  560.     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
  561.     mfc_context->aux_batchbuffer_surface.bo = NULL;
  562.  
  563.     if (mfc_context->aux_batchbuffer)
  564.         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
  565.  
  566.     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
  567.     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
  568.     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
  569.     mfc_context->aux_batchbuffer_surface.pitch = 16;
  570.     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
  571.     mfc_context->aux_batchbuffer_surface.size_block = 16;
  572.  
  573.     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
  574. }
  575.  
  576. static void
  577. gen8_mfc_pipe_buf_addr_state(VADriverContextP ctx,
  578.                              struct intel_encoder_context *encoder_context)
  579. {
  580.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  581.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  582.     int i;
  583.  
  584.     BEGIN_BCS_BATCH(batch, 61);
  585.  
  586.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  587.  
  588.     /* the DW1-3 is for pre_deblocking */
  589.     if (mfc_context->pre_deblocking_output.bo)
  590.         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
  591.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  592.                       0);
  593.     else
  594.         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
  595.  
  596.     OUT_BCS_BATCH(batch, 0);
  597.     OUT_BCS_BATCH(batch, 0);
  598.     /* the DW4-6 is for the post_deblocking */
  599.  
  600.     if (mfc_context->post_deblocking_output.bo)
  601.         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
  602.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  603.                       0);                                                                                       /* post output addr  */
  604.     else
  605.         OUT_BCS_BATCH(batch, 0);
  606.    
  607.     OUT_BCS_BATCH(batch, 0);
  608.     OUT_BCS_BATCH(batch, 0);
  609.  
  610.     /* the DW7-9 is for the uncompressed_picture */
  611.     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
  612.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  613.                   0); /* uncompressed data */
  614.  
  615.     OUT_BCS_BATCH(batch, 0);
  616.     OUT_BCS_BATCH(batch, 0);
  617.  
  618.     /* the DW10-12 is for the mb status */
  619.     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
  620.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  621.                   0); /* StreamOut data*/
  622.    
  623.     OUT_BCS_BATCH(batch, 0);
  624.     OUT_BCS_BATCH(batch, 0);
  625.  
  626.     /* the DW13-15 is for the intra_row_store_scratch */
  627.     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
  628.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  629.                   0);  
  630.  
  631.     OUT_BCS_BATCH(batch, 0);
  632.     OUT_BCS_BATCH(batch, 0);
  633.  
  634.     /* the DW16-18 is for the deblocking filter */
  635.     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
  636.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  637.                   0);
  638.  
  639.     OUT_BCS_BATCH(batch, 0);
  640.     OUT_BCS_BATCH(batch, 0);
  641.  
  642.     /* the DW 19-50 is for Reference pictures*/
  643.     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  644.         if ( mfc_context->reference_surfaces[i].bo != NULL) {
  645.             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
  646.                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  647.                           0);                  
  648.         } else {
  649.             OUT_BCS_BATCH(batch, 0);
  650.         }
  651.  
  652.         OUT_BCS_BATCH(batch, 0);
  653.     }
  654.  
  655.     OUT_BCS_BATCH(batch, 0);
  656.  
  657.     /* The DW 52-54 is for the MB status buffer */
  658.     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
  659.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  660.                   0);                                                                                   /* Macroblock status buffer*/
  661.        
  662.     OUT_BCS_BATCH(batch, 0);
  663.     OUT_BCS_BATCH(batch, 0);
  664.  
  665.     /* the DW 55-57 is the ILDB buffer */
  666.     OUT_BCS_BATCH(batch, 0);
  667.     OUT_BCS_BATCH(batch, 0);
  668.     OUT_BCS_BATCH(batch, 0);
  669.  
  670.     /* the DW 58-60 is the second ILDB buffer */
  671.     OUT_BCS_BATCH(batch, 0);
  672.     OUT_BCS_BATCH(batch, 0);
  673.     OUT_BCS_BATCH(batch, 0);
  674.  
  675.     ADVANCE_BCS_BATCH(batch);
  676. }
  677.  
  678. static void
  679. gen8_mfc_avc_directmode_state(VADriverContextP ctx,
  680.                               struct intel_encoder_context *encoder_context)
  681. {
  682.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  683.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  684.  
  685.     int i;
  686.  
  687.     BEGIN_BCS_BATCH(batch, 71);
  688.  
  689.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  690.  
  691.     /* Reference frames and Current frames */
  692.     /* the DW1-32 is for the direct MV for reference */
  693.     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
  694.         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
  695.             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
  696.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  697.                           0);
  698.             OUT_BCS_BATCH(batch, 0);
  699.         } else {
  700.             OUT_BCS_BATCH(batch, 0);
  701.             OUT_BCS_BATCH(batch, 0);
  702.         }
  703.     }
  704.    
  705.     OUT_BCS_BATCH(batch, 0);
  706.  
  707.     /* the DW34-36 is the MV for the current reference */
  708.     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
  709.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  710.                   0);
  711.  
  712.     OUT_BCS_BATCH(batch, 0);
  713.     OUT_BCS_BATCH(batch, 0);
  714.  
  715.     /* POL list */
  716.     for(i = 0; i < 32; i++) {
  717.         OUT_BCS_BATCH(batch, i/2);
  718.     }
  719.     OUT_BCS_BATCH(batch, 0);
  720.     OUT_BCS_BATCH(batch, 0);
  721.  
  722.     ADVANCE_BCS_BATCH(batch);
  723. }
  724.  
  725.  
  726. static void
  727. gen8_mfc_bsp_buf_base_addr_state(VADriverContextP ctx,
  728.                                  struct intel_encoder_context *encoder_context)
  729. {
  730.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  731.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  732.  
  733.     BEGIN_BCS_BATCH(batch, 10);
  734.  
  735.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  736.     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
  737.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  738.                   0);
  739.     OUT_BCS_BATCH(batch, 0);
  740.     OUT_BCS_BATCH(batch, 0);
  741.        
  742.     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
  743.     OUT_BCS_BATCH(batch, 0);
  744.     OUT_BCS_BATCH(batch, 0);
  745.     OUT_BCS_BATCH(batch, 0);
  746.  
  747.     /* the DW7-9 is for Bitplane Read Buffer Base Address */
  748.     OUT_BCS_BATCH(batch, 0);
  749.     OUT_BCS_BATCH(batch, 0);
  750.     OUT_BCS_BATCH(batch, 0);
  751.  
  752.     ADVANCE_BCS_BATCH(batch);
  753. }
  754.  
  755.  
  756. static void gen8_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
  757.                                                       struct encode_state *encode_state,
  758.                                                       struct intel_encoder_context *encoder_context)
  759. {
  760.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  761.  
  762.     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
  763.     mfc_context->set_surface_state(ctx, encoder_context);
  764.     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
  765.     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
  766.     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
  767.     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
  768.     mfc_context->avc_qm_state(ctx, encoder_context);
  769.     mfc_context->avc_fqm_state(ctx, encoder_context);
  770.     gen8_mfc_avc_directmode_state(ctx, encoder_context);
  771.     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
  772. }
  773.  
  774.  
  775. static VAStatus gen8_mfc_run(VADriverContextP ctx,
  776.                              struct encode_state *encode_state,
  777.                              struct intel_encoder_context *encoder_context)
  778. {
  779.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  780.  
  781.     intel_batchbuffer_flush(batch);             //run the pipeline
  782.  
  783.     return VA_STATUS_SUCCESS;
  784. }
  785.  
  786.  
  787. static VAStatus
  788. gen8_mfc_stop(VADriverContextP ctx,
  789.               struct encode_state *encode_state,
  790.               struct intel_encoder_context *encoder_context,
  791.               int *encoded_bits_size)
  792. {
  793.     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
  794.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  795.     VACodedBufferSegment *coded_buffer_segment;
  796.    
  797.     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
  798.     assert(vaStatus == VA_STATUS_SUCCESS);
  799.     *encoded_bits_size = coded_buffer_segment->size * 8;
  800.     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
  801.  
  802.     return VA_STATUS_SUCCESS;
  803. }
  804.  
  805.  
  806. static void
  807. gen8_mfc_avc_slice_state(VADriverContextP ctx,
  808.                          VAEncPictureParameterBufferH264 *pic_param,
  809.                          VAEncSliceParameterBufferH264 *slice_param,
  810.                          struct encode_state *encode_state,
  811.                          struct intel_encoder_context *encoder_context,
  812.                          int rate_control_enable,
  813.                          int qp,
  814.                          struct intel_batchbuffer *batch)
  815. {
  816.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  817.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  818.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  819.     int beginmb = slice_param->macroblock_address;
  820.     int endmb = beginmb + slice_param->num_macroblocks;
  821.     int beginx = beginmb % width_in_mbs;
  822.     int beginy = beginmb / width_in_mbs;
  823.     int nextx =  endmb % width_in_mbs;
  824.     int nexty = endmb / width_in_mbs;
  825.     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  826.     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
  827.     int maxQpN, maxQpP;
  828.     unsigned char correct[6], grow, shrink;
  829.     int i;
  830.     int weighted_pred_idc = 0;
  831.     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
  832.     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
  833.     int num_ref_l0 = 0, num_ref_l1 = 0;
  834.  
  835.     if (batch == NULL)
  836.         batch = encoder_context->base.batch;
  837.  
  838.     if (slice_type == SLICE_TYPE_I) {
  839.         luma_log2_weight_denom = 0;
  840.         chroma_log2_weight_denom = 0;
  841.     } else if (slice_type == SLICE_TYPE_P) {
  842.         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
  843.         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
  844.  
  845.         if (slice_param->num_ref_idx_active_override_flag)
  846.             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  847.     } else if (slice_type == SLICE_TYPE_B) {
  848.         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
  849.         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
  850.         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
  851.  
  852.         if (slice_param->num_ref_idx_active_override_flag) {
  853.             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  854.             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  855.         }
  856.  
  857.         if (weighted_pred_idc == 2) {
  858.             /* 8.4.3 - Derivation process for prediction weights (8-279) */
  859.             luma_log2_weight_denom = 5;
  860.             chroma_log2_weight_denom = 5;
  861.         }
  862.     }
  863.  
  864.     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
  865.     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
  866.  
  867.     for (i = 0; i < 6; i++)
  868.         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
  869.  
  870.     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
  871.         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
  872.     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
  873.         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
  874.  
  875.     BEGIN_BCS_BATCH(batch, 11);;
  876.  
  877.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
  878.     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
  879.  
  880.     OUT_BCS_BATCH(batch,
  881.                   (num_ref_l0 << 16) |
  882.                   (num_ref_l1 << 24) |
  883.                   (chroma_log2_weight_denom << 8) |
  884.                   (luma_log2_weight_denom << 0));
  885.  
  886.     OUT_BCS_BATCH(batch,
  887.                   (weighted_pred_idc << 30) |
  888.                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
  889.                   (slice_param->disable_deblocking_filter_idc << 27) |
  890.                   (slice_param->cabac_init_idc << 24) |
  891.                   (qp<<16) |                    /*Slice Quantization Parameter*/
  892.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  893.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  894.     OUT_BCS_BATCH(batch,
  895.                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
  896.                   (beginx << 16) |
  897.                   slice_param->macroblock_address );
  898.     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
  899.     OUT_BCS_BATCH(batch,
  900.                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
  901.                   (1 << 30) |           /*ResetRateControlCounter*/
  902.                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
  903.                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
  904.                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                
  905.                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
  906.                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
  907.                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
  908.                   (last_slice << 19) |     /*IsLastSlice*/
  909.                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
  910.                   (1 << 17) |       /*HeaderPresentFlag*/      
  911.                   (1 << 16) |       /*SliceData PresentFlag*/
  912.                   (1 << 15) |       /*TailPresentFlag*/
  913.                   (1 << 13) |       /*RBSP NAL TYPE*/  
  914.                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
  915.     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
  916.     OUT_BCS_BATCH(batch,
  917.                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
  918.                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
  919.                   (shrink << 8)  |
  920.                   (grow << 0));  
  921.     OUT_BCS_BATCH(batch,
  922.                   (correct[5] << 20) |
  923.                   (correct[4] << 16) |
  924.                   (correct[3] << 12) |
  925.                   (correct[2] << 8) |
  926.                   (correct[1] << 4) |
  927.                   (correct[0] << 0));
  928.     OUT_BCS_BATCH(batch, 0);
  929.  
  930.     ADVANCE_BCS_BATCH(batch);
  931. }
  932.  
  933.  
  934. #ifdef MFC_SOFTWARE_HASWELL
  935.  
  936. static int
  937. gen8_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
  938.                               int qp,unsigned int *msg,
  939.                               struct intel_encoder_context *encoder_context,
  940.                               unsigned char target_mb_size, unsigned char max_mb_size,
  941.                               struct intel_batchbuffer *batch)
  942. {
  943.     int len_in_dwords = 12;
  944.     unsigned int intra_msg;
  945. #define         INTRA_MSG_FLAG          (1 << 13)
  946. #define         INTRA_MBTYPE_MASK       (0x1F0000)
  947.     if (batch == NULL)
  948.         batch = encoder_context->base.batch;
  949.  
  950.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  951.  
  952.     intra_msg = msg[0] & 0xC0FF;
  953.     intra_msg |= INTRA_MSG_FLAG;
  954.     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
  955.     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
  956.     OUT_BCS_BATCH(batch, 0);
  957.     OUT_BCS_BATCH(batch, 0);
  958.     OUT_BCS_BATCH(batch,
  959.                   (0 << 24) |           /* PackedMvNum, Debug*/
  960.                   (0 << 20) |           /* No motion vector */
  961.                   (1 << 19) |           /* CbpDcY */
  962.                   (1 << 18) |           /* CbpDcU */
  963.                   (1 << 17) |           /* CbpDcV */
  964.                   intra_msg);
  965.  
  966.     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
  967.     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */               
  968.     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
  969.  
  970.     /*Stuff for Intra MB*/
  971.     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/       
  972.     OUT_BCS_BATCH(batch, msg[2]);      
  973.     OUT_BCS_BATCH(batch, msg[3]&0xFF); 
  974.    
  975.     /*MaxSizeInWord and TargetSzieInWord*/
  976.     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
  977.                   (target_mb_size << 16) );
  978.  
  979.     OUT_BCS_BATCH(batch, 0);
  980.  
  981.     ADVANCE_BCS_BATCH(batch);
  982.  
  983.     return len_in_dwords;
  984. }
  985.  
  986. static int
  987. gen8_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
  988.                               unsigned int *msg, unsigned int offset,
  989.                               struct intel_encoder_context *encoder_context,
  990.                               unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
  991.                               struct intel_batchbuffer *batch)
  992. {
  993.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  994.     int len_in_dwords = 12;
  995.     unsigned int inter_msg = 0;
  996.     if (batch == NULL)
  997.         batch = encoder_context->base.batch;
  998.     {
  999. #define MSG_MV_OFFSET   4
  1000.         unsigned int *mv_ptr;
  1001.         mv_ptr = msg + MSG_MV_OFFSET;
  1002.         /* MV of VME output is based on 16 sub-blocks. So it is necessary
  1003.          * to convert them to be compatible with the format of AVC_PAK
  1004.          * command.
  1005.          */
  1006.         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
  1007.             /* MV[0] and MV[2] are replicated */
  1008.             mv_ptr[4] = mv_ptr[0];
  1009.             mv_ptr[5] = mv_ptr[1];
  1010.             mv_ptr[2] = mv_ptr[8];
  1011.             mv_ptr[3] = mv_ptr[9];
  1012.             mv_ptr[6] = mv_ptr[8];
  1013.             mv_ptr[7] = mv_ptr[9];
  1014.         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
  1015.             /* MV[0] and MV[1] are replicated */
  1016.             mv_ptr[2] = mv_ptr[0];
  1017.             mv_ptr[3] = mv_ptr[1];
  1018.             mv_ptr[4] = mv_ptr[16];
  1019.             mv_ptr[5] = mv_ptr[17];
  1020.             mv_ptr[6] = mv_ptr[24];
  1021.             mv_ptr[7] = mv_ptr[25];
  1022.         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
  1023.                    !(msg[1] & SUBMB_SHAPE_MASK)) {
  1024.             /* Don't touch MV[0] or MV[1] */
  1025.             mv_ptr[2] = mv_ptr[8];
  1026.             mv_ptr[3] = mv_ptr[9];
  1027.             mv_ptr[4] = mv_ptr[16];
  1028.             mv_ptr[5] = mv_ptr[17];
  1029.             mv_ptr[6] = mv_ptr[24];
  1030.             mv_ptr[7] = mv_ptr[25];
  1031.         }
  1032.     }
  1033.  
  1034.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1035.  
  1036.     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
  1037.  
  1038.     inter_msg = 32;
  1039.     /* MV quantity */
  1040.     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
  1041.         if (msg[1] & SUBMB_SHAPE_MASK)
  1042.             inter_msg = 128;
  1043.     }
  1044.     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
  1045.     OUT_BCS_BATCH(batch, offset);
  1046.     inter_msg = msg[0] & (0x1F00FFFF);
  1047.     inter_msg |= INTER_MV8;
  1048.     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
  1049.     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
  1050.         (msg[1] & SUBMB_SHAPE_MASK)) {
  1051.         inter_msg |= INTER_MV32;
  1052.     }
  1053.  
  1054.     OUT_BCS_BATCH(batch, inter_msg);
  1055.  
  1056.     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
  1057.     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
  1058. #if 0
  1059.     if ( slice_type == SLICE_TYPE_B) {
  1060.         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
  1061.     } else {
  1062.         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
  1063.     }
  1064. #else
  1065.     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
  1066. #endif
  1067.  
  1068.     inter_msg = msg[1] >> 8;
  1069.     /*Stuff for Inter MB*/
  1070.     OUT_BCS_BATCH(batch, inter_msg);        
  1071.     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
  1072.     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
  1073.  
  1074.     /*MaxSizeInWord and TargetSzieInWord*/
  1075.     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
  1076.                   (target_mb_size << 16) );
  1077.  
  1078.     OUT_BCS_BATCH(batch, 0x0);    
  1079.  
  1080.     ADVANCE_BCS_BATCH(batch);
  1081.  
  1082.     return len_in_dwords;
  1083. }
  1084.  
  1085. #define         AVC_INTRA_RDO_OFFSET    4
  1086. #define         AVC_INTER_RDO_OFFSET    10
  1087. #define         AVC_INTER_MSG_OFFSET    8      
  1088. #define         AVC_INTER_MV_OFFSET             48
  1089. #define         AVC_RDO_MASK            0xFFFF
  1090.  
  1091. static void
  1092. gen8_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
  1093.                                        struct encode_state *encode_state,
  1094.                                        struct intel_encoder_context *encoder_context,
  1095.                                        int slice_index,
  1096.                                        struct intel_batchbuffer *slice_batch)
  1097. {
  1098.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1099.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1100.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1101.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1102.     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
  1103.     unsigned int *msg = NULL, offset = 0;
  1104.     unsigned char *msg_ptr = NULL;
  1105.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1106.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  1107.     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
  1108.     int i,x,y;
  1109.     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
  1110.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1111.     unsigned int tail_data[] = { 0x0, 0x0 };
  1112.     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
  1113.     int is_intra = slice_type == SLICE_TYPE_I;
  1114.     int qp_slice;
  1115.  
  1116.     qp_slice = qp;
  1117.     if (rate_control_mode == VA_RC_CBR) {
  1118.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  1119.         if (encode_state->slice_header_index[slice_index] == 0) {
  1120.             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
  1121.             qp_slice = qp;
  1122.         }
  1123.     }
  1124.  
  1125.     /* only support for 8-bit pixel bit-depth */
  1126.     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
  1127.     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
  1128.     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
  1129.     assert(qp >= 0 && qp < 52);
  1130.  
  1131.     gen8_mfc_avc_slice_state(ctx,
  1132.                              pPicParameter,
  1133.                              pSliceParameter,
  1134.                              encode_state, encoder_context,
  1135.                              (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
  1136.  
  1137.     if ( slice_index == 0)
  1138.         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  1139.  
  1140.     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
  1141.  
  1142.     dri_bo_map(vme_context->vme_output.bo , 1);
  1143.     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
  1144.  
  1145.     if (is_intra) {
  1146.         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
  1147.     } else {
  1148.         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
  1149.     }
  1150.    
  1151.     for (i = pSliceParameter->macroblock_address;
  1152.          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
  1153.         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
  1154.         x = i % width_in_mbs;
  1155.         y = i / width_in_mbs;
  1156.         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
  1157.  
  1158.         if (is_intra) {
  1159.             assert(msg);
  1160.             gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
  1161.         } else {
  1162.             int inter_rdo, intra_rdo;
  1163.             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
  1164.             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
  1165.             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
  1166.             if (intra_rdo < inter_rdo) {
  1167.                 gen8_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
  1168.             } else {
  1169.                 msg += AVC_INTER_MSG_OFFSET;
  1170.                 gen8_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, pSliceParameter->slice_type, slice_batch);
  1171.             }
  1172.         }
  1173.     }
  1174.    
  1175.     dri_bo_unmap(vme_context->vme_output.bo);
  1176.  
  1177.     if ( last_slice ) {    
  1178.         mfc_context->insert_object(ctx, encoder_context,
  1179.                                    tail_data, 2, 8,
  1180.                                    2, 1, 1, 0, slice_batch);
  1181.     } else {
  1182.         mfc_context->insert_object(ctx, encoder_context,
  1183.                                    tail_data, 1, 8,
  1184.                                    1, 1, 1, 0, slice_batch);
  1185.     }
  1186. }
  1187.  
  1188. static dri_bo *
  1189. gen8_mfc_avc_software_batchbuffer(VADriverContextP ctx,
  1190.                                   struct encode_state *encode_state,
  1191.                                   struct intel_encoder_context *encoder_context)
  1192. {
  1193.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1194.     struct intel_batchbuffer *batch;
  1195.     dri_bo *batch_bo;
  1196.     int i;
  1197.  
  1198.     batch = mfc_context->aux_batchbuffer;
  1199.     batch_bo = batch->buffer;
  1200.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  1201.         gen8_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
  1202.     }
  1203.  
  1204.     intel_batchbuffer_align(batch, 8);
  1205.    
  1206.     BEGIN_BCS_BATCH(batch, 2);
  1207.     OUT_BCS_BATCH(batch, 0);
  1208.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
  1209.     ADVANCE_BCS_BATCH(batch);
  1210.  
  1211.     dri_bo_reference(batch_bo);
  1212.     intel_batchbuffer_free(batch);
  1213.     mfc_context->aux_batchbuffer = NULL;
  1214.  
  1215.     return batch_bo;
  1216. }
  1217.  
  1218. #else
  1219.  
  1220. static void
  1221. gen8_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
  1222.                                     struct encode_state *encode_state,
  1223.                                     struct intel_encoder_context *encoder_context)
  1224.  
  1225. {
  1226.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1227.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1228.  
  1229.     assert(vme_context->vme_output.bo);
  1230.     mfc_context->buffer_suface_setup(ctx,
  1231.                                      &mfc_context->gpe_context,
  1232.                                      &vme_context->vme_output,
  1233.                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
  1234.                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
  1235.     assert(mfc_context->aux_batchbuffer_surface.bo);
  1236.     mfc_context->buffer_suface_setup(ctx,
  1237.                                      &mfc_context->gpe_context,
  1238.                                      &mfc_context->aux_batchbuffer_surface,
  1239.                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_SLICE_HEADER),
  1240.                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_SLICE_HEADER));
  1241. }
  1242.  
  1243. static void
  1244. gen8_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
  1245.                                      struct encode_state *encode_state,
  1246.                                      struct intel_encoder_context *encoder_context)
  1247.  
  1248. {
  1249.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1250.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1251.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1252.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  1253.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  1254.     mfc_context->mfc_batchbuffer_surface.num_blocks = width_in_mbs * height_in_mbs + encode_state->num_slice_params_ext * 8 + 1;
  1255.     mfc_context->mfc_batchbuffer_surface.size_block = 16 * CMD_LEN_IN_OWORD; /* 3 OWORDs */
  1256.     mfc_context->mfc_batchbuffer_surface.pitch = 16;
  1257.     mfc_context->mfc_batchbuffer_surface.bo = dri_bo_alloc(i965->intel.bufmgr,
  1258.                                                            "MFC batchbuffer",
  1259.                                                            mfc_context->mfc_batchbuffer_surface.num_blocks * mfc_context->mfc_batchbuffer_surface.size_block,
  1260.                                                            0x1000);
  1261.     mfc_context->buffer_suface_setup(ctx,
  1262.                                      &mfc_context->gpe_context,
  1263.                                      &mfc_context->mfc_batchbuffer_surface,
  1264.                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
  1265.                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
  1266. }
  1267.  
  1268. static void
  1269. gen8_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
  1270.                                     struct encode_state *encode_state,
  1271.                                     struct intel_encoder_context *encoder_context)
  1272. {
  1273.     gen8_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
  1274.     gen8_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
  1275. }
  1276.  
  1277. static void
  1278. gen8_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
  1279.                                 struct encode_state *encode_state,
  1280.                                 struct intel_encoder_context *encoder_context)
  1281. {
  1282.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1283.     struct gen6_interface_descriptor_data *desc;  
  1284.     int i;
  1285.     dri_bo *bo;
  1286.  
  1287.     bo = mfc_context->gpe_context.idrt.bo;
  1288.     dri_bo_map(bo, 1);
  1289.     assert(bo->virtual);
  1290.     desc = bo->virtual;
  1291.  
  1292.     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
  1293.         struct i965_kernel *kernel;
  1294.  
  1295.         kernel = &mfc_context->gpe_context.kernels[i];
  1296.         assert(sizeof(*desc) == 32);
  1297.  
  1298.         /*Setup the descritor table*/
  1299.         memset(desc, 0, sizeof(*desc));
  1300.         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
  1301.         desc->desc2.sampler_count = 0;
  1302.         desc->desc2.sampler_state_pointer = 0;
  1303.         desc->desc3.binding_table_entry_count = 2;
  1304.         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
  1305.         desc->desc4.constant_urb_entry_read_offset = 0;
  1306.         desc->desc4.constant_urb_entry_read_length = 4;
  1307.                
  1308.         /*kernel start*/
  1309.         dri_bo_emit_reloc(bo,  
  1310.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  1311.                           0,
  1312.                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
  1313.                           kernel->bo);
  1314.         desc++;
  1315.     }
  1316.  
  1317.     dri_bo_unmap(bo);
  1318. }
  1319.  
  1320. static void
  1321. gen8_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
  1322.                                     struct encode_state *encode_state,
  1323.                                     struct intel_encoder_context *encoder_context)
  1324. {
  1325.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1326.    
  1327.     (void)mfc_context;
  1328. }
  1329.  
  1330. static void
  1331. gen8_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
  1332.                                          int index,
  1333.                                          int head_offset,
  1334.                                          int batchbuffer_offset,
  1335.                                          int head_size,
  1336.                                          int tail_size,
  1337.                                          int number_mb_cmds,
  1338.                                          int first_object,
  1339.                                          int last_object,
  1340.                                          int last_slice,
  1341.                                          int mb_x,
  1342.                                          int mb_y,
  1343.                                          int width_in_mbs,
  1344.                                          int qp)
  1345. {
  1346.     BEGIN_BATCH(batch, 12);
  1347.    
  1348.     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (12 - 2));
  1349.     OUT_BATCH(batch, index);
  1350.     OUT_BATCH(batch, 0);
  1351.     OUT_BATCH(batch, 0);
  1352.     OUT_BATCH(batch, 0);
  1353.     OUT_BATCH(batch, 0);
  1354.    
  1355.     /*inline data */
  1356.     OUT_BATCH(batch, head_offset);
  1357.     OUT_BATCH(batch, batchbuffer_offset);
  1358.     OUT_BATCH(batch,
  1359.               head_size << 16 |
  1360.               tail_size);
  1361.     OUT_BATCH(batch,
  1362.               number_mb_cmds << 16 |
  1363.               first_object << 2 |
  1364.               last_object << 1 |
  1365.               last_slice);
  1366.     OUT_BATCH(batch,
  1367.               mb_y << 8 |
  1368.               mb_x);
  1369.     OUT_BATCH(batch,
  1370.               qp << 16 |
  1371.               width_in_mbs);
  1372.  
  1373.     ADVANCE_BATCH(batch);
  1374. }
  1375.  
  1376. static void
  1377. gen8_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
  1378.                                        struct intel_encoder_context *encoder_context,
  1379.                                        VAEncSliceParameterBufferH264 *slice_param,
  1380.                                        int head_offset,
  1381.                                        unsigned short head_size,
  1382.                                        unsigned short tail_size,
  1383.                                        int batchbuffer_offset,
  1384.                                        int qp,
  1385.                                        int last_slice)
  1386. {
  1387.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1388.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1389.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1390.     int total_mbs = slice_param->num_macroblocks;
  1391.     int number_mb_cmds = 128;
  1392.     int starting_mb = 0;
  1393.     int last_object = 0;
  1394.     int first_object = 1;
  1395.     int i;
  1396.     int mb_x, mb_y;
  1397.     int index = (slice_param->slice_type == SLICE_TYPE_I) ? MFC_BATCHBUFFER_AVC_INTRA : MFC_BATCHBUFFER_AVC_INTER;
  1398.  
  1399.     for (i = 0; i < total_mbs / number_mb_cmds; i++) {
  1400.         last_object = (total_mbs - starting_mb) == number_mb_cmds;
  1401.         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
  1402.         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
  1403.         assert(mb_x <= 255 && mb_y <= 255);
  1404.  
  1405.         starting_mb += number_mb_cmds;
  1406.  
  1407.         gen8_mfc_batchbuffer_emit_object_command(batch,
  1408.                                                  index,
  1409.                                                  head_offset,
  1410.                                                  batchbuffer_offset,
  1411.                                                  head_size,
  1412.                                                  tail_size,
  1413.                                                  number_mb_cmds,
  1414.                                                  first_object,
  1415.                                                  last_object,
  1416.                                                  last_slice,
  1417.                                                  mb_x,
  1418.                                                  mb_y,
  1419.                                                  width_in_mbs,
  1420.                                                  qp);
  1421.  
  1422.         if (first_object) {
  1423.             head_offset += head_size;
  1424.             batchbuffer_offset += head_size;
  1425.         }
  1426.  
  1427.         if (last_object) {
  1428.             head_offset += tail_size;
  1429.             batchbuffer_offset += tail_size;
  1430.         }
  1431.  
  1432.         batchbuffer_offset += number_mb_cmds * CMD_LEN_IN_OWORD;
  1433.  
  1434.         first_object = 0;
  1435.     }
  1436.  
  1437.     if (!last_object) {
  1438.         last_object = 1;
  1439.         number_mb_cmds = total_mbs % number_mb_cmds;
  1440.         mb_x = (slice_param->macroblock_address + starting_mb) % width_in_mbs;
  1441.         mb_y = (slice_param->macroblock_address + starting_mb) / width_in_mbs;
  1442.         assert(mb_x <= 255 && mb_y <= 255);
  1443.         starting_mb += number_mb_cmds;
  1444.  
  1445.         gen8_mfc_batchbuffer_emit_object_command(batch,
  1446.                                                  index,
  1447.                                                  head_offset,
  1448.                                                  batchbuffer_offset,
  1449.                                                  head_size,
  1450.                                                  tail_size,
  1451.                                                  number_mb_cmds,
  1452.                                                  first_object,
  1453.                                                  last_object,
  1454.                                                  last_slice,
  1455.                                                  mb_x,
  1456.                                                  mb_y,
  1457.                                                  width_in_mbs,
  1458.                                                  qp);
  1459.     }
  1460. }
  1461.                          
  1462. /*
  1463.  * return size in Owords (16bytes)
  1464.  */        
  1465. static int
  1466. gen8_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
  1467.                                struct encode_state *encode_state,
  1468.                                struct intel_encoder_context *encoder_context,
  1469.                                int slice_index,
  1470.                                int batchbuffer_offset)
  1471. {
  1472.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1473.     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
  1474.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1475.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1476.     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
  1477.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1478.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  1479.     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
  1480.     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
  1481.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1482.     unsigned int tail_data[] = { 0x0, 0x0 };
  1483.     long head_offset;
  1484.     int old_used = intel_batchbuffer_used_size(slice_batch), used;
  1485.     unsigned short head_size, tail_size;
  1486.     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
  1487.     int qp_slice;
  1488.  
  1489.     qp_slice = qp;
  1490.     if (rate_control_mode == VA_RC_CBR) {
  1491.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  1492.         if (encode_state->slice_header_index[slice_index] == 0) {
  1493.             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
  1494.             qp_slice = qp;
  1495.         }
  1496.     }
  1497.  
  1498.     /* only support for 8-bit pixel bit-depth */
  1499.     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
  1500.     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
  1501.     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
  1502.     assert(qp >= 0 && qp < 52);
  1503.  
  1504.     head_offset = old_used / 16;
  1505.     gen8_mfc_avc_slice_state(ctx,
  1506.                              pPicParameter,
  1507.                              pSliceParameter,
  1508.                              encode_state,
  1509.                              encoder_context,
  1510.                              (rate_control_mode == VA_RC_CBR),
  1511.                              qp_slice,
  1512.                              slice_batch);
  1513.  
  1514.     if (slice_index == 0)
  1515.         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  1516.  
  1517.     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
  1518.  
  1519.     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
  1520.     used = intel_batchbuffer_used_size(slice_batch);
  1521.     head_size = (used - old_used) / 16;
  1522.     old_used = used;
  1523.  
  1524.     /* tail */
  1525.     if (last_slice) {    
  1526.         mfc_context->insert_object(ctx,
  1527.                                    encoder_context,
  1528.                                    tail_data,
  1529.                                    2,
  1530.                                    8,
  1531.                                    2,
  1532.                                    1,
  1533.                                    1,
  1534.                                    0,
  1535.                                    slice_batch);
  1536.     } else {
  1537.         mfc_context->insert_object(ctx,
  1538.                                    encoder_context,
  1539.                                    tail_data,
  1540.                                    1,
  1541.                                    8,
  1542.                                    1,
  1543.                                    1,
  1544.                                    1,
  1545.                                    0,
  1546.                                    slice_batch);
  1547.     }
  1548.  
  1549.     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
  1550.     used = intel_batchbuffer_used_size(slice_batch);
  1551.     tail_size = (used - old_used) / 16;
  1552.  
  1553.    
  1554.     gen8_mfc_avc_batchbuffer_slice_command(ctx,
  1555.                                            encoder_context,
  1556.                                            pSliceParameter,
  1557.                                            head_offset,
  1558.                                            head_size,
  1559.                                            tail_size,
  1560.                                            batchbuffer_offset,
  1561.                                            qp,
  1562.                                            last_slice);
  1563.  
  1564.     return head_size + tail_size + pSliceParameter->num_macroblocks * CMD_LEN_IN_OWORD;
  1565. }
  1566.  
  1567. static void
  1568. gen8_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
  1569.                                   struct encode_state *encode_state,
  1570.                                   struct intel_encoder_context *encoder_context)
  1571. {
  1572.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1573.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1574.     int i, size, offset = 0;
  1575.     intel_batchbuffer_start_atomic(batch, 0x4000);
  1576.     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
  1577.  
  1578.     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
  1579.         size = gen8_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i, offset);
  1580.         offset += size;
  1581.     }
  1582.  
  1583.     intel_batchbuffer_end_atomic(batch);
  1584.     intel_batchbuffer_flush(batch);
  1585. }
  1586.  
  1587. static void
  1588. gen8_mfc_build_avc_batchbuffer(VADriverContextP ctx,
  1589.                                struct encode_state *encode_state,
  1590.                                struct intel_encoder_context *encoder_context)
  1591. {
  1592.     gen8_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
  1593.     gen8_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
  1594.     gen8_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
  1595.     gen8_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
  1596. }
  1597.  
  1598. static dri_bo *
  1599. gen8_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
  1600.                                   struct encode_state *encode_state,
  1601.                                   struct intel_encoder_context *encoder_context)
  1602. {
  1603.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1604.  
  1605.     gen8_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
  1606.     dri_bo_reference(mfc_context->mfc_batchbuffer_surface.bo);
  1607.  
  1608.     return mfc_context->mfc_batchbuffer_surface.bo;
  1609. }
  1610.  
  1611. #endif
  1612.  
  1613. static void
  1614. gen8_mfc_avc_pipeline_programing(VADriverContextP ctx,
  1615.                                  struct encode_state *encode_state,
  1616.                                  struct intel_encoder_context *encoder_context)
  1617. {
  1618.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1619.     dri_bo *slice_batch_bo;
  1620.  
  1621.     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
  1622.         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
  1623.         assert(0);
  1624.         return;
  1625.     }
  1626.  
  1627. #ifdef MFC_SOFTWARE_HASWELL
  1628.     slice_batch_bo = gen8_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
  1629. #else
  1630.     slice_batch_bo = gen8_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
  1631. #endif
  1632.  
  1633.     // begin programing
  1634.     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  1635.     intel_batchbuffer_emit_mi_flush(batch);
  1636.    
  1637.     // picture level programing
  1638.     gen8_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
  1639.  
  1640.     BEGIN_BCS_BATCH(batch, 3);
  1641.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
  1642.     OUT_BCS_RELOC(batch,
  1643.                   slice_batch_bo,
  1644.                   I915_GEM_DOMAIN_COMMAND, 0,
  1645.                   0);
  1646.     OUT_BCS_BATCH(batch, 0);
  1647.     ADVANCE_BCS_BATCH(batch);
  1648.  
  1649.     // end programing
  1650.     intel_batchbuffer_end_atomic(batch);
  1651.  
  1652.     dri_bo_unreference(slice_batch_bo);
  1653. }
  1654.  
  1655.  
  1656. static VAStatus
  1657. gen8_mfc_avc_encode_picture(VADriverContextP ctx,
  1658.                             struct encode_state *encode_state,
  1659.                             struct intel_encoder_context *encoder_context)
  1660. {
  1661.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1662.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1663.     int current_frame_bits_size;
  1664.     int sts;
  1665.  
  1666.     for (;;) {
  1667.         gen8_mfc_init(ctx, encode_state, encoder_context);
  1668.         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
  1669.         /*Programing bcs pipeline*/
  1670.         gen8_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);   //filling the pipeline
  1671.         gen8_mfc_run(ctx, encode_state, encoder_context);
  1672.         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
  1673.             gen8_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
  1674.             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
  1675.             if (sts == BRC_NO_HRD_VIOLATION) {
  1676.                 intel_mfc_hrd_context_update(encode_state, mfc_context);
  1677.                 break;
  1678.             }
  1679.             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
  1680.                 if (!mfc_context->hrd.violation_noted) {
  1681.                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
  1682.                     mfc_context->hrd.violation_noted = 1;
  1683.                 }
  1684.                 return VA_STATUS_SUCCESS;
  1685.             }
  1686.         } else {
  1687.             break;
  1688.         }
  1689.     }
  1690.  
  1691.     return VA_STATUS_SUCCESS;
  1692. }
  1693.  
  1694. /*
  1695.  * MPEG-2
  1696.  */
  1697.  
  1698. static const int
  1699. va_to_gen8_mpeg2_picture_type[3] = {
  1700.     1,  /* I */
  1701.     2,  /* P */
  1702.     3   /* B */
  1703. };
  1704.  
  1705. static void
  1706. gen8_mfc_mpeg2_pic_state(VADriverContextP ctx,
  1707.                          struct intel_encoder_context *encoder_context,
  1708.                          struct encode_state *encode_state)
  1709. {
  1710.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1711.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1712.     VAEncPictureParameterBufferMPEG2 *pic_param;
  1713.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1714.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  1715.     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
  1716.  
  1717.     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
  1718.     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
  1719.     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
  1720.  
  1721.     BEGIN_BCS_BATCH(batch, 13);
  1722.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1723.     OUT_BCS_BATCH(batch,
  1724.                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
  1725.                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
  1726.                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
  1727.                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
  1728.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1729.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1730.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1731.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1732.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1733.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1734.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1735.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1736.     OUT_BCS_BATCH(batch,
  1737.                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
  1738.                   va_to_gen8_mpeg2_picture_type[pic_param->picture_type] << 9 |
  1739.                   0);
  1740.     OUT_BCS_BATCH(batch,
  1741.                   1 << 31 |     /* slice concealment */
  1742.                   (height_in_mbs - 1) << 16 |
  1743.                   (width_in_mbs - 1));
  1744.  
  1745.     if (slice_param && slice_param->quantiser_scale_code >= 14)
  1746.         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
  1747.     else
  1748.         OUT_BCS_BATCH(batch, 0);
  1749.  
  1750.     OUT_BCS_BATCH(batch, 0);
  1751.     OUT_BCS_BATCH(batch,
  1752.                   0xFFF << 16 | /* InterMBMaxSize */
  1753.                   0xFFF << 0 |  /* IntraMBMaxSize */
  1754.                   0);
  1755.     OUT_BCS_BATCH(batch, 0);
  1756.     OUT_BCS_BATCH(batch, 0);
  1757.     OUT_BCS_BATCH(batch, 0);
  1758.     OUT_BCS_BATCH(batch, 0);
  1759.     OUT_BCS_BATCH(batch, 0);
  1760.     OUT_BCS_BATCH(batch, 0);
  1761.     ADVANCE_BCS_BATCH(batch);
  1762. }
  1763.  
  1764. static void
  1765. gen8_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  1766. {
  1767.     unsigned char intra_qm[64] = {
  1768.         8, 16, 19, 22, 26, 27, 29, 34,
  1769.         16, 16, 22, 24, 27, 29, 34, 37,
  1770.         19, 22, 26, 27, 29, 34, 34, 38,
  1771.         22, 22, 26, 27, 29, 34, 37, 40,
  1772.         22, 26, 27, 29, 32, 35, 40, 48,
  1773.         26, 27, 29, 32, 35, 40, 48, 58,
  1774.         26, 27, 29, 34, 38, 46, 56, 69,
  1775.         27, 29, 35, 38, 46, 56, 69, 83
  1776.     };
  1777.  
  1778.     unsigned char non_intra_qm[64] = {
  1779.         16, 16, 16, 16, 16, 16, 16, 16,
  1780.         16, 16, 16, 16, 16, 16, 16, 16,
  1781.         16, 16, 16, 16, 16, 16, 16, 16,
  1782.         16, 16, 16, 16, 16, 16, 16, 16,
  1783.         16, 16, 16, 16, 16, 16, 16, 16,
  1784.         16, 16, 16, 16, 16, 16, 16, 16,
  1785.         16, 16, 16, 16, 16, 16, 16, 16,
  1786.         16, 16, 16, 16, 16, 16, 16, 16
  1787.     };
  1788.  
  1789.     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
  1790.     gen8_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
  1791. }
  1792.  
  1793. static void
  1794. gen8_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  1795. {
  1796.     unsigned short intra_fqm[64] = {
  1797.         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
  1798.         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
  1799.         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
  1800.         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
  1801.         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
  1802.         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
  1803.         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
  1804.         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
  1805.     };
  1806.  
  1807.     unsigned short non_intra_fqm[64] = {
  1808.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1809.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1810.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1811.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1812.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1813.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1814.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1815.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1816.     };
  1817.  
  1818.     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
  1819.     gen8_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
  1820. }
  1821.  
  1822. static void
  1823. gen8_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
  1824.                                 struct intel_encoder_context *encoder_context,
  1825.                                 int x, int y,
  1826.                                 int next_x, int next_y,
  1827.                                 int is_fisrt_slice_group,
  1828.                                 int is_last_slice_group,
  1829.                                 int intra_slice,
  1830.                                 int qp,
  1831.                                 struct intel_batchbuffer *batch)
  1832. {
  1833.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1834.  
  1835.     if (batch == NULL)
  1836.         batch = encoder_context->base.batch;
  1837.  
  1838.     BEGIN_BCS_BATCH(batch, 8);
  1839.  
  1840.     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
  1841.     OUT_BCS_BATCH(batch,
  1842.                   0 << 31 |                             /* MbRateCtrlFlag */
  1843.                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
  1844.                   1 << 17 |                             /* Insert Header before the first slice group data */
  1845.                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
  1846.                   1 << 15 |                             /* TailPresentFlag: always 1 */
  1847.                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
  1848.                   !!intra_slice << 13 |                 /* IntraSlice */
  1849.                   !!intra_slice << 12 |                 /* IntraSliceFlag */
  1850.                   0);
  1851.     OUT_BCS_BATCH(batch,
  1852.                   next_y << 24 |
  1853.                   next_x << 16 |
  1854.                   y << 8 |
  1855.                   x << 0 |
  1856.                   0);
  1857.     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
  1858.     /* bitstream pointer is only loaded once for the first slice of a frame when
  1859.      * LoadSlicePointerFlag is 0
  1860.      */
  1861.     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
  1862.     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
  1863.     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
  1864.     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
  1865.  
  1866.     ADVANCE_BCS_BATCH(batch);
  1867. }
  1868.  
  1869. static int
  1870. gen8_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
  1871.                                 struct intel_encoder_context *encoder_context,
  1872.                                 int x, int y,
  1873.                                 int first_mb_in_slice,
  1874.                                 int last_mb_in_slice,
  1875.                                 int first_mb_in_slice_group,
  1876.                                 int last_mb_in_slice_group,
  1877.                                 int mb_type,
  1878.                                 int qp_scale_code,
  1879.                                 int coded_block_pattern,
  1880.                                 unsigned char target_size_in_word,
  1881.                                 unsigned char max_size_in_word,
  1882.                                 struct intel_batchbuffer *batch)
  1883. {
  1884.     int len_in_dwords = 9;
  1885.  
  1886.     if (batch == NULL)
  1887.         batch = encoder_context->base.batch;
  1888.  
  1889.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1890.  
  1891.     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
  1892.     OUT_BCS_BATCH(batch,
  1893.                   0 << 24 |     /* PackedMvNum */
  1894.                   0 << 20 |     /* MvFormat */
  1895.                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
  1896.                   0 << 15 |     /* TransformFlag: frame DCT */
  1897.                   0 << 14 |     /* FieldMbFlag */
  1898.                   1 << 13 |     /* IntraMbFlag */
  1899.                   mb_type << 8 |   /* MbType: Intra */
  1900.                   0 << 2 |      /* SkipMbFlag */
  1901.                   0 << 0 |      /* InterMbMode */
  1902.                   0);
  1903.     OUT_BCS_BATCH(batch, y << 16 | x);
  1904.     OUT_BCS_BATCH(batch,
  1905.                   max_size_in_word << 24 |
  1906.                   target_size_in_word << 16 |
  1907.                   coded_block_pattern << 6 |      /* CBP */
  1908.                   0);
  1909.     OUT_BCS_BATCH(batch,
  1910.                   last_mb_in_slice << 31 |
  1911.                   first_mb_in_slice << 30 |
  1912.                   0 << 27 |     /* EnableCoeffClamp */
  1913.                   last_mb_in_slice_group << 26 |
  1914.                   0 << 25 |     /* MbSkipConvDisable */
  1915.                   first_mb_in_slice_group << 24 |
  1916.                   0 << 16 |     /* MvFieldSelect */
  1917.                   qp_scale_code << 0 |
  1918.                   0);
  1919.     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
  1920.     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
  1921.     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
  1922.     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
  1923.  
  1924.     ADVANCE_BCS_BATCH(batch);
  1925.  
  1926.     return len_in_dwords;
  1927. }
  1928.  
  1929. /* Byte offset */
  1930. #define MPEG2_INTER_MV_OFFSET   48
  1931.  
  1932. static struct _mv_ranges
  1933. {
  1934.     int low;    /* in the unit of 1/2 pixel */
  1935.     int high;   /* in the unit of 1/2 pixel */
  1936. } mv_ranges[] = {
  1937.     {0, 0},
  1938.     {-16, 15},
  1939.     {-32, 31},
  1940.     {-64, 63},
  1941.     {-128, 127},
  1942.     {-256, 255},
  1943.     {-512, 511},
  1944.     {-1024, 1023},
  1945.     {-2048, 2047},
  1946.     {-4096, 4095}
  1947. };
  1948.  
  1949. static int
  1950. mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
  1951. {
  1952.     if (mv + pos * 16 * 2 < 0 ||
  1953.         mv + (pos + 1) * 16 * 2 > display_max * 2)
  1954.         mv = 0;
  1955.  
  1956.     if (f_code > 0 && f_code < 10) {
  1957.         if (mv < mv_ranges[f_code].low)
  1958.             mv = mv_ranges[f_code].low;
  1959.  
  1960.         if (mv > mv_ranges[f_code].high)
  1961.             mv = mv_ranges[f_code].high;
  1962.     }
  1963.  
  1964.     return mv;
  1965. }
  1966.  
  1967. static int
  1968. gen8_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
  1969.                                 struct encode_state *encode_state,
  1970.                                 struct intel_encoder_context *encoder_context,
  1971.                                 unsigned int *msg,
  1972.                                 int width_in_mbs, int height_in_mbs,
  1973.                                 int x, int y,
  1974.                                 int first_mb_in_slice,
  1975.                                 int last_mb_in_slice,
  1976.                                 int first_mb_in_slice_group,
  1977.                                 int last_mb_in_slice_group,
  1978.                                 int qp_scale_code,
  1979.                                 unsigned char target_size_in_word,
  1980.                                 unsigned char max_size_in_word,
  1981.                                 struct intel_batchbuffer *batch)
  1982. {
  1983.     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
  1984.     int len_in_dwords = 9;
  1985.     short *mvptr, mvx0, mvy0, mvx1, mvy1;
  1986.    
  1987.     if (batch == NULL)
  1988.         batch = encoder_context->base.batch;
  1989.  
  1990.     mvptr = (short *)((unsigned char *)msg + MPEG2_INTER_MV_OFFSET);;
  1991.     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
  1992.     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
  1993.     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
  1994.     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
  1995.  
  1996.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1997.  
  1998.     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
  1999.     OUT_BCS_BATCH(batch,
  2000.                   2 << 24 |     /* PackedMvNum */
  2001.                   7 << 20 |     /* MvFormat */
  2002.                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
  2003.                   0 << 15 |     /* TransformFlag: frame DCT */
  2004.                   0 << 14 |     /* FieldMbFlag */
  2005.                   0 << 13 |     /* IntraMbFlag */
  2006.                   1 << 8 |      /* MbType: Frame-based */
  2007.                   0 << 2 |      /* SkipMbFlag */
  2008.                   0 << 0 |      /* InterMbMode */
  2009.                   0);
  2010.     OUT_BCS_BATCH(batch, y << 16 | x);
  2011.     OUT_BCS_BATCH(batch,
  2012.                   max_size_in_word << 24 |
  2013.                   target_size_in_word << 16 |
  2014.                   0x3f << 6 |   /* CBP */
  2015.                   0);
  2016.     OUT_BCS_BATCH(batch,
  2017.                   last_mb_in_slice << 31 |
  2018.                   first_mb_in_slice << 30 |
  2019.                   0 << 27 |     /* EnableCoeffClamp */
  2020.                   last_mb_in_slice_group << 26 |
  2021.                   0 << 25 |     /* MbSkipConvDisable */
  2022.                   first_mb_in_slice_group << 24 |
  2023.                   0 << 16 |     /* MvFieldSelect */
  2024.                   qp_scale_code << 0 |
  2025.                   0);
  2026.  
  2027.     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
  2028.     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
  2029.     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
  2030.     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
  2031.  
  2032.     ADVANCE_BCS_BATCH(batch);
  2033.  
  2034.     return len_in_dwords;
  2035. }
  2036.  
  2037. static void
  2038. intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
  2039.                                            struct encode_state *encode_state,
  2040.                                            struct intel_encoder_context *encoder_context,
  2041.                                            struct intel_batchbuffer *slice_batch)
  2042. {
  2043.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2044.     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
  2045.  
  2046.     if (encode_state->packed_header_data[idx]) {
  2047.         VAEncPackedHeaderParameterBuffer *param = NULL;
  2048.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  2049.         unsigned int length_in_bits;
  2050.  
  2051.         assert(encode_state->packed_header_param[idx]);
  2052.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  2053.         length_in_bits = param->bit_length;
  2054.  
  2055.         mfc_context->insert_object(ctx,
  2056.                                    encoder_context,
  2057.                                    header_data,
  2058.                                    ALIGN(length_in_bits, 32) >> 5,
  2059.                                    length_in_bits & 0x1f,
  2060.                                    5,   /* FIXME: check it */
  2061.                                    0,
  2062.                                    0,
  2063.                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
  2064.                                    slice_batch);
  2065.     }
  2066.  
  2067.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
  2068.  
  2069.     if (encode_state->packed_header_data[idx]) {
  2070.         VAEncPackedHeaderParameterBuffer *param = NULL;
  2071.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  2072.         unsigned int length_in_bits;
  2073.  
  2074.         assert(encode_state->packed_header_param[idx]);
  2075.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  2076.         length_in_bits = param->bit_length;
  2077.  
  2078.         mfc_context->insert_object(ctx,
  2079.                                    encoder_context,
  2080.                                    header_data,
  2081.                                    ALIGN(length_in_bits, 32) >> 5,
  2082.                                    length_in_bits & 0x1f,
  2083.                                    5,   /* FIXME: check it */
  2084.                                    0,
  2085.                                    0,
  2086.                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
  2087.                                    slice_batch);
  2088.     }
  2089. }
  2090.  
  2091. static void
  2092. gen8_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
  2093.                                     struct encode_state *encode_state,
  2094.                                     struct intel_encoder_context *encoder_context,
  2095.                                     int slice_index,
  2096.                                     VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
  2097.                                     struct intel_batchbuffer *slice_batch)
  2098. {
  2099.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  2100.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2101.     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
  2102.     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
  2103.     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
  2104.     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
  2105.     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
  2106.     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
  2107.     int i, j;
  2108.     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
  2109.     unsigned int *msg = NULL;
  2110.     unsigned char *msg_ptr = NULL;
  2111.  
  2112.     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
  2113.     h_start_pos = slice_param->macroblock_address % width_in_mbs;
  2114.     v_start_pos = slice_param->macroblock_address / width_in_mbs;
  2115.     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
  2116.  
  2117.     dri_bo_map(vme_context->vme_output.bo , 0);
  2118.     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
  2119.  
  2120.     if (next_slice_group_param) {
  2121.         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
  2122.         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
  2123.     } else {
  2124.         h_next_start_pos = 0;
  2125.         v_next_start_pos = height_in_mbs;
  2126.     }
  2127.  
  2128.     gen8_mfc_mpeg2_slicegroup_state(ctx,
  2129.                                     encoder_context,
  2130.                                     h_start_pos,
  2131.                                     v_start_pos,
  2132.                                     h_next_start_pos,
  2133.                                     v_next_start_pos,
  2134.                                     slice_index == 0,
  2135.                                     next_slice_group_param == NULL,
  2136.                                     slice_param->is_intra_slice,
  2137.                                     slice_param->quantiser_scale_code,
  2138.                                     slice_batch);
  2139.  
  2140.     if (slice_index == 0)
  2141.         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  2142.  
  2143.     /* Insert '00' to make sure the header is valid */
  2144.     mfc_context->insert_object(ctx,
  2145.                                encoder_context,
  2146.                                (unsigned int*)section_delimiter,
  2147.                                1,
  2148.                                8,   /* 8bits in the last DWORD */
  2149.                                1,   /* 1 byte */
  2150.                                1,
  2151.                                0,
  2152.                                0,
  2153.                                slice_batch);
  2154.  
  2155.     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
  2156.         /* PAK for each macroblocks */
  2157.         for (j = 0; j < slice_param->num_macroblocks; j++) {
  2158.             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
  2159.             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
  2160.             int first_mb_in_slice = (j == 0);
  2161.             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
  2162.             int first_mb_in_slice_group = (i == 0 && j == 0);
  2163.             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
  2164.                                           j == slice_param->num_macroblocks - 1);
  2165.  
  2166.             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
  2167.  
  2168.             if (slice_param->is_intra_slice) {
  2169.                 gen8_mfc_mpeg2_pak_object_intra(ctx,
  2170.                                                 encoder_context,
  2171.                                                 h_pos, v_pos,
  2172.                                                 first_mb_in_slice,
  2173.                                                 last_mb_in_slice,
  2174.                                                 first_mb_in_slice_group,
  2175.                                                 last_mb_in_slice_group,
  2176.                                                 0x1a,
  2177.                                                 slice_param->quantiser_scale_code,
  2178.                                                 0x3f,
  2179.                                                 0,
  2180.                                                 0xff,
  2181.                                                 slice_batch);
  2182.             } else {
  2183.                 int inter_rdo, intra_rdo;
  2184.                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
  2185.                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
  2186.  
  2187.                 if (intra_rdo < inter_rdo)
  2188.                     gen8_mfc_mpeg2_pak_object_intra(ctx,
  2189.                                                      encoder_context,
  2190.                                                      h_pos, v_pos,
  2191.                                                      first_mb_in_slice,
  2192.                                                      last_mb_in_slice,
  2193.                                                      first_mb_in_slice_group,
  2194.                                                      last_mb_in_slice_group,
  2195.                                                      0x1a,
  2196.                                                      slice_param->quantiser_scale_code,
  2197.                                                      0x3f,
  2198.                                                      0,
  2199.                                                      0xff,
  2200.                                                      slice_batch);
  2201.                 else
  2202.                     gen8_mfc_mpeg2_pak_object_inter(ctx,
  2203.                                                 encode_state,
  2204.                                                 encoder_context,
  2205.                                                 msg,
  2206.                                                 width_in_mbs, height_in_mbs,
  2207.                                                 h_pos, v_pos,
  2208.                                                 first_mb_in_slice,
  2209.                                                 last_mb_in_slice,
  2210.                                                 first_mb_in_slice_group,
  2211.                                                 last_mb_in_slice_group,
  2212.                                                 slice_param->quantiser_scale_code,
  2213.                                                 0,
  2214.                                                 0xff,
  2215.                                                 slice_batch);
  2216.             }
  2217.         }
  2218.  
  2219.         slice_param++;
  2220.     }
  2221.  
  2222.     dri_bo_unmap(vme_context->vme_output.bo);
  2223.  
  2224.     /* tail data */
  2225.     if (next_slice_group_param == NULL) { /* end of a picture */
  2226.         mfc_context->insert_object(ctx,
  2227.                                    encoder_context,
  2228.                                    (unsigned int *)tail_delimiter,
  2229.                                    2,
  2230.                                    8,   /* 8bits in the last DWORD */
  2231.                                    5,   /* 5 bytes */
  2232.                                    1,
  2233.                                    1,
  2234.                                    0,
  2235.                                    slice_batch);
  2236.     } else {        /* end of a lsice group */
  2237.         mfc_context->insert_object(ctx,
  2238.                                    encoder_context,
  2239.                                    (unsigned int *)section_delimiter,
  2240.                                    1,
  2241.                                    8,   /* 8bits in the last DWORD */
  2242.                                    1,   /* 1 byte */
  2243.                                    1,
  2244.                                    1,
  2245.                                    0,
  2246.                                    slice_batch);
  2247.     }
  2248. }
  2249.  
  2250. /*
  2251.  * A batch buffer for all slices, including slice state,
  2252.  * slice insert object and slice pak object commands
  2253.  *
  2254.  */
  2255. static dri_bo *
  2256. gen8_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
  2257.                                           struct encode_state *encode_state,
  2258.                                           struct intel_encoder_context *encoder_context)
  2259. {
  2260.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2261.     struct intel_batchbuffer *batch;
  2262.     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
  2263.     dri_bo *batch_bo;
  2264.     int i;
  2265.  
  2266.     batch = mfc_context->aux_batchbuffer;
  2267.     batch_bo = batch->buffer;
  2268.  
  2269.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  2270.         if (i == encode_state->num_slice_params_ext - 1)
  2271.             next_slice_group_param = NULL;
  2272.         else
  2273.             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
  2274.  
  2275.         gen8_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
  2276.     }
  2277.  
  2278.     intel_batchbuffer_align(batch, 8);
  2279.    
  2280.     BEGIN_BCS_BATCH(batch, 2);
  2281.     OUT_BCS_BATCH(batch, 0);
  2282.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
  2283.     ADVANCE_BCS_BATCH(batch);
  2284.  
  2285.     dri_bo_reference(batch_bo);
  2286.     intel_batchbuffer_free(batch);
  2287.     mfc_context->aux_batchbuffer = NULL;
  2288.  
  2289.     return batch_bo;
  2290. }
  2291.  
  2292. static void
  2293. gen8_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
  2294.                                            struct encode_state *encode_state,
  2295.                                            struct intel_encoder_context *encoder_context)
  2296. {
  2297.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2298.  
  2299.     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
  2300.     mfc_context->set_surface_state(ctx, encoder_context);
  2301.     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
  2302.     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
  2303.     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
  2304.     gen8_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
  2305.     gen8_mfc_mpeg2_qm_state(ctx, encoder_context);
  2306.     gen8_mfc_mpeg2_fqm_state(ctx, encoder_context);
  2307. }
  2308.  
  2309. static void
  2310. gen8_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
  2311.                                    struct encode_state *encode_state,
  2312.                                    struct intel_encoder_context *encoder_context)
  2313. {
  2314.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  2315.     dri_bo *slice_batch_bo;
  2316.  
  2317.     slice_batch_bo = gen8_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
  2318.  
  2319.     // begin programing
  2320.     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  2321.     intel_batchbuffer_emit_mi_flush(batch);
  2322.    
  2323.     // picture level programing
  2324.     gen8_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
  2325.  
  2326.     BEGIN_BCS_BATCH(batch, 4);
  2327.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
  2328.     OUT_BCS_RELOC(batch,
  2329.                   slice_batch_bo,
  2330.                   I915_GEM_DOMAIN_COMMAND, 0,
  2331.                   0);
  2332.     OUT_BCS_BATCH(batch, 0);
  2333.     OUT_BCS_BATCH(batch, 0);
  2334.     ADVANCE_BCS_BATCH(batch);
  2335.  
  2336.     // end programing
  2337.     intel_batchbuffer_end_atomic(batch);
  2338.  
  2339.     dri_bo_unreference(slice_batch_bo);
  2340. }
  2341.  
  2342. static VAStatus
  2343. intel_mfc_mpeg2_prepare(VADriverContextP ctx,
  2344.                         struct encode_state *encode_state,
  2345.                         struct intel_encoder_context *encoder_context)
  2346. {
  2347.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2348.     struct object_surface *obj_surface;
  2349.     struct object_buffer *obj_buffer;
  2350.     struct i965_coded_buffer_segment *coded_buffer_segment;
  2351.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  2352.     dri_bo *bo;
  2353.     int i;
  2354.  
  2355.     /* reconstructed surface */
  2356.     obj_surface = encode_state->reconstructed_object;
  2357.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2358.     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
  2359.     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
  2360.     mfc_context->surface_state.width = obj_surface->orig_width;
  2361.     mfc_context->surface_state.height = obj_surface->orig_height;
  2362.     mfc_context->surface_state.w_pitch = obj_surface->width;
  2363.     mfc_context->surface_state.h_pitch = obj_surface->height;
  2364.  
  2365.     /* forward reference */
  2366.     obj_surface = encode_state->reference_objects[0];
  2367.  
  2368.     if (obj_surface && obj_surface->bo) {
  2369.         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
  2370.         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
  2371.     } else
  2372.         mfc_context->reference_surfaces[0].bo = NULL;
  2373.  
  2374.     /* backward reference */
  2375.     obj_surface = encode_state->reference_objects[1];
  2376.  
  2377.     if (obj_surface && obj_surface->bo) {
  2378.         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
  2379.         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
  2380.     } else {
  2381.         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
  2382.  
  2383.         if (mfc_context->reference_surfaces[1].bo)
  2384.             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
  2385.     }
  2386.  
  2387.     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  2388.         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
  2389.  
  2390.         if (mfc_context->reference_surfaces[i].bo)
  2391.             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
  2392.     }
  2393.    
  2394.     /* input YUV surface */
  2395.     obj_surface = encode_state->input_yuv_object;
  2396.     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
  2397.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  2398.  
  2399.     /* coded buffer */
  2400.     obj_buffer = encode_state->coded_buf_object;
  2401.     bo = obj_buffer->buffer_store->bo;
  2402.     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
  2403.     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  2404.     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  2405.     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
  2406.  
  2407.     /* set the internal flag to 0 to indicate the coded size is unknown */
  2408.     dri_bo_map(bo, 1);
  2409.     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
  2410.     coded_buffer_segment->mapped = 0;
  2411.     coded_buffer_segment->codec = encoder_context->codec;
  2412.     dri_bo_unmap(bo);
  2413.  
  2414.     return vaStatus;
  2415. }
  2416.  
  2417. static VAStatus
  2418. gen8_mfc_mpeg2_encode_picture(VADriverContextP ctx,
  2419.                               struct encode_state *encode_state,
  2420.                               struct intel_encoder_context *encoder_context)
  2421. {
  2422.     gen8_mfc_init(ctx, encode_state, encoder_context);
  2423.     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
  2424.     /*Programing bcs pipeline*/
  2425.     gen8_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
  2426.     gen8_mfc_run(ctx, encode_state, encoder_context);
  2427.  
  2428.     return VA_STATUS_SUCCESS;
  2429. }
  2430.  
  2431. /* JPEG encode methods */
  2432.  
  2433. static VAStatus
  2434. intel_mfc_jpeg_prepare(VADriverContextP ctx,
  2435.                         struct encode_state *encode_state,
  2436.                         struct intel_encoder_context *encoder_context)
  2437. {
  2438.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2439.     struct object_surface *obj_surface;
  2440.     struct object_buffer *obj_buffer;
  2441.     struct i965_coded_buffer_segment *coded_buffer_segment;
  2442.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  2443.     dri_bo *bo;
  2444.    
  2445.     /* input YUV surface */
  2446.     obj_surface = encode_state->input_yuv_object;
  2447.     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
  2448.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  2449.  
  2450.     /* coded buffer */
  2451.     obj_buffer = encode_state->coded_buf_object;
  2452.     bo = obj_buffer->buffer_store->bo;
  2453.     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
  2454.     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  2455.     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  2456.     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
  2457.  
  2458.     /* set the internal flag to 0 to indicate the coded size is unknown */
  2459.     dri_bo_map(bo, 1);
  2460.     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
  2461.     coded_buffer_segment->mapped = 0;
  2462.     coded_buffer_segment->codec = encoder_context->codec;
  2463.     dri_bo_unmap(bo);
  2464.  
  2465.     return vaStatus;
  2466. }
  2467.  
  2468.  
  2469. static void
  2470. gen8_mfc_jpeg_set_surface_state(VADriverContextP ctx,
  2471.                         struct intel_encoder_context *encoder_context,
  2472.                         struct encode_state *encode_state)
  2473. {
  2474.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  2475.     struct object_surface *obj_surface = encode_state->input_yuv_object;
  2476.     unsigned int input_fourcc;
  2477.     unsigned int y_cb_offset;
  2478.     unsigned int y_cr_offset;
  2479.     unsigned int surface_format;
  2480.  
  2481.     assert(obj_surface);
  2482.  
  2483.     y_cb_offset = obj_surface->y_cb_offset;
  2484.     y_cr_offset = obj_surface->y_cr_offset;
  2485.     input_fourcc = obj_surface->fourcc;
  2486.  
  2487.     surface_format = (obj_surface->fourcc == VA_FOURCC_Y800) ?
  2488.         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  2489.        
  2490.        
  2491.      switch (input_fourcc) {
  2492.         case VA_FOURCC_Y800: {
  2493.             surface_format = MFX_SURFACE_MONOCHROME;
  2494.             break;
  2495.         }
  2496.         case VA_FOURCC_NV12: {
  2497.             surface_format = MFX_SURFACE_PLANAR_420_8;
  2498.             break;
  2499.         }      
  2500.         case VA_FOURCC_UYVY: {
  2501.             surface_format = MFX_SURFACE_YCRCB_SWAPY;
  2502.             break;
  2503.         }
  2504.         case VA_FOURCC_YUY2: {
  2505.             surface_format = MFX_SURFACE_YCRCB_NORMAL;
  2506.             break;
  2507.         }
  2508.         case VA_FOURCC_RGBA:
  2509.         case VA_FOURCC_444P: {
  2510.             surface_format = MFX_SURFACE_R8G8B8A8_UNORM;
  2511.             break;
  2512.         }
  2513.     }
  2514.  
  2515.     BEGIN_BCS_BATCH(batch, 6);
  2516.  
  2517.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2518.     OUT_BCS_BATCH(batch, 0);
  2519.     OUT_BCS_BATCH(batch,
  2520.                   ((obj_surface->orig_height - 1) << 18) |
  2521.                   ((obj_surface->orig_width - 1) << 4));
  2522.     OUT_BCS_BATCH(batch,
  2523.                   (surface_format << 28) | /* Surface Format */
  2524.                   (0 << 27) | /* must be 1 for interleave U/V, hardware requirement for AVC/VC1/MPEG and 0 for JPEG */
  2525.                   (0 << 22) | /* surface object control state, FIXME??? */
  2526.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2527.                   (0 << 2)  | /* must be 0 for interleave U/V */
  2528.                   (1 << 1)  | /* must be tiled */
  2529.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
  2530.     OUT_BCS_BATCH(batch,
  2531.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2532.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  2533.     OUT_BCS_BATCH(batch,
  2534.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2535.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoeo for JPEG */
  2536.                  
  2537.  
  2538.     ADVANCE_BCS_BATCH(batch);
  2539. }
  2540.  
  2541. static void
  2542. gen8_mfc_jpeg_pic_state(VADriverContextP ctx,
  2543.                         struct intel_encoder_context *encoder_context,
  2544.                         struct encode_state *encode_state)
  2545. {
  2546.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  2547.     struct object_surface *obj_surface = encode_state->input_yuv_object;
  2548.     VAEncPictureParameterBufferJPEG *pic_param;
  2549.     unsigned int  surface_format;
  2550.     unsigned int  frame_width_in_blks;
  2551.     unsigned int  frame_height_in_blks;
  2552.     unsigned int  pixels_in_horizontal_lastMCU;
  2553.     unsigned int  pixels_in_vertical_lastMCU;
  2554.     unsigned int  input_surface_format;
  2555.     unsigned int  output_mcu_format;
  2556.     unsigned int  picture_width;
  2557.     unsigned int  picture_height;  
  2558.  
  2559.     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
  2560.     assert(obj_surface);
  2561.     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
  2562.     surface_format = obj_surface->fourcc;
  2563.     picture_width = pic_param->picture_width;
  2564.     picture_height = pic_param->picture_height;
  2565.    
  2566.     switch (surface_format) {
  2567.         case VA_FOURCC_Y800: {
  2568.             input_surface_format = JPEG_ENC_SURFACE_Y8;
  2569.             output_mcu_format = JPEG_ENC_MCU_YUV400;
  2570.             break;
  2571.         }
  2572.         case VA_FOURCC_NV12: {
  2573.             input_surface_format = JPEG_ENC_SURFACE_NV12;
  2574.             output_mcu_format = JPEG_ENC_MCU_YUV420;
  2575.             break;
  2576.         }      
  2577.         case VA_FOURCC_UYVY: {
  2578.             input_surface_format = JPEG_ENC_SURFACE_UYVY;
  2579.             output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
  2580.             break;
  2581.         }
  2582.         case VA_FOURCC_YUY2: {
  2583.             input_surface_format = JPEG_ENC_SURFACE_YUY2;
  2584.             output_mcu_format = JPEG_ENC_MCU_YUV422H_2Y;
  2585.             break;
  2586.         }
  2587.  
  2588.         case VA_FOURCC_RGBA:
  2589.         case VA_FOURCC_444P: {
  2590.             input_surface_format = JPEG_ENC_SURFACE_RGB;
  2591.             output_mcu_format = JPEG_ENC_MCU_RGB;
  2592.             break;
  2593.         }
  2594.         default : {
  2595.             input_surface_format = JPEG_ENC_SURFACE_NV12;
  2596.             output_mcu_format = JPEG_ENC_MCU_YUV420;
  2597.             break;
  2598.         }
  2599.     }
  2600.  
  2601.    
  2602.     switch (output_mcu_format) {
  2603.        
  2604.         case JPEG_ENC_MCU_YUV400:
  2605.         case JPEG_ENC_MCU_RGB: {
  2606.             pixels_in_horizontal_lastMCU = (picture_width % 8);
  2607.             pixels_in_vertical_lastMCU = (picture_height % 8);
  2608.  
  2609.             //H1=1,V1=1 for YUV400 and YUV444. So, compute these values accordingly
  2610.             frame_width_in_blks = ((picture_width + 7) / 8);
  2611.             frame_height_in_blks = ((picture_height + 7) / 8);
  2612.             break;
  2613.         }
  2614.        
  2615.         case JPEG_ENC_MCU_YUV420: {        
  2616.             if((picture_width % 2) == 0)
  2617.                 pixels_in_horizontal_lastMCU = picture_width % 16;
  2618.             else
  2619.                 pixels_in_horizontal_lastMCU   = ((picture_width % 16) + 1) % 16;
  2620.            
  2621.             if((picture_height % 2) == 0)
  2622.                 pixels_in_vertical_lastMCU     = picture_height % 16;
  2623.             else
  2624.                 pixels_in_vertical_lastMCU   = ((picture_height % 16) + 1) % 16;
  2625.  
  2626.             //H1=2,V1=2 for YUV420. So, compute these values accordingly
  2627.             frame_width_in_blks = ((picture_width + 15) / 16) * 2;
  2628.             frame_height_in_blks = ((picture_height + 15) / 16) * 2;
  2629.             break;
  2630.         }
  2631.        
  2632.         case JPEG_ENC_MCU_YUV422H_2Y: {
  2633.             if(picture_width % 2 == 0)
  2634.                 pixels_in_horizontal_lastMCU = picture_width % 16;
  2635.             else
  2636.                 pixels_in_horizontal_lastMCU = ((picture_width % 16) + 1) % 16;
  2637.            
  2638.             pixels_in_vertical_lastMCU = picture_height % 8;
  2639.            
  2640.             //H1=2,V1=1 for YUV422H_2Y. So, compute these values accordingly
  2641.             frame_width_in_blks = ((picture_width + 15) / 16) * 2;
  2642.             frame_height_in_blks = ((picture_height + 7) / 8);
  2643.             break;            
  2644.         }      
  2645.     } //end of switch
  2646.    
  2647.     BEGIN_BCS_BATCH(batch, 3);
  2648.     /* DWORD 0 */
  2649.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2650.     /* DWORD 1 */
  2651.     OUT_BCS_BATCH(batch,
  2652.                   ( pixels_in_horizontal_lastMCU << 26) |    /* Pixels In Horizontal Last MCU */
  2653.                   ( pixels_in_vertical_lastMCU << 21)   |    /* Pixels In Vertical Last MCU */
  2654.                   ( input_surface_format << 8)          |    /* Input Surface format */
  2655.                   ( output_mcu_format << 0));                /* Output MCU Structure */
  2656.     /* DWORD 2 */
  2657.     OUT_BCS_BATCH(batch,
  2658.                   ((frame_height_in_blks - 1) << 16)    |   /* Frame Height In Blks Minus 1 */
  2659.                   (JPEG_ENC_ROUND_QUANT_DEFAULT  << 13) |   /* Rounding Quant set to default value 0 */
  2660.                   ((frame_width_in_blks - 1) << 0));        /* Frame Width In Blks Minus 1 */
  2661.     ADVANCE_BCS_BATCH(batch);
  2662. }
  2663.  
  2664. static void
  2665. get_reciprocal_dword_qm(unsigned char *raster_qm, uint32_t *dword_qm)
  2666. {
  2667.     int i = 0, j = 0;
  2668.     short reciprocal_qm[64];
  2669.    
  2670.     for(i=0; i<64; i++) {
  2671.         reciprocal_qm[i] = 65535/(raster_qm[i]);          
  2672.     }
  2673.    
  2674.     for(i=0; i<64; i++) {
  2675.         dword_qm[j] = ((reciprocal_qm[i+1] <<16) | (reciprocal_qm[i]));
  2676.         j++;
  2677.         i++;
  2678.     }    
  2679.    
  2680. }
  2681.  
  2682.  
  2683. static void
  2684. gen8_mfc_jpeg_fqm_state(VADriverContextP ctx,
  2685.                         struct intel_encoder_context *encoder_context,
  2686.                         struct encode_state *encode_state)
  2687. {
  2688.     unsigned int quality = 0;
  2689.     uint32_t temp, i = 0, j = 0, dword_qm[32];
  2690.     VAEncPictureParameterBufferJPEG *pic_param;
  2691.     VAQMatrixBufferJPEG *qmatrix;
  2692.     unsigned char raster_qm[64], column_raster_qm[64];
  2693.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2694.    
  2695.     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
  2696.     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
  2697.     quality = pic_param->quality;
  2698.    
  2699.     //If the app sends the qmatrix, use it, buffer it for using it with the next frames
  2700.     //The app can send qmatrix for the first frame and not send for the subsequent frames
  2701.     if(encode_state->q_matrix && encode_state->q_matrix->buffer) {
  2702.         qmatrix = (VAQMatrixBufferJPEG *)encode_state->q_matrix->buffer;
  2703.  
  2704.         mfc_context->buffered_qmatrix.load_lum_quantiser_matrix = 1;
  2705.         memcpy(mfc_context->buffered_qmatrix.lum_quantiser_matrix, qmatrix->lum_quantiser_matrix, 64 * (sizeof(unsigned char)));
  2706.  
  2707.         if(pic_param->num_components > 1) {
  2708.             mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 1;
  2709.             memcpy(mfc_context->buffered_qmatrix.chroma_quantiser_matrix, qmatrix->chroma_quantiser_matrix, 64 * (sizeof(unsigned char)));
  2710.         } else {
  2711.             mfc_context->buffered_qmatrix.load_chroma_quantiser_matrix = 0;
  2712.         }
  2713.  
  2714.     } else {
  2715.         //If the app doesnt send the qmatrix, use the buffered/default qmatrix
  2716.         qmatrix = &mfc_context->buffered_qmatrix;
  2717.         qmatrix->load_lum_quantiser_matrix = 1;
  2718.         qmatrix->load_chroma_quantiser_matrix = (pic_param->num_components > 1) ? 1 : 0;
  2719.     }  
  2720.  
  2721.  
  2722.     //As per the design, normalization of the quality factor and scaling of the Quantization tables
  2723.     //based on the quality factor needs to be done in the driver before sending the values to the HW.
  2724.     //But note, the driver expects the scaled quantization tables (as per below logic) to be sent as
  2725.     //packed header information. The packed header is written as the header of the jpeg file. This
  2726.     //header information is used to decode the jpeg file. So, it is the app's responsibility to send
  2727.     //the correct header information (See build_packed_jpeg_header_buffer() in jpegenc.c in LibVa on
  2728.     //how to do this). QTables can be different for different applications. If no tables are provided,
  2729.     //the default tables in the driver are used.
  2730.  
  2731.     //Normalization of the quality factor
  2732.     if (quality > 100) quality=100;
  2733.     if (quality == 0)  quality=1;
  2734.     quality = (quality < 50) ? (5000/quality) : (200 - (quality*2));
  2735.    
  2736.     //Step 1. Apply Quality factor and clip to range [1, 255] for luma and chroma Quantization matrices
  2737.     //Step 2. HW expects the 1/Q[i] values in the qm sent, so get reciprocals
  2738.     //Step 3. HW also expects 32 dwords, hence combine 2 (1/Q) values into 1 dword
  2739.     //Step 4. Send the Quantization matrix to the HW, use gen8_mfc_fqm_state
  2740.    
  2741.     //For luma (Y or R)
  2742.     if(qmatrix->load_lum_quantiser_matrix) {
  2743.         //apply quality to lum_quantiser_matrix
  2744.         for(i=0; i < 64; i++) {
  2745.             temp = (qmatrix->lum_quantiser_matrix[i] * quality)/100;
  2746.             //clamp to range [1,255]
  2747.             temp = (temp > 255) ? 255 : temp;
  2748.             temp = (temp < 1) ? 1 : temp;
  2749.             qmatrix->lum_quantiser_matrix[i] = (unsigned char)temp;
  2750.         }      
  2751.        
  2752.         //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
  2753.         //The App should send it in zigzag. Now, the driver has to extract the raster from it.
  2754.         for (j = 0; j < 64; j++)
  2755.             raster_qm[zigzag_direct[j]] = qmatrix->lum_quantiser_matrix[j];
  2756.  
  2757.         //Convert the raster order(row-ordered) to the column-raster (column by column).
  2758.         //To be consistent with the other encoders, send it in column order.
  2759.         //Need to double check if our HW expects col or row raster.
  2760.         for (j = 0; j < 64; j++) {
  2761.             int row = j / 8, col = j % 8;
  2762.             column_raster_qm[col * 8 + row] = raster_qm[j];
  2763.         }
  2764.        
  2765.         //Convert to raster QM to reciprocal. HW expects values in reciprocal.
  2766.         get_reciprocal_dword_qm(column_raster_qm, dword_qm);
  2767.        
  2768.         //send the luma qm to the command buffer
  2769.         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
  2770.     }
  2771.    
  2772.     //For Chroma, if chroma exists (Cb, Cr or G, B)
  2773.     if(qmatrix->load_chroma_quantiser_matrix) {
  2774.         //apply quality to chroma_quantiser_matrix
  2775.         for(i=0; i < 64; i++) {
  2776.             temp = (qmatrix->chroma_quantiser_matrix[i] * quality)/100;
  2777.             //clamp to range [1,255]
  2778.             temp = (temp > 255) ? 255 : temp;
  2779.             temp = (temp < 1) ? 1 : temp;
  2780.             qmatrix->chroma_quantiser_matrix[i] = (unsigned char)temp;
  2781.         }
  2782.        
  2783.         //For VAAPI, the VAQMatrixBuffer needs to be in zigzag order.
  2784.         //The App should send it in zigzag. Now, the driver has to extract the raster from it.
  2785.         for (j = 0; j < 64; j++)
  2786.             raster_qm[zigzag_direct[j]] = qmatrix->chroma_quantiser_matrix[j];
  2787.        
  2788.         //Convert the raster order(row-ordered) to the column-raster (column by column).
  2789.         //To be consistent with the other encoders, send it in column order.
  2790.         //Need to double check if our HW expects col or row raster.
  2791.         for (j = 0; j < 64; j++) {
  2792.             int row = j / 8, col = j % 8;
  2793.             column_raster_qm[col * 8 + row] = raster_qm[j];
  2794.         }
  2795.  
  2796.  
  2797.         //Convert to raster QM to reciprocal. HW expects values in reciprocal.
  2798.         get_reciprocal_dword_qm(column_raster_qm, dword_qm);
  2799.  
  2800.         //send the same chroma qm to the command buffer (for both U,V or G,B)
  2801.         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);
  2802.         gen8_mfc_fqm_state(ctx, MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX, dword_qm, 32, encoder_context);        
  2803.     }
  2804. }
  2805.  
  2806.  
  2807. //Translation of Table K.5 into code: This method takes the huffval from the
  2808. //Huffmantable buffer and converts into index for the coefficients and size tables
  2809. uint8_t map_huffval_to_index(uint8_t huff_val)
  2810. {
  2811.     uint8_t index = 0;
  2812.  
  2813.     if(huff_val < 0xF0) {
  2814.         index = (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
  2815.     } else {
  2816.         index = 1 + (((huff_val >> 4) & 0x0F) * 0xA) + (huff_val & 0x0F);
  2817.     }
  2818.  
  2819.     return index;
  2820. }
  2821.  
  2822.  
  2823. //Implementation of Flow chart Annex C  - Figure C.1
  2824. static void
  2825. generate_huffman_codesizes_table(uint8_t *bits, uint8_t *huff_size_table, uint8_t *lastK)
  2826. {
  2827.     uint8_t i=1, j=1, k=0;
  2828.  
  2829.     while(i <= 16) {
  2830.         while(j <= (uint8_t)bits[i-1]) {
  2831.             huff_size_table[k] = i;
  2832.             k = k+1;
  2833.             j = j+1;
  2834.         }
  2835.        
  2836.         i = i+1;
  2837.         j = 1;
  2838.     }
  2839.     huff_size_table[k] = 0;
  2840.     (*lastK) = k;    
  2841. }
  2842.  
  2843. //Implementation of Flow chart Annex C - Figure C.2
  2844. static void
  2845. generate_huffman_codes_table(uint8_t *huff_size_table, uint16_t *huff_code_table)
  2846. {
  2847.     uint8_t k=0;
  2848.     uint16_t code=0;
  2849.     uint8_t si=huff_size_table[k];
  2850.    
  2851.     while(huff_size_table[k] != 0) {
  2852.    
  2853.         while(huff_size_table[k] == si) {
  2854.            
  2855.             // An huffman code can never be 0xFFFF. Replace it with 0 if 0xFFFF
  2856.             if(code == 0xFFFF) {
  2857.                 code = 0x0000;
  2858.             }
  2859.  
  2860.             huff_code_table[k] = code;
  2861.             code = code+1;
  2862.             k = k+1;
  2863.         }
  2864.    
  2865.         code <<= 1;
  2866.         si = si+1;
  2867.     }
  2868.    
  2869. }
  2870.  
  2871. //Implementation of Flow chat Annex C - Figure C.3
  2872. static void
  2873. generate_ordered_codes_table(uint8_t *huff_vals, uint8_t *huff_size_table, uint16_t *huff_code_table, uint8_t type, uint8_t lastK)
  2874. {
  2875.     uint8_t huff_val_size=0, i=0, k=0;
  2876.    
  2877.     huff_val_size = (type == 0) ? 12 : 162;
  2878.     uint8_t huff_si_table[huff_val_size];
  2879.     uint16_t huff_co_table[huff_val_size];
  2880.    
  2881.     memset(huff_si_table, 0, sizeof(huff_si_table));
  2882.     memset(huff_co_table, 0, sizeof(huff_co_table));
  2883.    
  2884.     do {
  2885.         i = map_huffval_to_index(huff_vals[k]);
  2886.         huff_co_table[i] = huff_code_table[k];
  2887.         huff_si_table[i] = huff_size_table[k];
  2888.         k++;
  2889.     } while(k < lastK);
  2890.    
  2891.     memcpy(huff_size_table, huff_si_table, sizeof(uint8_t)*huff_val_size);
  2892.     memcpy(huff_code_table, huff_co_table, sizeof(uint16_t)*huff_val_size);
  2893. }
  2894.  
  2895.  
  2896. //This method converts the huffman table to code words which is needed by the HW
  2897. //Flowcharts from Jpeg Spec Annex C - Figure C.1, Figure C.2, Figure C.3 are used here
  2898. static void
  2899. convert_hufftable_to_codes(VAHuffmanTableBufferJPEGBaseline *huff_buffer, uint32_t *table, uint8_t type, uint8_t index)
  2900. {
  2901.     uint8_t lastK = 0, i=0;
  2902.     uint8_t huff_val_size = 0;
  2903.     uint8_t *huff_bits, *huff_vals;
  2904.  
  2905.     huff_val_size = (type == 0) ? 12 : 162;
  2906.     uint8_t huff_size_table[huff_val_size+1]; //The +1 for adding 0 at the end of huff_val_size
  2907.     uint16_t huff_code_table[huff_val_size];
  2908.  
  2909.     memset(huff_size_table, 0, sizeof(huff_size_table));
  2910.     memset(huff_code_table, 0, sizeof(huff_code_table));
  2911.  
  2912.     huff_bits = (type == 0) ? (huff_buffer->huffman_table[index].num_dc_codes) : (huff_buffer->huffman_table[index].num_ac_codes);
  2913.     huff_vals = (type == 0) ? (huff_buffer->huffman_table[index].dc_values) : (huff_buffer->huffman_table[index].ac_values);
  2914.    
  2915.  
  2916.     //Generation of table of Huffman code sizes
  2917.     generate_huffman_codesizes_table(huff_bits, huff_size_table, &lastK);
  2918.        
  2919.     //Generation of table of Huffman codes
  2920.     generate_huffman_codes_table(huff_size_table, huff_code_table);
  2921.        
  2922.     //Ordering procedure for encoding procedure code tables
  2923.     generate_ordered_codes_table(huff_vals, huff_size_table, huff_code_table, type, lastK);
  2924.  
  2925.     //HW expects Byte0: Code length; Byte1,Byte2: Code Word, Byte3: Dummy
  2926.     //Since IA is littlended, &, | and << accordingly to store the values in the DWord.
  2927.     for(i=0; i<huff_val_size; i++) {
  2928.         table[i] = 0;
  2929.         table[i] = ((huff_size_table[i] & 0xFF) | ((huff_code_table[i] & 0xFFFF) << 8));
  2930.     }
  2931.  
  2932. }
  2933.  
  2934. //send the huffman table using MFC_JPEG_HUFF_TABLE_STATE
  2935. static void
  2936. gen8_mfc_jpeg_huff_table_state(VADriverContextP ctx,
  2937.                                            struct encode_state *encode_state,
  2938.                                            struct intel_encoder_context *encoder_context,
  2939.                                            int num_tables)
  2940. {
  2941.     VAHuffmanTableBufferJPEGBaseline *huff_buffer;
  2942.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  2943.     uint8_t index;
  2944.     uint32_t dc_table[12], ac_table[162];
  2945.    
  2946.     assert(encode_state->huffman_table && encode_state->huffman_table->buffer);
  2947.     huff_buffer = (VAHuffmanTableBufferJPEGBaseline *)encode_state->huffman_table->buffer;
  2948.  
  2949.     memset(dc_table, 0, 12);
  2950.     memset(ac_table, 0, 162);
  2951.  
  2952.     for (index = 0; index < num_tables; index++) {
  2953.         int id = va_to_gen7_jpeg_hufftable[index];
  2954.  
  2955.         if (!huff_buffer->load_huffman_table[index])
  2956.             continue;
  2957.      
  2958.         //load DC table with 12 DWords
  2959.         convert_hufftable_to_codes(huff_buffer, dc_table, 0, index);  //0 for Dc
  2960.  
  2961.         //load AC table with 162 DWords
  2962.         convert_hufftable_to_codes(huff_buffer, ac_table, 1, index);  //1 for AC
  2963.  
  2964.         BEGIN_BCS_BATCH(batch, 176);
  2965.         OUT_BCS_BATCH(batch, MFC_JPEG_HUFF_TABLE_STATE | (176 - 2));
  2966.         OUT_BCS_BATCH(batch, id); //Huff table id
  2967.  
  2968.         //DWord 2 - 13 has DC_TABLE
  2969.         intel_batchbuffer_data(batch, dc_table, 12*4);
  2970.  
  2971.         //Dword 14 -175 has AC_TABLE
  2972.         intel_batchbuffer_data(batch, ac_table, 162*4);
  2973.         ADVANCE_BCS_BATCH(batch);
  2974.     }    
  2975. }
  2976.  
  2977.  
  2978. //This method is used to compute the MCU count used for setting MFC_JPEG_SCAN_OBJECT
  2979. static void get_Y_sampling_factors(uint32_t surface_format, uint8_t *h_factor, uint8_t *v_factor)
  2980. {
  2981.     switch (surface_format) {
  2982.         case VA_FOURCC_Y800: {
  2983.             (* h_factor) = 1;
  2984.             (* v_factor) = 1;
  2985.             break;
  2986.         }
  2987.         case VA_FOURCC_NV12: {
  2988.             (* h_factor) = 2;            
  2989.             (* v_factor) = 2;
  2990.             break;
  2991.         }      
  2992.         case VA_FOURCC_UYVY: {
  2993.             (* h_factor) = 2;
  2994.             (* v_factor) = 1;
  2995.             break;
  2996.         }
  2997.         case VA_FOURCC_YUY2: {
  2998.             (* h_factor) = 2;
  2999.             (* v_factor) = 1;
  3000.             break;
  3001.         }
  3002.         case VA_FOURCC_RGBA:
  3003.         case VA_FOURCC_444P: {
  3004.             (* h_factor) = 1;
  3005.             (* v_factor) = 1;
  3006.             break;
  3007.         }
  3008.         default : { //May be  have to insert error handling here. For now just use as below
  3009.             (* h_factor) = 1;
  3010.             (* v_factor) = 1;
  3011.             break;
  3012.         }
  3013.     }
  3014. }
  3015.  
  3016. //set MFC_JPEG_SCAN_OBJECT
  3017. static void
  3018. gen8_mfc_jpeg_scan_object(VADriverContextP ctx,
  3019.                                            struct encode_state *encode_state,
  3020.                                            struct intel_encoder_context *encoder_context)
  3021. {
  3022.     uint32_t mcu_count, surface_format, Mx, My;
  3023.     uint8_t i, horizontal_sampling_factor, vertical_sampling_factor, huff_ac_table=0, huff_dc_table=0;
  3024.     uint8_t is_last_scan = 1;    //Jpeg has only 1 scan per frame. When last scan, HW inserts EOI code.
  3025.     uint8_t head_present_flag=1; //Header has tables and app data
  3026.     uint16_t num_components, restart_interval;   //Specifies number of MCUs in an ECS.
  3027.     VAEncSliceParameterBufferJPEG *slice_param;
  3028.     VAEncPictureParameterBufferJPEG *pic_param;
  3029.    
  3030.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  3031.     struct object_surface *obj_surface = encode_state->input_yuv_object;
  3032.    
  3033.     assert(encode_state->slice_params_ext[0] && encode_state->slice_params_ext[0]->buffer);
  3034.     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
  3035.     assert(obj_surface);
  3036.     pic_param = (VAEncPictureParameterBufferJPEG *)encode_state->pic_param_ext->buffer;
  3037.     slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[0]->buffer;
  3038.     surface_format = obj_surface->fourcc;
  3039.    
  3040.     get_Y_sampling_factors(surface_format, &horizontal_sampling_factor, &vertical_sampling_factor);
  3041.    
  3042.     // Mx = #MCUs in a row, My = #MCUs in a column
  3043.     Mx = (pic_param->picture_width + (horizontal_sampling_factor*8 -1))/(horizontal_sampling_factor*8);
  3044.     My = (pic_param->picture_height + (vertical_sampling_factor*8 -1))/(vertical_sampling_factor*8);
  3045.     mcu_count = (Mx * My);
  3046.  
  3047.     num_components = pic_param->num_components;    
  3048.     restart_interval = slice_param->restart_interval;
  3049.    
  3050.     //Depending on number of components and values set for table selectors,
  3051.     //only those bits are set in 24:22 for AC table, 20:18 for DC table
  3052.     for(i=0; i<num_components; i++) {
  3053.         huff_ac_table |= ((slice_param->components[i].ac_table_selector)<<i);
  3054.         huff_dc_table |= ((slice_param->components[i].dc_table_selector)<<i);
  3055.     }
  3056.    
  3057.    
  3058.     BEGIN_BCS_BATCH(batch, 3);
  3059.     /* DWORD 0 */
  3060.     OUT_BCS_BATCH(batch, MFC_JPEG_SCAN_OBJECT | (3 - 2));
  3061.     /* DWORD 1 */
  3062.     OUT_BCS_BATCH(batch, mcu_count << 0);       //MCU Count
  3063.     /* DWORD 2 */
  3064.     OUT_BCS_BATCH(batch,
  3065.                   (huff_ac_table << 22)     |   //Huffman AC Table
  3066.                   (huff_dc_table << 18)     |   //Huffman DC Table
  3067.                   (head_present_flag << 17) |   //Head present flag
  3068.                   (is_last_scan << 16)      |   //Is last scan
  3069.                   (restart_interval << 0));     //Restart Interval
  3070.     ADVANCE_BCS_BATCH(batch);
  3071. }
  3072.  
  3073. static void
  3074. gen8_mfc_jpeg_pak_insert_object(struct intel_encoder_context *encoder_context, unsigned int *insert_data,
  3075.                                 int length_in_dws, int data_bits_in_last_dw, int is_last_header,
  3076.                                 int is_end_of_slice)
  3077. {
  3078.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  3079.     assert(batch);
  3080.    
  3081.     if (data_bits_in_last_dw == 0)
  3082.         data_bits_in_last_dw = 32;
  3083.  
  3084.     BEGIN_BCS_BATCH(batch, length_in_dws + 2);
  3085.  
  3086.     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (length_in_dws + 2 - 2));
  3087.     //DWord 1
  3088.     OUT_BCS_BATCH(batch,
  3089.                   (0 << 16) |                    //DataByteOffset 0 for JPEG Encoder
  3090.                   (0 << 15) |                    //HeaderLengthExcludeFrmSize 0 for JPEG Encoder
  3091.                   (data_bits_in_last_dw << 8) |  //DataBitsInLastDW
  3092.                   (0 << 4) |                     //SkipEmulByteCount 0 for JPEG Encoder
  3093.                   (0 << 3) |                     //EmulationFlag 0 for JPEG Encoder
  3094.                   ((!!is_last_header) << 2) |    //LastHeaderFlag
  3095.                   ((!!is_end_of_slice) << 1) |   //EndOfSliceFlag
  3096.                   (1 << 0));                     //BitstreamStartReset 1 for JPEG Encoder
  3097.     //Data Paylaod
  3098.     intel_batchbuffer_data(batch, insert_data, length_in_dws*4);
  3099.  
  3100.     ADVANCE_BCS_BATCH(batch);
  3101. }
  3102.  
  3103.  
  3104. //send the jpeg headers to HW using MFX_PAK_INSERT_OBJECT
  3105. static void
  3106. gen8_mfc_jpeg_add_headers(VADriverContextP ctx,
  3107.                                            struct encode_state *encode_state,
  3108.                                            struct intel_encoder_context *encoder_context)
  3109. {
  3110.     if (encode_state->packed_header_data_ext) {
  3111.         VAEncPackedHeaderParameterBuffer *param = NULL;
  3112.         unsigned int *header_data = (unsigned int *)(*encode_state->packed_header_data_ext)->buffer;
  3113.         unsigned int length_in_bits;
  3114.  
  3115.         param = (VAEncPackedHeaderParameterBuffer *)(*encode_state->packed_header_params_ext)->buffer;
  3116.         length_in_bits = param->bit_length;
  3117.  
  3118.         gen8_mfc_jpeg_pak_insert_object(encoder_context,
  3119.                                         header_data,
  3120.                                         ALIGN(length_in_bits, 32) >> 5,
  3121.                                         length_in_bits & 0x1f,
  3122.                                         1,
  3123.                                         1);
  3124.     }
  3125. }
  3126.  
  3127. //Initialize the buffered_qmatrix with the default qmatrix in the driver.
  3128. //If the app sends the qmatrix, this will be replaced with the one app sends.
  3129. static void
  3130. jpeg_init_default_qmatrix(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  3131. {
  3132.     int i=0;
  3133.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3134.  
  3135.     //Load the the QM in zigzag order. If app sends QM, it is always in zigzag order.
  3136.     for(i=0; i<64; i++)
  3137.        mfc_context->buffered_qmatrix.lum_quantiser_matrix[i] = jpeg_luma_quant[zigzag_direct[i]];
  3138.  
  3139.     for(i=0; i<64; i++)
  3140.         mfc_context->buffered_qmatrix.chroma_quantiser_matrix[i] = jpeg_chroma_quant[zigzag_direct[i]];
  3141. }    
  3142.  
  3143. /* This is at the picture level */
  3144. static void
  3145. gen8_mfc_jpeg_pipeline_picture_programing(VADriverContextP ctx,
  3146.                                            struct encode_state *encode_state,
  3147.                                            struct intel_encoder_context *encoder_context)
  3148. {
  3149.     int i, j, component, max_selector = 0;
  3150.     VAEncSliceParameterBufferJPEG *slice_param;
  3151.    
  3152.     gen8_mfc_pipe_mode_select(ctx, MFX_FORMAT_JPEG, encoder_context);
  3153.     gen8_mfc_jpeg_set_surface_state(ctx, encoder_context, encode_state);
  3154.     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
  3155.     gen8_mfc_ind_obj_base_addr_state(ctx, encoder_context);
  3156.     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
  3157.     gen8_mfc_jpeg_pic_state(ctx, encoder_context, encode_state);
  3158.    
  3159.     //do the slice level encoding here
  3160.     gen8_mfc_jpeg_fqm_state(ctx, encoder_context, encode_state);
  3161.  
  3162.     //I dont think I need this for loop. Just to be consistent with other encoding logic...
  3163.     for(i = 0; i < encode_state->num_slice_params_ext; i++) {
  3164.         assert(encode_state->slice_params && encode_state->slice_params_ext[i]->buffer);
  3165.         slice_param = (VAEncSliceParameterBufferJPEG *)encode_state->slice_params_ext[i]->buffer;
  3166.        
  3167.         for(j = 0; j < encode_state->slice_params_ext[i]->num_elements; j++) {
  3168.            
  3169.             for(component = 0; component < slice_param->num_components; component++) {
  3170.                 if(max_selector < slice_param->components[component].dc_table_selector)
  3171.                     max_selector = slice_param->components[component].dc_table_selector;
  3172.                
  3173.                 if (max_selector < slice_param->components[component].ac_table_selector)
  3174.                     max_selector = slice_param->components[component].ac_table_selector;
  3175.             }
  3176.            
  3177.             slice_param++;
  3178.         }
  3179.     }    
  3180.  
  3181.     assert(max_selector < 2);
  3182.     //send the huffman table using MFC_JPEG_HUFF_TABLE
  3183.     gen8_mfc_jpeg_huff_table_state(ctx, encode_state, encoder_context, max_selector+1);
  3184.     //set MFC_JPEG_SCAN_OBJECT
  3185.     gen8_mfc_jpeg_scan_object(ctx, encode_state, encoder_context);
  3186.     //add headers using MFX_PAK_INSERT_OBJECT (it is refered as MFX_INSERT_OBJECT in this driver code)
  3187.     gen8_mfc_jpeg_add_headers(ctx, encode_state, encoder_context);
  3188.        
  3189. }
  3190.  
  3191. static void
  3192. gen8_mfc_jpeg_pipeline_programing(VADriverContextP ctx,
  3193.                                    struct encode_state *encode_state,
  3194.                                    struct intel_encoder_context *encoder_context)
  3195. {
  3196.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  3197.    
  3198.     // begin programing
  3199.     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  3200.     intel_batchbuffer_emit_mi_flush(batch);
  3201.    
  3202.     // picture level programing
  3203.     gen8_mfc_jpeg_pipeline_picture_programing(ctx, encode_state, encoder_context);
  3204.  
  3205.     // end programing
  3206.     intel_batchbuffer_end_atomic(batch);
  3207.  
  3208. }
  3209.  
  3210.  
  3211. static VAStatus
  3212. gen8_mfc_jpeg_encode_picture(VADriverContextP ctx,
  3213.                               struct encode_state *encode_state,
  3214.                               struct intel_encoder_context *encoder_context)
  3215. {
  3216.     gen8_mfc_init(ctx, encode_state, encoder_context);
  3217.     intel_mfc_jpeg_prepare(ctx, encode_state, encoder_context);
  3218.     /*Programing bcs pipeline*/
  3219.     gen8_mfc_jpeg_pipeline_programing(ctx, encode_state, encoder_context);
  3220.     gen8_mfc_run(ctx, encode_state, encoder_context);
  3221.  
  3222.     return VA_STATUS_SUCCESS;
  3223. }
  3224.  
  3225. static int gen8_mfc_vp8_qindex_estimate(struct encode_state *encode_state,
  3226.                                         struct gen6_mfc_context *mfc_context,
  3227.                                         int target_frame_size,
  3228.                                         int is_key_frame)
  3229. {
  3230.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3231.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3232.     unsigned int max_qindex = pic_param->clamp_qindex_high;
  3233.     unsigned int min_qindex = pic_param->clamp_qindex_low;
  3234.     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
  3235.     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
  3236.     int target_mb_size;
  3237.     int last_size_gap  = -1;
  3238.     int per_mb_size_at_qindex;
  3239.     int target_qindex = min_qindex, i;
  3240.  
  3241.     /* make sure would not overflow*/
  3242.     if (target_frame_size >= (0x7fffffff >> 9))
  3243.         target_mb_size = (target_frame_size / width_in_mbs / height_in_mbs) << 9;
  3244.     else
  3245.         target_mb_size = (target_frame_size << 9) / width_in_mbs / height_in_mbs;
  3246.  
  3247.     for (i = min_qindex; i <= max_qindex; i++) {
  3248.         per_mb_size_at_qindex = vp8_bits_per_mb[!is_key_frame][i];
  3249.         target_qindex = i;
  3250.         if (per_mb_size_at_qindex <= target_mb_size) {
  3251.             if (target_mb_size - per_mb_size_at_qindex < last_size_gap)
  3252.                 target_qindex--;
  3253.             break;
  3254.         }
  3255.         else
  3256.             last_size_gap = per_mb_size_at_qindex - target_mb_size;
  3257.     }
  3258.  
  3259.     return target_qindex;
  3260. }
  3261.  
  3262. static void
  3263. gen8_mfc_vp8_bit_rate_control_context_init(struct encode_state *encode_state,
  3264.                                         struct gen6_mfc_context *mfc_context)
  3265. {
  3266.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3267.     VAEncMiscParameterBuffer *misc_param_frame_rate_buffer = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
  3268.     VAEncMiscParameterFrameRate* param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buffer->data;
  3269.     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
  3270.     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
  3271.     float fps = param_frame_rate->framerate;
  3272.     int inter_mb_size = seq_param->bits_per_second * 1.0 / (fps+4.0) / width_in_mbs / height_in_mbs;
  3273.     int intra_mb_size = inter_mb_size * 5.0;
  3274.  
  3275.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_mb_size = intra_mb_size;
  3276.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
  3277.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_mb_size = inter_mb_size;
  3278.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  3279.  
  3280.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord = (intra_mb_size + 16)/ 16;
  3281.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord = (inter_mb_size + 16)/ 16;
  3282.  
  3283.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_I].TargetSizeInWord * 1.5;
  3284.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[SLICE_TYPE_P].TargetSizeInWord * 1.5;
  3285. }
  3286.  
  3287. static void gen8_mfc_vp8_brc_init(struct encode_state *encode_state,
  3288.                                struct intel_encoder_context* encoder_context)
  3289. {
  3290.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3291.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3292.     VAEncMiscParameterBuffer* misc_param_hrd = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
  3293.     VAEncMiscParameterHRD* param_hrd = (VAEncMiscParameterHRD*)misc_param_hrd->data;
  3294.     VAEncMiscParameterBuffer* misc_param_frame_rate_buffer = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
  3295.     VAEncMiscParameterFrameRate* param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buffer->data;
  3296.     double bitrate = seq_param->bits_per_second;
  3297.     unsigned int frame_rate = param_frame_rate->framerate;
  3298.     int inum = 1, pnum = 0;
  3299.     int intra_period = seq_param->intra_period;
  3300.     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
  3301.     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
  3302.     int max_frame_size =  (vp8_bits_per_mb[0][0] >> 9) * width_in_mbs * height_in_mbs;/* vp8_bits_per_mb table mutilpled 512 */
  3303.  
  3304.     pnum = intra_period  - 1;
  3305.  
  3306.     mfc_context->brc.mode = encoder_context->rate_control_mode;
  3307.  
  3308.     mfc_context->brc.target_frame_size[SLICE_TYPE_I] = (int)((double)((bitrate * intra_period)/frame_rate) /
  3309.                                                              (double)(inum + BRC_PWEIGHT * pnum ));
  3310.     mfc_context->brc.target_frame_size[SLICE_TYPE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[SLICE_TYPE_I];
  3311.  
  3312.     mfc_context->brc.gop_nums[SLICE_TYPE_I] = inum;
  3313.     mfc_context->brc.gop_nums[SLICE_TYPE_P] = pnum;
  3314.  
  3315.     mfc_context->brc.bits_per_frame = bitrate/frame_rate;
  3316.  
  3317.     mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY = gen8_mfc_vp8_qindex_estimate(encode_state,
  3318.                                                                    mfc_context,
  3319.                                                                    mfc_context->brc.target_frame_size[SLICE_TYPE_I],
  3320.                                                                    1);
  3321.     mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY = gen8_mfc_vp8_qindex_estimate(encode_state,
  3322.                                                                    mfc_context,
  3323.                                                                    mfc_context->brc.target_frame_size[SLICE_TYPE_P],
  3324.                                                                    0);
  3325.  
  3326.     mfc_context->hrd.buffer_size = (double)param_hrd->buffer_size;
  3327.     mfc_context->hrd.current_buffer_fullness =
  3328.         (double)(param_hrd->initial_buffer_fullness < mfc_context->hrd.buffer_size)?
  3329.         param_hrd->initial_buffer_fullness: mfc_context->hrd.buffer_size/2.;
  3330.     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size/2.;
  3331.     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size/max_frame_size;
  3332.     mfc_context->hrd.violation_noted = 0;
  3333. }
  3334.  
  3335. static int gen8_mfc_vp8_brc_postpack(struct encode_state *encode_state,
  3336.                            struct gen6_mfc_context *mfc_context,
  3337.                            int frame_bits)
  3338. {
  3339.     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
  3340.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3341.     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
  3342.     int slicetype = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
  3343.     int qpi = mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY;
  3344.     int qpp = mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY;
  3345.     int qp; // quantizer of previously encoded slice of current type
  3346.     int qpn; // predicted quantizer for next frame of current type in integer format
  3347.     double qpf; // predicted quantizer for next frame of current type in float format
  3348.     double delta_qp; // QP correction
  3349.     int target_frame_size, frame_size_next;
  3350.     /* Notes:
  3351.      *  x - how far we are from HRD buffer borders
  3352.      *  y - how far we are from target HRD buffer fullness
  3353.      */
  3354.     double x, y;
  3355.     double frame_size_alpha;
  3356.     unsigned int max_qindex = pic_param->clamp_qindex_high;
  3357.     unsigned int min_qindex = pic_param->clamp_qindex_low;
  3358.  
  3359.     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
  3360.  
  3361.     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
  3362.     if (mfc_context->hrd.buffer_capacity < 5)
  3363.         frame_size_alpha = 0;
  3364.     else
  3365.         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
  3366.     if (frame_size_alpha > 30) frame_size_alpha = 30;
  3367.     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
  3368.         (double)(frame_size_alpha + 1.);
  3369.  
  3370.     /* frame_size_next: avoiding negative number and too small value */
  3371.     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
  3372.         frame_size_next = (int)((double)target_frame_size * 0.25);
  3373.  
  3374.     qpf = (double)qp * target_frame_size / frame_size_next;
  3375.     qpn = (int)(qpf + 0.5);
  3376.  
  3377.     if (qpn == qp) {
  3378.         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
  3379.         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
  3380.         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
  3381.             qpn++;
  3382.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  3383.         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
  3384.             qpn--;
  3385.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  3386.         }
  3387.     }
  3388.  
  3389.     /* making sure that QP is not changing too fast */
  3390.     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
  3391.     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
  3392.     /* making sure that with QP predictions we did do not leave QPs range */
  3393.     BRC_CLIP(qpn, min_qindex, max_qindex);
  3394.  
  3395.     /* checking wthether HRD compliance is still met */
  3396.     sts = intel_mfc_update_hrd(encode_state, mfc_context, frame_bits);
  3397.  
  3398.     /* calculating QP delta as some function*/
  3399.     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
  3400.     if (x > 0) {
  3401.         x /= mfc_context->hrd.target_buffer_fullness;
  3402.         y = mfc_context->hrd.current_buffer_fullness;
  3403.     }
  3404.     else {
  3405.         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
  3406.         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
  3407.     }
  3408.     if (y < 0.01) y = 0.01;
  3409.     if (x > 1) x = 1;
  3410.     else if (x < -1) x = -1;
  3411.  
  3412.     delta_qp = BRC_QP_MAX_CHANGE*exp(-1/y)*sin(BRC_PI_0_5 * x);
  3413.     qpn = (int)(qpn + delta_qp + 0.5);
  3414.  
  3415.     /* making sure that with QP predictions we did do not leave QPs range */
  3416.     BRC_CLIP(qpn, min_qindex, max_qindex);
  3417.  
  3418.     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
  3419.         /* correcting QPs of slices of other types */
  3420.         if (!is_key_frame) {
  3421.             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 4)
  3422.                 mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 2;
  3423.         } else {
  3424.             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 4)
  3425.                 mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
  3426.         }
  3427.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_I].QpPrimeY, min_qindex, max_qindex);
  3428.         BRC_CLIP(mfc_context->bit_rate_control_context[SLICE_TYPE_P].QpPrimeY, min_qindex, max_qindex);
  3429.     } else if (sts == BRC_UNDERFLOW) { // underflow
  3430.         if (qpn <= qp) qpn = qp + 2;
  3431.         if (qpn > max_qindex) {
  3432.             qpn = max_qindex;
  3433.             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
  3434.         }
  3435.     } else if (sts == BRC_OVERFLOW) {
  3436.         if (qpn >= qp) qpn = qp - 2;
  3437.         if (qpn < min_qindex) { // < 0 (?) overflow with minQP
  3438.             qpn = min_qindex;
  3439.             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
  3440.         }
  3441.     }
  3442.  
  3443.     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
  3444.  
  3445.     return sts;
  3446. }
  3447.  
  3448. static void gen8_mfc_vp8_hrd_context_init(struct encode_state *encode_state,
  3449.                                        struct intel_encoder_context *encoder_context)
  3450. {
  3451.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3452.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3453.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  3454.     int target_bit_rate = seq_param->bits_per_second;
  3455.  
  3456.     // current we only support CBR mode.
  3457.     if (rate_control_mode == VA_RC_CBR) {
  3458.         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
  3459.         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
  3460.         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
  3461.         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
  3462.         mfc_context->vui_hrd.i_frame_number = 0;
  3463.  
  3464.         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
  3465.         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
  3466.         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
  3467.     }
  3468.  
  3469. }
  3470.  
  3471. static void gen8_mfc_vp8_hrd_context_update(struct encode_state *encode_state,
  3472.                              struct gen6_mfc_context *mfc_context)
  3473. {
  3474.     mfc_context->vui_hrd.i_frame_number++;
  3475. }
  3476.  
  3477. /*
  3478.  * Check whether the parameters related with CBR are updated and decide whether
  3479.  * it needs to reinitialize the configuration related with CBR.
  3480.  * Currently it will check the following parameters:
  3481.  *      bits_per_second
  3482.  *      frame_rate
  3483.  *      gop_configuration(intra_period, ip_period, intra_idr_period)
  3484.  */
  3485. static bool gen8_mfc_vp8_brc_updated_check(struct encode_state *encode_state,
  3486.                            struct intel_encoder_context *encoder_context)
  3487. {
  3488.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  3489.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3490.     double cur_fps, cur_bitrate;
  3491.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3492.     VAEncMiscParameterBuffer *misc_param_frame_rate_buf = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeFrameRate]->buffer;
  3493.     VAEncMiscParameterFrameRate *param_frame_rate = (VAEncMiscParameterFrameRate*)misc_param_frame_rate_buf->data;
  3494.     unsigned int frame_rate = param_frame_rate->framerate;
  3495.  
  3496.     if (rate_control_mode != VA_RC_CBR) {
  3497.         return false;
  3498.     }
  3499.  
  3500.     cur_bitrate = seq_param->bits_per_second;
  3501.     cur_fps = frame_rate;
  3502.  
  3503.     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
  3504.         (cur_fps == mfc_context->brc.saved_fps) &&
  3505.         (seq_param->intra_period == mfc_context->brc.saved_intra_period)) {
  3506.         /* the parameters related with CBR are not updaetd */
  3507.         return false;
  3508.     }
  3509.  
  3510.     mfc_context->brc.saved_intra_period = seq_param->intra_period;
  3511.     mfc_context->brc.saved_fps = cur_fps;
  3512.     mfc_context->brc.saved_bps = cur_bitrate;
  3513.     return true;
  3514. }
  3515.  
  3516. static void gen8_mfc_vp8_brc_prepare(struct encode_state *encode_state,
  3517.                            struct intel_encoder_context *encoder_context)
  3518. {
  3519.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  3520.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3521.  
  3522.     if (rate_control_mode == VA_RC_CBR) {
  3523.         bool brc_updated;
  3524.         assert(encoder_context->codec != CODEC_MPEG2);
  3525.  
  3526.         brc_updated = gen8_mfc_vp8_brc_updated_check(encode_state, encoder_context);
  3527.  
  3528.         /*Programing bit rate control */
  3529.         if ((mfc_context->bit_rate_control_context[SLICE_TYPE_I].MaxSizeInWord == 0) ||
  3530.              brc_updated) {
  3531.             gen8_mfc_vp8_bit_rate_control_context_init(encode_state, mfc_context);
  3532.             gen8_mfc_vp8_brc_init(encode_state, encoder_context);
  3533.         }
  3534.  
  3535.         /*Programing HRD control */
  3536.         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated )
  3537.             gen8_mfc_vp8_hrd_context_init(encode_state, encoder_context);
  3538.     }
  3539. }
  3540.  
  3541. static void vp8_enc_state_init(struct gen6_mfc_context *mfc_context,
  3542.                                VAEncPictureParameterBufferVP8 *pic_param,
  3543.                                VAQMatrixBufferVP8 *q_matrix)
  3544. {
  3545.  
  3546.     int is_key_frame = !pic_param->pic_flags.bits.frame_type;
  3547.     unsigned char *coeff_probs_stream_in_buffer;
  3548.    
  3549.     mfc_context->vp8_state.frame_header_lf_update_pos = 0;
  3550.     mfc_context->vp8_state.frame_header_qindex_update_pos = 0;
  3551.     mfc_context->vp8_state.frame_header_token_update_pos = 0;
  3552.     mfc_context->vp8_state.frame_header_bin_mv_upate_pos = 0;
  3553.  
  3554.     mfc_context->vp8_state.prob_skip_false = 255;
  3555.     memset(mfc_context->vp8_state.mb_segment_tree_probs, 0, sizeof(mfc_context->vp8_state.mb_segment_tree_probs));
  3556.     memcpy(mfc_context->vp8_state.mv_probs, vp8_default_mv_context, sizeof(mfc_context->vp8_state.mv_probs));
  3557.    
  3558.     if (is_key_frame) {
  3559.         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_kf_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
  3560.         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_kf_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
  3561.  
  3562.         mfc_context->vp8_state.prob_intra = 255;
  3563.         mfc_context->vp8_state.prob_last = 128;
  3564.         mfc_context->vp8_state.prob_gf = 128;
  3565.     } else {
  3566.         memcpy(mfc_context->vp8_state.y_mode_probs, vp8_ymode_prob, sizeof(mfc_context->vp8_state.y_mode_probs));
  3567.         memcpy(mfc_context->vp8_state.uv_mode_probs, vp8_uv_mode_prob, sizeof(mfc_context->vp8_state.uv_mode_probs));
  3568.  
  3569.         mfc_context->vp8_state.prob_intra = 63;
  3570.         mfc_context->vp8_state.prob_last = 128;
  3571.         mfc_context->vp8_state.prob_gf = 128;
  3572.     }
  3573.    
  3574.     mfc_context->vp8_state.prob_skip_false = vp8_base_skip_false_prob[q_matrix->quantization_index[0]];
  3575.  
  3576.     dri_bo_map(mfc_context->vp8_state.coeff_probs_stream_in_bo, 1);
  3577.     coeff_probs_stream_in_buffer = (unsigned char *)mfc_context->vp8_state.coeff_probs_stream_in_bo->virtual;
  3578.     assert(coeff_probs_stream_in_buffer);
  3579.     memcpy(coeff_probs_stream_in_buffer, vp8_default_coef_probs, sizeof(vp8_default_coef_probs));
  3580.     dri_bo_unmap(mfc_context->vp8_state.coeff_probs_stream_in_bo);
  3581. }
  3582.  
  3583. static void vp8_enc_state_update(struct gen6_mfc_context *mfc_context,
  3584.                                  VAQMatrixBufferVP8 *q_matrix)
  3585. {
  3586.  
  3587.     /*some other probabilities need to be updated*/
  3588. }
  3589.  
  3590. extern void binarize_vp8_frame_header(VAEncSequenceParameterBufferVP8 *seq_param,
  3591.                            VAEncPictureParameterBufferVP8 *pic_param,
  3592.                            VAQMatrixBufferVP8 *q_matrix,
  3593.                            struct gen6_mfc_context *mfc_context,
  3594.                            struct intel_encoder_context *encoder_context);
  3595.  
  3596. static void vp8_enc_frame_header_binarize(struct encode_state *encode_state,
  3597.                                           struct intel_encoder_context *encoder_context,
  3598.                                           struct gen6_mfc_context *mfc_context)
  3599. {
  3600.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3601.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3602.     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
  3603.     unsigned char *frame_header_buffer;
  3604.  
  3605.     binarize_vp8_frame_header(seq_param, pic_param, q_matrix, mfc_context, encoder_context);
  3606.  
  3607.     dri_bo_map(mfc_context->vp8_state.frame_header_bo, 1);
  3608.     frame_header_buffer = (unsigned char *)mfc_context->vp8_state.frame_header_bo->virtual;
  3609.     assert(frame_header_buffer);
  3610.     memcpy(frame_header_buffer, mfc_context->vp8_state.vp8_frame_header, (mfc_context->vp8_state.frame_header_bit_count + 7) / 8);
  3611.     dri_bo_unmap(mfc_context->vp8_state.frame_header_bo);
  3612. }
  3613.  
  3614. #define MAX_VP8_FRAME_HEADER_SIZE              0x2000
  3615. #define VP8_TOKEN_STATISTICS_BUFFER_SIZE       0x2000
  3616.  
  3617. static void gen8_mfc_vp8_init(VADriverContextP ctx,
  3618.                           struct encode_state *encode_state,
  3619.                           struct intel_encoder_context *encoder_context)
  3620. {
  3621.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3622.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3623.     dri_bo *bo;
  3624.     int i;
  3625.     int width_in_mbs = 0;
  3626.     int height_in_mbs = 0;
  3627.     int slice_batchbuffer_size;
  3628.     int is_key_frame, slice_type, rate_control_mode;
  3629.  
  3630.     VAEncSequenceParameterBufferVP8 *pSequenceParameter = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3631.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3632.     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
  3633.  
  3634.     width_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
  3635.     height_in_mbs = ALIGN(pSequenceParameter->frame_height, 16) / 16;
  3636.  
  3637.     is_key_frame = !pic_param->pic_flags.bits.frame_type;
  3638.     slice_type = (is_key_frame ? SLICE_TYPE_I : SLICE_TYPE_P);
  3639.     rate_control_mode = encoder_context->rate_control_mode;
  3640.  
  3641.     if (rate_control_mode == VA_RC_CBR) {
  3642.         q_matrix->quantization_index[0] = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  3643.         for (i = 1; i < 4; i++)
  3644.             q_matrix->quantization_index[i] = q_matrix->quantization_index[0];
  3645.         for (i = 0; i < 5; i++)
  3646.             q_matrix->quantization_index_delta[i] = 0;
  3647.     }
  3648.  
  3649.     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
  3650.         (SLICE_HEADER + SLICE_TAIL);
  3651.  
  3652.     /*Encode common setup for MFC*/
  3653.     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
  3654.     mfc_context->post_deblocking_output.bo = NULL;
  3655.  
  3656.     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
  3657.     mfc_context->pre_deblocking_output.bo = NULL;
  3658.  
  3659.     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
  3660.     mfc_context->uncompressed_picture_source.bo = NULL;
  3661.  
  3662.     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
  3663.     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
  3664.  
  3665.     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
  3666.         if ( mfc_context->direct_mv_buffers[i].bo != NULL)
  3667.             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
  3668.         mfc_context->direct_mv_buffers[i].bo = NULL;
  3669.     }
  3670.  
  3671.     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
  3672.         if (mfc_context->reference_surfaces[i].bo != NULL)
  3673.             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
  3674.         mfc_context->reference_surfaces[i].bo = NULL;
  3675.     }
  3676.  
  3677.     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
  3678.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3679.                       "Buffer",
  3680.                       width_in_mbs * 64 * 16,
  3681.                       64);
  3682.     assert(bo);
  3683.     mfc_context->intra_row_store_scratch_buffer.bo = bo;
  3684.  
  3685.     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
  3686.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3687.                       "Buffer",
  3688.                       width_in_mbs * height_in_mbs * 16,
  3689.                       64);
  3690.     assert(bo);
  3691.     mfc_context->macroblock_status_buffer.bo = bo;
  3692.  
  3693.     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
  3694.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3695.                       "Buffer",
  3696.                       16 * width_in_mbs * 64,  /* 16 * width_in_mbs * 64 */
  3697.                       64);
  3698.     assert(bo);
  3699.     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  3700.  
  3701.     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
  3702.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3703.                       "Buffer",
  3704.                       16 * width_in_mbs * 64, /* 16 * width_in_mbs * 64 */
  3705.                       0x1000);
  3706.     assert(bo);
  3707.     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  3708.  
  3709.     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
  3710.     mfc_context->mfc_batchbuffer_surface.bo = NULL;
  3711.  
  3712.     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
  3713.     mfc_context->aux_batchbuffer_surface.bo = NULL;
  3714.  
  3715.     if (mfc_context->aux_batchbuffer)
  3716.         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
  3717.  
  3718.     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
  3719.     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
  3720.     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
  3721.     mfc_context->aux_batchbuffer_surface.pitch = 16;
  3722.     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
  3723.     mfc_context->aux_batchbuffer_surface.size_block = 16;
  3724.  
  3725.     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
  3726.  
  3727.     /* alloc vp8 encoding buffers*/
  3728.     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
  3729.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3730.                       "Buffer",
  3731.                       MAX_VP8_FRAME_HEADER_SIZE,
  3732.                       0x1000);
  3733.     assert(bo);
  3734.     mfc_context->vp8_state.frame_header_bo = bo;
  3735.  
  3736.     mfc_context->vp8_state.intermediate_buffer_max_size = width_in_mbs * height_in_mbs * 384 * 9;
  3737.     for(i = 0; i < 8; i++) {
  3738.         mfc_context->vp8_state.intermediate_partition_offset[i] = width_in_mbs * height_in_mbs * 384 * (i + 1);
  3739.     }
  3740.     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
  3741.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3742.                       "Buffer",
  3743.                       mfc_context->vp8_state.intermediate_buffer_max_size,
  3744.                       0x1000);
  3745.     assert(bo);
  3746.     mfc_context->vp8_state.intermediate_bo = bo;
  3747.  
  3748.     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
  3749.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3750.                       "Buffer",
  3751.                       width_in_mbs * height_in_mbs * 16,
  3752.                       0x1000);
  3753.     assert(bo);
  3754.     mfc_context->vp8_state.stream_out_bo = bo;
  3755.  
  3756.     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
  3757.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3758.                       "Buffer",
  3759.                       sizeof(vp8_default_coef_probs),
  3760.                       0x1000);
  3761.     assert(bo);
  3762.     mfc_context->vp8_state.coeff_probs_stream_in_bo = bo;
  3763.  
  3764.     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
  3765.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3766.                       "Buffer",
  3767.                       VP8_TOKEN_STATISTICS_BUFFER_SIZE,
  3768.                       0x1000);
  3769.     assert(bo);
  3770.     mfc_context->vp8_state.token_statistics_bo = bo;
  3771.  
  3772.     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
  3773.     bo = dri_bo_alloc(i965->intel.bufmgr,
  3774.                       "Buffer",
  3775.                       width_in_mbs * 16 * 64,
  3776.                       0x1000);
  3777.     assert(bo);
  3778.     mfc_context->vp8_state.mpc_row_store_bo = bo;
  3779.  
  3780.     vp8_enc_state_init(mfc_context, pic_param, q_matrix);
  3781.     vp8_enc_frame_header_binarize(encode_state, encoder_context, mfc_context);
  3782. }
  3783.  
  3784. static VAStatus
  3785. intel_mfc_vp8_prepare(VADriverContextP ctx,
  3786.                         struct encode_state *encode_state,
  3787.                         struct intel_encoder_context *encoder_context)
  3788. {
  3789.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3790.     struct object_surface *obj_surface;
  3791.     struct object_buffer *obj_buffer;
  3792.     struct i965_coded_buffer_segment *coded_buffer_segment;
  3793.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3794.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  3795.     dri_bo *bo;
  3796.     int i;
  3797.  
  3798.     /* reconstructed surface */
  3799.     obj_surface = encode_state->reconstructed_object;
  3800.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  3801.     if (pic_param->loop_filter_level[0] == 0) {
  3802.         mfc_context->pre_deblocking_output.bo = obj_surface->bo;
  3803.         dri_bo_reference(mfc_context->pre_deblocking_output.bo);
  3804.     } else {
  3805.         mfc_context->post_deblocking_output.bo = obj_surface->bo;
  3806.         dri_bo_reference(mfc_context->post_deblocking_output.bo);
  3807.     }
  3808.  
  3809.     mfc_context->surface_state.width = obj_surface->orig_width;
  3810.     mfc_context->surface_state.height = obj_surface->orig_height;
  3811.     mfc_context->surface_state.w_pitch = obj_surface->width;
  3812.     mfc_context->surface_state.h_pitch = obj_surface->height;
  3813.  
  3814.     /* set vp8 reference frames */
  3815.     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  3816.         obj_surface = encode_state->reference_objects[i];
  3817.  
  3818.         if (obj_surface && obj_surface->bo) {
  3819.             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
  3820.             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
  3821.         } else {
  3822.             mfc_context->reference_surfaces[i].bo = NULL;
  3823.         }
  3824.     }
  3825.  
  3826.     /* input YUV surface */
  3827.     obj_surface = encode_state->input_yuv_object;
  3828.     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
  3829.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  3830.  
  3831.     /* coded buffer */
  3832.     obj_buffer = encode_state->coded_buf_object;
  3833.     bo = obj_buffer->buffer_store->bo;
  3834.     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
  3835.     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  3836.     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  3837.     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
  3838.  
  3839.     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
  3840.     mfc_context->vp8_state.final_frame_bo = mfc_context->mfc_indirect_pak_bse_object.bo;
  3841.     mfc_context->vp8_state.final_frame_byte_offset = I965_CODEDBUFFER_HEADER_SIZE;
  3842.     dri_bo_reference(mfc_context->vp8_state.final_frame_bo);
  3843.  
  3844.     /* set the internal flag to 0 to indicate the coded size is unknown */
  3845.     dri_bo_map(bo, 1);
  3846.     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
  3847.     coded_buffer_segment->mapped = 0;
  3848.     coded_buffer_segment->codec = encoder_context->codec;
  3849.     dri_bo_unmap(bo);
  3850.  
  3851.     return vaStatus;
  3852. }
  3853.  
  3854. static void
  3855. gen8_mfc_vp8_encoder_cfg(VADriverContextP ctx,
  3856.                          struct encode_state *encode_state,
  3857.                          struct intel_encoder_context *encoder_context)
  3858. {
  3859.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  3860.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3861.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3862.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3863.  
  3864.     BEGIN_BCS_BATCH(batch, 30);
  3865.     OUT_BCS_BATCH(batch, MFX_VP8_ENCODER_CFG | (30 - 2)); /* SKL should be 31-2 ? */
  3866.  
  3867.     OUT_BCS_BATCH(batch,
  3868.                   0 << 9 | /* compressed bitstream output disable */
  3869.                   1 << 7 | /* disable per-segment delta qindex and loop filter in RC */
  3870.                   1 << 6 | /* RC initial pass */
  3871.                   0 << 4 | /* upate segment feature date flag */
  3872.                   1 << 3 | /* bitstream statistics output enable */
  3873.                   1 << 2 | /* token statistics output enable */
  3874.                   0 << 1 | /* final bitstream output disable */
  3875.                   0 << 0); /*DW1*/
  3876.    
  3877.     OUT_BCS_BATCH(batch, 0); /*DW2*/
  3878.  
  3879.     OUT_BCS_BATCH(batch,
  3880.                   0xfff << 16 | /* max intra mb bit count limit */
  3881.                   0xfff << 0  /* max inter mb bit count limit */
  3882.                   ); /*DW3*/
  3883.  
  3884.     OUT_BCS_BATCH(batch, 0); /*DW4*/
  3885.     OUT_BCS_BATCH(batch, 0); /*DW5*/
  3886.     OUT_BCS_BATCH(batch, 0); /*DW6*/
  3887.     OUT_BCS_BATCH(batch, 0); /*DW7*/
  3888.     OUT_BCS_BATCH(batch, 0); /*DW8*/
  3889.     OUT_BCS_BATCH(batch, 0); /*DW9*/
  3890.     OUT_BCS_BATCH(batch, 0); /*DW10*/
  3891.     OUT_BCS_BATCH(batch, 0); /*DW11*/
  3892.     OUT_BCS_BATCH(batch, 0); /*DW12*/
  3893.     OUT_BCS_BATCH(batch, 0); /*DW13*/
  3894.     OUT_BCS_BATCH(batch, 0); /*DW14*/
  3895.     OUT_BCS_BATCH(batch, 0); /*DW15*/
  3896.     OUT_BCS_BATCH(batch, 0); /*DW16*/
  3897.     OUT_BCS_BATCH(batch, 0); /*DW17*/
  3898.     OUT_BCS_BATCH(batch, 0); /*DW18*/
  3899.     OUT_BCS_BATCH(batch, 0); /*DW19*/
  3900.     OUT_BCS_BATCH(batch, 0); /*DW20*/
  3901.     OUT_BCS_BATCH(batch, 0); /*DW21*/
  3902.  
  3903.     OUT_BCS_BATCH(batch,
  3904.                  pic_param->pic_flags.bits.show_frame << 23 |
  3905.                  pic_param->pic_flags.bits.version << 20
  3906.                  ); /*DW22*/
  3907.  
  3908.     OUT_BCS_BATCH(batch,
  3909.                  (seq_param->frame_height_scale << 14 | seq_param->frame_height) << 16 |
  3910.                  (seq_param->frame_width_scale << 14 | seq_param->frame_width) << 0
  3911.                  );
  3912.  
  3913.     /*DW24*/
  3914.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bit_count); /* frame header bit count */
  3915.  
  3916.     /*DW25*/
  3917.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_qindex_update_pos); /* frame header bin buffer qindex update pointer */
  3918.  
  3919.     /*DW26*/
  3920.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_lf_update_pos); /* frame header bin buffer loop filter update pointer*/
  3921.  
  3922.     /*DW27*/
  3923.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_token_update_pos); /* frame header bin buffer token update pointer */
  3924.  
  3925.     /*DW28*/
  3926.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.frame_header_bin_mv_upate_pos); /*frame header bin buffer mv update pointer */
  3927.  
  3928.     /*DW29*/
  3929.     OUT_BCS_BATCH(batch, 0);
  3930.  
  3931.     ADVANCE_BCS_BATCH(batch);
  3932. }
  3933.  
  3934. static void
  3935. gen8_mfc_vp8_pic_state(VADriverContextP ctx,
  3936.                        struct encode_state *encode_state,
  3937.                        struct intel_encoder_context *encoder_context)
  3938. {
  3939.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  3940.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  3941.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  3942.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  3943.     VAQMatrixBufferVP8 *q_matrix = (VAQMatrixBufferVP8 *)encode_state->q_matrix->buffer;
  3944.     int i, j, log2num;
  3945.  
  3946.     log2num = pic_param->pic_flags.bits.num_token_partitions;
  3947.  
  3948.     /*update mode and token probs*/
  3949.     vp8_enc_state_update(mfc_context, q_matrix);
  3950.  
  3951.     BEGIN_BCS_BATCH(batch, 38);
  3952.     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
  3953.     OUT_BCS_BATCH(batch,
  3954.                   (ALIGN(seq_param->frame_height, 16) / 16 - 1) << 16 |
  3955.                   (ALIGN(seq_param->frame_width, 16) / 16 - 1) << 0);
  3956.  
  3957.     OUT_BCS_BATCH(batch,
  3958.                   log2num << 24 |
  3959.                   pic_param->sharpness_level << 16 |
  3960.                   pic_param->pic_flags.bits.sign_bias_alternate << 13 |
  3961.                   pic_param->pic_flags.bits.sign_bias_golden << 12 |
  3962.                   pic_param->pic_flags.bits.loop_filter_adj_enable << 11 |
  3963.                   pic_param->pic_flags.bits.mb_no_coeff_skip << 10 |
  3964.                   pic_param->pic_flags.bits.update_mb_segmentation_map << 9 |
  3965.                   pic_param->pic_flags.bits.segmentation_enabled << 8 |
  3966.                   !pic_param->pic_flags.bits.frame_type << 5 | /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
  3967.                   (pic_param->pic_flags.bits.version / 2) << 4 |
  3968.                   (pic_param->pic_flags.bits.version == 3) << 1 | /* full pixel mode for version 3 */
  3969.                   !!pic_param->pic_flags.bits.version << 0); /* version 0: 6 tap */
  3970.  
  3971.     OUT_BCS_BATCH(batch,
  3972.                   pic_param->loop_filter_level[3] << 24 |
  3973.                   pic_param->loop_filter_level[2] << 16 |
  3974.                   pic_param->loop_filter_level[1] <<  8 |
  3975.                   pic_param->loop_filter_level[0] <<  0);
  3976.  
  3977.     OUT_BCS_BATCH(batch,
  3978.                   q_matrix->quantization_index[3] << 24 |
  3979.                   q_matrix->quantization_index[2] << 16 |
  3980.                   q_matrix->quantization_index[1] <<  8 |
  3981.                   q_matrix->quantization_index[0] << 0);
  3982.  
  3983.     OUT_BCS_BATCH(batch,
  3984.                  ((unsigned short)(q_matrix->quantization_index_delta[4]) >> 15) << 28 |
  3985.                  abs(q_matrix->quantization_index_delta[4]) << 24 |
  3986.                  ((unsigned short)(q_matrix->quantization_index_delta[3]) >> 15) << 20 |
  3987.                  abs(q_matrix->quantization_index_delta[3]) << 16 |
  3988.                  ((unsigned short)(q_matrix->quantization_index_delta[2]) >> 15) << 12 |
  3989.                  abs(q_matrix->quantization_index_delta[2]) << 8 |
  3990.                  ((unsigned short)(q_matrix->quantization_index_delta[1]) >> 15) << 4 |
  3991.                  abs(q_matrix->quantization_index_delta[1]) << 0);
  3992.  
  3993.     OUT_BCS_BATCH(batch,
  3994.                  ((unsigned short)(q_matrix->quantization_index_delta[0]) >> 15) << 4 |
  3995.                  abs(q_matrix->quantization_index_delta[0]) << 0);
  3996.    
  3997.     OUT_BCS_BATCH(batch,
  3998.                  pic_param->clamp_qindex_high << 8 |
  3999.                  pic_param->clamp_qindex_low << 0);
  4000.  
  4001.     for (i = 8; i < 19; i++) {
  4002.          OUT_BCS_BATCH(batch, 0xffffffff);
  4003.     }
  4004.  
  4005.     OUT_BCS_BATCH(batch,
  4006.                   mfc_context->vp8_state.mb_segment_tree_probs[2] << 16 |
  4007.                   mfc_context->vp8_state.mb_segment_tree_probs[1] <<  8 |
  4008.                   mfc_context->vp8_state.mb_segment_tree_probs[0] <<  0);
  4009.  
  4010.     OUT_BCS_BATCH(batch,
  4011.                   mfc_context->vp8_state.prob_skip_false << 24 |
  4012.                   mfc_context->vp8_state.prob_intra      << 16 |
  4013.                   mfc_context->vp8_state.prob_last       <<  8 |
  4014.                   mfc_context->vp8_state.prob_gf         <<  0);
  4015.  
  4016.     OUT_BCS_BATCH(batch,
  4017.                   mfc_context->vp8_state.y_mode_probs[3] << 24 |
  4018.                   mfc_context->vp8_state.y_mode_probs[2] << 16 |
  4019.                   mfc_context->vp8_state.y_mode_probs[1] <<  8 |
  4020.                   mfc_context->vp8_state.y_mode_probs[0] <<  0);
  4021.  
  4022.     OUT_BCS_BATCH(batch,
  4023.                   mfc_context->vp8_state.uv_mode_probs[2] << 16 |
  4024.                   mfc_context->vp8_state.uv_mode_probs[1] <<  8 |
  4025.                   mfc_context->vp8_state.uv_mode_probs[0] <<  0);
  4026.    
  4027.     /* MV update value, DW23-DW32 */
  4028.     for (i = 0; i < 2; i++) {
  4029.         for (j = 0; j < 20; j += 4) {
  4030.             OUT_BCS_BATCH(batch,
  4031.                           (j + 3 == 19 ? 0 : mfc_context->vp8_state.mv_probs[i][j + 3]) << 24 |
  4032.                           mfc_context->vp8_state.mv_probs[i][j + 2] << 16 |
  4033.                           mfc_context->vp8_state.mv_probs[i][j + 1] <<  8 |
  4034.                           mfc_context->vp8_state.mv_probs[i][j + 0] <<  0);
  4035.         }
  4036.     }
  4037.  
  4038.     OUT_BCS_BATCH(batch,
  4039.                   (pic_param->ref_lf_delta[3] & 0x7f) << 24 |
  4040.                   (pic_param->ref_lf_delta[2] & 0x7f) << 16 |
  4041.                   (pic_param->ref_lf_delta[1] & 0x7f) <<  8 |
  4042.                   (pic_param->ref_lf_delta[0] & 0x7f) <<  0);
  4043.  
  4044.     OUT_BCS_BATCH(batch,
  4045.                   (pic_param->mode_lf_delta[3] & 0x7f) << 24 |
  4046.                   (pic_param->mode_lf_delta[2] & 0x7f) << 16 |
  4047.                   (pic_param->mode_lf_delta[1] & 0x7f) <<  8 |
  4048.                   (pic_param->mode_lf_delta[0] & 0x7f) <<  0);
  4049.  
  4050.     OUT_BCS_BATCH(batch, 0);
  4051.     OUT_BCS_BATCH(batch, 0);
  4052.     OUT_BCS_BATCH(batch, 0);
  4053.  
  4054.     ADVANCE_BCS_BATCH(batch);
  4055. }
  4056.  
  4057. #define OUT_VP8_BUFFER(bo, offset)                                      \
  4058.     if (bo)                                                             \
  4059.         OUT_BCS_RELOC(batch,                                            \
  4060.                       bo,                                               \
  4061.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION, \
  4062.                       offset);                                           \
  4063.     else                                                                \
  4064.         OUT_BCS_BATCH(batch, 0);                                        \
  4065.     OUT_BCS_BATCH(batch, 0);                                            \
  4066.     OUT_BCS_BATCH(batch, 0);
  4067.  
  4068. static void
  4069. gen8_mfc_vp8_bsp_buf_base_addr_state(VADriverContextP ctx,
  4070.                                      struct encode_state *encode_state,
  4071.                                      struct intel_encoder_context *encoder_context)
  4072. {
  4073.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  4074.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  4075.  
  4076.     BEGIN_BCS_BATCH(batch, 32);
  4077.     OUT_BCS_BATCH(batch, MFX_VP8_BSP_BUF_BASE_ADDR_STATE | (32 - 2));
  4078.  
  4079.     OUT_VP8_BUFFER(mfc_context->vp8_state.frame_header_bo, 0);
  4080.  
  4081.     OUT_VP8_BUFFER(mfc_context->vp8_state.intermediate_bo, 0);
  4082.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[0]);
  4083.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[1]);
  4084.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[2]);
  4085.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[3]);
  4086.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[4]);
  4087.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[5]);
  4088.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[6]);
  4089.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_partition_offset[7]);
  4090.     OUT_BCS_BATCH(batch, mfc_context->vp8_state.intermediate_buffer_max_size);
  4091.  
  4092.     OUT_VP8_BUFFER(mfc_context->vp8_state.final_frame_bo, I965_CODEDBUFFER_HEADER_SIZE);
  4093.     OUT_BCS_BATCH(batch, 0);
  4094.  
  4095.     OUT_VP8_BUFFER(mfc_context->vp8_state.stream_out_bo, 0);
  4096.     OUT_VP8_BUFFER(mfc_context->vp8_state.coeff_probs_stream_in_bo, 0);
  4097.     OUT_VP8_BUFFER(mfc_context->vp8_state.token_statistics_bo, 0);
  4098.     OUT_VP8_BUFFER(mfc_context->vp8_state.mpc_row_store_bo, 0);
  4099.  
  4100.     ADVANCE_BCS_BATCH(batch);
  4101. }
  4102.  
  4103. static void
  4104. gen8_mfc_vp8_pipeline_picture_programing(VADriverContextP ctx,
  4105.                                            struct encode_state *encode_state,
  4106.                                            struct intel_encoder_context *encoder_context)
  4107. {
  4108.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  4109.  
  4110.     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_VP8, encoder_context);
  4111.     mfc_context->set_surface_state(ctx, encoder_context);
  4112.     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
  4113.     gen8_mfc_pipe_buf_addr_state(ctx, encoder_context);
  4114.     gen8_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
  4115.     gen8_mfc_vp8_bsp_buf_base_addr_state(ctx, encode_state, encoder_context);
  4116.     gen8_mfc_vp8_pic_state(ctx, encode_state,encoder_context);
  4117.     gen8_mfc_vp8_encoder_cfg(ctx, encode_state, encoder_context);
  4118. }
  4119.  
  4120. static const unsigned char
  4121. vp8_intra_mb_mode_map[VME_MB_INTRA_MODE_COUNT] = {
  4122.     PAK_V_PRED,
  4123.     PAK_H_PRED,
  4124.     PAK_DC_PRED,
  4125.     PAK_TM_PRED
  4126. };
  4127.  
  4128. static const unsigned char
  4129. vp8_intra_block_mode_map[VME_B_INTRA_MODE_COUNT] = {
  4130.     PAK_B_VE_PRED,
  4131.     PAK_B_HE_PRED,
  4132.     PAK_B_DC_PRED,
  4133.     PAK_B_LD_PRED,
  4134.     PAK_B_RD_PRED,
  4135.     PAK_B_VR_PRED,
  4136.     PAK_B_HD_PRED,
  4137.     PAK_B_VL_PRED,
  4138.     PAK_B_HU_PRED
  4139. };
  4140.  
  4141. static int inline gen8_mfc_vp8_intra_mb_mode_map(unsigned int vme_pred_mode, int is_luma_4x4)
  4142. {
  4143.     unsigned int i, pak_pred_mode = 0;
  4144.     unsigned int vme_sub_blocks_pred_mode[8], pak_sub_blocks_pred_mode[8]; /* 8 blocks's intra mode */
  4145.  
  4146.     if (!is_luma_4x4) {
  4147.         pak_pred_mode = vp8_intra_mb_mode_map[vme_pred_mode & 0x3];
  4148.     } else {
  4149.         for (i = 0; i < 8; i++) {
  4150.             vme_sub_blocks_pred_mode[i] = ((vme_pred_mode >> (4 * i)) & 0xf);
  4151.             assert(vme_sub_blocks_pred_mode[i] < VME_B_INTRA_MODE_COUNT);
  4152.             pak_sub_blocks_pred_mode[i] = vp8_intra_block_mode_map[vme_sub_blocks_pred_mode[i]];
  4153.             pak_pred_mode |= (pak_sub_blocks_pred_mode[i] << (4 * i));
  4154.         }
  4155.     }
  4156.  
  4157.     return pak_pred_mode;
  4158. }
  4159. static void
  4160. gen8_mfc_vp8_pak_object_intra(VADriverContextP ctx,
  4161.                               struct intel_encoder_context *encoder_context,
  4162.                               unsigned int *msg,
  4163.                               int x, int y,
  4164.                               struct intel_batchbuffer *batch)
  4165. {
  4166.     unsigned int vme_intra_mb_mode, vme_chroma_pred_mode;
  4167.     unsigned int pak_intra_mb_mode, pak_chroma_pred_mode;
  4168.     unsigned int vme_luma_pred_mode[2], pak_luma_pred_mode[2];
  4169.  
  4170.     if (batch == NULL)
  4171.         batch = encoder_context->base.batch;
  4172.  
  4173.     vme_intra_mb_mode = ((msg[0] & 0x30) >> 4);
  4174.     assert((vme_intra_mb_mode == 0) || (vme_intra_mb_mode == 2)); //vp8 only support intra_16x16 and intra_4x4
  4175.     pak_intra_mb_mode = (vme_intra_mb_mode >> 1);
  4176.  
  4177.     vme_luma_pred_mode[0] = msg[1];
  4178.     vme_luma_pred_mode[1] = msg[2];
  4179.     vme_chroma_pred_mode = msg[3] & 0x3;
  4180.  
  4181.     pak_luma_pred_mode[0] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[0], pak_intra_mb_mode);
  4182.     pak_luma_pred_mode[1] = gen8_mfc_vp8_intra_mb_mode_map(vme_luma_pred_mode[1], pak_intra_mb_mode);
  4183.     pak_chroma_pred_mode = gen8_mfc_vp8_intra_mb_mode_map(vme_chroma_pred_mode, 0);
  4184.  
  4185.     BEGIN_BCS_BATCH(batch, 7);
  4186.  
  4187.     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
  4188.     OUT_BCS_BATCH(batch, 0);
  4189.     OUT_BCS_BATCH(batch, 0);
  4190.     OUT_BCS_BATCH(batch,
  4191.                   (0 << 20) |                    /* mv format: intra mb */
  4192.                   (0 << 18) |                    /* Segment ID */
  4193.                   (0 << 17) |                    /* disable coeff clamp */
  4194.                   (1 << 13) |                    /* intra mb flag */
  4195.                   (0 << 11) |                    /* refer picture select: last frame */
  4196.                   (pak_intra_mb_mode << 8) |     /* mb type */
  4197.                   (pak_chroma_pred_mode << 4) |  /* mb uv mode */
  4198.                   (0 << 2) |                     /* skip mb flag: disable */
  4199.                   0);
  4200.  
  4201.     OUT_BCS_BATCH(batch, (y << 16) | x);
  4202.     OUT_BCS_BATCH(batch, pak_luma_pred_mode[0]);
  4203.     OUT_BCS_BATCH(batch, pak_luma_pred_mode[1]);
  4204.  
  4205.     ADVANCE_BCS_BATCH(batch);
  4206. }
  4207.  
  4208. static void
  4209. gen8_mfc_vp8_pak_object_inter(VADriverContextP ctx,
  4210.                               struct intel_encoder_context *encoder_context,
  4211.                               unsigned int *msg,
  4212.                               int offset,
  4213.                               int x, int y,
  4214.                               struct intel_batchbuffer *batch)
  4215. {
  4216.     int i;
  4217.  
  4218.     if (batch == NULL)
  4219.         batch = encoder_context->base.batch;
  4220.  
  4221.     /* only support inter_16x16 now */
  4222.     assert((msg[AVC_INTER_MSG_OFFSET] & INTER_MODE_MASK) == INTER_16X16);
  4223.     /* for inter_16x16, all 16 MVs should be same,
  4224.      * and move mv to the vme mb start address to make sure offset is 64 bytes aligned
  4225.      * as vp8 spec, all vp8 luma motion vectors are doulbled stored
  4226.      */
  4227.     msg[0] = (((msg[AVC_INTER_MV_OFFSET/4] & 0xffff0000) << 1) | ((msg[AVC_INTER_MV_OFFSET/4] << 1) & 0xffff));
  4228.  
  4229.     for (i = 1; i < 16; i++) {
  4230.         msg[i] = msg[0];
  4231.     }
  4232.    
  4233.     BEGIN_BCS_BATCH(batch, 7);
  4234.  
  4235.     OUT_BCS_BATCH(batch, MFX_VP8_PAK_OBJECT | (7 - 2));
  4236.     OUT_BCS_BATCH(batch,
  4237.                   (0 << 29) |           /* enable inline mv data: disable */
  4238.                   64);
  4239.     OUT_BCS_BATCH(batch,
  4240.                   offset);
  4241.     OUT_BCS_BATCH(batch,
  4242.                   (4 << 20) |           /* mv format: inter */
  4243.                   (0 << 18) |           /* Segment ID */
  4244.                   (0 << 17) |           /* coeff clamp: disable */
  4245.                   (0 << 13) |           /* intra mb flag: inter mb */
  4246.                   (0 << 11) |           /* refer picture select: last frame */
  4247.                   (0 << 8) |            /* mb type: 16x16 */
  4248.                   (0 << 4) |            /* mb uv mode: dc_pred */
  4249.                   (0 << 2) |            /* skip mb flag: disable */
  4250.                   0);
  4251.  
  4252.     OUT_BCS_BATCH(batch, (y << 16) | x);
  4253.  
  4254.     /*new mv*/
  4255.     OUT_BCS_BATCH(batch, 0x8);
  4256.     OUT_BCS_BATCH(batch, 0x8);
  4257.  
  4258.     ADVANCE_BCS_BATCH(batch);
  4259. }
  4260.  
  4261. static void
  4262. gen8_mfc_vp8_pak_pipeline(VADriverContextP ctx,
  4263.                           struct encode_state *encode_state,
  4264.                           struct intel_encoder_context *encoder_context,
  4265.                           struct intel_batchbuffer *slice_batch)
  4266. {
  4267.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  4268.     VAEncSequenceParameterBufferVP8 *seq_param = (VAEncSequenceParameterBufferVP8 *)encode_state->seq_param_ext->buffer;
  4269.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  4270.     int width_in_mbs = ALIGN(seq_param->frame_width, 16) / 16;
  4271.     int height_in_mbs = ALIGN(seq_param->frame_height, 16) / 16;
  4272.     unsigned int *msg = NULL;
  4273.     unsigned char *msg_ptr = NULL;
  4274.     unsigned int i, offset, is_intra_frame;
  4275.  
  4276.     is_intra_frame = !pic_param->pic_flags.bits.frame_type;
  4277.  
  4278.     dri_bo_map(vme_context->vme_output.bo , 1);
  4279.     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
  4280.  
  4281.     for( i = 0; i < width_in_mbs * height_in_mbs; i++) {
  4282.         int h_pos = i % width_in_mbs;
  4283.         int v_pos = i / width_in_mbs;
  4284.         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
  4285.        
  4286.         if (is_intra_frame) {
  4287.             gen8_mfc_vp8_pak_object_intra(ctx,
  4288.                     encoder_context,
  4289.                     msg,
  4290.                     h_pos, v_pos,
  4291.                     slice_batch);
  4292.         } else {
  4293.             int inter_rdo, intra_rdo;
  4294.             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
  4295.             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
  4296.  
  4297.             if (intra_rdo < inter_rdo) {
  4298.                 gen8_mfc_vp8_pak_object_intra(ctx,
  4299.                         encoder_context,
  4300.                         msg,
  4301.                         h_pos, v_pos,
  4302.                         slice_batch);
  4303.             } else {
  4304.                 offset = i * vme_context->vme_output.size_block;
  4305.                 gen8_mfc_vp8_pak_object_inter(ctx,
  4306.                         encoder_context,
  4307.                         msg,
  4308.                         offset,
  4309.                         h_pos, v_pos,
  4310.                         slice_batch);
  4311.             }
  4312.         }
  4313.     }
  4314.  
  4315.     dri_bo_unmap(vme_context->vme_output.bo);
  4316. }
  4317.  
  4318. /*
  4319.  * A batch buffer for vp8 pak object commands
  4320.  */
  4321. static dri_bo *
  4322. gen8_mfc_vp8_software_batchbuffer(VADriverContextP ctx,
  4323.                                           struct encode_state *encode_state,
  4324.                                           struct intel_encoder_context *encoder_context)
  4325. {
  4326.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  4327.     struct intel_batchbuffer *batch;
  4328.     dri_bo *batch_bo;
  4329.  
  4330.     batch = mfc_context->aux_batchbuffer;
  4331.     batch_bo = batch->buffer;
  4332.  
  4333.     gen8_mfc_vp8_pak_pipeline(ctx, encode_state, encoder_context, batch);
  4334.  
  4335.     intel_batchbuffer_align(batch, 8);
  4336.  
  4337.     BEGIN_BCS_BATCH(batch, 2);
  4338.     OUT_BCS_BATCH(batch, 0);
  4339.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
  4340.     ADVANCE_BCS_BATCH(batch);
  4341.  
  4342.     dri_bo_reference(batch_bo);
  4343.     intel_batchbuffer_free(batch);
  4344.     mfc_context->aux_batchbuffer = NULL;
  4345.  
  4346.     return batch_bo;
  4347. }
  4348.  
  4349. static void
  4350. gen8_mfc_vp8_pipeline_programing(VADriverContextP ctx,
  4351.                                    struct encode_state *encode_state,
  4352.                                    struct intel_encoder_context *encoder_context)
  4353. {
  4354.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  4355.     dri_bo *slice_batch_bo;
  4356.  
  4357.     slice_batch_bo = gen8_mfc_vp8_software_batchbuffer(ctx, encode_state, encoder_context);
  4358.  
  4359.     // begin programing
  4360.     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  4361.     intel_batchbuffer_emit_mi_flush(batch);
  4362.  
  4363.     // picture level programing
  4364.     gen8_mfc_vp8_pipeline_picture_programing(ctx, encode_state, encoder_context);
  4365.  
  4366.     BEGIN_BCS_BATCH(batch, 4);
  4367.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
  4368.     OUT_BCS_RELOC(batch,
  4369.                   slice_batch_bo,
  4370.                   I915_GEM_DOMAIN_COMMAND, 0,
  4371.                   0);
  4372.     OUT_BCS_BATCH(batch, 0);
  4373.     OUT_BCS_BATCH(batch, 0);
  4374.     ADVANCE_BCS_BATCH(batch);
  4375.  
  4376.     // end programing
  4377.     intel_batchbuffer_end_atomic(batch);
  4378.  
  4379.     dri_bo_unreference(slice_batch_bo);
  4380. }
  4381.  
  4382. static int gen8_mfc_calc_vp8_coded_buffer_size(VADriverContextP ctx,
  4383.                           struct encode_state *encode_state,
  4384.                           struct intel_encoder_context *encoder_context)
  4385. {
  4386.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  4387.     VAEncPictureParameterBufferVP8 *pic_param = (VAEncPictureParameterBufferVP8 *)encode_state->pic_param_ext->buffer;
  4388.     unsigned char is_intra_frame = !pic_param->pic_flags.bits.frame_type;
  4389.     unsigned int *vp8_encoding_status, i, first_partition_bytes, token_partition_bytes, vp8_coded_bytes;
  4390.    
  4391.     int partition_num = 1 << pic_param->pic_flags.bits.num_token_partitions;
  4392.  
  4393.     first_partition_bytes = token_partition_bytes = vp8_coded_bytes = 0;
  4394.  
  4395.     dri_bo_map(mfc_context->vp8_state.token_statistics_bo, 0);
  4396.  
  4397.     vp8_encoding_status = (unsigned int *)mfc_context->vp8_state.token_statistics_bo->virtual;
  4398.     first_partition_bytes = (vp8_encoding_status[0] + 7) / 8;
  4399.  
  4400.     for (i = 1; i <= partition_num; i++)
  4401.         token_partition_bytes += (vp8_encoding_status[i] + 7) / 8;
  4402.  
  4403.     /*coded_bytes includes P0~P8 partitions bytes + uncompresse date bytes + partion_size bytes in bitstream + 3 extra bytes */
  4404.     /*it seems the last partition size in vp8 status buffer is smaller than reality. so add 3 extra bytes */
  4405.     vp8_coded_bytes = first_partition_bytes + token_partition_bytes + (3 + 7 * !!is_intra_frame) + (partition_num - 1) * 3 + 3;
  4406.  
  4407.     dri_bo_unmap(mfc_context->vp8_state.token_statistics_bo);
  4408.  
  4409.     dri_bo_map(mfc_context->vp8_state.final_frame_bo, 0);
  4410.     struct i965_coded_buffer_segment *coded_buffer_segment = (struct i965_coded_buffer_segment *)(mfc_context->vp8_state.final_frame_bo->virtual);
  4411.     coded_buffer_segment->base.size = vp8_coded_bytes;
  4412.     dri_bo_unmap(mfc_context->vp8_state.final_frame_bo);
  4413.  
  4414.     return vp8_coded_bytes;
  4415. }
  4416.  
  4417. static VAStatus
  4418. gen8_mfc_vp8_encode_picture(VADriverContextP ctx,
  4419.                               struct encode_state *encode_state,
  4420.                               struct intel_encoder_context *encoder_context)
  4421. {
  4422.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  4423.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  4424.     int current_frame_bits_size;
  4425.     int sts;
  4426.  
  4427.     gen8_mfc_vp8_init(ctx, encode_state, encoder_context);
  4428.     intel_mfc_vp8_prepare(ctx, encode_state, encoder_context);
  4429.     /*Programing bcs pipeline*/
  4430.     gen8_mfc_vp8_pipeline_programing(ctx, encode_state, encoder_context);
  4431.     gen8_mfc_run(ctx, encode_state, encoder_context);
  4432.     current_frame_bits_size = 8 * gen8_mfc_calc_vp8_coded_buffer_size(ctx, encode_state, encoder_context);
  4433.  
  4434.     if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
  4435.         sts = gen8_mfc_vp8_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
  4436.         if (sts == BRC_NO_HRD_VIOLATION) {
  4437.             gen8_mfc_vp8_hrd_context_update(encode_state, mfc_context);
  4438.         }
  4439.         else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
  4440.             if (!mfc_context->hrd.violation_noted) {
  4441.                 fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
  4442.                 mfc_context->hrd.violation_noted = 1;
  4443.             }
  4444.             return VA_STATUS_SUCCESS;
  4445.         }
  4446.     }
  4447.  
  4448.     return VA_STATUS_SUCCESS;
  4449. }
  4450.  
  4451. static void
  4452. gen8_mfc_context_destroy(void *context)
  4453. {
  4454.     struct gen6_mfc_context *mfc_context = context;
  4455.     int i;
  4456.  
  4457.     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
  4458.     mfc_context->post_deblocking_output.bo = NULL;
  4459.  
  4460.     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
  4461.     mfc_context->pre_deblocking_output.bo = NULL;
  4462.  
  4463.     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
  4464.     mfc_context->uncompressed_picture_source.bo = NULL;
  4465.  
  4466.     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
  4467.     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
  4468.  
  4469.     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
  4470.         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
  4471.         mfc_context->direct_mv_buffers[i].bo = NULL;
  4472.     }
  4473.  
  4474.     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
  4475.     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
  4476.  
  4477.     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
  4478.     mfc_context->macroblock_status_buffer.bo = NULL;
  4479.  
  4480.     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
  4481.     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  4482.  
  4483.     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
  4484.     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  4485.  
  4486.  
  4487.     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
  4488.         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
  4489.         mfc_context->reference_surfaces[i].bo = NULL;  
  4490.     }
  4491.  
  4492.     i965_gpe_context_destroy(&mfc_context->gpe_context);
  4493.  
  4494.     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
  4495.     mfc_context->mfc_batchbuffer_surface.bo = NULL;
  4496.  
  4497.     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
  4498.     mfc_context->aux_batchbuffer_surface.bo = NULL;
  4499.  
  4500.     if (mfc_context->aux_batchbuffer)
  4501.         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
  4502.  
  4503.     mfc_context->aux_batchbuffer = NULL;
  4504.  
  4505.     dri_bo_unreference(mfc_context->vp8_state.coeff_probs_stream_in_bo);
  4506.     mfc_context->vp8_state.coeff_probs_stream_in_bo = NULL;
  4507.  
  4508.     dri_bo_unreference(mfc_context->vp8_state.final_frame_bo);
  4509.     mfc_context->vp8_state.final_frame_bo = NULL;
  4510.  
  4511.     dri_bo_unreference(mfc_context->vp8_state.frame_header_bo);
  4512.     mfc_context->vp8_state.frame_header_bo = NULL;
  4513.  
  4514.     dri_bo_unreference(mfc_context->vp8_state.intermediate_bo);
  4515.     mfc_context->vp8_state.intermediate_bo = NULL;
  4516.  
  4517.     dri_bo_unreference(mfc_context->vp8_state.mpc_row_store_bo);
  4518.     mfc_context->vp8_state.mpc_row_store_bo = NULL;
  4519.  
  4520.     dri_bo_unreference(mfc_context->vp8_state.stream_out_bo);
  4521.     mfc_context->vp8_state.stream_out_bo = NULL;
  4522.  
  4523.     dri_bo_unreference(mfc_context->vp8_state.token_statistics_bo);
  4524.     mfc_context->vp8_state.token_statistics_bo = NULL;
  4525.  
  4526.     free(mfc_context);
  4527. }
  4528.  
  4529. static VAStatus gen8_mfc_pipeline(VADriverContextP ctx,
  4530.                                   VAProfile profile,
  4531.                                   struct encode_state *encode_state,
  4532.                                   struct intel_encoder_context *encoder_context)
  4533. {
  4534.     VAStatus vaStatus;
  4535.  
  4536.     switch (profile) {
  4537.     case VAProfileH264ConstrainedBaseline:
  4538.     case VAProfileH264Main:
  4539.     case VAProfileH264High:
  4540.     case VAProfileH264MultiviewHigh:
  4541.     case VAProfileH264StereoHigh:
  4542.         vaStatus = gen8_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
  4543.         break;
  4544.  
  4545.         /* FIXME: add for other profile */
  4546.     case VAProfileMPEG2Simple:
  4547.     case VAProfileMPEG2Main:
  4548.         vaStatus = gen8_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
  4549.         break;
  4550.  
  4551.     case VAProfileJPEGBaseline:
  4552.         jpeg_init_default_qmatrix(ctx, encoder_context);
  4553.         vaStatus = gen8_mfc_jpeg_encode_picture(ctx, encode_state, encoder_context);
  4554.         break;
  4555.  
  4556.     case VAProfileVP8Version0_3:
  4557.         vaStatus = gen8_mfc_vp8_encode_picture(ctx, encode_state, encoder_context);
  4558.         break;
  4559.  
  4560.     default:
  4561.         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
  4562.         break;
  4563.     }
  4564.  
  4565.     return vaStatus;
  4566. }
  4567.  
  4568. Bool gen8_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  4569. {
  4570.     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
  4571.     assert(mfc_context);
  4572.     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
  4573.  
  4574.     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
  4575.     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
  4576.  
  4577.     mfc_context->gpe_context.curbe.length = 32 * 4;
  4578.  
  4579.     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
  4580.     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
  4581.     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
  4582.     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
  4583.     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
  4584.  
  4585.     i965_gpe_load_kernels(ctx,
  4586.                           &mfc_context->gpe_context,
  4587.                           gen8_mfc_kernels,
  4588.                           NUM_MFC_KERNEL);
  4589.  
  4590.     mfc_context->pipe_mode_select = gen8_mfc_pipe_mode_select;
  4591.     mfc_context->set_surface_state = gen8_mfc_surface_state;
  4592.     mfc_context->ind_obj_base_addr_state = gen8_mfc_ind_obj_base_addr_state;
  4593.     mfc_context->avc_img_state = gen8_mfc_avc_img_state;
  4594.     mfc_context->avc_qm_state = gen8_mfc_avc_qm_state;
  4595.     mfc_context->avc_fqm_state = gen8_mfc_avc_fqm_state;
  4596.     mfc_context->insert_object = gen8_mfc_avc_insert_object;
  4597.     mfc_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
  4598.  
  4599.     encoder_context->mfc_context = mfc_context;
  4600.     encoder_context->mfc_context_destroy = gen8_mfc_context_destroy;
  4601.     encoder_context->mfc_pipeline = gen8_mfc_pipeline;
  4602.  
  4603.     if (encoder_context->codec == CODEC_VP8)
  4604.         encoder_context->mfc_brc_prepare = gen8_mfc_vp8_brc_prepare;
  4605.     else
  4606.         encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
  4607.  
  4608.     return True;
  4609. }
  4610.