Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2010-2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Zhao Yakui <yakui.zhao@intel.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <math.h>
  34. #include <assert.h>
  35.  
  36. #include "intel_batchbuffer.h"
  37. #include "i965_defines.h"
  38. #include "i965_structs.h"
  39. #include "i965_drv_video.h"
  40. #include "i965_encoder.h"
  41. #include "i965_encoder_utils.h"
  42. #include "gen6_mfc.h"
  43. #include "gen6_vme.h"
  44. #include "intel_media.h"
  45.  
  46. #define AVC_INTRA_RDO_OFFSET    4
  47. #define AVC_INTER_RDO_OFFSET    10
  48. #define AVC_INTER_MSG_OFFSET    8
  49. #define AVC_INTER_MV_OFFSET     48
  50. #define AVC_RDO_MASK            0xFFFF
  51.  
  52. #define MFC_SOFTWARE_HASWELL    0
  53.  
  54. #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
  55. #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
  56. #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
  57.  
  58. #define B0_STEP_REV             2
  59. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  60.  
  61. static const uint32_t gen75_mfc_batchbuffer_avc[][4] = {
  62. #include "shaders/utils/mfc_batchbuffer_hsw.g75b"
  63. };
  64.  
  65. static struct i965_kernel gen75_mfc_kernels[] = {
  66.     {
  67.         "MFC AVC INTRA BATCHBUFFER ",
  68.         MFC_BATCHBUFFER_AVC_INTRA,
  69.         gen75_mfc_batchbuffer_avc,
  70.         sizeof(gen75_mfc_batchbuffer_avc),
  71.         NULL
  72.     },
  73. };
  74.  
  75. #define         INTER_MODE_MASK         0x03
  76. #define         INTER_8X8               0x03
  77. #define         INTER_16X8              0x01
  78. #define         INTER_8X16              0x02
  79. #define         SUBMB_SHAPE_MASK        0x00FF00
  80.  
  81. #define         INTER_MV8               (4 << 20)
  82. #define         INTER_MV32              (6 << 20)
  83.  
  84.  
  85. static void
  86. gen75_mfc_pipe_mode_select(VADriverContextP ctx,
  87.                            int standard_select,
  88.                            struct intel_encoder_context *encoder_context)
  89. {
  90.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  91.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  92.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  93.            standard_select == MFX_FORMAT_AVC);
  94.  
  95.     BEGIN_BCS_BATCH(batch, 5);
  96.  
  97.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  98.     OUT_BCS_BATCH(batch,
  99.                   (MFX_LONG_MODE << 17) | /* Must be long format for encoder */
  100.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  101.                   (0 << 10) | /* Stream-Out Enable */
  102.                   ((!!mfc_context->post_deblocking_output.bo) << 9)  | /* Post Deblocking Output */
  103.                   ((!!mfc_context->pre_deblocking_output.bo) << 8)  | /* Pre Deblocking Output */
  104.                   (0 << 5)  | /* not in stitch mode */
  105.                   (1 << 4)  | /* encoding mode */
  106.                   (standard_select << 0));  /* standard select: avc or mpeg2 */
  107.     OUT_BCS_BATCH(batch,
  108.                   (0 << 7)  | /* expand NOA bus flag */
  109.                   (0 << 6)  | /* disable slice-level clock gating */
  110.                   (0 << 5)  | /* disable clock gating for NOA */
  111.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  112.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  113.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  114.                   (0 << 1)  |
  115.                   (0 << 0));
  116.     OUT_BCS_BATCH(batch, 0);
  117.     OUT_BCS_BATCH(batch, 0);
  118.  
  119.     ADVANCE_BCS_BATCH(batch);
  120. }
  121.  
  122. static void
  123. gen75_mfc_surface_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  124. {
  125.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  126.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  127.  
  128.     BEGIN_BCS_BATCH(batch, 6);
  129.  
  130.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  131.     OUT_BCS_BATCH(batch, 0);
  132.     OUT_BCS_BATCH(batch,
  133.                   ((mfc_context->surface_state.height - 1) << 18) |
  134.                   ((mfc_context->surface_state.width - 1) << 4));
  135.     OUT_BCS_BATCH(batch,
  136.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  137.                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
  138.                   (0 << 22) | /* surface object control state, FIXME??? */
  139.                   ((mfc_context->surface_state.w_pitch - 1) << 3) | /* pitch */
  140.                   (0 << 2)  | /* must be 0 for interleave U/V */
  141.                   (1 << 1)  | /* must be tiled */
  142.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, TILEWALK_YMAJOR */
  143.     OUT_BCS_BATCH(batch,
  144.                   (0 << 16) |                                                           /* must be 0 for interleave U/V */
  145.                   (mfc_context->surface_state.h_pitch));                /* y offset for U(cb) */
  146.     OUT_BCS_BATCH(batch, 0);
  147.  
  148.     ADVANCE_BCS_BATCH(batch);
  149. }
  150.  
  151. static void
  152. gen75_mfc_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  153.                                         struct intel_encoder_context *encoder_context)
  154. {
  155.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  156.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  157.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  158.  
  159.     BEGIN_BCS_BATCH(batch, 26);
  160.  
  161.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  162.     /* the DW1-3 is for the MFX indirect bistream offset */
  163.     OUT_BCS_BATCH(batch, 0);
  164.     OUT_BCS_BATCH(batch, 0);
  165.     OUT_BCS_BATCH(batch, 0);
  166.     /* the DW4-5 is the MFX upper bound */
  167.     OUT_BCS_BATCH(batch, 0);
  168.     OUT_BCS_BATCH(batch, 0);
  169.  
  170.     /* the DW6-10 is for MFX Indirect MV Object Base Address */
  171.     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  172.     OUT_BCS_BATCH(batch, 0);
  173.     OUT_BCS_BATCH(batch, 0);
  174.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  175.     OUT_BCS_BATCH(batch, 0);
  176.  
  177.     /* the DW11-15 is for MFX IT-COFF. Not used on encoder */
  178.     OUT_BCS_BATCH(batch, 0);
  179.     OUT_BCS_BATCH(batch, 0);
  180.     OUT_BCS_BATCH(batch, 0);
  181.     OUT_BCS_BATCH(batch, 0);
  182.     OUT_BCS_BATCH(batch, 0);
  183.  
  184.     /* the DW16-20 is for MFX indirect DBLK. Not used on encoder */
  185.     OUT_BCS_BATCH(batch, 0);
  186.     OUT_BCS_BATCH(batch, 0);
  187.     OUT_BCS_BATCH(batch, 0);
  188.     OUT_BCS_BATCH(batch, 0);
  189.     OUT_BCS_BATCH(batch, 0);
  190.  
  191.     /* the DW21-25 is for MFC Indirect PAK-BSE Object Base Address for Encoder*/       
  192.     OUT_BCS_RELOC(batch,
  193.                   mfc_context->mfc_indirect_pak_bse_object.bo,
  194.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  195.                   0);
  196.     OUT_BCS_BATCH(batch, 0);
  197.     OUT_BCS_BATCH(batch, 0);
  198.        
  199.     OUT_BCS_RELOC(batch,
  200.                   mfc_context->mfc_indirect_pak_bse_object.bo,
  201.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  202.                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
  203.     OUT_BCS_BATCH(batch, 0);
  204.  
  205.     ADVANCE_BCS_BATCH(batch);
  206. }
  207.  
  208. static void
  209. gen75_mfc_ind_obj_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  210. {
  211.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  212.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  213.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  214.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  215.  
  216.     if (IS_STEPPING_BPLUS(i965)) {
  217.         gen75_mfc_ind_obj_base_addr_state_bplus(ctx, encoder_context);
  218.         return;
  219.     }
  220.  
  221.     BEGIN_BCS_BATCH(batch, 11);
  222.  
  223.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  224.     OUT_BCS_BATCH(batch, 0);
  225.     OUT_BCS_BATCH(batch, 0);
  226.     /* MFX Indirect MV Object Base Address */
  227.     OUT_BCS_RELOC(batch, vme_context->vme_output.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  228.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  229.     OUT_BCS_BATCH(batch, 0);
  230.     OUT_BCS_BATCH(batch, 0);
  231.     OUT_BCS_BATCH(batch, 0);
  232.     OUT_BCS_BATCH(batch, 0);
  233.     /*MFC Indirect PAK-BSE Object Base Address for Encoder*/   
  234.     OUT_BCS_RELOC(batch,
  235.                   mfc_context->mfc_indirect_pak_bse_object.bo,
  236.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  237.                   0);
  238.     OUT_BCS_RELOC(batch,
  239.                   mfc_context->mfc_indirect_pak_bse_object.bo,
  240.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  241.                   mfc_context->mfc_indirect_pak_bse_object.end_offset);
  242.  
  243.     ADVANCE_BCS_BATCH(batch);
  244. }
  245.  
  246. static void
  247. gen75_mfc_avc_img_state(VADriverContextP ctx, struct encode_state *encode_state,  
  248.                         struct intel_encoder_context *encoder_context)
  249. {
  250.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  251.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  252.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  253.  
  254.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  255.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  256.  
  257.     BEGIN_BCS_BATCH(batch, 16);
  258.  
  259.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  260.     /*DW1. MB setting of frame */
  261.     OUT_BCS_BATCH(batch,
  262.                   ((width_in_mbs * height_in_mbs - 1) & 0xFFFF));
  263.     OUT_BCS_BATCH(batch,
  264.                   ((height_in_mbs - 1) << 16) |
  265.                   ((width_in_mbs - 1) << 0));
  266.     /* DW3 QP setting */
  267.     OUT_BCS_BATCH(batch,
  268.                   (0 << 24) |   /* Second Chroma QP Offset */
  269.                   (0 << 16) |   /* Chroma QP Offset */
  270.                   (0 << 14) |   /* Max-bit conformance Intra flag */
  271.                   (0 << 13) |   /* Max Macroblock size conformance Inter flag */
  272.                   (pPicParameter->pic_fields.bits.weighted_pred_flag << 12) |   /*Weighted_Pred_Flag */
  273.                   (pPicParameter->pic_fields.bits.weighted_bipred_idc << 10) |  /* Weighted_BiPred_Idc */
  274.                   (0 << 8)  |   /* FIXME: Image Structure */
  275.                   (0 << 0) );   /* Current Decoed Image Frame Store ID, reserved in Encode mode */
  276.     OUT_BCS_BATCH(batch,
  277.                   (0 << 16) |   /* Mininum Frame size */
  278.                   (0 << 15) |   /* Disable reading of Macroblock Status Buffer */
  279.                   (0 << 14) |   /* Load BitStream Pointer only once, 1 slic 1 frame */
  280.                   (0 << 13) |   /* CABAC 0 word insertion test enable */
  281.                   (1 << 12) |   /* MVUnpackedEnable,compliant to DXVA */
  282.                   (1 << 10) |   /* Chroma Format IDC, 4:2:0 */
  283.                   (0 << 8)  |   /* FIXME: MbMvFormatFlag */
  284.                   (pPicParameter->pic_fields.bits.entropy_coding_mode_flag << 7)  |   /*0:CAVLC encoding mode,1:CABAC*/
  285.                   (0 << 6)  |   /* Only valid for VLD decoding mode */
  286.                   (0 << 5)  |   /* Constrained Intra Predition Flag, from PPS */
  287.                   (0 << 4)  |   /* Direct 8x8 inference flag */
  288.                   (pPicParameter->pic_fields.bits.transform_8x8_mode_flag << 3)  |   /*8x8 or 4x4 IDCT Transform Mode Flag*/
  289.                   (1 << 2)  |   /* Frame MB only flag */
  290.                   (0 << 1)  |   /* MBAFF mode is in active */
  291.                   (0 << 0));    /* Field picture flag */
  292.     /* DW5 Trellis quantization */
  293.     OUT_BCS_BATCH(batch, 0);    /* Mainly about MB rate control and debug, just ignoring */
  294.     OUT_BCS_BATCH(batch,        /* Inter and Intra Conformance Max size limit */
  295.                   (0xBB8 << 16) |       /* InterMbMaxSz */
  296.                   (0xEE8) );            /* IntraMbMaxSz */
  297.     OUT_BCS_BATCH(batch, 0);            /* Reserved */
  298.     /* DW8. QP delta */
  299.     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
  300.     OUT_BCS_BATCH(batch, 0);            /* Slice QP Delta for bitrate control */
  301.     /* DW10. Bit setting for MB */
  302.     OUT_BCS_BATCH(batch, 0x8C000000);
  303.     OUT_BCS_BATCH(batch, 0x00010000);
  304.     /* DW12. */
  305.     OUT_BCS_BATCH(batch, 0);
  306.     OUT_BCS_BATCH(batch, 0x02010100);
  307.     /* DW14. For short format */
  308.     OUT_BCS_BATCH(batch, 0);
  309.     OUT_BCS_BATCH(batch, 0);
  310.  
  311.     ADVANCE_BCS_BATCH(batch);
  312. }
  313.  
  314. static void
  315. gen75_mfc_qm_state(VADriverContextP ctx,
  316.                    int qm_type,
  317.                    unsigned int *qm,
  318.                    int qm_length,
  319.                    struct intel_encoder_context *encoder_context)
  320. {
  321.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  322.     unsigned int qm_buffer[16];
  323.  
  324.     assert(qm_length <= 16);
  325.     assert(sizeof(*qm) == 4);
  326.     memcpy(qm_buffer, qm, qm_length * 4);
  327.  
  328.     BEGIN_BCS_BATCH(batch, 18);
  329.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  330.     OUT_BCS_BATCH(batch, qm_type << 0);
  331.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  332.     ADVANCE_BCS_BATCH(batch);
  333. }
  334.  
  335. static void
  336. gen75_mfc_avc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  337. {
  338.     unsigned int qm[16] = {
  339.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  340.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  341.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  342.         0x10101010, 0x10101010, 0x10101010, 0x10101010
  343.     };
  344.  
  345.     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 12, encoder_context);
  346.     gen75_mfc_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 12, encoder_context);
  347.     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 16, encoder_context);
  348.     gen75_mfc_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 16, encoder_context);
  349. }
  350.  
  351. static void
  352. gen75_mfc_fqm_state(VADriverContextP ctx,
  353.                     int fqm_type,
  354.                     unsigned int *fqm,
  355.                     int fqm_length,
  356.                     struct intel_encoder_context *encoder_context)
  357. {
  358.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  359.     unsigned int fqm_buffer[32];
  360.  
  361.     assert(fqm_length <= 32);
  362.     assert(sizeof(*fqm) == 4);
  363.     memcpy(fqm_buffer, fqm, fqm_length * 4);
  364.  
  365.     BEGIN_BCS_BATCH(batch, 34);
  366.     OUT_BCS_BATCH(batch, MFX_FQM_STATE | (34 - 2));
  367.     OUT_BCS_BATCH(batch, fqm_type << 0);
  368.     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
  369.     ADVANCE_BCS_BATCH(batch);
  370. }
  371.  
  372. static void
  373. gen75_mfc_avc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  374. {
  375.     unsigned int qm[32] = {
  376.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  377.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  378.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  379.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  380.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  381.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  382.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  383.         0x10001000, 0x10001000, 0x10001000, 0x10001000
  384.     };
  385.  
  386.     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, qm, 24, encoder_context);
  387.     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, qm, 24, encoder_context);
  388.     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, qm, 32, encoder_context);
  389.     gen75_mfc_fqm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, qm, 32, encoder_context);
  390. }
  391.  
  392. static void
  393. gen75_mfc_avc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
  394.                             unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
  395.                             int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
  396.                             struct intel_batchbuffer *batch)
  397. {
  398.     if (batch == NULL)
  399.         batch = encoder_context->base.batch;
  400.  
  401.     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
  402.  
  403.     OUT_BCS_BATCH(batch, MFX_INSERT_OBJECT | (lenght_in_dws + 2 - 2));
  404.     OUT_BCS_BATCH(batch,
  405.                   (0 << 16) |   /* always start at offset 0 */
  406.                   (data_bits_in_last_dw << 8) |
  407.                   (skip_emul_byte_count << 4) |
  408.                   (!!emulation_flag << 3) |
  409.                   ((!!is_last_header) << 2) |
  410.                   ((!!is_end_of_slice) << 1) |
  411.                   (0 << 0));    /* FIXME: ??? */
  412.     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
  413.  
  414.     ADVANCE_BCS_BATCH(batch);
  415. }
  416.  
  417.  
  418. static void gen75_mfc_init(VADriverContextP ctx,
  419.                            struct encode_state *encode_state,
  420.                            struct intel_encoder_context *encoder_context)
  421. {
  422.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  423.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  424.     dri_bo *bo;
  425.     int i;
  426.     int width_in_mbs = 0;
  427.     int height_in_mbs = 0;
  428.     int slice_batchbuffer_size;
  429.  
  430.     if (encoder_context->codec == CODEC_H264 ||
  431.         encoder_context->codec == CODEC_H264_MVC) {
  432.         VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  433.         width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  434.         height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  435.     } else {
  436.         VAEncSequenceParameterBufferMPEG2 *pSequenceParameter = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
  437.  
  438.         assert(encoder_context->codec == CODEC_MPEG2);
  439.  
  440.         width_in_mbs = ALIGN(pSequenceParameter->picture_width, 16) / 16;
  441.         height_in_mbs = ALIGN(pSequenceParameter->picture_height, 16) / 16;
  442.     }
  443.  
  444.     slice_batchbuffer_size = 64 * width_in_mbs * height_in_mbs + 4096 +
  445.                 (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
  446.  
  447.     /*Encode common setup for MFC*/
  448.     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
  449.     mfc_context->post_deblocking_output.bo = NULL;
  450.  
  451.     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
  452.     mfc_context->pre_deblocking_output.bo = NULL;
  453.  
  454.     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
  455.     mfc_context->uncompressed_picture_source.bo = NULL;
  456.  
  457.     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
  458.     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
  459.  
  460.     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
  461.         if (mfc_context->direct_mv_buffers[i].bo != NULL)
  462.             dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
  463.         mfc_context->direct_mv_buffers[i].bo = NULL;
  464.     }
  465.  
  466.     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
  467.         if (mfc_context->reference_surfaces[i].bo != NULL)
  468.             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
  469.         mfc_context->reference_surfaces[i].bo = NULL;  
  470.     }
  471.  
  472.     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
  473.     bo = dri_bo_alloc(i965->intel.bufmgr,
  474.                       "Buffer",
  475.                       width_in_mbs * 64,
  476.                       64);
  477.     assert(bo);
  478.     mfc_context->intra_row_store_scratch_buffer.bo = bo;
  479.  
  480.     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
  481.     bo = dri_bo_alloc(i965->intel.bufmgr,
  482.                       "Buffer",
  483.                       width_in_mbs * height_in_mbs * 16,
  484.                       64);
  485.     assert(bo);
  486.     mfc_context->macroblock_status_buffer.bo = bo;
  487.  
  488.     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
  489.     bo = dri_bo_alloc(i965->intel.bufmgr,
  490.                       "Buffer",
  491.                       4 * width_in_mbs * 64,  /* 4 * width_in_mbs * 64 */
  492.                       64);
  493.     assert(bo);
  494.     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  495.  
  496.     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
  497.     bo = dri_bo_alloc(i965->intel.bufmgr,
  498.                       "Buffer",
  499.                       2 * width_in_mbs * 64, /* 2 * width_in_mbs * 64 */
  500.                       0x1000);
  501.     assert(bo);
  502.     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  503.  
  504.     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
  505.     mfc_context->mfc_batchbuffer_surface.bo = NULL;
  506.  
  507.     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
  508.     mfc_context->aux_batchbuffer_surface.bo = NULL;
  509.  
  510.     if (mfc_context->aux_batchbuffer)
  511.         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
  512.  
  513.     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD,
  514.                                                         slice_batchbuffer_size);
  515.     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
  516.     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
  517.     mfc_context->aux_batchbuffer_surface.pitch = 16;
  518.     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
  519.     mfc_context->aux_batchbuffer_surface.size_block = 16;
  520.  
  521.     i965_gpe_context_init(ctx, &mfc_context->gpe_context);
  522. }
  523.  
  524. static void
  525. gen75_mfc_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  526.                                     struct intel_encoder_context *encoder_context)
  527. {
  528.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  529.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  530.     int i;
  531.  
  532.     BEGIN_BCS_BATCH(batch, 61);
  533.  
  534.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  535.  
  536.     /* the DW1-3 is for pre_deblocking */
  537.     if (mfc_context->pre_deblocking_output.bo)
  538.         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
  539.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  540.                       0);
  541.     else
  542.         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
  543.  
  544.     OUT_BCS_BATCH(batch, 0);
  545.     OUT_BCS_BATCH(batch, 0);
  546.     /* the DW4-6 is for the post_deblocking */
  547.  
  548.     if (mfc_context->post_deblocking_output.bo)
  549.         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
  550.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  551.                       0);                                                                                       /* post output addr  */
  552.     else
  553.         OUT_BCS_BATCH(batch, 0);
  554.     OUT_BCS_BATCH(batch, 0);
  555.     OUT_BCS_BATCH(batch, 0);
  556.  
  557.     /* the DW7-9 is for the uncompressed_picture */
  558.     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
  559.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  560.                   0); /* uncompressed data */
  561.  
  562.     OUT_BCS_BATCH(batch, 0);
  563.     OUT_BCS_BATCH(batch, 0);
  564.  
  565.     /* the DW10-12 is for the mb status */
  566.     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
  567.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  568.                   0); /* StreamOut data*/
  569.     OUT_BCS_BATCH(batch, 0);
  570.     OUT_BCS_BATCH(batch, 0);
  571.  
  572.     /* the DW13-15 is for the intra_row_store_scratch */
  573.     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
  574.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  575.                   0);  
  576.     OUT_BCS_BATCH(batch, 0);
  577.     OUT_BCS_BATCH(batch, 0);
  578.  
  579.     /* the DW16-18 is for the deblocking filter */
  580.     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
  581.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  582.                   0);
  583.     OUT_BCS_BATCH(batch, 0);
  584.     OUT_BCS_BATCH(batch, 0);
  585.  
  586.     /* the DW 19-50 is for Reference pictures*/
  587.     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  588.         if ( mfc_context->reference_surfaces[i].bo != NULL) {
  589.             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
  590.                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  591.                           0);                  
  592.         } else {
  593.             OUT_BCS_BATCH(batch, 0);
  594.         }
  595.         OUT_BCS_BATCH(batch, 0);
  596.     }
  597.     OUT_BCS_BATCH(batch, 0);
  598.  
  599.     /* The DW 52-54 is for the MB status buffer */
  600.     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
  601.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  602.                   0);                                                                                   /* Macroblock status buffer*/
  603.        
  604.     OUT_BCS_BATCH(batch, 0);
  605.     OUT_BCS_BATCH(batch, 0);
  606.  
  607.     /* the DW 55-57 is the ILDB buffer */
  608.     OUT_BCS_BATCH(batch, 0);
  609.     OUT_BCS_BATCH(batch, 0);
  610.     OUT_BCS_BATCH(batch, 0);
  611.  
  612.     /* the DW 58-60 is the second ILDB buffer */
  613.     OUT_BCS_BATCH(batch, 0);
  614.     OUT_BCS_BATCH(batch, 0);
  615.     OUT_BCS_BATCH(batch, 0);
  616.     ADVANCE_BCS_BATCH(batch);
  617. }
  618.  
  619. static void
  620. gen75_mfc_pipe_buf_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  621. {
  622.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  623.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  624.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  625.     int i;
  626.  
  627.     if (IS_STEPPING_BPLUS(i965)) {
  628.         gen75_mfc_pipe_buf_addr_state_bplus(ctx, encoder_context);
  629.         return;
  630.     }
  631.  
  632.     BEGIN_BCS_BATCH(batch, 25);
  633.  
  634.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  635.  
  636.     if (mfc_context->pre_deblocking_output.bo)
  637.         OUT_BCS_RELOC(batch, mfc_context->pre_deblocking_output.bo,
  638.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  639.                       0);
  640.     else
  641.         OUT_BCS_BATCH(batch, 0);                                                                                        /* pre output addr   */
  642.  
  643.     if (mfc_context->post_deblocking_output.bo)
  644.         OUT_BCS_RELOC(batch, mfc_context->post_deblocking_output.bo,
  645.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  646.                       0);                                                                                       /* post output addr  */
  647.     else
  648.         OUT_BCS_BATCH(batch, 0);
  649.  
  650.     OUT_BCS_RELOC(batch, mfc_context->uncompressed_picture_source.bo,
  651.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  652.                   0);                                                                                   /* uncompressed data */
  653.     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
  654.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  655.                   0);                                                                                   /* StreamOut data*/
  656.     OUT_BCS_RELOC(batch, mfc_context->intra_row_store_scratch_buffer.bo,
  657.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  658.                   0);  
  659.     OUT_BCS_RELOC(batch, mfc_context->deblocking_filter_row_store_scratch_buffer.bo,
  660.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  661.                   0);
  662.     /* 7..22 Reference pictures*/
  663.     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  664.         if ( mfc_context->reference_surfaces[i].bo != NULL) {
  665.             OUT_BCS_RELOC(batch, mfc_context->reference_surfaces[i].bo,
  666.                           I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  667.                           0);                  
  668.         } else {
  669.             OUT_BCS_BATCH(batch, 0);
  670.         }
  671.     }
  672.     OUT_BCS_RELOC(batch, mfc_context->macroblock_status_buffer.bo,
  673.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  674.                   0);                                                                                   /* Macroblock status buffer*/
  675.  
  676.     OUT_BCS_BATCH(batch, 0);
  677.  
  678.     ADVANCE_BCS_BATCH(batch);
  679. }
  680.  
  681. static void
  682. gen75_mfc_avc_directmode_state_bplus(VADriverContextP ctx,
  683.                                      struct intel_encoder_context *encoder_context)
  684. {
  685.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  686.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  687.  
  688.     int i;
  689.  
  690.     BEGIN_BCS_BATCH(batch, 71);
  691.  
  692.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  693.  
  694.     /* Reference frames and Current frames */
  695.     /* the DW1-32 is for the direct MV for reference */
  696.     for(i = 0; i < NUM_MFC_DMV_BUFFERS - 2; i += 2) {
  697.         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
  698.             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
  699.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  700.                           0);
  701.             OUT_BCS_BATCH(batch, 0);
  702.         } else {
  703.             OUT_BCS_BATCH(batch, 0);
  704.             OUT_BCS_BATCH(batch, 0);
  705.         }
  706.     }
  707.     OUT_BCS_BATCH(batch, 0);
  708.  
  709.     /* the DW34-36 is the MV for the current reference */
  710.     OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[NUM_MFC_DMV_BUFFERS - 2].bo,
  711.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  712.                   0);
  713.  
  714.     OUT_BCS_BATCH(batch, 0);
  715.     OUT_BCS_BATCH(batch, 0);
  716.  
  717.     /* POL list */
  718.     for(i = 0; i < 32; i++) {
  719.         OUT_BCS_BATCH(batch, i/2);
  720.     }
  721.     OUT_BCS_BATCH(batch, 0);
  722.     OUT_BCS_BATCH(batch, 0);
  723.  
  724.     ADVANCE_BCS_BATCH(batch);
  725. }
  726.  
  727. static void
  728. gen75_mfc_avc_directmode_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  729. {
  730.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  731.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  732.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  733.     int i;
  734.  
  735.     if (IS_STEPPING_BPLUS(i965)) {
  736.         gen75_mfc_avc_directmode_state_bplus(ctx, encoder_context);
  737.         return;
  738.     }
  739.  
  740.     BEGIN_BCS_BATCH(batch, 69);
  741.  
  742.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  743.  
  744.     /* Reference frames and Current frames */
  745.     for(i = 0; i < NUM_MFC_DMV_BUFFERS; i++) {
  746.         if ( mfc_context->direct_mv_buffers[i].bo != NULL) {
  747.             OUT_BCS_RELOC(batch, mfc_context->direct_mv_buffers[i].bo,
  748.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  749.                           0);
  750.         } else {
  751.             OUT_BCS_BATCH(batch, 0);
  752.         }
  753.     }
  754.  
  755.     /* POL list */
  756.     for(i = 0; i < 32; i++) {
  757.         OUT_BCS_BATCH(batch, i/2);
  758.     }
  759.     OUT_BCS_BATCH(batch, 0);
  760.     OUT_BCS_BATCH(batch, 0);
  761.  
  762.     ADVANCE_BCS_BATCH(batch);
  763. }
  764.  
  765.  
  766. static void
  767. gen75_mfc_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  768.                                         struct intel_encoder_context *encoder_context)
  769. {
  770.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  771.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  772.  
  773.     BEGIN_BCS_BATCH(batch, 10);
  774.  
  775.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  776.     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
  777.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  778.                   0);
  779.     OUT_BCS_BATCH(batch, 0);
  780.     OUT_BCS_BATCH(batch, 0);
  781.        
  782.     /* the DW4-6 is for MPR Row Store Scratch Buffer Base Address */
  783.     OUT_BCS_BATCH(batch, 0);
  784.     OUT_BCS_BATCH(batch, 0);
  785.     OUT_BCS_BATCH(batch, 0);
  786.  
  787.     /* the DW7-9 is for Bitplane Read Buffer Base Address */
  788.     OUT_BCS_BATCH(batch, 0);
  789.     OUT_BCS_BATCH(batch, 0);
  790.     OUT_BCS_BATCH(batch, 0);
  791.  
  792.     ADVANCE_BCS_BATCH(batch);
  793. }
  794.  
  795. static void
  796. gen75_mfc_bsp_buf_base_addr_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  797. {
  798.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  799.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  800.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  801.  
  802.     if (IS_STEPPING_BPLUS(i965)) {
  803.         gen75_mfc_bsp_buf_base_addr_state_bplus(ctx, encoder_context);
  804.         return;
  805.     }
  806.  
  807.     BEGIN_BCS_BATCH(batch, 4);
  808.  
  809.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  810.     OUT_BCS_RELOC(batch, mfc_context->bsd_mpc_row_store_scratch_buffer.bo,
  811.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  812.                   0);
  813.     OUT_BCS_BATCH(batch, 0);
  814.     OUT_BCS_BATCH(batch, 0);
  815.  
  816.     ADVANCE_BCS_BATCH(batch);
  817. }
  818.  
  819.  
  820. static void gen75_mfc_avc_pipeline_picture_programing( VADriverContextP ctx,
  821.                                                        struct encode_state *encode_state,
  822.                                                        struct intel_encoder_context *encoder_context)
  823. {
  824.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  825.  
  826.     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_AVC, encoder_context);
  827.     mfc_context->set_surface_state(ctx, encoder_context);
  828.     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
  829.     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
  830.     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
  831.     mfc_context->avc_img_state(ctx, encode_state, encoder_context);
  832.     mfc_context->avc_qm_state(ctx, encoder_context);
  833.     mfc_context->avc_fqm_state(ctx, encoder_context);
  834.     gen75_mfc_avc_directmode_state(ctx, encoder_context);
  835.     intel_mfc_avc_ref_idx_state(ctx, encode_state, encoder_context);
  836. }
  837.  
  838.  
  839. static VAStatus gen75_mfc_run(VADriverContextP ctx,
  840.                               struct encode_state *encode_state,
  841.                               struct intel_encoder_context *encoder_context)
  842. {
  843.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  844.  
  845.     intel_batchbuffer_flush(batch);             //run the pipeline
  846.  
  847.     return VA_STATUS_SUCCESS;
  848. }
  849.  
  850.  
  851. static VAStatus
  852. gen75_mfc_stop(VADriverContextP ctx,
  853.                struct encode_state *encode_state,
  854.                struct intel_encoder_context *encoder_context,
  855.                int *encoded_bits_size)
  856. {
  857.     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
  858.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  859.     VACodedBufferSegment *coded_buffer_segment;
  860.    
  861.     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
  862.     assert(vaStatus == VA_STATUS_SUCCESS);
  863.     *encoded_bits_size = coded_buffer_segment->size * 8;
  864.     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
  865.  
  866.     return VA_STATUS_SUCCESS;
  867. }
  868.  
  869.  
  870. static void
  871. gen75_mfc_avc_slice_state(VADriverContextP ctx,
  872.                           VAEncPictureParameterBufferH264 *pic_param,
  873.                           VAEncSliceParameterBufferH264 *slice_param,
  874.                           struct encode_state *encode_state,
  875.                           struct intel_encoder_context *encoder_context,
  876.                           int rate_control_enable,
  877.                           int qp,
  878.                           struct intel_batchbuffer *batch)
  879. {
  880.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  881.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  882.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  883.     int beginmb = slice_param->macroblock_address;
  884.     int endmb = beginmb + slice_param->num_macroblocks;
  885.     int beginx = beginmb % width_in_mbs;
  886.     int beginy = beginmb / width_in_mbs;
  887.     int nextx =  endmb % width_in_mbs;
  888.     int nexty = endmb / width_in_mbs;
  889.     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  890.     int last_slice = (endmb == (width_in_mbs * height_in_mbs));
  891.     int maxQpN, maxQpP;
  892.     unsigned char correct[6], grow, shrink;
  893.     int i;
  894.     int weighted_pred_idc = 0;
  895.     unsigned int luma_log2_weight_denom = slice_param->luma_log2_weight_denom;
  896.     unsigned int chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
  897.     int num_ref_l0 = 0, num_ref_l1 = 0;
  898.  
  899.     if (batch == NULL)
  900.         batch = encoder_context->base.batch;
  901.  
  902.     if (slice_type == SLICE_TYPE_I) {
  903.         luma_log2_weight_denom = 0;
  904.         chroma_log2_weight_denom = 0;
  905.     } else if (slice_type == SLICE_TYPE_P) {
  906.         weighted_pred_idc = pic_param->pic_fields.bits.weighted_pred_flag;
  907.         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
  908.  
  909.         if (slice_param->num_ref_idx_active_override_flag)
  910.             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  911.     } else if (slice_type == SLICE_TYPE_B) {
  912.         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
  913.         num_ref_l0 = pic_param->num_ref_idx_l0_active_minus1 + 1;
  914.         num_ref_l1 = pic_param->num_ref_idx_l1_active_minus1 + 1;
  915.  
  916.         if (slice_param->num_ref_idx_active_override_flag) {
  917.             num_ref_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  918.             num_ref_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  919.         }
  920.  
  921.         if (weighted_pred_idc == 2) {
  922.             /* 8.4.3 - Derivation process for prediction weights (8-279) */
  923.             luma_log2_weight_denom = 5;
  924.             chroma_log2_weight_denom = 5;
  925.         }
  926.     }
  927.  
  928.     maxQpN = mfc_context->bit_rate_control_context[slice_type].MaxQpNegModifier;
  929.     maxQpP = mfc_context->bit_rate_control_context[slice_type].MaxQpPosModifier;
  930.  
  931.     for (i = 0; i < 6; i++)
  932.         correct[i] = mfc_context->bit_rate_control_context[slice_type].Correct[i];
  933.  
  934.     grow = mfc_context->bit_rate_control_context[slice_type].GrowInit +
  935.         (mfc_context->bit_rate_control_context[slice_type].GrowResistance << 4);
  936.     shrink = mfc_context->bit_rate_control_context[slice_type].ShrinkInit +
  937.         (mfc_context->bit_rate_control_context[slice_type].ShrinkResistance << 4);
  938.  
  939.     BEGIN_BCS_BATCH(batch, 11);;
  940.  
  941.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2) );
  942.     OUT_BCS_BATCH(batch, slice_type);                   /*Slice Type: I:P:B Slice*/
  943.  
  944.     OUT_BCS_BATCH(batch,
  945.                   (num_ref_l0 << 16) |
  946.                   (num_ref_l1 << 24) |
  947.                   (chroma_log2_weight_denom << 8) |
  948.                   (luma_log2_weight_denom << 0));
  949.  
  950.     OUT_BCS_BATCH(batch,
  951.                   (weighted_pred_idc << 30) |
  952.                   (slice_param->direct_spatial_mv_pred_flag<<29) |             /*Direct Prediction Type*/
  953.                   (slice_param->disable_deblocking_filter_idc << 27) |
  954.                   (slice_param->cabac_init_idc << 24) |
  955.                   (qp<<16) |                    /*Slice Quantization Parameter*/
  956.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  957.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  958.     OUT_BCS_BATCH(batch,
  959.                   (beginy << 24) |                      /*First MB X&Y , the begin postion of current slice*/
  960.                   (beginx << 16) |
  961.                   slice_param->macroblock_address );
  962.     OUT_BCS_BATCH(batch, (nexty << 16) | nextx);                       /*Next slice first MB X&Y*/
  963.     OUT_BCS_BATCH(batch,
  964.                   (0/*rate_control_enable*/ << 31) |            /*in CBR mode RateControlCounterEnable = enable*/
  965.                   (1 << 30) |           /*ResetRateControlCounter*/
  966.                   (0 << 28) |           /*RC Triggle Mode = Always Rate Control*/
  967.                   (4 << 24) |     /*RC Stable Tolerance, middle level*/
  968.                   (0/*rate_control_enable*/ << 23) |     /*RC Panic Enable*/                
  969.                   (0 << 22) |     /*QP mode, don't modfiy CBP*/
  970.                   (0 << 21) |     /*MB Type Direct Conversion Enabled*/
  971.                   (0 << 20) |     /*MB Type Skip Conversion Enabled*/
  972.                   (last_slice << 19) |     /*IsLastSlice*/
  973.                   (0 << 18) |   /*BitstreamOutputFlag Compressed BitStream Output Disable Flag 0:enable 1:disable*/
  974.                   (1 << 17) |       /*HeaderPresentFlag*/      
  975.                   (1 << 16) |       /*SliceData PresentFlag*/
  976.                   (1 << 15) |       /*TailPresentFlag*/
  977.                   (1 << 13) |       /*RBSP NAL TYPE*/  
  978.                   (0 << 12) );    /*CabacZeroWordInsertionEnable*/
  979.     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
  980.     OUT_BCS_BATCH(batch,
  981.                   (maxQpN << 24) |     /*Target QP - 24 is lowest QP*/
  982.                   (maxQpP << 16) |     /*Target QP + 20 is highest QP*/
  983.                   (shrink << 8)  |
  984.                   (grow << 0));  
  985.     OUT_BCS_BATCH(batch,
  986.                   (correct[5] << 20) |
  987.                   (correct[4] << 16) |
  988.                   (correct[3] << 12) |
  989.                   (correct[2] << 8) |
  990.                   (correct[1] << 4) |
  991.                   (correct[0] << 0));
  992.     OUT_BCS_BATCH(batch, 0);
  993.  
  994.     ADVANCE_BCS_BATCH(batch);
  995. }
  996.  
  997.  
  998. #if MFC_SOFTWARE_HASWELL
  999.  
  1000. static int
  1001. gen75_mfc_avc_pak_object_intra(VADriverContextP ctx, int x, int y, int end_mb,
  1002.                                int qp,unsigned int *msg,
  1003.                                struct intel_encoder_context *encoder_context,
  1004.                                unsigned char target_mb_size, unsigned char max_mb_size,
  1005.                                struct intel_batchbuffer *batch)
  1006. {
  1007.     int len_in_dwords = 12;
  1008.     unsigned int intra_msg;
  1009. #define         INTRA_MSG_FLAG          (1 << 13)
  1010. #define         INTRA_MBTYPE_MASK       (0x1F0000)
  1011.     if (batch == NULL)
  1012.         batch = encoder_context->base.batch;
  1013.  
  1014.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1015.  
  1016.     intra_msg = msg[0] & 0xC0FF;
  1017.     intra_msg |= INTRA_MSG_FLAG;
  1018.     intra_msg |= ((msg[0] & INTRA_MBTYPE_MASK) >> 8);
  1019.     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
  1020.     OUT_BCS_BATCH(batch, 0);
  1021.     OUT_BCS_BATCH(batch, 0);
  1022.     OUT_BCS_BATCH(batch,
  1023.                   (0 << 24) |           /* PackedMvNum, Debug*/
  1024.                   (0 << 20) |           /* No motion vector */
  1025.                   (1 << 19) |           /* CbpDcY */
  1026.                   (1 << 18) |           /* CbpDcU */
  1027.                   (1 << 17) |           /* CbpDcV */
  1028.                   intra_msg);
  1029.  
  1030.     OUT_BCS_BATCH(batch, (0xFFFF << 16) | (y << 8) | x);                /* Code Block Pattern for Y*/
  1031.     OUT_BCS_BATCH(batch, 0x000F000F);                                                   /* Code Block Pattern */               
  1032.     OUT_BCS_BATCH(batch, (0 << 27) | (end_mb << 26) | qp);      /* Last MB */
  1033.  
  1034.     /*Stuff for Intra MB*/
  1035.     OUT_BCS_BATCH(batch, msg[1]);                       /* We using Intra16x16 no 4x4 predmode*/       
  1036.     OUT_BCS_BATCH(batch, msg[2]);      
  1037.     OUT_BCS_BATCH(batch, msg[3]&0xFF); 
  1038.    
  1039.     /*MaxSizeInWord and TargetSzieInWord*/
  1040.     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
  1041.                   (target_mb_size << 16) );
  1042.  
  1043.     OUT_BCS_BATCH(batch, 0);
  1044.  
  1045.     ADVANCE_BCS_BATCH(batch);
  1046.  
  1047.     return len_in_dwords;
  1048. }
  1049.  
  1050. static int
  1051. gen75_mfc_avc_pak_object_inter(VADriverContextP ctx, int x, int y, int end_mb, int qp,
  1052.                                unsigned int *msg, unsigned int offset,
  1053.                                struct intel_encoder_context *encoder_context,
  1054.                                unsigned char target_mb_size,unsigned char max_mb_size, int slice_type,
  1055.                                struct intel_batchbuffer *batch)
  1056. {
  1057.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1058.     int len_in_dwords = 12;
  1059.     unsigned int inter_msg = 0;
  1060.     if (batch == NULL)
  1061.         batch = encoder_context->base.batch;
  1062.     {
  1063. #define MSG_MV_OFFSET   4
  1064.         unsigned int *mv_ptr;
  1065.         mv_ptr = msg + MSG_MV_OFFSET;
  1066.         /* MV of VME output is based on 16 sub-blocks. So it is necessary
  1067.          * to convert them to be compatible with the format of AVC_PAK
  1068.          * command.
  1069.          */
  1070.         if ((msg[0] & INTER_MODE_MASK) == INTER_8X16) {
  1071.             /* MV[0] and MV[2] are replicated */
  1072.             mv_ptr[4] = mv_ptr[0];
  1073.             mv_ptr[5] = mv_ptr[1];
  1074.             mv_ptr[2] = mv_ptr[8];
  1075.             mv_ptr[3] = mv_ptr[9];
  1076.             mv_ptr[6] = mv_ptr[8];
  1077.             mv_ptr[7] = mv_ptr[9];
  1078.         } else if ((msg[0] & INTER_MODE_MASK) == INTER_16X8) {
  1079.             /* MV[0] and MV[1] are replicated */
  1080.             mv_ptr[2] = mv_ptr[0];
  1081.             mv_ptr[3] = mv_ptr[1];
  1082.             mv_ptr[4] = mv_ptr[16];
  1083.             mv_ptr[5] = mv_ptr[17];
  1084.             mv_ptr[6] = mv_ptr[24];
  1085.             mv_ptr[7] = mv_ptr[25];
  1086.         } else if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
  1087.                    !(msg[1] & SUBMB_SHAPE_MASK)) {
  1088.             /* Don't touch MV[0] or MV[1] */
  1089.             mv_ptr[2] = mv_ptr[8];
  1090.             mv_ptr[3] = mv_ptr[9];
  1091.             mv_ptr[4] = mv_ptr[16];
  1092.             mv_ptr[5] = mv_ptr[17];
  1093.             mv_ptr[6] = mv_ptr[24];
  1094.             mv_ptr[7] = mv_ptr[25];
  1095.         }
  1096.     }
  1097.  
  1098.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1099.  
  1100.     OUT_BCS_BATCH(batch, MFC_AVC_PAK_OBJECT | (len_in_dwords - 2));
  1101.  
  1102.     inter_msg = 32;
  1103.     /* MV quantity */
  1104.     if ((msg[0] & INTER_MODE_MASK) == INTER_8X8) {
  1105.         if (msg[1] & SUBMB_SHAPE_MASK)
  1106.             inter_msg = 128;
  1107.     }
  1108.     OUT_BCS_BATCH(batch, inter_msg);         /* 32 MV*/
  1109.     OUT_BCS_BATCH(batch, offset);
  1110.     inter_msg = msg[0] & (0x1F00FFFF);
  1111.     inter_msg |= INTER_MV8;
  1112.     inter_msg |= ((1 << 19) | (1 << 18) | (1 << 17));
  1113.     if (((msg[0] & INTER_MODE_MASK) == INTER_8X8) &&
  1114.         (msg[1] & SUBMB_SHAPE_MASK)) {
  1115.         inter_msg |= INTER_MV32;
  1116.     }
  1117.  
  1118.     OUT_BCS_BATCH(batch, inter_msg);
  1119.  
  1120.     OUT_BCS_BATCH(batch, (0xFFFF<<16) | (y << 8) | x);        /* Code Block Pattern for Y*/
  1121.     OUT_BCS_BATCH(batch, 0x000F000F);                         /* Code Block Pattern */  
  1122. #if 0
  1123.     if ( slice_type == SLICE_TYPE_B) {
  1124.         OUT_BCS_BATCH(batch, (0xF<<28) | (end_mb << 26) | qp);  /* Last MB */
  1125.     } else {
  1126.         OUT_BCS_BATCH(batch, (end_mb << 26) | qp);      /* Last MB */
  1127.     }
  1128. #else
  1129.     OUT_BCS_BATCH(batch, (end_mb << 26) | qp);  /* Last MB */
  1130. #endif
  1131.  
  1132.     inter_msg = msg[1] >> 8;
  1133.     /*Stuff for Inter MB*/
  1134.     OUT_BCS_BATCH(batch, inter_msg);        
  1135.     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[0]);
  1136.     OUT_BCS_BATCH(batch, vme_context->ref_index_in_mb[1]);
  1137.  
  1138.     /*MaxSizeInWord and TargetSzieInWord*/
  1139.     OUT_BCS_BATCH(batch, (max_mb_size << 24) |
  1140.                   (target_mb_size << 16) );
  1141.  
  1142.     OUT_BCS_BATCH(batch, 0x0);    
  1143.  
  1144.     ADVANCE_BCS_BATCH(batch);
  1145.  
  1146.     return len_in_dwords;
  1147. }
  1148.  
  1149. static void
  1150. gen75_mfc_avc_pipeline_slice_programing(VADriverContextP ctx,
  1151.                                         struct encode_state *encode_state,
  1152.                                         struct intel_encoder_context *encoder_context,
  1153.                                         int slice_index,
  1154.                                         struct intel_batchbuffer *slice_batch)
  1155. {
  1156.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1157.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1158.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1159.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1160.     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
  1161.     unsigned int *msg = NULL, offset = 0;
  1162.     unsigned char *msg_ptr = NULL;
  1163.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1164.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  1165.     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
  1166.     int i,x,y;
  1167.     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
  1168.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1169.     unsigned int tail_data[] = { 0x0, 0x0 };
  1170.     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
  1171.     int is_intra = slice_type == SLICE_TYPE_I;
  1172.     int qp_slice;
  1173.  
  1174.     qp_slice = qp;
  1175.     if (rate_control_mode == VA_RC_CBR) {
  1176.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  1177.         if (encode_state->slice_header_index[slice_index] == 0) {
  1178.             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
  1179.             qp_slice = qp;
  1180.         }
  1181.     }
  1182.  
  1183.     /* only support for 8-bit pixel bit-depth */
  1184.     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
  1185.     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
  1186.     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
  1187.     assert(qp >= 0 && qp < 52);
  1188.  
  1189.     gen75_mfc_avc_slice_state(ctx,
  1190.                               pPicParameter,
  1191.                               pSliceParameter,
  1192.                               encode_state, encoder_context,
  1193.                               (rate_control_mode == VA_RC_CBR), qp_slice, slice_batch);
  1194.  
  1195.     if ( slice_index == 0)
  1196.         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  1197.  
  1198.     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
  1199.  
  1200.     dri_bo_map(vme_context->vme_output.bo , 1);
  1201.     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
  1202.  
  1203.     if (is_intra) {
  1204.         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
  1205.     } else {
  1206.         msg = (unsigned int *) (msg_ptr + pSliceParameter->macroblock_address * vme_context->vme_output.size_block);
  1207.     }
  1208.    
  1209.     for (i = pSliceParameter->macroblock_address;
  1210.          i < pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks; i++) {
  1211.         int last_mb = (i == (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks - 1) );
  1212.         x = i % width_in_mbs;
  1213.         y = i / width_in_mbs;
  1214.         msg = (unsigned int *) (msg_ptr + i * vme_context->vme_output.size_block);
  1215.  
  1216.         if (is_intra) {
  1217.             assert(msg);
  1218.             gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
  1219.         } else {
  1220.             int inter_rdo, intra_rdo;
  1221.             inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
  1222.             intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
  1223.             offset = i * vme_context->vme_output.size_block + AVC_INTER_MV_OFFSET;
  1224.             if (intra_rdo < inter_rdo) {
  1225.                 gen75_mfc_avc_pak_object_intra(ctx, x, y, last_mb, qp, msg, encoder_context, 0, 0, slice_batch);
  1226.             } else {
  1227.                 msg += AVC_INTER_MSG_OFFSET;
  1228.                 gen75_mfc_avc_pak_object_inter(ctx, x, y, last_mb, qp, msg, offset, encoder_context, 0, 0, slice_type, slice_batch);
  1229.             }
  1230.         }
  1231.     }
  1232.    
  1233.     dri_bo_unmap(vme_context->vme_output.bo);
  1234.  
  1235.     if ( last_slice ) {    
  1236.         mfc_context->insert_object(ctx, encoder_context,
  1237.                                    tail_data, 2, 8,
  1238.                                    2, 1, 1, 0, slice_batch);
  1239.     } else {
  1240.         mfc_context->insert_object(ctx, encoder_context,
  1241.                                    tail_data, 1, 8,
  1242.                                    1, 1, 1, 0, slice_batch);
  1243.     }
  1244. }
  1245.  
  1246. static dri_bo *
  1247. gen75_mfc_avc_software_batchbuffer(VADriverContextP ctx,
  1248.                                    struct encode_state *encode_state,
  1249.                                    struct intel_encoder_context *encoder_context)
  1250. {
  1251.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1252.     struct intel_batchbuffer *batch;
  1253.     dri_bo *batch_bo;
  1254.     int i;
  1255.     int buffer_size;
  1256.  
  1257.     batch = mfc_context->aux_batchbuffer;
  1258.     batch_bo = batch->buffer;
  1259.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  1260.         gen75_mfc_avc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
  1261.     }
  1262.  
  1263.     intel_batchbuffer_align(batch, 8);
  1264.    
  1265.     BEGIN_BCS_BATCH(batch, 2);
  1266.     OUT_BCS_BATCH(batch, 0);
  1267.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
  1268.     ADVANCE_BCS_BATCH(batch);
  1269.  
  1270.     dri_bo_reference(batch_bo);
  1271.  
  1272.     intel_batchbuffer_free(batch);
  1273.     mfc_context->aux_batchbuffer = NULL;
  1274.  
  1275.     return batch_bo;
  1276. }
  1277.  
  1278. #else
  1279.  
  1280. static void
  1281. gen75_mfc_batchbuffer_surfaces_input(VADriverContextP ctx,
  1282.                                      struct encode_state *encode_state,
  1283.                                      struct intel_encoder_context *encoder_context)
  1284.  
  1285. {
  1286.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1287.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1288.  
  1289.     assert(vme_context->vme_output.bo);
  1290.     mfc_context->buffer_suface_setup(ctx,
  1291.                                      &mfc_context->gpe_context,
  1292.                                      &vme_context->vme_output,
  1293.                                      BINDING_TABLE_OFFSET(BIND_IDX_VME_OUTPUT),
  1294.                                      SURFACE_STATE_OFFSET(BIND_IDX_VME_OUTPUT));
  1295. }
  1296.  
  1297. static void
  1298. gen75_mfc_batchbuffer_surfaces_output(VADriverContextP ctx,
  1299.                                       struct encode_state *encode_state,
  1300.                                       struct intel_encoder_context *encoder_context)
  1301.  
  1302. {
  1303.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1304.     assert(mfc_context->aux_batchbuffer_surface.bo);
  1305.     mfc_context->buffer_suface_setup(ctx,
  1306.                                      &mfc_context->gpe_context,
  1307.                                      &mfc_context->aux_batchbuffer_surface,
  1308.                                      BINDING_TABLE_OFFSET(BIND_IDX_MFC_BATCHBUFFER),
  1309.                                      SURFACE_STATE_OFFSET(BIND_IDX_MFC_BATCHBUFFER));
  1310. }
  1311.  
  1312. static void
  1313. gen75_mfc_batchbuffer_surfaces_setup(VADriverContextP ctx,
  1314.                                      struct encode_state *encode_state,
  1315.                                      struct intel_encoder_context *encoder_context)
  1316. {
  1317.     gen75_mfc_batchbuffer_surfaces_input(ctx, encode_state, encoder_context);
  1318.     gen75_mfc_batchbuffer_surfaces_output(ctx, encode_state, encoder_context);
  1319. }
  1320.  
  1321. static void
  1322. gen75_mfc_batchbuffer_idrt_setup(VADriverContextP ctx,
  1323.                                  struct encode_state *encode_state,
  1324.                                  struct intel_encoder_context *encoder_context)
  1325. {
  1326.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1327.     struct gen6_interface_descriptor_data *desc;  
  1328.     int i;
  1329.     dri_bo *bo;
  1330.  
  1331.     bo = mfc_context->gpe_context.idrt.bo;
  1332.     dri_bo_map(bo, 1);
  1333.     assert(bo->virtual);
  1334.     desc = bo->virtual;
  1335.  
  1336.     for (i = 0; i < mfc_context->gpe_context.num_kernels; i++) {
  1337.         struct i965_kernel *kernel;
  1338.  
  1339.         kernel = &mfc_context->gpe_context.kernels[i];
  1340.         assert(sizeof(*desc) == 32);
  1341.  
  1342.         /*Setup the descritor table*/
  1343.         memset(desc, 0, sizeof(*desc));
  1344.         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
  1345.         desc->desc2.sampler_count = 0;
  1346.         desc->desc2.sampler_state_pointer = 0;
  1347.         desc->desc3.binding_table_entry_count = 2;
  1348.         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET(0) >> 5);
  1349.         desc->desc4.constant_urb_entry_read_offset = 0;
  1350.         desc->desc4.constant_urb_entry_read_length = 4;
  1351.                
  1352.         /*kernel start*/
  1353.         dri_bo_emit_reloc(bo,  
  1354.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  1355.                           0,
  1356.                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
  1357.                           kernel->bo);
  1358.         desc++;
  1359.     }
  1360.  
  1361.     dri_bo_unmap(bo);
  1362. }
  1363.  
  1364. static void
  1365. gen75_mfc_batchbuffer_constant_setup(VADriverContextP ctx,
  1366.                                      struct encode_state *encode_state,
  1367.                                      struct intel_encoder_context *encoder_context)
  1368. {
  1369.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1370.    
  1371.     (void)mfc_context;
  1372. }
  1373.  
  1374. #define AVC_PAK_LEN_IN_BYTE     48
  1375. #define AVC_PAK_LEN_IN_OWORD    3
  1376.  
  1377. static void
  1378. gen75_mfc_batchbuffer_emit_object_command(struct intel_batchbuffer *batch,
  1379.                                           uint32_t intra_flag,
  1380.                                           int head_offset,
  1381.                                           int number_mb_cmds,
  1382.                                           int slice_end_x,
  1383.                                           int slice_end_y,
  1384.                                           int mb_x,
  1385.                                           int mb_y,
  1386.                                           int width_in_mbs,
  1387.                                           int qp,
  1388.                                           uint32_t fwd_ref,
  1389.                                           uint32_t bwd_ref)
  1390. {
  1391.     uint32_t temp_value;
  1392.     BEGIN_BATCH(batch, 14);
  1393.    
  1394.     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (14 - 2));
  1395.     OUT_BATCH(batch, 0);
  1396.     OUT_BATCH(batch, 0);
  1397.     OUT_BATCH(batch, 0);
  1398.     OUT_BATCH(batch, 0);
  1399.     OUT_BATCH(batch, 0);
  1400.    
  1401.     /*inline data */
  1402.     OUT_BATCH(batch, head_offset / 16);
  1403.     OUT_BATCH(batch, (intra_flag) | (qp << 16));
  1404.     temp_value = (mb_x | (mb_y << 8) | (width_in_mbs << 16));
  1405.     OUT_BATCH(batch, temp_value);
  1406.  
  1407.     OUT_BATCH(batch, number_mb_cmds);
  1408.  
  1409.     OUT_BATCH(batch,
  1410.               ((slice_end_y << 8) | (slice_end_x)));
  1411.     OUT_BATCH(batch, fwd_ref);
  1412.     OUT_BATCH(batch, bwd_ref);
  1413.  
  1414.     OUT_BATCH(batch, MI_NOOP);
  1415.  
  1416.     ADVANCE_BATCH(batch);
  1417. }
  1418.  
  1419. static void
  1420. gen75_mfc_avc_batchbuffer_slice_command(VADriverContextP ctx,
  1421.                                         struct intel_encoder_context *encoder_context,
  1422.                                         VAEncSliceParameterBufferH264 *slice_param,
  1423.                                         int head_offset,
  1424.                                         int qp,
  1425.                                         int last_slice)
  1426. {
  1427.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1428.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1429.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1430.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1431.     int total_mbs = slice_param->num_macroblocks;
  1432.     int slice_type = intel_avc_enc_slice_type_fixup(slice_param->slice_type);
  1433.     int number_mb_cmds = 128;
  1434.     int starting_offset = 0;
  1435.     int mb_x, mb_y;
  1436.     int last_mb, slice_end_x, slice_end_y;
  1437.     int remaining_mb = total_mbs;
  1438.     uint32_t fwd_ref , bwd_ref, mb_flag;
  1439.  
  1440.     last_mb = slice_param->macroblock_address + total_mbs - 1;
  1441.     slice_end_x = last_mb % width_in_mbs;
  1442.     slice_end_y = last_mb / width_in_mbs;
  1443.  
  1444.     if (slice_type == SLICE_TYPE_I) {
  1445.         fwd_ref = 0;
  1446.         bwd_ref = 0;
  1447.         mb_flag = 1;
  1448.     } else {
  1449.         fwd_ref = vme_context->ref_index_in_mb[0];
  1450.         bwd_ref = vme_context->ref_index_in_mb[1];
  1451.         mb_flag = 0;
  1452.     }
  1453.  
  1454.     if (width_in_mbs >= 100) {
  1455.         number_mb_cmds = width_in_mbs / 5;
  1456.     } else if (width_in_mbs >= 80) {
  1457.         number_mb_cmds = width_in_mbs / 4;
  1458.     } else if (width_in_mbs >= 60) {
  1459.         number_mb_cmds = width_in_mbs / 3;
  1460.     } else if (width_in_mbs >= 40) {
  1461.         number_mb_cmds = width_in_mbs / 2;
  1462.     } else {
  1463.         number_mb_cmds = width_in_mbs;
  1464.     }
  1465.  
  1466.     do {
  1467.         if (number_mb_cmds >= remaining_mb) {
  1468.                 number_mb_cmds = remaining_mb;
  1469.         }
  1470.         mb_x = (slice_param->macroblock_address + starting_offset) % width_in_mbs;
  1471.         mb_y = (slice_param->macroblock_address + starting_offset) / width_in_mbs;
  1472.  
  1473.         gen75_mfc_batchbuffer_emit_object_command(batch,
  1474.                                                   mb_flag,
  1475.                                                   head_offset,
  1476.                                                   number_mb_cmds,
  1477.                                                   slice_end_x,
  1478.                                                   slice_end_y,
  1479.                                                   mb_x,
  1480.                                                   mb_y,
  1481.                                                   width_in_mbs,
  1482.                                                   qp,
  1483.                                                   fwd_ref,
  1484.                                                   bwd_ref);
  1485.  
  1486.         head_offset += (number_mb_cmds * AVC_PAK_LEN_IN_BYTE);
  1487.         remaining_mb -= number_mb_cmds;
  1488.         starting_offset += number_mb_cmds;
  1489.     } while (remaining_mb > 0);
  1490. }
  1491.                          
  1492. /*
  1493.  * return size in Owords (16bytes)
  1494.  */        
  1495. static void
  1496. gen75_mfc_avc_batchbuffer_slice(VADriverContextP ctx,
  1497.                                 struct encode_state *encode_state,
  1498.                                 struct intel_encoder_context *encoder_context,
  1499.                                 int slice_index)
  1500. {
  1501.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1502.     struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
  1503.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param_ext->buffer;
  1504.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param_ext->buffer;
  1505.     VAEncSliceParameterBufferH264 *pSliceParameter = (VAEncSliceParameterBufferH264 *)encode_state->slice_params_ext[slice_index]->buffer;
  1506.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1507.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  1508.     int last_slice = (pSliceParameter->macroblock_address + pSliceParameter->num_macroblocks) == (width_in_mbs * height_in_mbs);
  1509.     int qp = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
  1510.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1511.     unsigned int tail_data[] = { 0x0, 0x0 };
  1512.     long head_offset;
  1513.     int slice_type = intel_avc_enc_slice_type_fixup(pSliceParameter->slice_type);
  1514.     int qp_slice;
  1515.  
  1516.     qp_slice = qp;
  1517.     if (rate_control_mode == VA_RC_CBR) {
  1518.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  1519.         if (encode_state->slice_header_index[slice_index] == 0) {
  1520.             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
  1521.             qp_slice = qp;
  1522.         }
  1523.     }
  1524.  
  1525.     /* only support for 8-bit pixel bit-depth */
  1526.     assert(pSequenceParameter->bit_depth_luma_minus8 == 0);
  1527.     assert(pSequenceParameter->bit_depth_chroma_minus8 == 0);
  1528.     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
  1529.     assert(qp >= 0 && qp < 52);
  1530.  
  1531.     gen75_mfc_avc_slice_state(ctx,
  1532.                               pPicParameter,
  1533.                               pSliceParameter,
  1534.                               encode_state,
  1535.                               encoder_context,
  1536.                               (rate_control_mode == VA_RC_CBR),
  1537.                               qp_slice,
  1538.                               slice_batch);
  1539.  
  1540.     if (slice_index == 0)
  1541.         intel_mfc_avc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  1542.  
  1543.     intel_avc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
  1544.  
  1545.     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
  1546.     head_offset = intel_batchbuffer_used_size(slice_batch);
  1547.  
  1548.     slice_batch->ptr += pSliceParameter->num_macroblocks * AVC_PAK_LEN_IN_BYTE;
  1549.  
  1550.     gen75_mfc_avc_batchbuffer_slice_command(ctx,
  1551.                                             encoder_context,
  1552.                                             pSliceParameter,
  1553.                                             head_offset,
  1554.                                             qp,
  1555.                                             last_slice);
  1556.  
  1557.  
  1558.     /* Aligned for tail */
  1559.     intel_batchbuffer_align(slice_batch, 16); /* aligned by an Oword */
  1560.     if (last_slice) {    
  1561.         mfc_context->insert_object(ctx,
  1562.                                    encoder_context,
  1563.                                    tail_data,
  1564.                                    2,
  1565.                                    8,
  1566.                                    2,
  1567.                                    1,
  1568.                                    1,
  1569.                                    0,
  1570.                                    slice_batch);
  1571.     } else {
  1572.         mfc_context->insert_object(ctx,
  1573.                                    encoder_context,
  1574.                                    tail_data,
  1575.                                    1,
  1576.                                    8,
  1577.                                    1,
  1578.                                    1,
  1579.                                    1,
  1580.                                    0,
  1581.                                    slice_batch);
  1582.     }
  1583.  
  1584.     return;
  1585. }
  1586.  
  1587. static void
  1588. gen75_mfc_avc_batchbuffer_pipeline(VADriverContextP ctx,
  1589.                                    struct encode_state *encode_state,
  1590.                                    struct intel_encoder_context *encoder_context)
  1591. {
  1592.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1593.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1594.     int i;
  1595.     intel_batchbuffer_start_atomic(batch, 0x4000);
  1596.     gen6_gpe_pipeline_setup(ctx, &mfc_context->gpe_context, batch);
  1597.  
  1598.     for ( i = 0; i < encode_state->num_slice_params_ext; i++) {
  1599.         gen75_mfc_avc_batchbuffer_slice(ctx, encode_state, encoder_context, i);
  1600.     }
  1601.     {
  1602.         struct intel_batchbuffer *slice_batch = mfc_context->aux_batchbuffer;
  1603.         intel_batchbuffer_align(slice_batch, 8);
  1604.         BEGIN_BCS_BATCH(slice_batch, 2);
  1605.         OUT_BCS_BATCH(slice_batch, 0);
  1606.         OUT_BCS_BATCH(slice_batch, MI_BATCH_BUFFER_END);
  1607.         ADVANCE_BCS_BATCH(slice_batch);
  1608.         mfc_context->aux_batchbuffer = NULL;
  1609.         intel_batchbuffer_free(slice_batch);
  1610.     }
  1611.     intel_batchbuffer_end_atomic(batch);
  1612.     intel_batchbuffer_flush(batch);
  1613. }
  1614.  
  1615. static void
  1616. gen75_mfc_build_avc_batchbuffer(VADriverContextP ctx,
  1617.                                 struct encode_state *encode_state,
  1618.                                 struct intel_encoder_context *encoder_context)
  1619. {
  1620.     gen75_mfc_batchbuffer_surfaces_setup(ctx, encode_state, encoder_context);
  1621.     gen75_mfc_batchbuffer_idrt_setup(ctx, encode_state, encoder_context);
  1622.     gen75_mfc_batchbuffer_constant_setup(ctx, encode_state, encoder_context);
  1623.     gen75_mfc_avc_batchbuffer_pipeline(ctx, encode_state, encoder_context);
  1624. }
  1625.  
  1626. static dri_bo *
  1627. gen75_mfc_avc_hardware_batchbuffer(VADriverContextP ctx,
  1628.                                    struct encode_state *encode_state,
  1629.                                    struct intel_encoder_context *encoder_context)
  1630. {
  1631.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1632.  
  1633.     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
  1634.     gen75_mfc_build_avc_batchbuffer(ctx, encode_state, encoder_context);
  1635.  
  1636.     return mfc_context->aux_batchbuffer_surface.bo;
  1637. }
  1638.  
  1639. #endif
  1640.  
  1641. static void
  1642. gen75_mfc_avc_pipeline_programing(VADriverContextP ctx,
  1643.                                   struct encode_state *encode_state,
  1644.                                   struct intel_encoder_context *encoder_context)
  1645. {
  1646.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1647.     dri_bo *slice_batch_bo;
  1648.  
  1649.     if ( intel_mfc_interlace_check(ctx, encode_state, encoder_context) ) {
  1650.         fprintf(stderr, "Current VA driver don't support interlace mode!\n");
  1651.         assert(0);
  1652.         return;
  1653.     }
  1654.  
  1655. #if MFC_SOFTWARE_HASWELL
  1656.     slice_batch_bo = gen75_mfc_avc_software_batchbuffer(ctx, encode_state, encoder_context);
  1657. #else
  1658.     slice_batch_bo = gen75_mfc_avc_hardware_batchbuffer(ctx, encode_state, encoder_context);
  1659. #endif
  1660.  
  1661.     // begin programing
  1662.     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  1663.     intel_batchbuffer_emit_mi_flush(batch);
  1664.    
  1665.     // picture level programing
  1666.     gen75_mfc_avc_pipeline_picture_programing(ctx, encode_state, encoder_context);
  1667.  
  1668.     BEGIN_BCS_BATCH(batch, 2);
  1669.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
  1670.     OUT_BCS_RELOC(batch,
  1671.                   slice_batch_bo,
  1672.                   I915_GEM_DOMAIN_COMMAND, 0,
  1673.                   0);
  1674.     ADVANCE_BCS_BATCH(batch);
  1675.  
  1676.     // end programing
  1677.     intel_batchbuffer_end_atomic(batch);
  1678.  
  1679.     dri_bo_unreference(slice_batch_bo);
  1680. }
  1681.  
  1682.  
  1683. static VAStatus
  1684. gen75_mfc_avc_encode_picture(VADriverContextP ctx,
  1685.                              struct encode_state *encode_state,
  1686.                              struct intel_encoder_context *encoder_context)
  1687. {
  1688.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1689.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1690.     int current_frame_bits_size;
  1691.     int sts;
  1692.  
  1693.     for (;;) {
  1694.         gen75_mfc_init(ctx, encode_state, encoder_context);
  1695.         intel_mfc_avc_prepare(ctx, encode_state, encoder_context);
  1696.         /*Programing bcs pipeline*/
  1697.         gen75_mfc_avc_pipeline_programing(ctx, encode_state, encoder_context);  //filling the pipeline
  1698.         gen75_mfc_run(ctx, encode_state, encoder_context);
  1699.         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
  1700.             gen75_mfc_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
  1701.             sts = intel_mfc_brc_postpack(encode_state, mfc_context, current_frame_bits_size);
  1702.             if (sts == BRC_NO_HRD_VIOLATION) {
  1703.                 intel_mfc_hrd_context_update(encode_state, mfc_context);
  1704.                 break;
  1705.             }
  1706.             else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
  1707.                 if (!mfc_context->hrd.violation_noted) {
  1708.                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP)? "overflow": "underflow");
  1709.                     mfc_context->hrd.violation_noted = 1;
  1710.                 }
  1711.                 return VA_STATUS_SUCCESS;
  1712.             }
  1713.         } else {
  1714.             break;
  1715.         }
  1716.     }
  1717.  
  1718.     return VA_STATUS_SUCCESS;
  1719. }
  1720.  
  1721. /*
  1722.  * MPEG-2
  1723.  */
  1724.  
  1725. static const int
  1726. va_to_gen75_mpeg2_picture_type[3] = {
  1727.     1,  /* I */
  1728.     2,  /* P */
  1729.     3   /* B */
  1730. };
  1731.  
  1732. static void
  1733. gen75_mfc_mpeg2_pic_state(VADriverContextP ctx,
  1734.                           struct intel_encoder_context *encoder_context,
  1735.                           struct encode_state *encode_state)
  1736. {
  1737.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1738.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1739.     VAEncPictureParameterBufferMPEG2 *pic_param;
  1740.     int width_in_mbs = (mfc_context->surface_state.width + 15) / 16;
  1741.     int height_in_mbs = (mfc_context->surface_state.height + 15) / 16;
  1742.     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
  1743.  
  1744.     assert(encode_state->pic_param_ext && encode_state->pic_param_ext->buffer);
  1745.     pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
  1746.     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[0]->buffer;
  1747.  
  1748.     BEGIN_BCS_BATCH(batch, 13);
  1749.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1750.     OUT_BCS_BATCH(batch,
  1751.                   (pic_param->f_code[1][1] & 0xf) << 28 | /* f_code[1][1] */
  1752.                   (pic_param->f_code[1][0] & 0xf) << 24 | /* f_code[1][0] */
  1753.                   (pic_param->f_code[0][1] & 0xf) << 20 | /* f_code[0][1] */
  1754.                   (pic_param->f_code[0][0] & 0xf) << 16 | /* f_code[0][0] */
  1755.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1756.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1757.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1758.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1759.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1760.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1761.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1762.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1763.     OUT_BCS_BATCH(batch,
  1764.                   0 << 14 |     /* LoadSlicePointerFlag, 0 means only loading bitstream pointer once */
  1765.                   va_to_gen75_mpeg2_picture_type[pic_param->picture_type] << 9 |
  1766.                   0);
  1767.     OUT_BCS_BATCH(batch,
  1768.                   1 << 31 |     /* slice concealment */
  1769.                   (height_in_mbs - 1) << 16 |
  1770.                   (width_in_mbs - 1));
  1771.     if (slice_param && slice_param->quantiser_scale_code >= 14)
  1772.         OUT_BCS_BATCH(batch, (3 << 1) | (1 << 4) | (5 << 8) | (1 << 12));
  1773.     else
  1774.         OUT_BCS_BATCH(batch, 0);
  1775.  
  1776.     OUT_BCS_BATCH(batch, 0);
  1777.     OUT_BCS_BATCH(batch,
  1778.                   0xFFF << 16 | /* InterMBMaxSize */
  1779.                   0xFFF << 0 |  /* IntraMBMaxSize */
  1780.                   0);
  1781.     OUT_BCS_BATCH(batch, 0);
  1782.     OUT_BCS_BATCH(batch, 0);
  1783.     OUT_BCS_BATCH(batch, 0);
  1784.     OUT_BCS_BATCH(batch, 0);
  1785.     OUT_BCS_BATCH(batch, 0);
  1786.     OUT_BCS_BATCH(batch, 0);
  1787.     ADVANCE_BCS_BATCH(batch);
  1788. }
  1789.  
  1790. static void
  1791. gen75_mfc_mpeg2_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  1792. {
  1793.     unsigned char intra_qm[64] = {
  1794.         8, 16, 19, 22, 26, 27, 29, 34,
  1795.         16, 16, 22, 24, 27, 29, 34, 37,
  1796.         19, 22, 26, 27, 29, 34, 34, 38,
  1797.         22, 22, 26, 27, 29, 34, 37, 40,
  1798.         22, 26, 27, 29, 32, 35, 40, 48,
  1799.         26, 27, 29, 32, 35, 40, 48, 58,
  1800.         26, 27, 29, 34, 38, 46, 56, 69,
  1801.         27, 29, 35, 38, 46, 56, 69, 83
  1802.     };
  1803.  
  1804.     unsigned char non_intra_qm[64] = {
  1805.         16, 16, 16, 16, 16, 16, 16, 16,
  1806.         16, 16, 16, 16, 16, 16, 16, 16,
  1807.         16, 16, 16, 16, 16, 16, 16, 16,
  1808.         16, 16, 16, 16, 16, 16, 16, 16,
  1809.         16, 16, 16, 16, 16, 16, 16, 16,
  1810.         16, 16, 16, 16, 16, 16, 16, 16,
  1811.         16, 16, 16, 16, 16, 16, 16, 16,
  1812.         16, 16, 16, 16, 16, 16, 16, 16
  1813.     };
  1814.  
  1815.     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_qm, 16, encoder_context);
  1816.     gen75_mfc_qm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_qm, 16,encoder_context);
  1817. }
  1818.  
  1819. static void
  1820. gen75_mfc_mpeg2_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  1821. {
  1822.     unsigned short intra_fqm[64] = {
  1823.         65536/0x8, 65536/0x10, 65536/0x13, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b,
  1824.         65536/0x10, 65536/0x10, 65536/0x16, 65536/0x16, 65536/0x1a, 65536/0x1b, 65536/0x1b, 65536/0x1d,
  1825.         65536/0x13, 65536/0x16, 65536/0x1a, 65536/0x1a, 65536/0x1b, 65536/0x1d, 65536/0x1d, 65536/0x23,
  1826.         65536/0x16, 65536/0x18, 65536/0x1b, 65536/0x1b, 65536/0x13, 65536/0x20, 65536/0x22, 65536/0x26,
  1827.         65536/0x1a, 65536/0x1b, 65536/0x13, 65536/0x13, 65536/0x20, 65536/0x23, 65536/0x26, 65536/0x2e,
  1828.         65536/0x1b, 65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x23, 65536/0x28, 65536/0x2e, 65536/0x38,
  1829.         65536/0x1d, 65536/0x22, 65536/0x22, 65536/0x25, 65536/0x28, 65536/0x30, 65536/0x38, 65536/0x45,
  1830.         65536/0x22, 65536/0x25, 65536/0x26, 65536/0x28, 65536/0x30, 65536/0x3a, 65536/0x45, 65536/0x53,
  1831.     };
  1832.  
  1833.     unsigned short non_intra_fqm[64] = {
  1834.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1835.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1836.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1837.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1838.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1839.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1840.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1841.         0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000, 0x1000,
  1842.     };
  1843.  
  1844.     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX, (unsigned int *)intra_fqm, 32, encoder_context);
  1845.     gen75_mfc_fqm_state(ctx, MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX, (unsigned int *)non_intra_fqm, 32, encoder_context);
  1846. }
  1847.  
  1848. static void
  1849. gen75_mfc_mpeg2_slicegroup_state(VADriverContextP ctx,
  1850.                                  struct intel_encoder_context *encoder_context,
  1851.                                  int x, int y,
  1852.                                  int next_x, int next_y,
  1853.                                  int is_fisrt_slice_group,
  1854.                                  int is_last_slice_group,
  1855.                                  int intra_slice,
  1856.                                  int qp,
  1857.                                  struct intel_batchbuffer *batch)
  1858. {
  1859.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  1860.  
  1861.     if (batch == NULL)
  1862.         batch = encoder_context->base.batch;
  1863.  
  1864.     BEGIN_BCS_BATCH(batch, 8);
  1865.  
  1866.     OUT_BCS_BATCH(batch, MFC_MPEG2_SLICEGROUP_STATE | (8 - 2));
  1867.     OUT_BCS_BATCH(batch,
  1868.                   0 << 31 |                             /* MbRateCtrlFlag */
  1869.                   !!is_last_slice_group << 19 |         /* IsLastSliceGrp */
  1870.                   1 << 17 |                             /* Insert Header before the first slice group data */
  1871.                   1 << 16 |                             /* SliceData PresentFlag: always 1 */
  1872.                   1 << 15 |                             /* TailPresentFlag: always 1 */
  1873.                   0 << 14 |                             /* FirstSliceHdrDisabled: slice header for each slice */
  1874.                   !!intra_slice << 13 |                 /* IntraSlice */
  1875.                   !!intra_slice << 12 |                 /* IntraSliceFlag */
  1876.                   0);
  1877.     OUT_BCS_BATCH(batch,
  1878.                   next_y << 24 |
  1879.                   next_x << 16 |
  1880.                   y << 8 |
  1881.                   x << 0 |
  1882.                   0);
  1883.     OUT_BCS_BATCH(batch, qp);   /* FIXME: SliceGroupQp */
  1884.     /* bitstream pointer is only loaded once for the first slice of a frame when
  1885.      * LoadSlicePointerFlag is 0
  1886.      */
  1887.     OUT_BCS_BATCH(batch, mfc_context->mfc_indirect_pak_bse_object.offset);
  1888.     OUT_BCS_BATCH(batch, 0);    /* FIXME: */
  1889.     OUT_BCS_BATCH(batch, 0);    /* FIXME: CorrectPoints */
  1890.     OUT_BCS_BATCH(batch, 0);    /* FIXME: CVxxx */
  1891.  
  1892.     ADVANCE_BCS_BATCH(batch);
  1893. }
  1894.  
  1895. static int
  1896. gen75_mfc_mpeg2_pak_object_intra(VADriverContextP ctx,
  1897.                                  struct intel_encoder_context *encoder_context,
  1898.                                  int x, int y,
  1899.                                  int first_mb_in_slice,
  1900.                                  int last_mb_in_slice,
  1901.                                  int first_mb_in_slice_group,
  1902.                                  int last_mb_in_slice_group,
  1903.                                  int mb_type,
  1904.                                  int qp_scale_code,
  1905.                                  int coded_block_pattern,
  1906.                                  unsigned char target_size_in_word,
  1907.                                  unsigned char max_size_in_word,
  1908.                                  struct intel_batchbuffer *batch)
  1909. {
  1910.     int len_in_dwords = 9;
  1911.  
  1912.     if (batch == NULL)
  1913.         batch = encoder_context->base.batch;
  1914.  
  1915.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1916.  
  1917.     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
  1918.     OUT_BCS_BATCH(batch,
  1919.                   0 << 24 |     /* PackedMvNum */
  1920.                   0 << 20 |     /* MvFormat */
  1921.                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
  1922.                   0 << 15 |     /* TransformFlag: frame DCT */
  1923.                   0 << 14 |     /* FieldMbFlag */
  1924.                   1 << 13 |     /* IntraMbFlag */
  1925.                   mb_type << 8 |   /* MbType: Intra */
  1926.                   0 << 2 |      /* SkipMbFlag */
  1927.                   0 << 0 |      /* InterMbMode */
  1928.                   0);
  1929.     OUT_BCS_BATCH(batch, y << 16 | x);
  1930.     OUT_BCS_BATCH(batch,
  1931.                   max_size_in_word << 24 |
  1932.                   target_size_in_word << 16 |
  1933.                   coded_block_pattern << 6 |      /* CBP */
  1934.                   0);
  1935.     OUT_BCS_BATCH(batch,
  1936.                   last_mb_in_slice << 31 |
  1937.                   first_mb_in_slice << 30 |
  1938.                   0 << 27 |     /* EnableCoeffClamp */
  1939.                   last_mb_in_slice_group << 26 |
  1940.                   0 << 25 |     /* MbSkipConvDisable */
  1941.                   first_mb_in_slice_group << 24 |
  1942.                   0 << 16 |     /* MvFieldSelect */
  1943.                   qp_scale_code << 0 |
  1944.                   0);
  1945.     OUT_BCS_BATCH(batch, 0);    /* MV[0][0] */
  1946.     OUT_BCS_BATCH(batch, 0);    /* MV[1][0] */
  1947.     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
  1948.     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
  1949.  
  1950.     ADVANCE_BCS_BATCH(batch);
  1951.  
  1952.     return len_in_dwords;
  1953. }
  1954.  
  1955. #define MPEG2_INTER_MV_OFFSET   12
  1956.  
  1957. static struct _mv_ranges
  1958. {
  1959.     int low;    /* in the unit of 1/2 pixel */
  1960.     int high;   /* in the unit of 1/2 pixel */
  1961. } mv_ranges[] = {
  1962.     {0, 0},
  1963.     {-16, 15},
  1964.     {-32, 31},
  1965.     {-64, 63},
  1966.     {-128, 127},
  1967.     {-256, 255},
  1968.     {-512, 511},
  1969.     {-1024, 1023},
  1970.     {-2048, 2047},
  1971.     {-4096, 4095}
  1972. };
  1973.  
  1974. static int
  1975. mpeg2_motion_vector(int mv, int pos, int display_max, int f_code)
  1976. {
  1977.     if (mv + pos * 16 * 2 < 0 ||
  1978.         mv + (pos + 1) * 16 * 2 > display_max * 2)
  1979.         mv = 0;
  1980.  
  1981.     if (f_code > 0 && f_code < 10) {
  1982.         if (mv < mv_ranges[f_code].low)
  1983.             mv = mv_ranges[f_code].low;
  1984.  
  1985.         if (mv > mv_ranges[f_code].high)
  1986.             mv = mv_ranges[f_code].high;
  1987.     }
  1988.  
  1989.     return mv;
  1990. }
  1991.  
  1992. static int
  1993. gen75_mfc_mpeg2_pak_object_inter(VADriverContextP ctx,
  1994.                                  struct encode_state *encode_state,
  1995.                                  struct intel_encoder_context *encoder_context,
  1996.                                  unsigned int *msg,
  1997.                                  int width_in_mbs, int height_in_mbs,
  1998.                                  int x, int y,
  1999.                                  int first_mb_in_slice,
  2000.                                  int last_mb_in_slice,
  2001.                                  int first_mb_in_slice_group,
  2002.                                  int last_mb_in_slice_group,
  2003.                                  int qp_scale_code,
  2004.                                  unsigned char target_size_in_word,
  2005.                                  unsigned char max_size_in_word,
  2006.                                  struct intel_batchbuffer *batch)
  2007. {
  2008.     VAEncPictureParameterBufferMPEG2 *pic_param = (VAEncPictureParameterBufferMPEG2 *)encode_state->pic_param_ext->buffer;
  2009.     int len_in_dwords = 9;
  2010.     short *mvptr, mvx0, mvy0, mvx1, mvy1;
  2011.    
  2012.     if (batch == NULL)
  2013.         batch = encoder_context->base.batch;
  2014.  
  2015.     mvptr = (short *)(msg + MPEG2_INTER_MV_OFFSET);
  2016.     mvx0 = mpeg2_motion_vector(mvptr[0] / 2, x, width_in_mbs * 16, pic_param->f_code[0][0]);
  2017.     mvy0 = mpeg2_motion_vector(mvptr[1] / 2, y, height_in_mbs * 16, pic_param->f_code[0][0]);
  2018.     mvx1 = mpeg2_motion_vector(mvptr[2] / 2, x, width_in_mbs * 16, pic_param->f_code[1][0]);
  2019.     mvy1 = mpeg2_motion_vector(mvptr[3] / 2, y, height_in_mbs * 16, pic_param->f_code[1][0]);
  2020.  
  2021.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  2022.  
  2023.     OUT_BCS_BATCH(batch, MFC_MPEG2_PAK_OBJECT | (len_in_dwords - 2));
  2024.     OUT_BCS_BATCH(batch,
  2025.                   2 << 24 |     /* PackedMvNum */
  2026.                   7 << 20 |     /* MvFormat */
  2027.                   7 << 17 |     /* CbpDcY/CbpDcU/CbpDcV */
  2028.                   0 << 15 |     /* TransformFlag: frame DCT */
  2029.                   0 << 14 |     /* FieldMbFlag */
  2030.                   0 << 13 |     /* IntraMbFlag */
  2031.                   1 << 8 |      /* MbType: Frame-based */
  2032.                   0 << 2 |      /* SkipMbFlag */
  2033.                   0 << 0 |      /* InterMbMode */
  2034.                   0);
  2035.     OUT_BCS_BATCH(batch, y << 16 | x);
  2036.     OUT_BCS_BATCH(batch,
  2037.                   max_size_in_word << 24 |
  2038.                   target_size_in_word << 16 |
  2039.                   0x3f << 6 |   /* CBP */
  2040.                   0);
  2041.     OUT_BCS_BATCH(batch,
  2042.                   last_mb_in_slice << 31 |
  2043.                   first_mb_in_slice << 30 |
  2044.                   0 << 27 |     /* EnableCoeffClamp */
  2045.                   last_mb_in_slice_group << 26 |
  2046.                   0 << 25 |     /* MbSkipConvDisable */
  2047.                   first_mb_in_slice_group << 24 |
  2048.                   0 << 16 |     /* MvFieldSelect */
  2049.                   qp_scale_code << 0 |
  2050.                   0);
  2051.  
  2052.     OUT_BCS_BATCH(batch, (mvx0 & 0xFFFF) | mvy0 << 16);    /* MV[0][0] */
  2053.     OUT_BCS_BATCH(batch, (mvx1 & 0xFFFF) | mvy1 << 16);    /* MV[1][0] */
  2054.     OUT_BCS_BATCH(batch, 0);    /* MV[0][1] */
  2055.     OUT_BCS_BATCH(batch, 0);    /* MV[1][1] */
  2056.  
  2057.     ADVANCE_BCS_BATCH(batch);
  2058.  
  2059.     return len_in_dwords;
  2060. }
  2061.  
  2062. static void
  2063. intel_mfc_mpeg2_pipeline_header_programing(VADriverContextP ctx,
  2064.                                            struct encode_state *encode_state,
  2065.                                            struct intel_encoder_context *encoder_context,
  2066.                                            struct intel_batchbuffer *slice_batch)
  2067. {
  2068.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2069.     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_SPS);
  2070.  
  2071.     if (encode_state->packed_header_data[idx]) {
  2072.         VAEncPackedHeaderParameterBuffer *param = NULL;
  2073.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  2074.         unsigned int length_in_bits;
  2075.  
  2076.         assert(encode_state->packed_header_param[idx]);
  2077.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  2078.         length_in_bits = param->bit_length;
  2079.  
  2080.         mfc_context->insert_object(ctx,
  2081.                                    encoder_context,
  2082.                                    header_data,
  2083.                                    ALIGN(length_in_bits, 32) >> 5,
  2084.                                    length_in_bits & 0x1f,
  2085.                                    5,   /* FIXME: check it */
  2086.                                    0,
  2087.                                    0,
  2088.                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
  2089.                                    slice_batch);
  2090.     }
  2091.  
  2092.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderMPEG2_PPS);
  2093.  
  2094.     if (encode_state->packed_header_data[idx]) {
  2095.         VAEncPackedHeaderParameterBuffer *param = NULL;
  2096.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  2097.         unsigned int length_in_bits;
  2098.  
  2099.         assert(encode_state->packed_header_param[idx]);
  2100.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  2101.         length_in_bits = param->bit_length;
  2102.  
  2103.         mfc_context->insert_object(ctx,
  2104.                                    encoder_context,
  2105.                                    header_data,
  2106.                                    ALIGN(length_in_bits, 32) >> 5,
  2107.                                    length_in_bits & 0x1f,
  2108.                                    5,   /* FIXME: check it */
  2109.                                    0,
  2110.                                    0,
  2111.                                    0,   /* Needn't insert emulation bytes for MPEG-2 */
  2112.                                    slice_batch);
  2113.     }
  2114. }
  2115.  
  2116. static void
  2117. gen75_mfc_mpeg2_pipeline_slice_group(VADriverContextP ctx,
  2118.                                      struct encode_state *encode_state,
  2119.                                      struct intel_encoder_context *encoder_context,
  2120.                                      int slice_index,
  2121.                                      VAEncSliceParameterBufferMPEG2 *next_slice_group_param,
  2122.                                      struct intel_batchbuffer *slice_batch)
  2123. {
  2124.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  2125.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2126.     VAEncSequenceParameterBufferMPEG2 *seq_param = (VAEncSequenceParameterBufferMPEG2 *)encode_state->seq_param_ext->buffer;
  2127.     VAEncSliceParameterBufferMPEG2 *slice_param = NULL;
  2128.     unsigned char tail_delimiter[] = {MPEG2_DELIMITER0, MPEG2_DELIMITER1, MPEG2_DELIMITER2, MPEG2_DELIMITER3, MPEG2_DELIMITER4, 0, 0, 0};
  2129.     unsigned char section_delimiter[] = {0x0, 0x0, 0x0, 0x0};
  2130.     int width_in_mbs = ALIGN(seq_param->picture_width, 16) / 16;
  2131.     int height_in_mbs = ALIGN(seq_param->picture_height, 16) / 16;
  2132.     int i, j;
  2133.     int h_start_pos, v_start_pos, h_next_start_pos, v_next_start_pos;
  2134.     unsigned int *msg = NULL;
  2135.     unsigned char *msg_ptr = NULL;
  2136.  
  2137.     slice_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[slice_index]->buffer;
  2138.     h_start_pos = slice_param->macroblock_address % width_in_mbs;
  2139.     v_start_pos = slice_param->macroblock_address / width_in_mbs;
  2140.     assert(h_start_pos + slice_param->num_macroblocks <= width_in_mbs);
  2141.  
  2142.     dri_bo_map(vme_context->vme_output.bo , 0);
  2143.     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
  2144.  
  2145.     if (next_slice_group_param) {
  2146.         h_next_start_pos = next_slice_group_param->macroblock_address % width_in_mbs;
  2147.         v_next_start_pos = next_slice_group_param->macroblock_address / width_in_mbs;
  2148.     } else {
  2149.         h_next_start_pos = 0;
  2150.         v_next_start_pos = height_in_mbs;
  2151.     }
  2152.  
  2153.     gen75_mfc_mpeg2_slicegroup_state(ctx,
  2154.                                      encoder_context,
  2155.                                      h_start_pos,
  2156.                                      v_start_pos,
  2157.                                      h_next_start_pos,
  2158.                                      v_next_start_pos,
  2159.                                      slice_index == 0,
  2160.                                      next_slice_group_param == NULL,
  2161.                                      slice_param->is_intra_slice,
  2162.                                      slice_param->quantiser_scale_code,
  2163.                                      slice_batch);
  2164.  
  2165.     if (slice_index == 0)
  2166.         intel_mfc_mpeg2_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  2167.  
  2168.     /* Insert '00' to make sure the header is valid */
  2169.     mfc_context->insert_object(ctx,
  2170.                                encoder_context,
  2171.                                (unsigned int*)section_delimiter,
  2172.                                1,
  2173.                                8,   /* 8bits in the last DWORD */
  2174.                                1,   /* 1 byte */
  2175.                                1,
  2176.                                0,
  2177.                                0,
  2178.                                slice_batch);
  2179.  
  2180.     for (i = 0; i < encode_state->slice_params_ext[slice_index]->num_elements; i++) {
  2181.         /* PAK for each macroblocks */
  2182.         for (j = 0; j < slice_param->num_macroblocks; j++) {
  2183.             int h_pos = (slice_param->macroblock_address + j) % width_in_mbs;
  2184.             int v_pos = (slice_param->macroblock_address + j) / width_in_mbs;
  2185.             int first_mb_in_slice = (j == 0);
  2186.             int last_mb_in_slice = (j == slice_param->num_macroblocks - 1);
  2187.             int first_mb_in_slice_group = (i == 0 && j == 0);
  2188.             int last_mb_in_slice_group = (i == encode_state->slice_params_ext[slice_index]->num_elements - 1 &&
  2189.                                           j == slice_param->num_macroblocks - 1);
  2190.  
  2191.             msg = (unsigned int *)(msg_ptr + (slice_param->macroblock_address + j) * vme_context->vme_output.size_block);
  2192.  
  2193.             if (slice_param->is_intra_slice) {
  2194.                 gen75_mfc_mpeg2_pak_object_intra(ctx,
  2195.                                                  encoder_context,
  2196.                                                  h_pos, v_pos,
  2197.                                                  first_mb_in_slice,
  2198.                                                  last_mb_in_slice,
  2199.                                                  first_mb_in_slice_group,
  2200.                                                  last_mb_in_slice_group,
  2201.                                                  0x1a,
  2202.                                                  slice_param->quantiser_scale_code,
  2203.                                                  0x3f,
  2204.                                                  0,
  2205.                                                  0xff,
  2206.                                                  slice_batch);
  2207.             } else {
  2208.                 int inter_rdo, intra_rdo;
  2209.                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
  2210.                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
  2211.  
  2212.                 if (intra_rdo < inter_rdo)
  2213.                     gen75_mfc_mpeg2_pak_object_intra(ctx,
  2214.                                                      encoder_context,
  2215.                                                      h_pos, v_pos,
  2216.                                                      first_mb_in_slice,
  2217.                                                      last_mb_in_slice,
  2218.                                                      first_mb_in_slice_group,
  2219.                                                      last_mb_in_slice_group,
  2220.                                                      0x1a,
  2221.                                                      slice_param->quantiser_scale_code,
  2222.                                                      0x3f,
  2223.                                                      0,
  2224.                                                      0xff,
  2225.                                                      slice_batch);
  2226.                 else
  2227.                     gen75_mfc_mpeg2_pak_object_inter(ctx,
  2228.                                                      encode_state,
  2229.                                                      encoder_context,
  2230.                                                      msg,
  2231.                                                      width_in_mbs, height_in_mbs,
  2232.                                                      h_pos, v_pos,
  2233.                                                      first_mb_in_slice,
  2234.                                                      last_mb_in_slice,
  2235.                                                      first_mb_in_slice_group,
  2236.                                                      last_mb_in_slice_group,
  2237.                                                      slice_param->quantiser_scale_code,
  2238.                                                      0,
  2239.                                                      0xff,
  2240.                                                      slice_batch);
  2241.             }
  2242.         }
  2243.  
  2244.         slice_param++;
  2245.     }
  2246.  
  2247.     dri_bo_unmap(vme_context->vme_output.bo);
  2248.  
  2249.     /* tail data */
  2250.     if (next_slice_group_param == NULL) { /* end of a picture */
  2251.         mfc_context->insert_object(ctx,
  2252.                                    encoder_context,
  2253.                                    (unsigned int *)tail_delimiter,
  2254.                                    2,
  2255.                                    8,   /* 8bits in the last DWORD */
  2256.                                    5,   /* 5 bytes */
  2257.                                    1,
  2258.                                    1,
  2259.                                    0,
  2260.                                    slice_batch);
  2261.     } else {        /* end of a lsice group */
  2262.         mfc_context->insert_object(ctx,
  2263.                                    encoder_context,
  2264.                                    (unsigned int *)section_delimiter,
  2265.                                    1,
  2266.                                    8,   /* 8bits in the last DWORD */
  2267.                                    1,   /* 1 byte */
  2268.                                    1,
  2269.                                    1,
  2270.                                    0,
  2271.                                    slice_batch);
  2272.     }
  2273. }
  2274.  
  2275. /*
  2276.  * A batch buffer for all slices, including slice state,
  2277.  * slice insert object and slice pak object commands
  2278.  *
  2279.  */
  2280. static dri_bo *
  2281. gen75_mfc_mpeg2_software_slice_batchbuffer(VADriverContextP ctx,
  2282.                                            struct encode_state *encode_state,
  2283.                                            struct intel_encoder_context *encoder_context)
  2284. {
  2285.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2286.     struct intel_batchbuffer *batch;
  2287.     VAEncSliceParameterBufferMPEG2 *next_slice_group_param = NULL;
  2288.     dri_bo *batch_bo;
  2289.     int i;
  2290.  
  2291.     batch = mfc_context->aux_batchbuffer;
  2292.     batch_bo = batch->buffer;
  2293.  
  2294.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  2295.         if (i == encode_state->num_slice_params_ext - 1)
  2296.             next_slice_group_param = NULL;
  2297.         else
  2298.             next_slice_group_param = (VAEncSliceParameterBufferMPEG2 *)encode_state->slice_params_ext[i + 1]->buffer;
  2299.  
  2300.         gen75_mfc_mpeg2_pipeline_slice_group(ctx, encode_state, encoder_context, i, next_slice_group_param, batch);
  2301.     }
  2302.  
  2303.     intel_batchbuffer_align(batch, 8);
  2304.    
  2305.     BEGIN_BCS_BATCH(batch, 2);
  2306.     OUT_BCS_BATCH(batch, 0);
  2307.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
  2308.     ADVANCE_BCS_BATCH(batch);
  2309.  
  2310.     dri_bo_reference(batch_bo);
  2311.     intel_batchbuffer_free(batch);
  2312.     mfc_context->aux_batchbuffer = NULL;
  2313.  
  2314.     return batch_bo;
  2315. }
  2316.  
  2317. static void
  2318. gen75_mfc_mpeg2_pipeline_picture_programing(VADriverContextP ctx,
  2319.                                             struct encode_state *encode_state,
  2320.                                             struct intel_encoder_context *encoder_context)
  2321. {
  2322.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2323.  
  2324.     mfc_context->pipe_mode_select(ctx, MFX_FORMAT_MPEG2, encoder_context);
  2325.     mfc_context->set_surface_state(ctx, encoder_context);
  2326.     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
  2327.     gen75_mfc_pipe_buf_addr_state(ctx, encoder_context);
  2328.     gen75_mfc_bsp_buf_base_addr_state(ctx, encoder_context);
  2329.     gen75_mfc_mpeg2_pic_state(ctx, encoder_context, encode_state);
  2330.     gen75_mfc_mpeg2_qm_state(ctx, encoder_context);
  2331.     gen75_mfc_mpeg2_fqm_state(ctx, encoder_context);
  2332. }
  2333.  
  2334. static void
  2335. gen75_mfc_mpeg2_pipeline_programing(VADriverContextP ctx,
  2336.                                     struct encode_state *encode_state,
  2337.                                     struct intel_encoder_context *encoder_context)
  2338. {
  2339.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  2340.     dri_bo *slice_batch_bo;
  2341.  
  2342.     slice_batch_bo = gen75_mfc_mpeg2_software_slice_batchbuffer(ctx, encode_state, encoder_context);
  2343.  
  2344.     // begin programing
  2345.     intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  2346.     intel_batchbuffer_emit_mi_flush(batch);
  2347.    
  2348.     // picture level programing
  2349.     gen75_mfc_mpeg2_pipeline_picture_programing(ctx, encode_state, encoder_context);
  2350.  
  2351.     BEGIN_BCS_BATCH(batch, 2);
  2352.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8));
  2353.     OUT_BCS_RELOC(batch,
  2354.                   slice_batch_bo,
  2355.                   I915_GEM_DOMAIN_COMMAND, 0,
  2356.                   0);
  2357.     ADVANCE_BCS_BATCH(batch);
  2358.  
  2359.     // end programing
  2360.     intel_batchbuffer_end_atomic(batch);
  2361.  
  2362.     dri_bo_unreference(slice_batch_bo);
  2363. }
  2364.  
  2365. static VAStatus
  2366. intel_mfc_mpeg2_prepare(VADriverContextP ctx,
  2367.                         struct encode_state *encode_state,
  2368.                         struct intel_encoder_context *encoder_context)
  2369. {
  2370.     struct gen6_mfc_context *mfc_context = encoder_context->mfc_context;
  2371.     struct object_surface *obj_surface;
  2372.     struct object_buffer *obj_buffer;
  2373.     struct i965_coded_buffer_segment *coded_buffer_segment;
  2374.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  2375.     dri_bo *bo;
  2376.     int i;
  2377.  
  2378.     /* reconstructed surface */
  2379.     obj_surface = encode_state->reconstructed_object;
  2380.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2381.     mfc_context->pre_deblocking_output.bo = obj_surface->bo;
  2382.     dri_bo_reference(mfc_context->pre_deblocking_output.bo);
  2383.     mfc_context->surface_state.width = obj_surface->orig_width;
  2384.     mfc_context->surface_state.height = obj_surface->orig_height;
  2385.     mfc_context->surface_state.w_pitch = obj_surface->width;
  2386.     mfc_context->surface_state.h_pitch = obj_surface->height;
  2387.  
  2388.     /* forward reference */
  2389.     obj_surface = encode_state->reference_objects[0];
  2390.  
  2391.     if (obj_surface && obj_surface->bo) {
  2392.         mfc_context->reference_surfaces[0].bo = obj_surface->bo;
  2393.         dri_bo_reference(mfc_context->reference_surfaces[0].bo);
  2394.     } else
  2395.         mfc_context->reference_surfaces[0].bo = NULL;
  2396.  
  2397.     /* backward reference */
  2398.     obj_surface = encode_state->reference_objects[1];
  2399.  
  2400.     if (obj_surface && obj_surface->bo) {
  2401.         mfc_context->reference_surfaces[1].bo = obj_surface->bo;
  2402.         dri_bo_reference(mfc_context->reference_surfaces[1].bo);
  2403.     } else {
  2404.         mfc_context->reference_surfaces[1].bo = mfc_context->reference_surfaces[0].bo;
  2405.  
  2406.         if (mfc_context->reference_surfaces[1].bo)
  2407.             dri_bo_reference(mfc_context->reference_surfaces[1].bo);
  2408.     }
  2409.  
  2410.     for (i = 2; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  2411.         mfc_context->reference_surfaces[i].bo = mfc_context->reference_surfaces[i & 1].bo;
  2412.  
  2413.         if (mfc_context->reference_surfaces[i].bo)
  2414.             dri_bo_reference(mfc_context->reference_surfaces[i].bo);
  2415.     }
  2416.    
  2417.     /* input YUV surface */
  2418.     obj_surface = encode_state->input_yuv_object;
  2419.     mfc_context->uncompressed_picture_source.bo = obj_surface->bo;
  2420.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  2421.  
  2422.     /* coded buffer */
  2423.     obj_buffer = encode_state->coded_buf_object;
  2424.     bo = obj_buffer->buffer_store->bo;
  2425.     mfc_context->mfc_indirect_pak_bse_object.bo = bo;
  2426.     mfc_context->mfc_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  2427.     mfc_context->mfc_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  2428.     dri_bo_reference(mfc_context->mfc_indirect_pak_bse_object.bo);
  2429.  
  2430.     /* set the internal flag to 0 to indicate the coded size is unknown */
  2431.     dri_bo_map(bo, 1);
  2432.     coded_buffer_segment = (struct i965_coded_buffer_segment *)bo->virtual;
  2433.     coded_buffer_segment->mapped = 0;
  2434.     coded_buffer_segment->codec = encoder_context->codec;
  2435.     dri_bo_unmap(bo);
  2436.  
  2437.     return vaStatus;
  2438. }
  2439.  
  2440. static VAStatus
  2441. gen75_mfc_mpeg2_encode_picture(VADriverContextP ctx,
  2442.                                struct encode_state *encode_state,
  2443.                                struct intel_encoder_context *encoder_context)
  2444. {
  2445.     gen75_mfc_init(ctx, encode_state, encoder_context);
  2446.     intel_mfc_mpeg2_prepare(ctx, encode_state, encoder_context);
  2447.     /*Programing bcs pipeline*/
  2448.     gen75_mfc_mpeg2_pipeline_programing(ctx, encode_state, encoder_context);
  2449.     gen75_mfc_run(ctx, encode_state, encoder_context);
  2450.  
  2451.     return VA_STATUS_SUCCESS;
  2452. }
  2453.  
  2454. static void
  2455. gen75_mfc_context_destroy(void *context)
  2456. {
  2457.     struct gen6_mfc_context *mfc_context = context;
  2458.     int i;
  2459.  
  2460.     dri_bo_unreference(mfc_context->post_deblocking_output.bo);
  2461.     mfc_context->post_deblocking_output.bo = NULL;
  2462.  
  2463.     dri_bo_unreference(mfc_context->pre_deblocking_output.bo);
  2464.     mfc_context->pre_deblocking_output.bo = NULL;
  2465.  
  2466.     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
  2467.     mfc_context->uncompressed_picture_source.bo = NULL;
  2468.  
  2469.     dri_bo_unreference(mfc_context->mfc_indirect_pak_bse_object.bo);
  2470.     mfc_context->mfc_indirect_pak_bse_object.bo = NULL;
  2471.  
  2472.     for (i = 0; i < NUM_MFC_DMV_BUFFERS; i++){
  2473.         dri_bo_unreference(mfc_context->direct_mv_buffers[i].bo);
  2474.         mfc_context->direct_mv_buffers[i].bo = NULL;
  2475.     }
  2476.  
  2477.     dri_bo_unreference(mfc_context->intra_row_store_scratch_buffer.bo);
  2478.     mfc_context->intra_row_store_scratch_buffer.bo = NULL;
  2479.  
  2480.     dri_bo_unreference(mfc_context->macroblock_status_buffer.bo);
  2481.     mfc_context->macroblock_status_buffer.bo = NULL;
  2482.  
  2483.     dri_bo_unreference(mfc_context->deblocking_filter_row_store_scratch_buffer.bo);
  2484.     mfc_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2485.  
  2486.     dri_bo_unreference(mfc_context->bsd_mpc_row_store_scratch_buffer.bo);
  2487.     mfc_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2488.  
  2489.     for (i = 0; i < MAX_MFC_REFERENCE_SURFACES; i++){
  2490.         dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
  2491.         mfc_context->reference_surfaces[i].bo = NULL;  
  2492.     }
  2493.  
  2494.     i965_gpe_context_destroy(&mfc_context->gpe_context);
  2495.  
  2496.     dri_bo_unreference(mfc_context->mfc_batchbuffer_surface.bo);
  2497.     mfc_context->mfc_batchbuffer_surface.bo = NULL;
  2498.  
  2499.     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
  2500.     mfc_context->aux_batchbuffer_surface.bo = NULL;
  2501.  
  2502.     if (mfc_context->aux_batchbuffer)
  2503.         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
  2504.  
  2505.     mfc_context->aux_batchbuffer = NULL;
  2506.  
  2507.     free(mfc_context);
  2508. }
  2509.  
  2510. static VAStatus gen75_mfc_pipeline(VADriverContextP ctx,
  2511.                                    VAProfile profile,
  2512.                                    struct encode_state *encode_state,
  2513.                                    struct intel_encoder_context *encoder_context)
  2514. {
  2515.     VAStatus vaStatus;
  2516.  
  2517.     switch (profile) {
  2518.     case VAProfileH264ConstrainedBaseline:
  2519.     case VAProfileH264Main:
  2520.     case VAProfileH264High:
  2521.     case VAProfileH264MultiviewHigh:
  2522.     case VAProfileH264StereoHigh:
  2523.         vaStatus = gen75_mfc_avc_encode_picture(ctx, encode_state, encoder_context);
  2524.         break;
  2525.  
  2526.         /* FIXME: add for other profile */
  2527.     case VAProfileMPEG2Simple:
  2528.     case VAProfileMPEG2Main:
  2529.         vaStatus = gen75_mfc_mpeg2_encode_picture(ctx, encode_state, encoder_context);
  2530.         break;
  2531.  
  2532.     default:
  2533.         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
  2534.         break;
  2535.     }
  2536.  
  2537.     return vaStatus;
  2538. }
  2539.  
  2540. Bool gen75_mfc_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  2541. {
  2542.     struct gen6_mfc_context *mfc_context = calloc(1, sizeof(struct gen6_mfc_context));
  2543.  
  2544.     if (!mfc_context)
  2545.         return False;
  2546.  
  2547.     mfc_context->gpe_context.surface_state_binding_table.length = (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
  2548.  
  2549.     mfc_context->gpe_context.idrt.max_entries = MAX_GPE_KERNELS;
  2550.     mfc_context->gpe_context.idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
  2551.  
  2552.     mfc_context->gpe_context.curbe.length = 32 * 4;
  2553.  
  2554.     mfc_context->gpe_context.vfe_state.max_num_threads = 60 - 1;
  2555.     mfc_context->gpe_context.vfe_state.num_urb_entries = 16;
  2556.     mfc_context->gpe_context.vfe_state.gpgpu_mode = 0;
  2557.     mfc_context->gpe_context.vfe_state.urb_entry_size = 59 - 1;
  2558.     mfc_context->gpe_context.vfe_state.curbe_allocation_size = 37 - 1;
  2559.  
  2560.     i965_gpe_load_kernels(ctx,
  2561.                           &mfc_context->gpe_context,
  2562.                           gen75_mfc_kernels,
  2563.                           1);
  2564.  
  2565.     mfc_context->pipe_mode_select = gen75_mfc_pipe_mode_select;
  2566.     mfc_context->set_surface_state = gen75_mfc_surface_state;
  2567.     mfc_context->ind_obj_base_addr_state = gen75_mfc_ind_obj_base_addr_state;
  2568.     mfc_context->avc_img_state = gen75_mfc_avc_img_state;
  2569.     mfc_context->avc_qm_state = gen75_mfc_avc_qm_state;
  2570.     mfc_context->avc_fqm_state = gen75_mfc_avc_fqm_state;
  2571.     mfc_context->insert_object = gen75_mfc_avc_insert_object;
  2572.     mfc_context->buffer_suface_setup = gen7_gpe_buffer_suface_setup;
  2573.  
  2574.     encoder_context->mfc_context = mfc_context;
  2575.     encoder_context->mfc_context_destroy = gen75_mfc_context_destroy;
  2576.     encoder_context->mfc_pipeline = gen75_mfc_pipeline;
  2577.     encoder_context->mfc_brc_prepare = intel_mfc_brc_prepare;
  2578.  
  2579.     return True;
  2580. }
  2581.