Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Qu Pengfei <Pengfei.Qu@intel.com>
  26.  *
  27.  */
  28.  
  29. #include <stdio.h>
  30. #include <stdlib.h>
  31. #include <string.h>
  32. #include <math.h>
  33. #include <assert.h>
  34.  
  35. #include "intel_batchbuffer.h"
  36. #include "i965_defines.h"
  37. #include "i965_structs.h"
  38. #include "i965_drv_video.h"
  39. #include "i965_encoder.h"
  40. #include "i965_encoder_utils.h"
  41. #include "gen9_mfc.h"
  42. #include "gen6_vme.h"
  43. #include "intel_media.h"
  44.  
  45. typedef enum _gen6_brc_status {
  46.     BRC_NO_HRD_VIOLATION = 0,
  47.     BRC_UNDERFLOW = 1,
  48.     BRC_OVERFLOW = 2,
  49.     BRC_UNDERFLOW_WITH_MAX_QP = 3,
  50.     BRC_OVERFLOW_WITH_MIN_QP = 4,
  51. } gen6_brc_status;
  52.  
  53. /* BRC define */
  54. #define BRC_CLIP(x, min, max)                                   \
  55.     {                                                           \
  56.         x = ((x > (max)) ? (max) : ((x < (min)) ? (min) : x));  \
  57.     }
  58.  
  59. #define BRC_P_B_QP_DIFF 4
  60. #define BRC_I_P_QP_DIFF 2
  61. #define BRC_I_B_QP_DIFF (BRC_I_P_QP_DIFF + BRC_P_B_QP_DIFF)
  62.  
  63. #define BRC_PWEIGHT 0.6  /* weight if P slice with comparison to I slice */
  64. #define BRC_BWEIGHT 0.25 /* weight if B slice with comparison to I slice */
  65.  
  66. #define BRC_QP_MAX_CHANGE 5 /* maximum qp modification */
  67. #define BRC_CY 0.1 /* weight for */
  68. #define BRC_CX_UNDERFLOW 5.
  69. #define BRC_CX_OVERFLOW -4.
  70.  
  71. #define BRC_PI_0_5 1.5707963267948966192313216916398
  72.  
  73. /* intel buffer write */
  74. #define ALLOC_ENCODER_BUFFER(gen_buffer, string, size) do {     \
  75.         dri_bo_unreference(gen_buffer->bo);                     \
  76.         gen_buffer->bo = dri_bo_alloc(i965->intel.bufmgr,       \
  77.                                       string,                   \
  78.                                       size,                     \
  79.                                       0x1000);                  \
  80.         assert(gen_buffer->bo);                                 \
  81.     } while (0);
  82.  
  83.  
  84. #define OUT_BUFFER_X(buf_bo, is_target, ma)  do {                         \
  85.         if (buf_bo) {                                                   \
  86.             OUT_BCS_RELOC(batch,                                        \
  87.                           buf_bo,                                       \
  88.                           I915_GEM_DOMAIN_INSTRUCTION,                       \
  89.                           is_target ? I915_GEM_DOMAIN_INSTRUCTION : 0,       \
  90.                           0);                                           \
  91.         } else {                                                        \
  92.             OUT_BCS_BATCH(batch, 0);                                    \
  93.         }                                                               \
  94.         OUT_BCS_BATCH(batch, 0);                                        \
  95.         if (ma)                                                         \
  96.             OUT_BCS_BATCH(batch, 0);                                    \
  97.     } while (0)
  98.  
  99. #define OUT_BUFFER_MA_TARGET(buf_bo)       OUT_BUFFER_X(buf_bo, 1, 1)
  100. #define OUT_BUFFER_MA_REFERENCE(buf_bo)    OUT_BUFFER_X(buf_bo, 0, 1)
  101. #define OUT_BUFFER_NMA_TARGET(buf_bo)      OUT_BUFFER_X(buf_bo, 1, 0)
  102. #define OUT_BUFFER_NMA_REFERENCE(buf_bo)   OUT_BUFFER_X(buf_bo, 0, 0)
  103.  
  104.  
  105. #define SURFACE_STATE_PADDED_SIZE               SURFACE_STATE_PADDED_SIZE_GEN8
  106. #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
  107. #define BINDING_TABLE_OFFSET(index)             (SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * index)
  108.  
  109. #define HCP_SOFTWARE_SKYLAKE    1
  110.  
  111. #define NUM_HCPE_KERNEL 2
  112.  
  113. #define     INTER_MODE_MASK     0x03
  114. #define     INTER_8X8       0x03
  115. #define     INTER_16X8      0x01
  116. #define     INTER_8X16      0x02
  117. #define     SUBMB_SHAPE_MASK    0x00FF00
  118.  
  119. #define     INTER_MV8       (4 << 20)
  120. #define     INTER_MV32      (6 << 20)
  121.  
  122.  
  123. /* HEVC */
  124.  
  125. /* utils */
  126. static void
  127. hevc_gen_default_iq_matrix_encoder(VAQMatrixBufferHEVC *iq_matrix)
  128. {
  129.     /* Flat_4x4_16 */
  130.     memset(&iq_matrix->scaling_lists_4x4, 16, sizeof(iq_matrix->scaling_lists_4x4));
  131.  
  132.     /* Flat_8x8_16 */
  133.     memset(&iq_matrix->scaling_lists_8x8, 16, sizeof(iq_matrix->scaling_lists_8x8));
  134.  
  135.     /* Flat_16x16_16 */
  136.     memset(&iq_matrix->scaling_lists_16x16, 16, sizeof(iq_matrix->scaling_lists_16x16));
  137.  
  138.     /* Flat_32x32_16 */
  139.     memset(&iq_matrix->scaling_lists_32x32, 16, sizeof(iq_matrix->scaling_lists_32x32));
  140.  
  141.     /* Flat_16x16_dc_16 */
  142.     memset(&iq_matrix->scaling_list_dc_16x16, 16, sizeof(iq_matrix->scaling_list_dc_16x16));
  143.  
  144.     /* Flat_32x32_dc_16 */
  145.     memset(&iq_matrix->scaling_list_dc_32x32, 16, sizeof(iq_matrix->scaling_list_dc_32x32));
  146. }
  147.  
  148. /* HEVC picture and slice state related */
  149.  
  150. static void
  151. gen9_hcpe_pipe_mode_select(VADriverContextP ctx,
  152.                            int standard_select,
  153.                            struct intel_encoder_context *encoder_context)
  154. {
  155.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  156.  
  157.     assert(standard_select == HCP_CODEC_HEVC);
  158.  
  159.     BEGIN_BCS_BATCH(batch, 4);
  160.  
  161.     OUT_BCS_BATCH(batch, HCP_PIPE_MODE_SELECT | (4 - 2));
  162.     OUT_BCS_BATCH(batch,
  163.                   (standard_select << 5) |
  164.                   (0 << 3) | /* disable Pic Status / Error Report */
  165.                   HCP_CODEC_SELECT_ENCODE);
  166.     OUT_BCS_BATCH(batch, 0);
  167.     OUT_BCS_BATCH(batch, 0);
  168.  
  169.     ADVANCE_BCS_BATCH(batch);
  170. }
  171.  
  172. static void
  173. gen9_hcpe_surface_state(VADriverContextP ctx, struct encode_state *encode_state,
  174.                         struct intel_encoder_context *encoder_context)
  175. {
  176.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  177.     struct object_surface *obj_surface = encode_state->reconstructed_object;
  178.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  179.  
  180.     /* to do */
  181.     unsigned int y_cb_offset;
  182.  
  183.     assert(obj_surface);
  184.  
  185.     y_cb_offset = obj_surface->y_cb_offset;
  186.  
  187.     BEGIN_BCS_BATCH(batch, 3);
  188.     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
  189.     OUT_BCS_BATCH(batch,
  190.                   (1 << 28) |                   /* surface id */
  191.                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
  192.     OUT_BCS_BATCH(batch,
  193.                   (SURFACE_FORMAT_PLANAR_420_8 << 28) |
  194.                   y_cb_offset);
  195.     ADVANCE_BCS_BATCH(batch);
  196.  
  197.     BEGIN_BCS_BATCH(batch, 3);
  198.     OUT_BCS_BATCH(batch, HCP_SURFACE_STATE | (3 - 2));
  199.     OUT_BCS_BATCH(batch,
  200.                   (0 << 28) |                   /* surface id */
  201.                   (mfc_context->surface_state.w_pitch - 1));    /* pitch - 1 */
  202.     OUT_BCS_BATCH(batch,
  203.                   (SURFACE_FORMAT_PLANAR_420_8 << 28) |
  204.                   y_cb_offset);
  205.     ADVANCE_BCS_BATCH(batch);
  206. }
  207.  
  208. static void
  209. gen9_hcpe_pipe_buf_addr_state(VADriverContextP ctx, struct encode_state *encode_state,
  210.                               struct intel_encoder_context *encoder_context)
  211. {
  212.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  213.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  214.     struct object_surface *obj_surface;
  215.     GenHevcSurface *hcpe_hevc_surface;
  216.     dri_bo *bo;
  217.     unsigned int i;
  218.  
  219.     BEGIN_BCS_BATCH(batch, 95);
  220.  
  221.     OUT_BCS_BATCH(batch, HCP_PIPE_BUF_ADDR_STATE | (95 - 2));
  222.  
  223.     obj_surface = encode_state->reconstructed_object;
  224.     assert(obj_surface && obj_surface->bo);
  225.     hcpe_hevc_surface = obj_surface->private_data;
  226.     assert(hcpe_hevc_surface && hcpe_hevc_surface->motion_vector_temporal_bo);
  227.  
  228.     OUT_BUFFER_MA_TARGET(obj_surface->bo); /* DW 1..3 */
  229.     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_line_buffer.bo);/* DW 4..6 */
  230.     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_line_buffer.bo); /* DW 7..9 */
  231.     OUT_BUFFER_MA_TARGET(mfc_context->deblocking_filter_tile_column_buffer.bo); /* DW 10..12 */
  232.     OUT_BUFFER_MA_TARGET(mfc_context->metadata_line_buffer.bo);         /* DW 13..15 */
  233.     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_line_buffer.bo);    /* DW 16..18 */
  234.     OUT_BUFFER_MA_TARGET(mfc_context->metadata_tile_column_buffer.bo);  /* DW 19..21 */
  235.     OUT_BUFFER_MA_TARGET(mfc_context->sao_line_buffer.bo);              /* DW 22..24 */
  236.     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_line_buffer.bo);         /* DW 25..27 */
  237.     OUT_BUFFER_MA_TARGET(mfc_context->sao_tile_column_buffer.bo);       /* DW 28..30 */
  238.     OUT_BUFFER_MA_TARGET(hcpe_hevc_surface->motion_vector_temporal_bo); /* DW 31..33 */
  239.     OUT_BUFFER_MA_TARGET(NULL); /* DW 34..36, reserved */
  240.  
  241.     /* here only max 8 reference allowed */
  242.     for (i = 0; i < ARRAY_ELEMS(mfc_context->reference_surfaces); i++) {
  243.         bo = mfc_context->reference_surfaces[i].bo;
  244.  
  245.         if (bo) {
  246.             OUT_BUFFER_NMA_REFERENCE(bo);
  247.         } else
  248.             OUT_BUFFER_NMA_REFERENCE(NULL);
  249.     }
  250.     OUT_BCS_BATCH(batch, 0);    /* DW 53, memory address attributes */
  251.  
  252.     OUT_BUFFER_MA_TARGET(mfc_context->uncompressed_picture_source.bo); /* DW 54..56, uncompressed picture source */
  253.     OUT_BUFFER_MA_TARGET(NULL); /* DW 57..59, ignore  */
  254.     OUT_BUFFER_MA_TARGET(NULL); /* DW 60..62, ignore  */
  255.     OUT_BUFFER_MA_TARGET(NULL); /* DW 63..65, ignore  */
  256.  
  257.     for (i = 0; i < ARRAY_ELEMS(mfc_context->current_collocated_mv_temporal_buffer) - 1; i++) {
  258.         bo = mfc_context->current_collocated_mv_temporal_buffer[i].bo;
  259.  
  260.         if (bo) {
  261.             OUT_BUFFER_NMA_REFERENCE(bo);
  262.         } else
  263.             OUT_BUFFER_NMA_REFERENCE(NULL);
  264.     }
  265.     OUT_BCS_BATCH(batch, 0);    /* DW 82, memory address attributes */
  266.  
  267.     OUT_BUFFER_MA_TARGET(NULL);    /* DW 83..85, ignore for HEVC */
  268.     OUT_BUFFER_MA_TARGET(NULL);    /* DW 86..88, ignore for HEVC */
  269.     OUT_BUFFER_MA_TARGET(NULL);    /* DW 89..91, ignore for HEVC */
  270.     OUT_BUFFER_MA_TARGET(NULL);    /* DW 92..94, ignore for HEVC */
  271.  
  272.     ADVANCE_BCS_BATCH(batch);
  273. }
  274.  
  275. static void
  276. gen9_hcpe_ind_obj_base_addr_state(VADriverContextP ctx,
  277.                                   struct intel_encoder_context *encoder_context)
  278. {
  279.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  280.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  281.  
  282.     /* to do */
  283.     BEGIN_BCS_BATCH(batch, 14);
  284.  
  285.     OUT_BCS_BATCH(batch, HCP_IND_OBJ_BASE_ADDR_STATE | (14 - 2));
  286.     OUT_BUFFER_MA_REFERENCE(NULL);                 /* DW 1..3 igonre for encoder*/
  287.     OUT_BUFFER_NMA_REFERENCE(NULL);                /* DW 4..5, Upper Bound */
  288.     OUT_BUFFER_MA_TARGET(mfc_context->hcp_indirect_cu_object.bo);                 /* DW 6..8, CU */
  289.     /* DW 9..11, PAK-BSE */
  290.     OUT_BCS_RELOC(batch,
  291.                   mfc_context->hcp_indirect_pak_bse_object.bo,
  292.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  293.                   mfc_context->hcp_indirect_pak_bse_object.offset);
  294.     OUT_BCS_BATCH(batch, 0);
  295.     OUT_BCS_BATCH(batch, 0);
  296.     OUT_BCS_RELOC(batch,
  297.                   mfc_context->hcp_indirect_pak_bse_object.bo,
  298.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  299.                   mfc_context->hcp_indirect_pak_bse_object.end_offset);
  300.     OUT_BCS_BATCH(batch, 0);
  301.  
  302.     ADVANCE_BCS_BATCH(batch);
  303. }
  304.  
  305. static void
  306. gen9_hcpe_fqm_state(VADriverContextP ctx,
  307.                     int size_id,
  308.                     int color_component,
  309.                     int pred_type,
  310.                     int dc,
  311.                     unsigned int *fqm,
  312.                     int fqm_length,
  313.                     struct intel_encoder_context *encoder_context)
  314. {
  315.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  316.     unsigned int fqm_buffer[32];
  317.  
  318.     assert(fqm_length <= 32);
  319.     assert(sizeof(*fqm) == 4);
  320.     memset(fqm_buffer, 0, sizeof(fqm_buffer));
  321.     memcpy(fqm_buffer, fqm, fqm_length * 4);
  322.  
  323.     BEGIN_BCS_BATCH(batch, 34);
  324.  
  325.     OUT_BCS_BATCH(batch, HCP_FQM_STATE | (34 - 2));
  326.     OUT_BCS_BATCH(batch,
  327.                   dc << 16 |
  328.                   color_component << 3 |
  329.                   size_id << 1 |
  330.                   pred_type);
  331.     intel_batchbuffer_data(batch, fqm_buffer, 32 * 4);
  332.  
  333.     ADVANCE_BCS_BATCH(batch);
  334. }
  335.  
  336.  
  337. static void
  338. gen9_hcpe_hevc_fqm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  339. {
  340.     unsigned int qm[32] = {
  341.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  342.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  343.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  344.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  345.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  346.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  347.         0x10001000, 0x10001000, 0x10001000, 0x10001000,
  348.         0x10001000, 0x10001000, 0x10001000, 0x10001000
  349.     };
  350.  
  351.     gen9_hcpe_fqm_state(ctx,
  352.                         0, 0, 0, 0,
  353.                         qm, 8,
  354.                         encoder_context);
  355.     gen9_hcpe_fqm_state(ctx,
  356.                         0, 0, 1, 0,
  357.                         qm, 8,
  358.                         encoder_context);
  359.     gen9_hcpe_fqm_state(ctx,
  360.                         1, 0, 0, 0,
  361.                         qm, 32,
  362.                         encoder_context);
  363.     gen9_hcpe_fqm_state(ctx,
  364.                         1, 0, 1, 0,
  365.                         qm, 32,
  366.                         encoder_context);
  367.     gen9_hcpe_fqm_state(ctx,
  368.                         2, 0, 0, 0x1000,
  369.                         qm, 0,
  370.                         encoder_context);
  371.     gen9_hcpe_fqm_state(ctx,
  372.                         2, 0, 1, 0x1000,
  373.                         qm, 0,
  374.                         encoder_context);
  375.     gen9_hcpe_fqm_state(ctx,
  376.                         3, 0, 0, 0x1000,
  377.                         qm, 0,
  378.                         encoder_context);
  379.     gen9_hcpe_fqm_state(ctx,
  380.                         3, 0, 1, 0x1000,
  381.                         qm, 0,
  382.                         encoder_context);
  383. }
  384.  
  385. static void
  386. gen9_hcpe_qm_state(VADriverContextP ctx,
  387.                    int size_id,
  388.                    int color_component,
  389.                    int pred_type,
  390.                    int dc,
  391.                    unsigned int *qm,
  392.                    int qm_length,
  393.                    struct intel_encoder_context *encoder_context)
  394. {
  395.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  396.     unsigned int qm_buffer[16];
  397.  
  398.     assert(qm_length <= 16);
  399.     assert(sizeof(*qm) == 4);
  400.     memset(qm_buffer, 0, sizeof(qm_buffer));
  401.     memcpy(qm_buffer, qm, qm_length * 4);
  402.  
  403.     BEGIN_BCS_BATCH(batch, 18);
  404.  
  405.     OUT_BCS_BATCH(batch, HCP_QM_STATE | (18 - 2));
  406.     OUT_BCS_BATCH(batch,
  407.                   dc << 5 |
  408.                   color_component << 3 |
  409.                   size_id << 1 |
  410.                   pred_type);
  411.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  412.  
  413.     ADVANCE_BCS_BATCH(batch);
  414. }
  415.  
  416. static void
  417. gen9_hcpe_hevc_qm_state(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  418. {
  419.  
  420.     int i;
  421.  
  422.     unsigned int qm[16] = {
  423.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  424.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  425.         0x10101010, 0x10101010, 0x10101010, 0x10101010,
  426.         0x10101010, 0x10101010, 0x10101010, 0x10101010
  427.     };
  428.  
  429.     for (i = 0; i < 6; i++) {
  430.         gen9_hcpe_qm_state(ctx,
  431.                            0, i % 3, i / 3, 0,
  432.                            qm, 4,
  433.                            encoder_context);
  434.     }
  435.  
  436.     for (i = 0; i < 6; i++) {
  437.         gen9_hcpe_qm_state(ctx,
  438.                            1, i % 3, i / 3, 0,
  439.                            qm, 16,
  440.                            encoder_context);
  441.     }
  442.  
  443.     for (i = 0; i < 6; i++) {
  444.         gen9_hcpe_qm_state(ctx,
  445.                            2, i % 3, i / 3, 16,
  446.                            qm, 16,
  447.                            encoder_context);
  448.     }
  449.  
  450.     for (i = 0; i < 2; i++) {
  451.         gen9_hcpe_qm_state(ctx,
  452.                            3, 0, i % 2, 16,
  453.                            qm, 16,
  454.                            encoder_context);
  455.     }
  456. }
  457.  
  458. static void
  459. gen9_hcpe_hevc_pic_state(VADriverContextP ctx, struct encode_state *encode_state,
  460.                          struct intel_encoder_context *encoder_context)
  461. {
  462.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  463.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  464.     VAEncPictureParameterBufferHEVC *pic_param ;
  465.     VAEncSequenceParameterBufferHEVC *seq_param ;
  466.  
  467.     int max_pcm_size_minus3 = 0, min_pcm_size_minus3 = 0;
  468.     int pcm_sample_bit_depth_luma_minus1 = 7, pcm_sample_bit_depth_chroma_minus1 = 7;
  469.     /*
  470.      * 7.4.3.1
  471.      *
  472.      * When not present, the value of loop_filter_across_tiles_enabled_flag
  473.      * is inferred to be equal to 1.
  474.      */
  475.     int loop_filter_across_tiles_enabled_flag = 0;
  476.     pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  477.     seq_param = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  478.  
  479.     int log2_cu_size = seq_param->log2_min_luma_coding_block_size_minus3 + 3;
  480.     int log2_ctb_size =  seq_param->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
  481.     int ctb_size = 1 << log2_ctb_size;
  482.     double rawctubits = 8 * 3 * ctb_size * ctb_size / 2.0;
  483.     int maxctubits = (int)(5 * rawctubits / 3) ;
  484.     double bitrate = seq_param->bits_per_second * 1.0;
  485.     double framebitrate = bitrate / 32 / 8; //32 byte unit
  486.     int minframebitrate = 0;//(int) (framebitrate * 3 / 10);
  487.     int maxframebitrate = (int)(framebitrate * 10 / 10);
  488.     int maxdeltaframebitrate = 0x1c5c; //(int) (framebitrate * 1/ 10);
  489.     int mindeltaframebitrate = 0; //(int) (framebitrate * 1/ 10);
  490.     int minframesize = 0;//(int)(rawframebits * 1/50);
  491.  
  492.     if (seq_param->seq_fields.bits.pcm_enabled_flag) {
  493.         max_pcm_size_minus3 = seq_param->log2_max_pcm_luma_coding_block_size_minus3;
  494.         min_pcm_size_minus3 = seq_param->log2_min_pcm_luma_coding_block_size_minus3;
  495.         pcm_sample_bit_depth_luma_minus1 = (seq_param->pcm_sample_bit_depth_luma_minus1 & 0x0f);
  496.         pcm_sample_bit_depth_chroma_minus1 = (seq_param->pcm_sample_bit_depth_chroma_minus1 & 0x0f);
  497.     } else {
  498.         max_pcm_size_minus3 = MIN(seq_param->log2_min_luma_coding_block_size_minus3 + seq_param->log2_diff_max_min_luma_coding_block_size, 2);
  499.     }
  500.  
  501.     if (pic_param->pic_fields.bits.tiles_enabled_flag)
  502.         loop_filter_across_tiles_enabled_flag = pic_param->pic_fields.bits.loop_filter_across_tiles_enabled_flag;
  503.  
  504.     /* set zero for encoder */
  505.     loop_filter_across_tiles_enabled_flag = 0;
  506.  
  507.     BEGIN_BCS_BATCH(batch, 19);
  508.  
  509.     OUT_BCS_BATCH(batch, HCP_PIC_STATE | (19 - 2));
  510.  
  511.     OUT_BCS_BATCH(batch,
  512.                   mfc_context->pic_size.picture_height_in_min_cb_minus1 << 16 |
  513.                   0 << 14 |
  514.                   mfc_context->pic_size.picture_width_in_min_cb_minus1);
  515.     OUT_BCS_BATCH(batch,
  516.                   max_pcm_size_minus3 << 10 |
  517.                   min_pcm_size_minus3 << 8 |
  518.                   (seq_param->log2_min_transform_block_size_minus2 +
  519.                    seq_param->log2_diff_max_min_transform_block_size) << 6 |
  520.                   seq_param->log2_min_transform_block_size_minus2 << 4 |
  521.                   (seq_param->log2_min_luma_coding_block_size_minus3 +
  522.                    seq_param->log2_diff_max_min_luma_coding_block_size) << 2 |
  523.                   seq_param->log2_min_luma_coding_block_size_minus3);
  524.     OUT_BCS_BATCH(batch, 0); /* DW 3, ignored */
  525.     OUT_BCS_BATCH(batch,
  526.                   0 << 27 | /* CU packet structure is 0 for SKL */
  527.                   seq_param->seq_fields.bits.strong_intra_smoothing_enabled_flag << 26 |
  528.                   pic_param->pic_fields.bits.transquant_bypass_enabled_flag << 25 |
  529.                   seq_param->seq_fields.bits.amp_enabled_flag << 23 |
  530.                   pic_param->pic_fields.bits.transform_skip_enabled_flag << 22 |
  531.                   0 << 21 | /* 0 for encoder !(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_BOTTOM_FIELD)*/
  532.                   0 << 20 |     /* 0 for encoder !!(pic_param->decoded_curr_pic.flags & VA_PICTURE_HEVC_FIELD_PIC)*/
  533.                   pic_param->pic_fields.bits.weighted_pred_flag << 19 |
  534.                   pic_param->pic_fields.bits.weighted_bipred_flag << 18 |
  535.                   pic_param->pic_fields.bits.tiles_enabled_flag << 17 |                 /* 0 for encoder */
  536.                   pic_param->pic_fields.bits.entropy_coding_sync_enabled_flag << 16 |
  537.                   loop_filter_across_tiles_enabled_flag << 15 |
  538.                   pic_param->pic_fields.bits.sign_data_hiding_enabled_flag << 13 |  /* 0 for encoder */
  539.                   pic_param->log2_parallel_merge_level_minus2 << 10 |               /* 0 for encoder */
  540.                   pic_param->pic_fields.bits.constrained_intra_pred_flag << 9 |     /* 0 for encoder */
  541.                   seq_param->seq_fields.bits.pcm_loop_filter_disabled_flag << 8 |
  542.                   (pic_param->diff_cu_qp_delta_depth & 0x03) << 6 |                 /* 0 for encoder */
  543.                   pic_param->pic_fields.bits.cu_qp_delta_enabled_flag << 5 |        /* 0 for encoder */
  544.                   seq_param->seq_fields.bits.pcm_enabled_flag << 4 |
  545.                   seq_param->seq_fields.bits.sample_adaptive_offset_enabled_flag << 3 | /* 0 for encoder */
  546.                   0);
  547.     OUT_BCS_BATCH(batch,
  548.                   0 << 27 |                 /* 8 bit only for encoder */
  549.                   0 << 24 |                 /* 8 bit only for encoder */
  550.                   pcm_sample_bit_depth_luma_minus1 << 20 |
  551.                   pcm_sample_bit_depth_chroma_minus1 << 16 |
  552.                   seq_param->max_transform_hierarchy_depth_inter << 13 |    /*  for encoder */
  553.                   seq_param->max_transform_hierarchy_depth_intra << 10 |    /*  for encoder */
  554.                   (pic_param->pps_cr_qp_offset & 0x1f) << 5 |
  555.                   (pic_param->pps_cb_qp_offset & 0x1f));
  556.     OUT_BCS_BATCH(batch,
  557.                   0 << 29 | /* must be 0 for encoder */
  558.                   maxctubits); /* DW 6, max LCU bit size allowed for encoder  */
  559.     OUT_BCS_BATCH(batch,
  560.                   0 << 31 | /* frame bitrate max unit */
  561.                   maxframebitrate); /* DW 7, frame bitrate max 0:13   */
  562.     OUT_BCS_BATCH(batch,
  563.                   0 << 31 | /* frame bitrate min unit */
  564.                   minframebitrate); /* DW 8, frame bitrate min 0:13   */
  565.     OUT_BCS_BATCH(batch,
  566.                   maxdeltaframebitrate << 16 | /* frame bitrate max delta ,help to select deltaQP of slice*/
  567.                   mindeltaframebitrate); /* DW 9,(0,14) frame bitrate min delta ,help to select deltaQP of slice*/
  568.     OUT_BCS_BATCH(batch, 0x07050402);   /* DW 10, frame delta qp max */
  569.     OUT_BCS_BATCH(batch, 0x0d0b0908);
  570.     OUT_BCS_BATCH(batch, 0);    /* DW 12, frame delta qp min */
  571.     OUT_BCS_BATCH(batch, 0);
  572.     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 14, frame delta qp max range  */
  573.     OUT_BCS_BATCH(batch, 0x100c0806);   /* DW 15 */
  574.     OUT_BCS_BATCH(batch, 0x04030200);   /* DW 16, frame delta qp min range  */
  575.     OUT_BCS_BATCH(batch, 0x100c0806);
  576.     OUT_BCS_BATCH(batch,
  577.                   0 << 30 |
  578.                   minframesize);    /* DW 18, min frame size units */
  579.  
  580.     ADVANCE_BCS_BATCH(batch);
  581. }
  582.  
  583.  
  584. static void
  585. gen9_hcpe_hevc_insert_object(VADriverContextP ctx, struct intel_encoder_context *encoder_context,
  586.                              unsigned int *insert_data, int lenght_in_dws, int data_bits_in_last_dw,
  587.                              int skip_emul_byte_count, int is_last_header, int is_end_of_slice, int emulation_flag,
  588.                              struct intel_batchbuffer *batch)
  589. {
  590.     if (batch == NULL)
  591.         batch = encoder_context->base.batch;
  592.  
  593.     if (data_bits_in_last_dw == 0)
  594.         data_bits_in_last_dw = 32;
  595.  
  596.     BEGIN_BCS_BATCH(batch, lenght_in_dws + 2);
  597.  
  598.     OUT_BCS_BATCH(batch, HCP_INSERT_PAK_OBJECT | (lenght_in_dws + 2 - 2));
  599.     OUT_BCS_BATCH(batch,
  600.                   (0 << 31) |   /* inline payload */
  601.                   (0 << 16) |   /* always start at offset 0 */
  602.                   (0 << 15) |   /* HeaderLengthExcludeFrmSize */
  603.                   (data_bits_in_last_dw << 8) |
  604.                   (skip_emul_byte_count << 4) |
  605.                   (!!emulation_flag << 3) |
  606.                   ((!!is_last_header) << 2) |
  607.                   ((!!is_end_of_slice) << 1) |
  608.                   (0 << 0));    /* Reserved */
  609.     intel_batchbuffer_data(batch, insert_data, lenght_in_dws * 4);
  610.  
  611.     ADVANCE_BCS_BATCH(batch);
  612. }
  613. /*
  614. // To be do: future
  615. static uint8_t
  616. intel_get_ref_idx_state_1(VAPictureHEVC *va_pic, unsigned int frame_store_id)
  617. {
  618.     unsigned int is_long_term =
  619.         !!(va_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE);
  620.     unsigned int is_top_field =
  621.         !!!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
  622.     unsigned int is_bottom_field =
  623.         !!(va_pic->flags & VA_PICTURE_HEVC_BOTTOM_FIELD);
  624.  
  625.     return ((is_long_term                         << 6) |
  626.             ((is_top_field ^ is_bottom_field ^ 1) << 5) |
  627.             (frame_store_id                       << 1) |
  628.             ((is_top_field ^ 1) & is_bottom_field));
  629. }
  630. */
  631. static void
  632. gen9_hcpe_ref_idx_state_1(struct intel_batchbuffer *batch,
  633.                           int list,
  634.                           struct intel_encoder_context *encoder_context,
  635.                           struct encode_state *encode_state)
  636. {
  637.     int i;
  638.     VAEncPictureParameterBufferHEVC *pic_param = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  639.     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
  640.     uint8_t num_ref_minus1 = (list ? slice_param->num_ref_idx_l1_active_minus1 : slice_param->num_ref_idx_l0_active_minus1);
  641.     VAPictureHEVC *ref_list = (list ? slice_param->ref_pic_list1 : slice_param->ref_pic_list0);
  642.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  643.     struct object_surface *obj_surface;
  644.     int frame_index;
  645.  
  646.     int ref_idx_l0 = (vme_context->ref_index_in_mb[list] & 0xff);
  647.  
  648.     if (ref_idx_l0 > 3) {
  649.         WARN_ONCE("ref_idx_l0 is out of range\n");
  650.         ref_idx_l0 = 0;
  651.     }
  652.  
  653.     obj_surface = vme_context->used_reference_objects[list];
  654.     frame_index = -1;
  655.     for (i = 0; i < 16; i++) {
  656.         if (obj_surface &&
  657.             obj_surface == encode_state->reference_objects[i]) {
  658.             frame_index = i;
  659.             break;
  660.         }
  661.     }
  662.     if (frame_index == -1) {
  663.         WARN_ONCE("RefPicList 0 or 1 is not found in DPB!\n");
  664.     }
  665.  
  666.     if(num_ref_minus1 == 0 && frame_index == 1 && list == 0){
  667.         WARN_ONCE("Input ref list is Wrong !\n");
  668.     }
  669.  
  670.     BEGIN_BCS_BATCH(batch, 18);
  671.  
  672.     OUT_BCS_BATCH(batch, HCP_REF_IDX_STATE | (18 - 2));
  673.     OUT_BCS_BATCH(batch,
  674.                   num_ref_minus1 << 1 |
  675.                   list);
  676.  
  677.     for (i = 0; i < 16; i++) {
  678.         if (i < MIN((num_ref_minus1 + 1), 15)) {
  679.             VAPictureHEVC *ref_pic = &ref_list[i];
  680.             VAPictureHEVC *curr_pic = &pic_param->decoded_curr_pic;
  681.  
  682.             OUT_BCS_BATCH(batch,
  683.                           1 << 15 |         /* bottom_field_flag 0 */
  684.                           0 << 14 |         /* field_pic_flag 0 */
  685.                           !!(ref_pic->flags & VA_PICTURE_HEVC_LONG_TERM_REFERENCE) << 13 |  /* short term is 1 */
  686.                           0 << 12 | /* disable WP */
  687.                           0 << 11 | /* disable WP */
  688.                           frame_index << 8 |
  689.                           (CLAMP(-128, 127, curr_pic->pic_order_cnt - ref_pic->pic_order_cnt) & 0xff));
  690.         } else {
  691.             OUT_BCS_BATCH(batch, 0);
  692.         }
  693.     }
  694.  
  695.     ADVANCE_BCS_BATCH(batch);
  696. }
  697.  
  698. void
  699. intel_hcpe_hevc_ref_idx_state(VADriverContextP ctx,
  700.                               struct encode_state *encode_state,
  701.                               struct intel_encoder_context *encoder_context
  702.                              )
  703. {
  704.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  705.     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
  706.  
  707.     if (slice_param->slice_type == HEVC_SLICE_I)
  708.         return;
  709.  
  710.     gen9_hcpe_ref_idx_state_1(batch, 0, encoder_context, encode_state);
  711.  
  712.     if (slice_param->slice_type == HEVC_SLICE_P)
  713.         return;
  714.  
  715.     gen9_hcpe_ref_idx_state_1(batch, 1, encoder_context, encode_state);
  716. }
  717.  
  718. static void
  719. gen9_hcpe_hevc_slice_state(VADriverContextP ctx,
  720.                            VAEncPictureParameterBufferHEVC *pic_param,
  721.                            VAEncSliceParameterBufferHEVC *slice_param,
  722.                            struct encode_state *encode_state,
  723.                            struct intel_encoder_context *encoder_context,
  724.                            struct intel_batchbuffer *batch)
  725. {
  726.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  727.     int slice_type = slice_param->slice_type;
  728.  
  729.     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
  730.     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
  731.     int ctb_size = 1 << log2_ctb_size;
  732.     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
  733.     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
  734.     int last_slice = (((slice_param->slice_segment_address + slice_param->num_ctu_in_slice) == (width_in_ctb * height_in_ctb)) ? 1 : 0);
  735.  
  736.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  737.  
  738.     slice_hor_pos = slice_param->slice_segment_address % width_in_ctb;
  739.     slice_ver_pos = slice_param->slice_segment_address / width_in_ctb;
  740.  
  741.     next_slice_hor_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) % width_in_ctb;
  742.     next_slice_ver_pos = (slice_param->slice_segment_address + slice_param->num_ctu_in_slice) / width_in_ctb;
  743.  
  744.     /* only support multi slice begin from row start address */
  745.     assert((slice_param->slice_segment_address % width_in_ctb) == 0);
  746.  
  747.     if (last_slice == 1) {
  748.         if (slice_param->slice_segment_address == 0) {
  749.             next_slice_hor_pos = 0;
  750.             next_slice_ver_pos = height_in_ctb;
  751.         } else {
  752.             next_slice_hor_pos = 0;
  753.             next_slice_ver_pos = 0;
  754.         }
  755.     }
  756.  
  757.     BEGIN_BCS_BATCH(batch, 9);
  758.  
  759.     OUT_BCS_BATCH(batch, HCP_SLICE_STATE | (9 - 2));
  760.  
  761.     OUT_BCS_BATCH(batch,
  762.                   slice_ver_pos << 16 |
  763.                   slice_hor_pos);
  764.     OUT_BCS_BATCH(batch,
  765.                   next_slice_ver_pos << 16 |
  766.                   next_slice_hor_pos);
  767.     OUT_BCS_BATCH(batch,
  768.                   (slice_param->slice_cr_qp_offset & 0x1f) << 17 |
  769.                   (slice_param->slice_cb_qp_offset & 0x1f) << 12 |
  770.                   (pic_param->pic_init_qp + slice_param->slice_qp_delta) << 6 |
  771.                   slice_param->slice_fields.bits.slice_temporal_mvp_enabled_flag << 5 |
  772.                   slice_param->slice_fields.bits.dependent_slice_segment_flag << 4 |
  773.                   last_slice << 2 |
  774.                   slice_type);
  775.     OUT_BCS_BATCH(batch,
  776.                   0 << 26 |
  777.                   (slice_param->max_num_merge_cand - 1)  << 23 |
  778.                   slice_param->slice_fields.bits.cabac_init_flag << 22 |
  779.                   slice_param->luma_log2_weight_denom << 19 |
  780.                   (slice_param->luma_log2_weight_denom + slice_param->delta_chroma_log2_weight_denom) << 16 |
  781.                   slice_param->slice_fields.bits.collocated_from_l0_flag << 15 |
  782.                   (slice_type != HEVC_SLICE_B) << 14 |
  783.                   slice_param->slice_fields.bits.mvd_l1_zero_flag << 13 |
  784.                   slice_param->slice_fields.bits.slice_sao_luma_flag << 12 |
  785.                   slice_param->slice_fields.bits.slice_sao_chroma_flag << 11 |
  786.                   slice_param->slice_fields.bits.slice_loop_filter_across_slices_enabled_flag << 10 |
  787.                   (slice_param->slice_beta_offset_div2 & 0xf) << 5 |
  788.                   (slice_param->slice_tc_offset_div2 & 0xf) << 1 |
  789.                   slice_param->slice_fields.bits.slice_deblocking_filter_disabled_flag);
  790.     OUT_BCS_BATCH(batch, 0); /* DW 5 ,ignore for encoder.*/
  791.     OUT_BCS_BATCH(batch,
  792.                   4 << 26 |
  793.                   4 << 20 |
  794.                   0);
  795.     OUT_BCS_BATCH(batch,
  796.                   1 << 10 |  /* header insertion enable */
  797.                   1 << 9  |  /* slice data enable */
  798.                   1 << 8  |  /* tail insertion enable, must at end of frame, not slice */
  799.                   1 << 2  |  /* RBSP or EBSP, EmulationByteSliceInsertEnable */
  800.                   1 << 1  |  /* cabacZeroWordInsertionEnable */
  801.                   0);        /* Ignored for decoding */
  802.     OUT_BCS_BATCH(batch, 0); /* PAK-BSE data start offset */
  803.  
  804.     ADVANCE_BCS_BATCH(batch);
  805. }
  806.  
  807. /* HEVC pipe line related */
  808. static void gen9_hcpe_hevc_pipeline_picture_programing(VADriverContextP ctx,
  809.         struct encode_state *encode_state,
  810.         struct intel_encoder_context *encoder_context)
  811. {
  812.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  813.  
  814.     mfc_context->pipe_mode_select(ctx, HCP_CODEC_HEVC, encoder_context);
  815.     mfc_context->set_surface_state(ctx, encode_state, encoder_context);
  816.     gen9_hcpe_pipe_buf_addr_state(ctx, encode_state, encoder_context);
  817.     mfc_context->ind_obj_base_addr_state(ctx, encoder_context);
  818.  
  819.     mfc_context->qm_state(ctx, encoder_context);
  820.     mfc_context->fqm_state(ctx, encoder_context);
  821.     mfc_context->pic_state(ctx, encode_state, encoder_context);
  822.     intel_hcpe_hevc_ref_idx_state(ctx, encode_state, encoder_context);
  823. }
  824.  
  825. static void gen9_hcpe_init(VADriverContextP ctx,
  826.                            struct encode_state *encode_state,
  827.                            struct intel_encoder_context *encoder_context)
  828. {
  829.     /* to do */
  830.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  831.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  832.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  833.     VAEncSliceParameterBufferHEVC *slice_param = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
  834.     dri_bo *bo;
  835.     int i, size = 0;
  836.     int slice_batchbuffer_size;
  837.     int slice_type = slice_param->slice_type;
  838.     int is_inter = (slice_type != HEVC_SLICE_I);
  839.  
  840.     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
  841.     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
  842.     int ctb_size = 1 << log2_ctb_size;
  843.     int cu_size  = 1 << log2_cu_size;
  844.  
  845.     int width_in_ctb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , ctb_size) / ctb_size;
  846.     int height_in_ctb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, ctb_size) / ctb_size;
  847.     int width_in_cu  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , cu_size) / cu_size;
  848.     int height_in_cu = ALIGN(pSequenceParameter->pic_height_in_luma_samples, cu_size) / cu_size;
  849.     int width_in_mb  = ALIGN(pSequenceParameter->pic_width_in_luma_samples , 16) / 16;
  850.     int height_in_mb = ALIGN(pSequenceParameter->pic_height_in_luma_samples, 16) / 16;
  851.  
  852.     int num_cu_record = 64;
  853.  
  854.     if (log2_ctb_size == 5) num_cu_record = 16;
  855.     else if (log2_ctb_size == 4) num_cu_record = 4;
  856.     else if (log2_ctb_size == 6) num_cu_record = 64;
  857.  
  858.     /* frame size in samples, cu,ctu, mb */
  859.     mfc_context->pic_size.picture_width_in_samples = pSequenceParameter->pic_width_in_luma_samples;
  860.     mfc_context->pic_size.picture_height_in_samples = pSequenceParameter->pic_height_in_luma_samples;
  861.     mfc_context->pic_size.ctb_size = ctb_size;
  862.     mfc_context->pic_size.picture_width_in_ctbs = width_in_ctb;
  863.     mfc_context->pic_size.picture_height_in_ctbs = height_in_ctb;
  864.     mfc_context->pic_size.min_cb_size = cu_size;
  865.     mfc_context->pic_size.picture_width_in_min_cb_minus1 = width_in_cu - 1;
  866.     mfc_context->pic_size.picture_height_in_min_cb_minus1 = height_in_cu - 1;
  867.     mfc_context->pic_size.picture_width_in_mbs = width_in_mb;
  868.     mfc_context->pic_size.picture_height_in_mbs = height_in_mb;
  869.  
  870.     slice_batchbuffer_size = 64 * width_in_ctb * width_in_ctb + 4096 +
  871.                              (SLICE_HEADER + SLICE_TAIL) * encode_state->num_slice_params_ext;
  872.  
  873.     /*Encode common setup for HCP*/
  874.     /*deblocking */
  875.     dri_bo_unreference(mfc_context->deblocking_filter_line_buffer.bo);
  876.     mfc_context->deblocking_filter_line_buffer.bo = NULL;
  877.  
  878.     dri_bo_unreference(mfc_context->deblocking_filter_tile_line_buffer.bo);
  879.     mfc_context->deblocking_filter_tile_line_buffer.bo = NULL;
  880.  
  881.     dri_bo_unreference(mfc_context->deblocking_filter_tile_column_buffer.bo);
  882.     mfc_context->deblocking_filter_tile_column_buffer.bo = NULL;
  883.  
  884.     /* input source */
  885.     dri_bo_unreference(mfc_context->uncompressed_picture_source.bo);
  886.     mfc_context->uncompressed_picture_source.bo = NULL;
  887.  
  888.     /* metadata */
  889.     dri_bo_unreference(mfc_context->metadata_line_buffer.bo);
  890.     mfc_context->metadata_line_buffer.bo = NULL;
  891.  
  892.     dri_bo_unreference(mfc_context->metadata_tile_line_buffer.bo);
  893.     mfc_context->metadata_tile_line_buffer.bo = NULL;
  894.  
  895.     dri_bo_unreference(mfc_context->metadata_tile_column_buffer.bo);
  896.     mfc_context->metadata_tile_column_buffer.bo = NULL;
  897.  
  898.     /* sao */
  899.     dri_bo_unreference(mfc_context->sao_line_buffer.bo);
  900.     mfc_context->sao_line_buffer.bo = NULL;
  901.  
  902.     dri_bo_unreference(mfc_context->sao_tile_line_buffer.bo);
  903.     mfc_context->sao_tile_line_buffer.bo = NULL;
  904.  
  905.     dri_bo_unreference(mfc_context->sao_tile_column_buffer.bo);
  906.     mfc_context->sao_tile_column_buffer.bo = NULL;
  907.  
  908.     /* mv temporal buffer */
  909.     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
  910.         if (mfc_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
  911.             dri_bo_unreference(mfc_context->current_collocated_mv_temporal_buffer[i].bo);
  912.         mfc_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
  913.     }
  914.  
  915.     /* reference */
  916.     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
  917.         if (mfc_context->reference_surfaces[i].bo != NULL)
  918.             dri_bo_unreference(mfc_context->reference_surfaces[i].bo);
  919.         mfc_context->reference_surfaces[i].bo = NULL;
  920.     }
  921.  
  922.     /* indirect data CU recording */
  923.     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
  924.     mfc_context->hcp_indirect_cu_object.bo = NULL;
  925.  
  926.     dri_bo_unreference(mfc_context->hcp_indirect_pak_bse_object.bo);
  927.     mfc_context->hcp_indirect_pak_bse_object.bo = NULL;
  928.  
  929.     /* Current internal buffer for HCP */
  930.  
  931.     size = ALIGN(pSequenceParameter->pic_width_in_luma_samples, 32) >> 3;
  932.     size <<= 6;
  933.     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_line_buffer), "line buffer", size);
  934.     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_line_buffer), "tile line buffer", size);
  935.  
  936.     size = ALIGN(pSequenceParameter->pic_height_in_luma_samples + 6 * width_in_ctb, 32) >> 3;
  937.     size <<= 6;
  938.     ALLOC_ENCODER_BUFFER((&mfc_context->deblocking_filter_tile_column_buffer), "tile column buffer", size);
  939.  
  940.     if (is_inter) {
  941.         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 188 + 9 * width_in_ctb + 1023) >> 9;
  942.         size <<= 6;
  943.         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
  944.  
  945.         size = (((pSequenceParameter->pic_width_in_luma_samples + 15) >> 4) * 172 + 9 * width_in_ctb + 1023) >> 9;
  946.         size <<= 6;
  947.         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
  948.  
  949.         size = (((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4) * 176 + 89 * width_in_ctb + 1023) >> 9;
  950.         size <<= 6;
  951.         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
  952.     } else {
  953.         size = (pSequenceParameter->pic_width_in_luma_samples + 8 * width_in_ctb + 1023) >> 9;
  954.         size <<= 6;
  955.         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_line_buffer), "metadata line buffer", size);
  956.  
  957.         size = (pSequenceParameter->pic_width_in_luma_samples + 16 * width_in_ctb + 1023) >> 9;
  958.         size <<= 6;
  959.         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_line_buffer), "metadata tile line buffer", size);
  960.  
  961.         size = (pSequenceParameter->pic_height_in_luma_samples + 8 * height_in_ctb + 1023) >> 9;
  962.         size <<= 6;
  963.         ALLOC_ENCODER_BUFFER((&mfc_context->metadata_tile_column_buffer), "metadata tile column buffer", size);
  964.     }
  965.  
  966.     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 3 * width_in_ctb), 16) >> 3;
  967.     size <<= 6;
  968.     ALLOC_ENCODER_BUFFER((&mfc_context->sao_line_buffer), "sao line buffer", size);
  969.  
  970.     size = ALIGN(((pSequenceParameter->pic_width_in_luma_samples >> 1) + 6 * width_in_ctb), 16) >> 3;
  971.     size <<= 6;
  972.     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_line_buffer), "sao tile line buffer", size);
  973.  
  974.     size = ALIGN(((pSequenceParameter->pic_height_in_luma_samples >> 1) + 6 * height_in_ctb), 16) >> 3;
  975.     size <<= 6;
  976.     ALLOC_ENCODER_BUFFER((&mfc_context->sao_tile_column_buffer), "sao tile column buffer", size);
  977.  
  978.     /////////////////////
  979.     dri_bo_unreference(mfc_context->hcp_indirect_cu_object.bo);
  980.     bo = dri_bo_alloc(i965->intel.bufmgr,
  981.                       "Indirect data CU Buffer",
  982.                       width_in_ctb * height_in_ctb * num_cu_record * 16 * 4,
  983.                       0x1000);
  984.     assert(bo);
  985.     mfc_context->hcp_indirect_cu_object.bo = bo;
  986.  
  987.     /* to do pak bse object buffer */
  988.     /* to do current collocated mv temporal buffer */
  989.  
  990.     dri_bo_unreference(mfc_context->hcp_batchbuffer_surface.bo);
  991.     mfc_context->hcp_batchbuffer_surface.bo = NULL;
  992.  
  993.     dri_bo_unreference(mfc_context->aux_batchbuffer_surface.bo);
  994.     mfc_context->aux_batchbuffer_surface.bo = NULL;
  995.  
  996.     if (mfc_context->aux_batchbuffer)
  997.         intel_batchbuffer_free(mfc_context->aux_batchbuffer);
  998.  
  999.     mfc_context->aux_batchbuffer = intel_batchbuffer_new(&i965->intel, I915_EXEC_BSD, slice_batchbuffer_size);
  1000.     mfc_context->aux_batchbuffer_surface.bo = mfc_context->aux_batchbuffer->buffer;
  1001.     dri_bo_reference(mfc_context->aux_batchbuffer_surface.bo);
  1002.     mfc_context->aux_batchbuffer_surface.pitch = 16;
  1003.     mfc_context->aux_batchbuffer_surface.num_blocks = mfc_context->aux_batchbuffer->size / 16;
  1004.     mfc_context->aux_batchbuffer_surface.size_block = 16;
  1005. }
  1006.  
  1007. static VAStatus gen9_hcpe_run(VADriverContextP ctx,
  1008.                               struct encode_state *encode_state,
  1009.                               struct intel_encoder_context *encoder_context)
  1010. {
  1011.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1012.  
  1013.     intel_batchbuffer_flush(batch);     //run the pipeline
  1014.  
  1015.     return VA_STATUS_SUCCESS;
  1016. }
  1017.  
  1018.  
  1019. static VAStatus
  1020. gen9_hcpe_stop(VADriverContextP ctx,
  1021.                struct encode_state *encode_state,
  1022.                struct intel_encoder_context *encoder_context,
  1023.                int *encoded_bits_size)
  1024. {
  1025.     VAStatus vaStatus = VA_STATUS_ERROR_UNKNOWN;
  1026.     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  1027.     VACodedBufferSegment *coded_buffer_segment;
  1028.  
  1029.     vaStatus = i965_MapBuffer(ctx, pPicParameter->coded_buf, (void **)&coded_buffer_segment);
  1030.     assert(vaStatus == VA_STATUS_SUCCESS);
  1031.     *encoded_bits_size = coded_buffer_segment->size * 8;
  1032.     i965_UnmapBuffer(ctx, pPicParameter->coded_buf);
  1033.  
  1034.     return VA_STATUS_SUCCESS;
  1035. }
  1036.  
  1037.  
  1038. int intel_hevc_find_skipemulcnt(unsigned char *buf, int bits_length)
  1039. {
  1040.     /* to do */
  1041.     int i, found;
  1042.     int leading_zero_cnt, byte_length, zero_byte;
  1043.     int nal_unit_type;
  1044.     int skip_cnt = 0;
  1045.  
  1046. #define NAL_UNIT_TYPE_MASK 0x7e
  1047. #define HW_MAX_SKIP_LENGTH 15
  1048.  
  1049.     byte_length = ALIGN(bits_length, 32) >> 3;
  1050.  
  1051.  
  1052.     leading_zero_cnt = 0;
  1053.     found = 0;
  1054.     for (i = 0; i < byte_length - 4; i++) {
  1055.         if (((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)) ||
  1056.             ((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 0) && (buf[i + 3] == 1))) {
  1057.             found = 1;
  1058.             break;
  1059.         }
  1060.         leading_zero_cnt++;
  1061.     }
  1062.     if (!found) {
  1063.         /* warning message is complained. But anyway it will be inserted. */
  1064.         WARN_ONCE("Invalid packed header data. "
  1065.                   "Can't find the 000001 start_prefix code\n");
  1066.         return 0;
  1067.     }
  1068.     i = leading_zero_cnt;
  1069.  
  1070.     zero_byte = 0;
  1071.     if (!((buf[i] == 0) && (buf[i + 1] == 0) && (buf[i + 2] == 1)))
  1072.         zero_byte = 1;
  1073.  
  1074.     skip_cnt = leading_zero_cnt + zero_byte + 3;
  1075.  
  1076.     /* the unit header byte is accounted */
  1077.     nal_unit_type = (buf[skip_cnt]) & NAL_UNIT_TYPE_MASK;
  1078.     skip_cnt += 1;
  1079.     skip_cnt += 1;  /* two bytes length of nal headers in hevc */
  1080.  
  1081.     if (nal_unit_type == 14 || nal_unit_type == 20 || nal_unit_type == 21) {
  1082.         /* more unit header bytes are accounted for MVC/SVC */
  1083.         //skip_cnt += 3;
  1084.     }
  1085.     if (skip_cnt > HW_MAX_SKIP_LENGTH) {
  1086.         WARN_ONCE("Too many leading zeros are padded for packed data. "
  1087.                   "It is beyond the HW range.!!!\n");
  1088.     }
  1089.     return skip_cnt;
  1090. }
  1091.  
  1092. #ifdef HCP_SOFTWARE_SKYLAKE
  1093.  
  1094. static int
  1095. gen9_hcpe_hevc_pak_object(VADriverContextP ctx, int lcu_x, int lcu_y, int isLast_ctb,
  1096.                           struct intel_encoder_context *encoder_context,
  1097.                           int cu_count_in_lcu, unsigned int split_coding_unit_flag,
  1098.                           struct intel_batchbuffer *batch)
  1099. {
  1100.     int len_in_dwords = 3;
  1101.  
  1102.     if (batch == NULL)
  1103.         batch = encoder_context->base.batch;
  1104.  
  1105.     BEGIN_BCS_BATCH(batch, len_in_dwords);
  1106.  
  1107.     OUT_BCS_BATCH(batch, HCP_PAK_OBJECT | (len_in_dwords - 2));
  1108.     OUT_BCS_BATCH(batch,
  1109.                   (((isLast_ctb > 0) ? 1 : 0) << 31) |  /* last ctb?*/
  1110.                   ((cu_count_in_lcu - 1) << 24) |           /* No motion vector */
  1111.                   split_coding_unit_flag);
  1112.  
  1113.     OUT_BCS_BATCH(batch, (lcu_y << 16) | lcu_x);        /* LCU  for Y*/
  1114.  
  1115.     ADVANCE_BCS_BATCH(batch);
  1116.  
  1117.     return len_in_dwords;
  1118. }
  1119.  
  1120. #define     AVC_INTRA_RDO_OFFSET    4
  1121. #define     AVC_INTER_RDO_OFFSET    10
  1122. #define     AVC_INTER_MSG_OFFSET    8
  1123. #define     AVC_INTER_MV_OFFSET     48
  1124. #define     AVC_RDO_MASK            0xFFFF
  1125.  
  1126. #define     AVC_INTRA_MODE_MASK     0x30
  1127. #define     AVC_INTRA_16X16         0x00
  1128. #define     AVC_INTRA_8X8           0x01
  1129. #define     AVC_INTRA_4X4           0x02
  1130.  
  1131. #define     AVC_INTER_MODE_MASK     0x03
  1132. #define     AVC_INTER_8X8           0x03
  1133. #define     AVC_INTER_8X16          0x02
  1134. #define     AVC_INTER_16X8          0x01
  1135. #define     AVC_INTER_16X16         0x00
  1136. #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
  1137.  
  1138. /* VME output message, write back message */
  1139. #define     AVC_INTER_SUBMB_PRE_MODE_MASK       0x00ff0000
  1140. #define     AVC_SUBMB_SHAPE_MASK    0x00FF00
  1141.  
  1142. /* here 1 MB = 1CU = 16x16 */
  1143. static void
  1144. gen9_hcpe_hevc_fill_indirect_cu_intra(VADriverContextP ctx,
  1145.                                       struct encode_state *encode_state,
  1146.                                       struct intel_encoder_context *encoder_context,
  1147.                                       int qp, unsigned int *msg,
  1148.                                       int ctb_x, int ctb_y,
  1149.                                       int mb_x, int mb_y,
  1150.                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type,int cu_index,int index)
  1151. {
  1152.     /* here cu == mb, so we use mb address as the cu address */
  1153.     /* to fill the indirect cu by the vme out */
  1154.     static int intra_mode_8x8_avc2hevc[9] = {26, 10, 1, 34, 18, 24, 13, 28, 8};
  1155.     static int intra_mode_16x16_avc2hevc[4] = {26, 10, 1, 34};
  1156.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1157.     unsigned char * cu_record_ptr = NULL;
  1158.     unsigned int * cu_msg = NULL;
  1159.     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
  1160.     int mb_address_in_ctb = 0;
  1161.     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
  1162.     int zero = 0;
  1163.     int is_inter = 0;
  1164.     int intraMbMode = 0;
  1165.     int cu_part_mode = 0;
  1166.     int intraMode[4];
  1167.     int inerpred_idc = 0;
  1168.     int intra_chroma_mode = 5;
  1169.     int cu_size = 1;
  1170.     int tu_size = 0x55;
  1171.     int tu_count = 4;
  1172.  
  1173.     if (!is_inter) inerpred_idc = 0xff;
  1174.  
  1175.     intraMbMode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
  1176.  
  1177.  
  1178.     if (intraMbMode == AVC_INTRA_16X16) {
  1179.         cu_part_mode = 0; //2Nx2N
  1180.         intra_chroma_mode = 5;
  1181.         cu_size = 1;
  1182.         tu_size = 0x55;
  1183.         tu_count = 4;
  1184.         intraMode[0] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
  1185.         intraMode[1] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
  1186.         intraMode[2] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
  1187.         intraMode[3] = intra_mode_16x16_avc2hevc[msg[1] & 0xf];
  1188.     } else if (intraMbMode == AVC_INTRA_8X8) {
  1189.         cu_part_mode = 0; //2Nx2N
  1190.         intra_chroma_mode = 5;
  1191.         cu_size = 0;
  1192.         tu_size = 0;
  1193.         tu_count = 4;
  1194.         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
  1195.         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
  1196.         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
  1197.         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> (index << 2) & 0xf];
  1198.  
  1199.     } else { // for 4x4 to use 8x8 replace
  1200.         cu_part_mode = 3; //NxN
  1201.         intra_chroma_mode = 0;
  1202.         cu_size = 0;
  1203.         tu_size = 0;
  1204.         tu_count = 4;
  1205.         intraMode[0] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 0) & 0xf];
  1206.         intraMode[1] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 4) & 0xf];
  1207.         intraMode[2] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 8) & 0xf];
  1208.         intraMode[3] = intra_mode_8x8_avc2hevc[msg[1] >> ((index << 4) + 12) & 0xf];
  1209.  
  1210.     }
  1211.  
  1212.     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
  1213.     /* get the mb info from the vme out */
  1214.     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
  1215.  
  1216.     cu_msg[0] = (inerpred_idc << 24 |   /* interpred_idc[3:0][1:0] */
  1217.                  zero << 23 |   /* reserved */
  1218.                  qp << 16 | /* CU_qp */
  1219.                  zero << 11 |   /* reserved */
  1220.                  intra_chroma_mode << 8 |   /* intra_chroma_mode */
  1221.                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
  1222.                  cu_part_mode << 4 |    /* cu_part_mode */
  1223.                  zero << 3 |    /* cu_transquant_bypass_flag */
  1224.                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
  1225.                  cu_size          /* cu_size */
  1226.                 );
  1227.     cu_msg[1] = (zero << 30 |   /* reserved  */
  1228.                  intraMode[3] << 24 |   /* intra_mode */
  1229.                  zero << 22 |   /* reserved  */
  1230.                  intraMode[2] << 16 |   /* intra_mode */
  1231.                  zero << 14 |   /* reserved  */
  1232.                  intraMode[1] << 8 |    /* intra_mode */
  1233.                  zero << 6 |    /* reserved  */
  1234.                  intraMode[0]           /* intra_mode */
  1235.                 );
  1236.     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
  1237.     cu_msg[2] = (zero << 16 |   /* mvx_l0[1]  */
  1238.                  zero           /* mvx_l0[0] */
  1239.                 );
  1240.     cu_msg[3] = (zero << 16 |   /* mvx_l0[3]  */
  1241.                  zero           /* mvx_l0[2] */
  1242.                 );
  1243.     cu_msg[4] = (zero << 16 |   /* mvy_l0[1]  */
  1244.                  zero           /* mvy_l0[0] */
  1245.                 );
  1246.     cu_msg[5] = (zero << 16 |   /* mvy_l0[3]  */
  1247.                  zero           /* mvy_l0[2] */
  1248.                 );
  1249.  
  1250.     cu_msg[6] = (zero << 16 |   /* mvx_l1[1]  */
  1251.                  zero           /* mvx_l1[0] */
  1252.                 );
  1253.     cu_msg[7] = (zero << 16 |   /* mvx_l1[3]  */
  1254.                  zero           /* mvx_l1[2] */
  1255.                 );
  1256.     cu_msg[8] = (zero << 16 |   /* mvy_l1[1]  */
  1257.                  zero           /* mvy_l1[0] */
  1258.                 );
  1259.     cu_msg[9] = (zero << 16 |   /* mvy_l1[3]  */
  1260.                  zero           /* mvy_l1[2] */
  1261.                 );
  1262.  
  1263.     cu_msg[10] = (zero << 28 |  /* ref_idx_l1[3]  */
  1264.                   zero << 24 |  /* ref_idx_l1[2] */
  1265.                   zero << 20 |  /* ref_idx_l1[1]  */
  1266.                   zero << 16 |  /* ref_idx_l1[0] */
  1267.                   zero << 12 |  /* ref_idx_l0[3]  */
  1268.                   zero << 8 |   /* ref_idx_l0[2] */
  1269.                   zero << 4 |   /* ref_idx_l0[1]  */
  1270.                   zero          /* ref_idx_l0[0] */
  1271.                  );
  1272.  
  1273.     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
  1274.     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
  1275.                   zero << 16 |  /* reserved  */
  1276.                   zero          /* tu_xform_Yskip[15:0] */
  1277.                  );
  1278.     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
  1279.                   zero          /* tu_xform_Uskip[15:0] */
  1280.                  );
  1281.     cu_msg[14] = zero ;
  1282.     cu_msg[15] = zero ;
  1283. }
  1284.  
  1285. /* here 1 MB = 1CU = 16x16 */
  1286. static void
  1287. gen9_hcpe_hevc_fill_indirect_cu_inter(VADriverContextP ctx,
  1288.                                       struct encode_state *encode_state,
  1289.                                       struct intel_encoder_context *encoder_context,
  1290.                                       int qp, unsigned int *msg,
  1291.                                       int ctb_x, int ctb_y,
  1292.                                       int mb_x, int mb_y,
  1293.                                       int ctb_width_in_mb, int width_in_ctb, int num_cu_record, int slice_type, int cu_index,int index)
  1294. {
  1295.     /* here cu == mb, so we use mb address as the cu address */
  1296.     /* to fill the indirect cu by the vme out */
  1297.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1298.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1299.     unsigned char * cu_record_ptr = NULL;
  1300.     unsigned int * cu_msg = NULL;
  1301.     int ctb_address = (ctb_y * width_in_ctb + ctb_x) * num_cu_record;
  1302.     int mb_address_in_ctb = 0;
  1303.     int cu_address = (ctb_address + mb_address_in_ctb + cu_index) * 16 * 4;
  1304.     int zero = 0;
  1305.     int cu_part_mode = 0;
  1306.     int submb_pre_mode = 0;
  1307.     int is_inter = 1;
  1308.     int cu_size = 1;
  1309.     int tu_size = 0x55;
  1310.     int tu_count = 4;
  1311.     int inter_mode = 0;
  1312.  
  1313.     unsigned int *mv_ptr;
  1314.     {
  1315.         inter_mode = (msg[0] & AVC_INTER_MODE_MASK);
  1316.         submb_pre_mode = (msg[1] & AVC_INTER_SUBMB_PRE_MODE_MASK) >> 16;
  1317. #define MSG_MV_OFFSET   4
  1318.         mv_ptr = msg + MSG_MV_OFFSET;
  1319.         /* MV of VME output is based on 16 sub-blocks. So it is necessary
  1320.         * to convert them to be compatible with the format of AVC_PAK
  1321.         * command.
  1322.         */
  1323.         /* 0/2/4/6/8... : l0, 1/3/5/7...: l1 ; now it only support 16x16,16x8,8x16,8x8*/
  1324.  
  1325.         if (inter_mode == AVC_INTER_16X16) {
  1326.             mv_ptr[4] = mv_ptr[0];
  1327.             mv_ptr[5] = mv_ptr[1];
  1328.             mv_ptr[2] = mv_ptr[0];
  1329.             mv_ptr[3] = mv_ptr[1];
  1330.             mv_ptr[6] = mv_ptr[0];
  1331.             mv_ptr[7] = mv_ptr[1];
  1332.             cu_part_mode = 0;
  1333.             cu_size = 1;
  1334.             tu_size = 0x55;
  1335.             tu_count = 4;
  1336.         } else if (inter_mode == AVC_INTER_8X16) {
  1337.             mv_ptr[4] = mv_ptr[0];
  1338.             mv_ptr[5] = mv_ptr[1];
  1339.             mv_ptr[2] = mv_ptr[8];
  1340.             mv_ptr[3] = mv_ptr[9];
  1341.             mv_ptr[6] = mv_ptr[8];
  1342.             mv_ptr[7] = mv_ptr[9];
  1343.             cu_part_mode = 1;
  1344.             cu_size = 1;
  1345.             tu_size = 0x55;
  1346.             tu_count = 4;
  1347.         } else if (inter_mode == AVC_INTER_16X8) {
  1348.             mv_ptr[2] = mv_ptr[0];
  1349.             mv_ptr[3] = mv_ptr[1];
  1350.             mv_ptr[4] = mv_ptr[16];
  1351.             mv_ptr[5] = mv_ptr[17];
  1352.             mv_ptr[6] = mv_ptr[24];
  1353.             mv_ptr[7] = mv_ptr[25];
  1354.             cu_part_mode = 2;
  1355.             cu_size = 1;
  1356.             tu_size = 0x55;
  1357.             tu_count = 4;
  1358.         }else if(inter_mode == AVC_INTER_8X8) {
  1359.             mv_ptr[0] = mv_ptr[index * 8 + 0 ];
  1360.             mv_ptr[1] = mv_ptr[index * 8 + 1 ];
  1361.             mv_ptr[2] = mv_ptr[index * 8 + 0 ];
  1362.             mv_ptr[3] = mv_ptr[index * 8 + 1 ];
  1363.             mv_ptr[4] = mv_ptr[index * 8 + 0 ];
  1364.             mv_ptr[5] = mv_ptr[index * 8 + 1 ];
  1365.             mv_ptr[6] = mv_ptr[index * 8 + 0 ];
  1366.             mv_ptr[7] = mv_ptr[index * 8 + 1 ];
  1367.             cu_part_mode = 0;
  1368.             cu_size = 0;
  1369.             tu_size = 0x0;
  1370.             tu_count = 4;
  1371.  
  1372.         }else
  1373.         {
  1374.             mv_ptr[4] = mv_ptr[0];
  1375.             mv_ptr[5] = mv_ptr[1];
  1376.             mv_ptr[2] = mv_ptr[0];
  1377.             mv_ptr[3] = mv_ptr[1];
  1378.             mv_ptr[6] = mv_ptr[0];
  1379.             mv_ptr[7] = mv_ptr[1];
  1380.             cu_part_mode = 0;
  1381.             cu_size = 1;
  1382.             tu_size = 0x55;
  1383.             tu_count = 4;
  1384.  
  1385.         }
  1386.     }
  1387.  
  1388.     cu_record_ptr = (unsigned char *)mfc_context->hcp_indirect_cu_object.bo->virtual;
  1389.     /* get the mb info from the vme out */
  1390.     cu_msg = (unsigned int *)(cu_record_ptr + cu_address);
  1391.  
  1392.     cu_msg[0] = (submb_pre_mode << 24 | /* interpred_idc[3:0][1:0] */
  1393.                  zero << 23 |   /* reserved */
  1394.                  qp << 16 | /* CU_qp */
  1395.                  zero << 11 |   /* reserved */
  1396.                  5 << 8 |   /* intra_chroma_mode */
  1397.                  zero << 7 |    /* IPCM_enable , reserved for SKL*/
  1398.                  cu_part_mode << 4 |    /* cu_part_mode */
  1399.                  zero << 3 |    /* cu_transquant_bypass_flag */
  1400.                  is_inter << 2 |    /* cu_pred_mode :intra 1,inter 1*/
  1401.                  cu_size          /* cu_size */
  1402.                 );
  1403.     cu_msg[1] = (zero << 30 |   /* reserved  */
  1404.                  zero << 24 |   /* intra_mode */
  1405.                  zero << 22 |   /* reserved  */
  1406.                  zero << 16 |   /* intra_mode */
  1407.                  zero << 14 |   /* reserved  */
  1408.                  zero << 8 |    /* intra_mode */
  1409.                  zero << 6 |    /* reserved  */
  1410.                  zero           /* intra_mode */
  1411.                 );
  1412.     /* l0: 4 MV (x,y); l1; 4 MV (x,y) */
  1413.     cu_msg[2] = ((mv_ptr[2] & 0xffff) << 16 |   /* mvx_l0[1]  */
  1414.                  (mv_ptr[0] & 0xffff)           /* mvx_l0[0] */
  1415.                 );
  1416.     cu_msg[3] = ((mv_ptr[6] & 0xffff) << 16 |   /* mvx_l0[3]  */
  1417.                  (mv_ptr[4] & 0xffff)           /* mvx_l0[2] */
  1418.                 );
  1419.     cu_msg[4] = ((mv_ptr[2] & 0xffff0000) |         /* mvy_l0[1]  */
  1420.                  (mv_ptr[0] & 0xffff0000) >> 16     /* mvy_l0[0] */
  1421.                 );
  1422.     cu_msg[5] = ((mv_ptr[6] & 0xffff0000) |         /* mvy_l0[3]  */
  1423.                  (mv_ptr[4] & 0xffff0000) >> 16     /* mvy_l0[2] */
  1424.                 );
  1425.  
  1426.     cu_msg[6] = ((mv_ptr[3] & 0xffff) << 16 |   /* mvx_l1[1]  */
  1427.                  (mv_ptr[1] & 0xffff)           /* mvx_l1[0] */
  1428.                 );
  1429.     cu_msg[7] = ((mv_ptr[7] & 0xffff) << 16 |   /* mvx_l1[3]  */
  1430.                  (mv_ptr[5] & 0xffff)           /* mvx_l1[2] */
  1431.                 );
  1432.     cu_msg[8] = ((mv_ptr[3] & 0xffff0000) |         /* mvy_l1[1]  */
  1433.                  (mv_ptr[1] & 0xffff0000) >> 16     /* mvy_l1[0] */
  1434.                 );
  1435.     cu_msg[9] = ((mv_ptr[7] & 0xffff0000) |         /* mvy_l1[3]  */
  1436.                  (mv_ptr[5] & 0xffff0000) >> 16     /* mvy_l1[2] */
  1437.                 );
  1438.  
  1439.     cu_msg[10] = (((vme_context->ref_index_in_mb[1] >> 24) & 0xf) << 28 |   /* ref_idx_l1[3]  */
  1440.                   ((vme_context->ref_index_in_mb[1] >> 16) & 0xf) << 24 |   /* ref_idx_l1[2] */
  1441.                   ((vme_context->ref_index_in_mb[1] >> 8) & 0xf) << 20 |    /* ref_idx_l1[1]  */
  1442.                   ((vme_context->ref_index_in_mb[1] >> 0) & 0xf) << 16 |    /* ref_idx_l1[0] */
  1443.                   ((vme_context->ref_index_in_mb[0] >> 24) & 0xf) << 12 |   /* ref_idx_l0[3]  */
  1444.                   ((vme_context->ref_index_in_mb[0] >> 16) & 0xf) << 8  |   /* ref_idx_l0[2] */
  1445.                   ((vme_context->ref_index_in_mb[0] >> 8) & 0xf) << 4 |     /* ref_idx_l0[1]  */
  1446.                   ((vme_context->ref_index_in_mb[0] >> 0) & 0xf)            /* ref_idx_l0[0] */
  1447.                  );
  1448.  
  1449.     cu_msg[11] = tu_size; /* tu_size 00000000 00000000 00000000 10101010  or 0x0*/
  1450.     cu_msg[12] = ((tu_count - 1) << 28 | /* tu count - 1 */
  1451.                   zero << 16 |  /* reserved  */
  1452.                   zero          /* tu_xform_Yskip[15:0] */
  1453.                  );
  1454.     cu_msg[13] = (zero << 16 |  /* tu_xform_Vskip[15:0]  */
  1455.                   zero          /* tu_xform_Uskip[15:0] */
  1456.                  );
  1457.     cu_msg[14] = zero ;
  1458.     cu_msg[15] = zero ;
  1459. }
  1460.  
  1461. #define HEVC_SPLIT_CU_FLAG_64_64 ((0x1<<20)|(0xf<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
  1462. #define HEVC_SPLIT_CU_FLAG_32_32 ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
  1463. #define HEVC_SPLIT_CU_FLAG_16_16 ((0x0<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
  1464. #define HEVC_SPLIT_CU_FLAG_8_8   ((0x1<<20)|(0x0<<16)|(0x0<<12)|(0x0<<8)|(0x0<<4)|(0x0))
  1465.  
  1466.  
  1467. void
  1468. intel_hevc_slice_insert_packed_data(VADriverContextP ctx,
  1469.                                     struct encode_state *encode_state,
  1470.                                     struct intel_encoder_context *encoder_context,
  1471.                                     int slice_index,
  1472.                                     struct intel_batchbuffer *slice_batch)
  1473. {
  1474.     int count, i, start_index;
  1475.     unsigned int length_in_bits;
  1476.     VAEncPackedHeaderParameterBuffer *param = NULL;
  1477.     unsigned int *header_data = NULL;
  1478.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1479.     int slice_header_index;
  1480.  
  1481.     if (encode_state->slice_header_index[slice_index] == 0)
  1482.         slice_header_index = -1;
  1483.     else
  1484.         slice_header_index = (encode_state->slice_header_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
  1485.  
  1486.     count = encode_state->slice_rawdata_count[slice_index];
  1487.     start_index = (encode_state->slice_rawdata_index[slice_index] & SLICE_PACKED_DATA_INDEX_MASK);
  1488.  
  1489.     for (i = 0; i < count; i++) {
  1490.         unsigned int skip_emul_byte_cnt;
  1491.  
  1492.         header_data = (unsigned int *)encode_state->packed_header_data_ext[start_index + i]->buffer;
  1493.  
  1494.         param = (VAEncPackedHeaderParameterBuffer *)
  1495.                 (encode_state->packed_header_params_ext[start_index + i]->buffer);
  1496.  
  1497.         /* skip the slice header packed data type as it is lastly inserted */
  1498.         if (param->type == VAEncPackedHeaderSlice)
  1499.             continue;
  1500.  
  1501.         length_in_bits = param->bit_length;
  1502.  
  1503.         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1504.  
  1505.         /* as the slice header is still required, the last header flag is set to
  1506.          * zero.
  1507.          */
  1508.         mfc_context->insert_object(ctx,
  1509.                                    encoder_context,
  1510.                                    header_data,
  1511.                                    ALIGN(length_in_bits, 32) >> 5,
  1512.                                    length_in_bits & 0x1f,
  1513.                                    skip_emul_byte_cnt,
  1514.                                    0,
  1515.                                    0,
  1516.                                    !param->has_emulation_bytes,
  1517.                                    slice_batch);
  1518.     }
  1519.  
  1520.     if (slice_header_index == -1) {
  1521.         unsigned char *slice_header = NULL;
  1522.         int slice_header_length_in_bits = 0;
  1523.         VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  1524.         VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  1525.         VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
  1526.  
  1527.         /* For the Normal HEVC */
  1528.         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter,
  1529.                                       pPicParameter,
  1530.                                       pSliceParameter,
  1531.                                       &slice_header,
  1532.                                       0);
  1533.         mfc_context->insert_object(ctx, encoder_context,
  1534.                                    (unsigned int *)slice_header,
  1535.                                    ALIGN(slice_header_length_in_bits, 32) >> 5,
  1536.                                    slice_header_length_in_bits & 0x1f,
  1537.                                    5,  /* first 6 bytes are start code + nal unit type */
  1538.                                    1, 0, 1, slice_batch);
  1539.         free(slice_header);
  1540.     } else {
  1541.         unsigned int skip_emul_byte_cnt;
  1542.  
  1543.         header_data = (unsigned int *)encode_state->packed_header_data_ext[slice_header_index]->buffer;
  1544.  
  1545.         param = (VAEncPackedHeaderParameterBuffer *)
  1546.                 (encode_state->packed_header_params_ext[slice_header_index]->buffer);
  1547.         length_in_bits = param->bit_length;
  1548.  
  1549.         /* as the slice header is the last header data for one slice,
  1550.          * the last header flag is set to one.
  1551.          */
  1552.         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1553.  
  1554.         mfc_context->insert_object(ctx,
  1555.                                    encoder_context,
  1556.                                    header_data,
  1557.                                    ALIGN(length_in_bits, 32) >> 5,
  1558.                                    length_in_bits & 0x1f,
  1559.                                    skip_emul_byte_cnt,
  1560.                                    1,
  1561.                                    0,
  1562.                                    !param->has_emulation_bytes,
  1563.                                    slice_batch);
  1564.     }
  1565.  
  1566.     return;
  1567. }
  1568.  
  1569. static void
  1570. gen9_hcpe_hevc_pipeline_slice_programing(VADriverContextP ctx,
  1571.         struct encode_state *encode_state,
  1572.         struct intel_encoder_context *encoder_context,
  1573.         int slice_index,
  1574.         struct intel_batchbuffer *slice_batch)
  1575. {
  1576.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1577.     struct gen6_vme_context *vme_context = encoder_context->vme_context;
  1578.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  1579.     VAEncPictureParameterBufferHEVC *pPicParameter = (VAEncPictureParameterBufferHEVC *)encode_state->pic_param_ext->buffer;
  1580.     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[slice_index]->buffer;
  1581.     int qp_slice = pPicParameter->pic_init_qp + pSliceParameter->slice_qp_delta;
  1582.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  1583.     //unsigned char *slice_header = NULL;         // for future use
  1584.     //int slice_header_length_in_bits = 0;
  1585.     unsigned int tail_data[] = { 0x0, 0x0 };
  1586.     int slice_type = pSliceParameter->slice_type;
  1587.  
  1588.     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
  1589.     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
  1590.     int ctb_size = 1 << log2_ctb_size;
  1591.     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
  1592.     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
  1593.     int last_slice = (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice) == (width_in_ctb * height_in_ctb);
  1594.     int ctb_width_in_mb = (ctb_size + 15) / 16;
  1595.     int i_ctb, ctb_x, ctb_y;
  1596.     unsigned int split_coding_unit_flag = 0;
  1597.     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + 15) / 16;
  1598.     int row_pad_flag = (pSequenceParameter->pic_height_in_luma_samples % ctb_size)> 0 ? 1:0;
  1599.     int col_pad_flag = (pSequenceParameter->pic_width_in_luma_samples % ctb_size)> 0 ? 1:0;
  1600.  
  1601.     int is_intra = (slice_type == HEVC_SLICE_I);
  1602.     unsigned int *msg = NULL;
  1603.     unsigned char *msg_ptr = NULL;
  1604.     int macroblock_address = 0;
  1605.     int num_cu_record = 64;
  1606.     int cu_count = 1;
  1607.     int tmp_mb_mode = 0;
  1608.     int mb_x = 0, mb_y = 0;
  1609.     int mb_addr = 0;
  1610.     int cu_index = 0;
  1611.     int inter_rdo, intra_rdo;
  1612.     int qp;
  1613.  
  1614.     if (log2_ctb_size == 5) num_cu_record = 16;
  1615.     else if (log2_ctb_size == 4) num_cu_record = 4;
  1616.     else if (log2_ctb_size == 6) num_cu_record = 64;
  1617.  
  1618.     qp = qp_slice;
  1619.     if (rate_control_mode == VA_RC_CBR) {
  1620.         qp = mfc_context->bit_rate_control_context[slice_type].QpPrimeY;
  1621.         if(slice_type == HEVC_SLICE_B) {
  1622.             if(pSequenceParameter->ip_period == 1)
  1623.             {
  1624.                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
  1625.  
  1626.             }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
  1627.                 qp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
  1628.             }
  1629.         }
  1630.         if (encode_state->slice_header_index[slice_index] == 0) {
  1631.             pSliceParameter->slice_qp_delta = qp - pPicParameter->pic_init_qp;
  1632.         }
  1633.     }
  1634.  
  1635.     /* only support for 8-bit pixel bit-depth */
  1636.     assert(pSequenceParameter->seq_fields.bits.bit_depth_luma_minus8 == 0);
  1637.     assert(pSequenceParameter->seq_fields.bits.bit_depth_chroma_minus8 == 0);
  1638.     assert(pPicParameter->pic_init_qp >= 0 && pPicParameter->pic_init_qp < 52);
  1639.     assert(qp >= 0 && qp < 52);
  1640.  
  1641.     {
  1642.         gen9_hcpe_hevc_slice_state(ctx,
  1643.                                    pPicParameter,
  1644.                                    pSliceParameter,
  1645.                                    encode_state, encoder_context,
  1646.                                    slice_batch);
  1647.  
  1648.         if (slice_index == 0)
  1649.             intel_hcpe_hevc_pipeline_header_programing(ctx, encode_state, encoder_context, slice_batch);
  1650.  
  1651.         intel_hevc_slice_insert_packed_data(ctx, encode_state, encoder_context, slice_index, slice_batch);
  1652.  
  1653.         /*
  1654.         slice_header_length_in_bits = build_hevc_slice_header(pSequenceParameter, pPicParameter, pSliceParameter, &slice_header, slice_index);
  1655.         int skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)slice_header, slice_header_length_in_bits);
  1656.  
  1657.         mfc_context->insert_object(ctx, encoder_context,
  1658.                                    (unsigned int *)slice_header, ALIGN(slice_header_length_in_bits, 32) >> 5, slice_header_length_in_bits & 0x1f,
  1659.                                     skip_emul_byte_cnt,
  1660.                                     1, 0, 1, slice_batch);
  1661.         free(slice_header);
  1662.         */
  1663.     }
  1664.  
  1665.  
  1666.  
  1667.     split_coding_unit_flag = (ctb_width_in_mb == 4) ? HEVC_SPLIT_CU_FLAG_64_64 : ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
  1668.  
  1669.     dri_bo_map(vme_context->vme_output.bo , 1);
  1670.     msg_ptr = (unsigned char *)vme_context->vme_output.bo->virtual;
  1671.     dri_bo_map(mfc_context->hcp_indirect_cu_object.bo , 1);
  1672.  
  1673.     for (i_ctb = pSliceParameter->slice_segment_address;i_ctb < pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice; i_ctb++) {
  1674.         int last_ctb = (i_ctb == (pSliceParameter->slice_segment_address + pSliceParameter->num_ctu_in_slice - 1));
  1675.         int ctb_height_in_mb_internal = ctb_width_in_mb;
  1676.         int ctb_width_in_mb_internal = ctb_width_in_mb;
  1677.         ctb_x = i_ctb % width_in_ctb;
  1678.         ctb_y = i_ctb / width_in_ctb;
  1679.         if(ctb_y == (height_in_ctb - 1) && row_pad_flag)  ctb_height_in_mb_internal = 1;
  1680.         if(ctb_x == (width_in_ctb - 1) && col_pad_flag)  ctb_width_in_mb_internal = 1;
  1681.  
  1682.         mb_x = 0;
  1683.         mb_y = 0;
  1684.         macroblock_address = ctb_y * width_in_mbs * ctb_width_in_mb + ctb_x * ctb_width_in_mb;
  1685.         split_coding_unit_flag = ((ctb_width_in_mb == 2) ? HEVC_SPLIT_CU_FLAG_32_32 : HEVC_SPLIT_CU_FLAG_16_16);
  1686.         cu_count = 1;
  1687.         cu_index = 0;
  1688.         mb_addr = 0;
  1689.         msg = NULL;
  1690.         for (mb_y = 0; mb_y < ctb_height_in_mb_internal; mb_y++)
  1691.         {
  1692.             mb_addr = macroblock_address + mb_y * width_in_mbs ;
  1693.             for (mb_x = 0; mb_x < ctb_width_in_mb_internal; mb_x++)
  1694.             {
  1695.                 /* get the mb info from the vme out */
  1696.                 msg = (unsigned int *)(msg_ptr + mb_addr * vme_context->vme_output.size_block);
  1697.  
  1698.                 inter_rdo = msg[AVC_INTER_RDO_OFFSET] & AVC_RDO_MASK;
  1699.                 intra_rdo = msg[AVC_INTRA_RDO_OFFSET] & AVC_RDO_MASK;
  1700.                 /*fill to indirect cu */
  1701.                 /*to do */
  1702.                 if (is_intra || intra_rdo < inter_rdo) {
  1703.                     /* fill intra cu */
  1704.                     tmp_mb_mode = (msg[0] & AVC_INTRA_MODE_MASK) >> 4;
  1705.                     if (tmp_mb_mode == AVC_INTRA_16X16) {
  1706.                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
  1707.                     } else { // for 4x4 to use 8x8 replace
  1708.                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
  1709.                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
  1710.                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
  1711.                         gen9_hcpe_hevc_fill_indirect_cu_intra(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
  1712.                         if(ctb_width_in_mb == 2)
  1713.                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
  1714.                         else if(ctb_width_in_mb == 1)
  1715.                             split_coding_unit_flag |= 0x1 << 20;
  1716.                     }
  1717.                 } else {
  1718.                     msg += AVC_INTER_MSG_OFFSET;
  1719.                     /* fill inter cu */
  1720.                     tmp_mb_mode = msg[0] & AVC_INTER_MODE_MASK;
  1721.                     if (tmp_mb_mode == AVC_INTER_8X8){
  1722.                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
  1723.                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,1);
  1724.                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,2);
  1725.                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,3);
  1726.                         if(ctb_width_in_mb == 2)
  1727.                             split_coding_unit_flag |= 0x1 << (mb_x + mb_y * ctb_width_in_mb + 16);
  1728.                         else if(ctb_width_in_mb == 1)
  1729.                             split_coding_unit_flag |= 0x1 << 20;
  1730.  
  1731.                     }else if(tmp_mb_mode == AVC_INTER_16X16 ||
  1732.                         tmp_mb_mode == AVC_INTER_8X16 ||
  1733.                         tmp_mb_mode == AVC_INTER_16X8) {
  1734.                         gen9_hcpe_hevc_fill_indirect_cu_inter(ctx, encode_state, encoder_context, qp, msg, ctb_x, ctb_y, mb_x, mb_y, ctb_width_in_mb, width_in_ctb, num_cu_record, slice_type,cu_index++,0);
  1735.                     }
  1736.                 }
  1737.                 mb_addr++;
  1738.             }
  1739.         }
  1740.  
  1741.         cu_count = cu_index;
  1742.         // PAK object fill accordingly.
  1743.         gen9_hcpe_hevc_pak_object(ctx, ctb_x, ctb_y, last_ctb, encoder_context, cu_count, split_coding_unit_flag, slice_batch);
  1744.     }
  1745.  
  1746.     dri_bo_unmap(mfc_context->hcp_indirect_cu_object.bo);
  1747.     dri_bo_unmap(vme_context->vme_output.bo);
  1748.  
  1749.     if (last_slice) {
  1750.         mfc_context->insert_object(ctx, encoder_context,
  1751.                                    tail_data, 2, 8,
  1752.                                    2, 1, 1, 0, slice_batch);
  1753.     } else {
  1754.         mfc_context->insert_object(ctx, encoder_context,
  1755.                                    tail_data, 1, 8,
  1756.                                    1, 1, 1, 0, slice_batch);
  1757.     }
  1758. }
  1759.  
  1760. static dri_bo *
  1761. gen9_hcpe_hevc_software_batchbuffer(VADriverContextP ctx,
  1762.                                     struct encode_state *encode_state,
  1763.                                     struct intel_encoder_context *encoder_context)
  1764. {
  1765.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1766.     struct intel_batchbuffer *batch;
  1767.     dri_bo *batch_bo;
  1768.     int i;
  1769.  
  1770.     batch = mfc_context->aux_batchbuffer;
  1771.     batch_bo = batch->buffer;
  1772.  
  1773.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  1774.         gen9_hcpe_hevc_pipeline_slice_programing(ctx, encode_state, encoder_context, i, batch);
  1775.     }
  1776.  
  1777.     intel_batchbuffer_align(batch, 8);
  1778.  
  1779.     BEGIN_BCS_BATCH(batch, 2);
  1780.     OUT_BCS_BATCH(batch, 0);
  1781.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_END);
  1782.     ADVANCE_BCS_BATCH(batch);
  1783.  
  1784.     dri_bo_reference(batch_bo);
  1785.     intel_batchbuffer_free(batch);
  1786.     mfc_context->aux_batchbuffer = NULL;
  1787.  
  1788.     return batch_bo;
  1789. }
  1790.  
  1791. #else
  1792.  
  1793. #endif
  1794.  
  1795. static void
  1796. gen9_hcpe_hevc_pipeline_programing(VADriverContextP ctx,
  1797.                                    struct encode_state *encode_state,
  1798.                                    struct intel_encoder_context *encoder_context)
  1799. {
  1800.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1801.     struct intel_batchbuffer *batch = encoder_context->base.batch;
  1802.     dri_bo *slice_batch_bo;
  1803.  
  1804. #ifdef HCP_SOFTWARE_SKYLAKE
  1805.     slice_batch_bo = gen9_hcpe_hevc_software_batchbuffer(ctx, encode_state, encoder_context);
  1806. #else
  1807.     slice_batch_bo = gen9_hcpe_hevc_hardware_batchbuffer(ctx, encode_state, encoder_context);
  1808. #endif
  1809.  
  1810.     // begin programing
  1811.     if (i965->intel.has_bsd2)
  1812.         intel_batchbuffer_start_atomic_bcs_override(batch, 0x4000, BSD_RING0);
  1813.     else
  1814.         intel_batchbuffer_start_atomic_bcs(batch, 0x4000);
  1815.     intel_batchbuffer_emit_mi_flush(batch);
  1816.  
  1817.     // picture level programing
  1818.     gen9_hcpe_hevc_pipeline_picture_programing(ctx, encode_state, encoder_context);
  1819.  
  1820.     BEGIN_BCS_BATCH(batch, 3);
  1821.     OUT_BCS_BATCH(batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
  1822.     OUT_BCS_RELOC(batch,
  1823.                   slice_batch_bo,
  1824.                   I915_GEM_DOMAIN_COMMAND, 0,
  1825.                   0);
  1826.     OUT_BCS_BATCH(batch, 0);
  1827.     ADVANCE_BCS_BATCH(batch);
  1828.  
  1829.     // end programing
  1830.     intel_batchbuffer_end_atomic(batch);
  1831.  
  1832.     dri_bo_unreference(slice_batch_bo);
  1833. }
  1834.  
  1835. void intel_hcpe_hevc_pipeline_header_programing(VADriverContextP ctx,
  1836.         struct encode_state *encode_state,
  1837.         struct intel_encoder_context *encoder_context,
  1838.         struct intel_batchbuffer *slice_batch)
  1839. {
  1840.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1841.     int idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS);
  1842.     unsigned int skip_emul_byte_cnt;
  1843.  
  1844.     if (encode_state->packed_header_data[idx]) {
  1845.         VAEncPackedHeaderParameterBuffer *param = NULL;
  1846.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  1847.         unsigned int length_in_bits;
  1848.  
  1849.         assert(encode_state->packed_header_param[idx]);
  1850.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  1851.         length_in_bits = param->bit_length;
  1852.  
  1853.         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1854.         mfc_context->insert_object(ctx,
  1855.                                    encoder_context,
  1856.                                    header_data,
  1857.                                    ALIGN(length_in_bits, 32) >> 5,
  1858.                                    length_in_bits & 0x1f,
  1859.                                    skip_emul_byte_cnt,
  1860.                                    0,
  1861.                                    0,
  1862.                                    !param->has_emulation_bytes,
  1863.                                    slice_batch);
  1864.     }
  1865.  
  1866.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_VPS) + 1; // index to SPS
  1867.  
  1868.     if (encode_state->packed_header_data[idx]) {
  1869.         VAEncPackedHeaderParameterBuffer *param = NULL;
  1870.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  1871.         unsigned int length_in_bits;
  1872.  
  1873.         assert(encode_state->packed_header_param[idx]);
  1874.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  1875.         length_in_bits = param->bit_length;
  1876.  
  1877.         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1878.         mfc_context->insert_object(ctx,
  1879.                                    encoder_context,
  1880.                                    header_data,
  1881.                                    ALIGN(length_in_bits, 32) >> 5,
  1882.                                    length_in_bits & 0x1f,
  1883.                                    skip_emul_byte_cnt,
  1884.                                    0,
  1885.                                    0,
  1886.                                    !param->has_emulation_bytes,
  1887.                                    slice_batch);
  1888.     }
  1889.  
  1890.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_PPS);
  1891.  
  1892.     if (encode_state->packed_header_data[idx]) {
  1893.         VAEncPackedHeaderParameterBuffer *param = NULL;
  1894.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  1895.         unsigned int length_in_bits;
  1896.  
  1897.         assert(encode_state->packed_header_param[idx]);
  1898.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  1899.         length_in_bits = param->bit_length;
  1900.  
  1901.         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1902.  
  1903.         mfc_context->insert_object(ctx,
  1904.                                    encoder_context,
  1905.                                    header_data,
  1906.                                    ALIGN(length_in_bits, 32) >> 5,
  1907.                                    length_in_bits & 0x1f,
  1908.                                    skip_emul_byte_cnt,
  1909.                                    0,
  1910.                                    0,
  1911.                                    !param->has_emulation_bytes,
  1912.                                    slice_batch);
  1913.     }
  1914.  
  1915.     idx = va_enc_packed_type_to_idx(VAEncPackedHeaderHEVC_SEI);
  1916.  
  1917.     if (encode_state->packed_header_data[idx]) {
  1918.         VAEncPackedHeaderParameterBuffer *param = NULL;
  1919.         unsigned int *header_data = (unsigned int *)encode_state->packed_header_data[idx]->buffer;
  1920.         unsigned int length_in_bits;
  1921.  
  1922.         assert(encode_state->packed_header_param[idx]);
  1923.         param = (VAEncPackedHeaderParameterBuffer *)encode_state->packed_header_param[idx]->buffer;
  1924.         length_in_bits = param->bit_length;
  1925.  
  1926.         skip_emul_byte_cnt = intel_hevc_find_skipemulcnt((unsigned char *)header_data, length_in_bits);
  1927.         mfc_context->insert_object(ctx,
  1928.                                    encoder_context,
  1929.                                    header_data,
  1930.                                    ALIGN(length_in_bits, 32) >> 5,
  1931.                                    length_in_bits & 0x1f,
  1932.                                    skip_emul_byte_cnt,
  1933.                                    0,
  1934.                                    0,
  1935.                                    !param->has_emulation_bytes,
  1936.                                    slice_batch);
  1937.     }
  1938. }
  1939.  
  1940. VAStatus intel_hcpe_hevc_prepare(VADriverContextP ctx,
  1941.                                  struct encode_state *encode_state,
  1942.                                  struct intel_encoder_context *encoder_context)
  1943. {
  1944.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1945.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  1946.     struct object_surface *obj_surface;
  1947.     struct object_buffer *obj_buffer;
  1948.     GenHevcSurface *hevc_encoder_surface;
  1949.     dri_bo *bo;
  1950.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  1951.     int i;
  1952.         struct i965_coded_buffer_segment *coded_buffer_segment;
  1953.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  1954.  
  1955.     /*Setup all the input&output object*/
  1956.  
  1957.     /* Setup current frame and current direct mv buffer*/
  1958.     obj_surface = encode_state->reconstructed_object;
  1959.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1960.  
  1961.     if (obj_surface->private_data == NULL) {
  1962.         uint32_t size;
  1963.  
  1964.         if (mfc_context->pic_size.ctb_size == 16)
  1965.             size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
  1966.                    ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
  1967.         else
  1968.             size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
  1969.                    ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
  1970.         size <<= 6; /* in unit of 64bytes */
  1971.  
  1972.         hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
  1973.  
  1974.         assert(hevc_encoder_surface);
  1975.         hevc_encoder_surface->motion_vector_temporal_bo =
  1976.             dri_bo_alloc(i965->intel.bufmgr,
  1977.                          "motion vector temporal buffer",
  1978.                          size,
  1979.                          0x1000);
  1980.         assert(hevc_encoder_surface->motion_vector_temporal_bo);
  1981.  
  1982.         obj_surface->private_data = (void *)hevc_encoder_surface;
  1983.         obj_surface->free_private_data = (void *)gen_free_hevc_surface;
  1984.     }
  1985.     hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
  1986.  
  1987.     if (hevc_encoder_surface) {
  1988.         hevc_encoder_surface->base.frame_store_id = -1;
  1989.         mfc_context->current_collocated_mv_temporal_buffer[NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS - 1].bo = hevc_encoder_surface->motion_vector_temporal_bo;
  1990.  
  1991.         dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
  1992.     }
  1993.  
  1994.     mfc_context->surface_state.width = obj_surface->orig_width;
  1995.     mfc_context->surface_state.height = obj_surface->orig_height;
  1996.     mfc_context->surface_state.w_pitch = obj_surface->width;
  1997.     mfc_context->surface_state.h_pitch = obj_surface->height;
  1998.  
  1999.     /* Setup reference frames and direct mv buffers*/
  2000.     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
  2001.         obj_surface = encode_state->reference_objects[i];
  2002.  
  2003.         if (obj_surface && obj_surface->bo) {
  2004.             mfc_context->reference_surfaces[i].bo = obj_surface->bo;
  2005.             dri_bo_reference(obj_surface->bo);
  2006.  
  2007.             /* Check MV temporal buffer */
  2008.             if (obj_surface->private_data == NULL) {
  2009.                 uint32_t size;
  2010.  
  2011.                 if (mfc_context->pic_size.ctb_size == 16)
  2012.                     size = ((pSequenceParameter->pic_width_in_luma_samples + 63) >> 6) *
  2013.                            ((pSequenceParameter->pic_height_in_luma_samples + 15) >> 4);
  2014.                 else
  2015.                     size = ((pSequenceParameter->pic_width_in_luma_samples + 31) >> 5) *
  2016.                            ((pSequenceParameter->pic_height_in_luma_samples + 31) >> 5);
  2017.                 size <<= 6; /* in unit of 64bytes */
  2018.  
  2019.                 hevc_encoder_surface = calloc(sizeof(GenHevcSurface), 1);
  2020.  
  2021.                 if (hevc_encoder_surface) {
  2022.                     hevc_encoder_surface->motion_vector_temporal_bo =
  2023.                         dri_bo_alloc(i965->intel.bufmgr,
  2024.                                      "motion vector temporal buffer",
  2025.                                      size,
  2026.                                      0x1000);
  2027.                     assert(hevc_encoder_surface->motion_vector_temporal_bo);
  2028.                 }
  2029.  
  2030.                 obj_surface->private_data = (void *)hevc_encoder_surface;
  2031.                 obj_surface->free_private_data = (void *)gen_free_hevc_surface;
  2032.             }
  2033.  
  2034.             hevc_encoder_surface = (GenHevcSurface *) obj_surface->private_data;
  2035.  
  2036.             if (hevc_encoder_surface) {
  2037.                 hevc_encoder_surface->base.frame_store_id = -1;
  2038.                 /* Setup MV temporal buffer */
  2039.                 mfc_context->current_collocated_mv_temporal_buffer[i].bo = hevc_encoder_surface->motion_vector_temporal_bo;
  2040.                 dri_bo_reference(hevc_encoder_surface->motion_vector_temporal_bo);
  2041.             }
  2042.         } else {
  2043.             break;
  2044.         }
  2045.     }
  2046.  
  2047.     mfc_context->uncompressed_picture_source.bo = encode_state->input_yuv_object->bo;
  2048.     dri_bo_reference(mfc_context->uncompressed_picture_source.bo);
  2049.  
  2050.     obj_buffer = encode_state->coded_buf_object;
  2051.     bo = obj_buffer->buffer_store->bo;
  2052.     mfc_context->hcp_indirect_pak_bse_object.bo = bo;
  2053.     mfc_context->hcp_indirect_pak_bse_object.offset = I965_CODEDBUFFER_HEADER_SIZE;
  2054.     mfc_context->hcp_indirect_pak_bse_object.end_offset = ALIGN(obj_buffer->size_element - 0x1000, 0x1000);
  2055.     dri_bo_reference(mfc_context->hcp_indirect_pak_bse_object.bo);
  2056.  
  2057.     dri_bo_map(bo, 1);
  2058.     coded_buffer_segment = (struct i965_coded_buffer_segment *)(bo->virtual);
  2059.     coded_buffer_segment->mapped = 0;
  2060.     coded_buffer_segment->codec = encoder_context->codec;
  2061.     dri_bo_unmap(bo);
  2062.  
  2063.     return vaStatus;
  2064. }
  2065.  
  2066. /* HEVC BRC related */
  2067.  
  2068. static void
  2069. intel_hcpe_bit_rate_control_context_init(struct encode_state *encode_state,
  2070.         struct gen9_hcpe_context *mfc_context)
  2071. {
  2072.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  2073.     int ctb_size = 16;
  2074.     int width_in_mbs = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
  2075.     int height_in_mbs = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
  2076.  
  2077.     float fps =  pSequenceParameter->vui_time_scale * 0.5 / pSequenceParameter->vui_num_units_in_tick ;
  2078.     double bitrate = pSequenceParameter->bits_per_second * 1.0;
  2079.     int inter_mb_size = bitrate * 1.0 / (fps + 4.0) / width_in_mbs / height_in_mbs;
  2080.     int intra_mb_size = inter_mb_size * 5.0;
  2081.     int i;
  2082.  
  2083.     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_mb_size = intra_mb_size;
  2084.     mfc_context->bit_rate_control_context[HEVC_SLICE_I].target_frame_size = intra_mb_size * width_in_mbs * height_in_mbs;
  2085.     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_mb_size = inter_mb_size;
  2086.     mfc_context->bit_rate_control_context[HEVC_SLICE_P].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  2087.     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_mb_size = inter_mb_size;
  2088.     mfc_context->bit_rate_control_context[HEVC_SLICE_B].target_frame_size = inter_mb_size * width_in_mbs * height_in_mbs;
  2089.  
  2090.     for (i = 0 ; i < 3; i++) {
  2091.         mfc_context->bit_rate_control_context[i].QpPrimeY = 26;
  2092.         mfc_context->bit_rate_control_context[i].MaxQpNegModifier = 6;
  2093.         mfc_context->bit_rate_control_context[i].MaxQpPosModifier = 6;
  2094.         mfc_context->bit_rate_control_context[i].GrowInit = 6;
  2095.         mfc_context->bit_rate_control_context[i].GrowResistance = 4;
  2096.         mfc_context->bit_rate_control_context[i].ShrinkInit = 6;
  2097.         mfc_context->bit_rate_control_context[i].ShrinkResistance = 4;
  2098.  
  2099.         mfc_context->bit_rate_control_context[i].Correct[0] = 8;
  2100.         mfc_context->bit_rate_control_context[i].Correct[1] = 4;
  2101.         mfc_context->bit_rate_control_context[i].Correct[2] = 2;
  2102.         mfc_context->bit_rate_control_context[i].Correct[3] = 2;
  2103.         mfc_context->bit_rate_control_context[i].Correct[4] = 4;
  2104.         mfc_context->bit_rate_control_context[i].Correct[5] = 8;
  2105.     }
  2106.  
  2107.     mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord = (intra_mb_size + 16) / 16;
  2108.     mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord = (inter_mb_size + 16) / 16;
  2109.     mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord = (inter_mb_size + 16) / 16;
  2110.  
  2111.     mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_I].TargetSizeInWord * 1.5;
  2112.     mfc_context->bit_rate_control_context[HEVC_SLICE_P].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_P].TargetSizeInWord * 1.5;
  2113.     mfc_context->bit_rate_control_context[HEVC_SLICE_B].MaxSizeInWord = mfc_context->bit_rate_control_context[HEVC_SLICE_B].TargetSizeInWord * 1.5;
  2114. }
  2115.  
  2116. static void intel_hcpe_brc_init(struct encode_state *encode_state,
  2117.                                 struct intel_encoder_context* encoder_context)
  2118. {
  2119.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  2120.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  2121.     VAEncMiscParameterHRD* pParameterHRD = NULL;
  2122.     VAEncMiscParameterBuffer* pMiscParamHRD = NULL;
  2123.  
  2124.     double bitrate = pSequenceParameter->bits_per_second * 1.0;
  2125.     double framerate = (double)pSequenceParameter->vui_time_scale / (2 * (double)pSequenceParameter->vui_num_units_in_tick);
  2126.     int inum = 1, pnum = 0, bnum = 0; /* Gop structure: number of I, P, B frames in the Gop. */
  2127.     int intra_period = pSequenceParameter->intra_period;
  2128.     int ip_period = pSequenceParameter->ip_period;
  2129.     double qp1_size = 0.1 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
  2130.     double qp51_size = 0.001 * 8 * 3 * pSequenceParameter->pic_width_in_luma_samples * pSequenceParameter->pic_height_in_luma_samples / 2;
  2131.     double bpf;
  2132.     int ratio_min = 1;
  2133.     int ratio_max = 32;
  2134.     int ratio = 8;
  2135.     double buffer_size = 0;
  2136.  
  2137.     if (!encode_state->misc_param[VAEncMiscParameterTypeHRD] || !encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer)
  2138.         return;
  2139.  
  2140.     pMiscParamHRD = (VAEncMiscParameterBuffer*)encode_state->misc_param[VAEncMiscParameterTypeHRD]->buffer;
  2141.     pParameterHRD = (VAEncMiscParameterHRD*)pMiscParamHRD->data;
  2142.  
  2143.     if (pSequenceParameter->ip_period) {
  2144.         pnum = (intra_period + ip_period - 1) / ip_period - 1;
  2145.         bnum = intra_period - inum - pnum;
  2146.     }
  2147.  
  2148.     mfc_context->brc.mode = encoder_context->rate_control_mode;
  2149.  
  2150.     mfc_context->brc.target_frame_size[HEVC_SLICE_I] = (int)((double)((bitrate * intra_period) / framerate) /
  2151.             (double)(inum + BRC_PWEIGHT * pnum + BRC_BWEIGHT * bnum));
  2152.     mfc_context->brc.target_frame_size[HEVC_SLICE_P] = BRC_PWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
  2153.     mfc_context->brc.target_frame_size[HEVC_SLICE_B] = BRC_BWEIGHT * mfc_context->brc.target_frame_size[HEVC_SLICE_I];
  2154.  
  2155.     mfc_context->brc.gop_nums[HEVC_SLICE_I] = inum;
  2156.     mfc_context->brc.gop_nums[HEVC_SLICE_P] = pnum;
  2157.     mfc_context->brc.gop_nums[HEVC_SLICE_B] = bnum;
  2158.  
  2159.     bpf = mfc_context->brc.bits_per_frame = bitrate / framerate;
  2160.  
  2161.     if (!pParameterHRD || pParameterHRD->buffer_size <= 0)
  2162.     {
  2163.         mfc_context->hrd.buffer_size = bitrate * ratio;
  2164.         mfc_context->hrd.current_buffer_fullness =
  2165.             (double)(bitrate * ratio/2 < mfc_context->hrd.buffer_size) ?
  2166.             bitrate * ratio/2 : mfc_context->hrd.buffer_size / 2.;
  2167.     }else
  2168.     {
  2169.         buffer_size = (double)pParameterHRD->buffer_size ;
  2170.         if(buffer_size < bitrate * ratio_min)
  2171.         {
  2172.             buffer_size = bitrate * ratio_min;
  2173.         }else if (buffer_size > bitrate * ratio_max)
  2174.         {
  2175.             buffer_size = bitrate * ratio_max ;
  2176.         }
  2177.         mfc_context->hrd.buffer_size =buffer_size;
  2178.         if(pParameterHRD->initial_buffer_fullness > 0)
  2179.         {
  2180.             mfc_context->hrd.current_buffer_fullness =
  2181.                 (double)(pParameterHRD->initial_buffer_fullness < mfc_context->hrd.buffer_size) ?
  2182.                 pParameterHRD->initial_buffer_fullness : mfc_context->hrd.buffer_size / 2.;
  2183.         }else
  2184.         {
  2185.             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size / 2.;
  2186.  
  2187.         }
  2188.     }
  2189.  
  2190.     mfc_context->hrd.target_buffer_fullness = (double)mfc_context->hrd.buffer_size / 2.;
  2191.     mfc_context->hrd.buffer_capacity = (double)mfc_context->hrd.buffer_size / qp1_size;
  2192.     mfc_context->hrd.violation_noted = 0;
  2193.  
  2194.     if ((bpf > qp51_size) && (bpf < qp1_size)) {
  2195.         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51 - 50 * (bpf - qp51_size) / (qp1_size - qp51_size);
  2196.     } else if (bpf >= qp1_size)
  2197.         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 1;
  2198.     else if (bpf <= qp51_size)
  2199.         mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY = 51;
  2200.  
  2201.     mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
  2202.     mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
  2203.  
  2204.     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
  2205.     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
  2206.     BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
  2207. }
  2208.  
  2209. int intel_hcpe_update_hrd(struct encode_state *encode_state,
  2210.                           struct gen9_hcpe_context *mfc_context,
  2211.                           int frame_bits)
  2212. {
  2213.     double prev_bf = mfc_context->hrd.current_buffer_fullness;
  2214.  
  2215.     mfc_context->hrd.current_buffer_fullness -= frame_bits;
  2216.  
  2217.     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness <= 0.) {
  2218.         mfc_context->hrd.current_buffer_fullness = prev_bf;
  2219.         return BRC_UNDERFLOW;
  2220.     }
  2221.  
  2222.     mfc_context->hrd.current_buffer_fullness += mfc_context->brc.bits_per_frame;
  2223.     if (mfc_context->hrd.buffer_size > 0 && mfc_context->hrd.current_buffer_fullness > mfc_context->hrd.buffer_size) {
  2224.         if (mfc_context->brc.mode == VA_RC_VBR)
  2225.             mfc_context->hrd.current_buffer_fullness = mfc_context->hrd.buffer_size;
  2226.         else {
  2227.             mfc_context->hrd.current_buffer_fullness = prev_bf;
  2228.             return BRC_OVERFLOW;
  2229.         }
  2230.     }
  2231.     return BRC_NO_HRD_VIOLATION;
  2232. }
  2233.  
  2234. int intel_hcpe_brc_postpack(struct encode_state *encode_state,
  2235.                             struct gen9_hcpe_context *mfc_context,
  2236.                             int frame_bits)
  2237. {
  2238.     gen6_brc_status sts = BRC_NO_HRD_VIOLATION;
  2239.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  2240.     VAEncSliceParameterBufferHEVC *pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[0]->buffer;
  2241.     int slicetype = pSliceParameter->slice_type;
  2242.     int qpi = mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY;
  2243.     int qpp = mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY;
  2244.     int qpb = mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY;
  2245.     int qp; // quantizer of previously encoded slice of current type
  2246.     int qpn; // predicted quantizer for next frame of current type in integer format
  2247.     double qpf; // predicted quantizer for next frame of current type in float format
  2248.     double delta_qp; // QP correction
  2249.     int target_frame_size, frame_size_next;
  2250.     /* Notes:
  2251.      *  x - how far we are from HRD buffer borders
  2252.      *  y - how far we are from target HRD buffer fullness
  2253.      */
  2254.     double x, y;
  2255.     double frame_size_alpha;
  2256.  
  2257.     if(slicetype == HEVC_SLICE_B) {
  2258.         if(pSequenceParameter->ip_period == 1)
  2259.         {
  2260.             slicetype = HEVC_SLICE_P;
  2261.         }else if(mfc_context->vui_hrd.i_frame_number % pSequenceParameter->ip_period == 1){
  2262.             slicetype = HEVC_SLICE_P;
  2263.         }
  2264.     }
  2265.  
  2266.     qp = mfc_context->bit_rate_control_context[slicetype].QpPrimeY;
  2267.  
  2268.     target_frame_size = mfc_context->brc.target_frame_size[slicetype];
  2269.     if (mfc_context->hrd.buffer_capacity < 5)
  2270.         frame_size_alpha = 0;
  2271.     else
  2272.         frame_size_alpha = (double)mfc_context->brc.gop_nums[slicetype];
  2273.     if (frame_size_alpha > 30) frame_size_alpha = 30;
  2274.     frame_size_next = target_frame_size + (double)(target_frame_size - frame_bits) /
  2275.                       (double)(frame_size_alpha + 1.);
  2276.  
  2277.     /* frame_size_next: avoiding negative number and too small value */
  2278.     if ((double)frame_size_next < (double)(target_frame_size * 0.25))
  2279.         frame_size_next = (int)((double)target_frame_size * 0.25);
  2280.  
  2281.     qpf = (double)qp * target_frame_size / frame_size_next;
  2282.     qpn = (int)(qpf + 0.5);
  2283.  
  2284.     if (qpn == qp) {
  2285.         /* setting qpn we round qpf making mistakes: now we are trying to compensate this */
  2286.         mfc_context->brc.qpf_rounding_accumulator += qpf - qpn;
  2287.         if (mfc_context->brc.qpf_rounding_accumulator > 1.0) {
  2288.             qpn++;
  2289.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  2290.         } else if (mfc_context->brc.qpf_rounding_accumulator < -1.0) {
  2291.             qpn--;
  2292.             mfc_context->brc.qpf_rounding_accumulator = 0.;
  2293.         }
  2294.     }
  2295.     /* making sure that QP is not changing too fast */
  2296.     if ((qpn - qp) > BRC_QP_MAX_CHANGE) qpn = qp + BRC_QP_MAX_CHANGE;
  2297.     else if ((qpn - qp) < -BRC_QP_MAX_CHANGE) qpn = qp - BRC_QP_MAX_CHANGE;
  2298.     /* making sure that with QP predictions we did do not leave QPs range */
  2299.     BRC_CLIP(qpn, 1, 51);
  2300.  
  2301.     /* checking wthether HRD compliance is still met */
  2302.     sts = intel_hcpe_update_hrd(encode_state, mfc_context, frame_bits);
  2303.  
  2304.     /* calculating QP delta as some function*/
  2305.     x = mfc_context->hrd.target_buffer_fullness - mfc_context->hrd.current_buffer_fullness;
  2306.     if (x > 0) {
  2307.         x /= mfc_context->hrd.target_buffer_fullness;
  2308.         y = mfc_context->hrd.current_buffer_fullness;
  2309.     } else {
  2310.         x /= (mfc_context->hrd.buffer_size - mfc_context->hrd.target_buffer_fullness);
  2311.         y = mfc_context->hrd.buffer_size - mfc_context->hrd.current_buffer_fullness;
  2312.     }
  2313.     if (y < 0.01) y = 0.01;
  2314.     if (x > 1) x = 1;
  2315.     else if (x < -1) x = -1;
  2316.  
  2317.     delta_qp = BRC_QP_MAX_CHANGE * exp(-1 / y) * sin(BRC_PI_0_5 * x);
  2318.     qpn = (int)(qpn + delta_qp + 0.5);
  2319.  
  2320.     /* making sure that with QP predictions we did do not leave QPs range */
  2321.     BRC_CLIP(qpn, 1, 51);
  2322.  
  2323.     if (sts == BRC_NO_HRD_VIOLATION) { // no HRD violation
  2324.         /* correcting QPs of slices of other types */
  2325.         if (slicetype == HEVC_SLICE_P) {
  2326.             if (abs(qpn + BRC_P_B_QP_DIFF - qpb) > 2)
  2327.                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_P_B_QP_DIFF - qpb) >> 1;
  2328.             if (abs(qpn - BRC_I_P_QP_DIFF - qpi) > 2)
  2329.                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_P_QP_DIFF - qpi) >> 1;
  2330.         } else if (slicetype == HEVC_SLICE_I) {
  2331.             if (abs(qpn + BRC_I_B_QP_DIFF - qpb) > 4)
  2332.                 mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY += (qpn + BRC_I_B_QP_DIFF - qpb) >> 2;
  2333.             if (abs(qpn + BRC_I_P_QP_DIFF - qpp) > 2)
  2334.                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn + BRC_I_P_QP_DIFF - qpp) >> 2;
  2335.         } else { // HEVC_SLICE_B
  2336.             if (abs(qpn - BRC_P_B_QP_DIFF - qpp) > 2)
  2337.                 mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY += (qpn - BRC_P_B_QP_DIFF - qpp) >> 1;
  2338.             if (abs(qpn - BRC_I_B_QP_DIFF - qpi) > 4)
  2339.                 mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY += (qpn - BRC_I_B_QP_DIFF - qpi) >> 2;
  2340.         }
  2341.         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_I].QpPrimeY, 1, 51);
  2342.         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_P].QpPrimeY, 1, 51);
  2343.         BRC_CLIP(mfc_context->bit_rate_control_context[HEVC_SLICE_B].QpPrimeY, 1, 51);
  2344.     } else if (sts == BRC_UNDERFLOW) { // underflow
  2345.         if (qpn <= qp) qpn = qp + 1;
  2346.         if (qpn > 51) {
  2347.             qpn = 51;
  2348.             sts = BRC_UNDERFLOW_WITH_MAX_QP; //underflow with maxQP
  2349.         }
  2350.     } else if (sts == BRC_OVERFLOW) {
  2351.         if (qpn >= qp) qpn = qp - 1;
  2352.         if (qpn < 1) { // < 0 (?) overflow with minQP
  2353.             qpn = 1;
  2354.             sts = BRC_OVERFLOW_WITH_MIN_QP; // bit stuffing to be done
  2355.         }
  2356.     }
  2357.  
  2358.     mfc_context->bit_rate_control_context[slicetype].QpPrimeY = qpn;
  2359.  
  2360.     return sts;
  2361. }
  2362.  
  2363. static void intel_hcpe_hrd_context_init(struct encode_state *encode_state,
  2364.                                         struct intel_encoder_context *encoder_context)
  2365. {
  2366.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  2367.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  2368.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  2369.     int target_bit_rate = pSequenceParameter->bits_per_second;
  2370.  
  2371.     // current we only support CBR mode.
  2372.     if (rate_control_mode == VA_RC_CBR) {
  2373.         mfc_context->vui_hrd.i_bit_rate_value = target_bit_rate >> 10;
  2374.         mfc_context->vui_hrd.i_cpb_size_value = (target_bit_rate * 8) >> 10;
  2375.         mfc_context->vui_hrd.i_initial_cpb_removal_delay = mfc_context->vui_hrd.i_cpb_size_value * 0.5 * 1024 / target_bit_rate * 90000;
  2376.         mfc_context->vui_hrd.i_cpb_removal_delay = 2;
  2377.         mfc_context->vui_hrd.i_frame_number = 0;
  2378.  
  2379.         mfc_context->vui_hrd.i_initial_cpb_removal_delay_length = 24;
  2380.         mfc_context->vui_hrd.i_cpb_removal_delay_length = 24;
  2381.         mfc_context->vui_hrd.i_dpb_output_delay_length = 24;
  2382.     }
  2383.  
  2384. }
  2385.  
  2386. void
  2387. intel_hcpe_hrd_context_update(struct encode_state *encode_state,
  2388.                               struct gen9_hcpe_context *mfc_context)
  2389. {
  2390.     mfc_context->vui_hrd.i_frame_number++;
  2391. }
  2392.  
  2393. int intel_hcpe_interlace_check(VADriverContextP ctx,
  2394.                                struct encode_state *encode_state,
  2395.                                struct intel_encoder_context *encoder_context)
  2396. {
  2397.     VAEncSliceParameterBufferHEVC *pSliceParameter;
  2398.     VAEncSequenceParameterBufferHEVC *pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  2399.     int log2_cu_size = pSequenceParameter->log2_min_luma_coding_block_size_minus3 + 3;
  2400.     int log2_ctb_size = pSequenceParameter->log2_diff_max_min_luma_coding_block_size + log2_cu_size;
  2401.     int ctb_size = 1 << log2_ctb_size;
  2402.     int width_in_ctb = (pSequenceParameter->pic_width_in_luma_samples + ctb_size - 1) / ctb_size;
  2403.     int height_in_ctb = (pSequenceParameter->pic_height_in_luma_samples + ctb_size - 1) / ctb_size;
  2404.     int i;
  2405.     int ctbCount = 0;
  2406.  
  2407.     for (i = 0; i < encode_state->num_slice_params_ext; i++) {
  2408.         pSliceParameter = (VAEncSliceParameterBufferHEVC *)encode_state->slice_params_ext[i]->buffer;
  2409.         ctbCount += pSliceParameter->num_ctu_in_slice;
  2410.     }
  2411.  
  2412.     if (ctbCount == (width_in_ctb * height_in_ctb))
  2413.         return 0;
  2414.  
  2415.     return 1;
  2416. }
  2417.  
  2418. /*
  2419.  * Check whether the parameters related with CBR are updated and decide whether
  2420.  * it needs to reinitialize the configuration related with CBR.
  2421.  * Currently it will check the following parameters:
  2422.  *      bits_per_second
  2423.  *      frame_rate
  2424.  *      gop_configuration(intra_period, ip_period, intra_idr_period)
  2425.  */
  2426. static bool intel_hcpe_brc_updated_check(struct encode_state *encode_state,
  2427.         struct intel_encoder_context *encoder_context)
  2428. {
  2429.     /* to do */
  2430.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  2431.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  2432.     double cur_fps, cur_bitrate;
  2433.     VAEncSequenceParameterBufferHEVC *pSequenceParameter;
  2434.  
  2435.  
  2436.     if (rate_control_mode != VA_RC_CBR) {
  2437.         return false;
  2438.     }
  2439.  
  2440.     pSequenceParameter = (VAEncSequenceParameterBufferHEVC *)encode_state->seq_param_ext->buffer;
  2441.  
  2442.     cur_bitrate = pSequenceParameter->bits_per_second;
  2443.     cur_fps = (double)pSequenceParameter->vui_time_scale /
  2444.               (2 * (double)pSequenceParameter->vui_num_units_in_tick);
  2445.  
  2446.     if ((cur_bitrate == mfc_context->brc.saved_bps) &&
  2447.         (cur_fps == mfc_context->brc.saved_fps) &&
  2448.         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period) &&
  2449.         (pSequenceParameter->intra_idr_period == mfc_context->brc.saved_idr_period) &&
  2450.         (pSequenceParameter->intra_period == mfc_context->brc.saved_intra_period)) {
  2451.         /* the parameters related with CBR are not updaetd */
  2452.         return false;
  2453.     }
  2454.  
  2455.     mfc_context->brc.saved_ip_period = pSequenceParameter->ip_period;
  2456.     mfc_context->brc.saved_intra_period = pSequenceParameter->intra_period;
  2457.     mfc_context->brc.saved_idr_period = pSequenceParameter->intra_idr_period;
  2458.     mfc_context->brc.saved_fps = cur_fps;
  2459.     mfc_context->brc.saved_bps = cur_bitrate;
  2460.     return true;
  2461. }
  2462.  
  2463. void intel_hcpe_brc_prepare(struct encode_state *encode_state,
  2464.                             struct intel_encoder_context *encoder_context)
  2465. {
  2466.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  2467.     struct gen9_hcpe_context *mfc_context = encoder_context->mfc_context;
  2468.  
  2469.     if (rate_control_mode == VA_RC_CBR) {
  2470.         bool brc_updated;
  2471.         assert(encoder_context->codec != CODEC_MPEG2);
  2472.  
  2473.         brc_updated = intel_hcpe_brc_updated_check(encode_state, encoder_context);
  2474.  
  2475.         /*Programing bit rate control */
  2476.         if ((mfc_context->bit_rate_control_context[HEVC_SLICE_I].MaxSizeInWord == 0) ||
  2477.             brc_updated) {
  2478.             intel_hcpe_bit_rate_control_context_init(encode_state, mfc_context);
  2479.             intel_hcpe_brc_init(encode_state, encoder_context);
  2480.         }
  2481.  
  2482.         /*Programing HRD control */
  2483.         if ((mfc_context->vui_hrd.i_cpb_size_value == 0) || brc_updated)
  2484.             intel_hcpe_hrd_context_init(encode_state, encoder_context);
  2485.     }
  2486. }
  2487.  
  2488. /* HEVC interface API for encoder */
  2489.  
  2490. static VAStatus
  2491. gen9_hcpe_hevc_encode_picture(VADriverContextP ctx,
  2492.                               struct encode_state *encode_state,
  2493.                               struct intel_encoder_context *encoder_context)
  2494. {
  2495.     struct gen9_hcpe_context *hcpe_context = encoder_context->mfc_context;
  2496.     unsigned int rate_control_mode = encoder_context->rate_control_mode;
  2497.     int current_frame_bits_size;
  2498.     int sts;
  2499.  
  2500.     for (;;) {
  2501.         gen9_hcpe_init(ctx, encode_state, encoder_context);
  2502.         intel_hcpe_hevc_prepare(ctx, encode_state, encoder_context);
  2503.         /*Programing bcs pipeline*/
  2504.         gen9_hcpe_hevc_pipeline_programing(ctx, encode_state, encoder_context); //filling the pipeline
  2505.         gen9_hcpe_run(ctx, encode_state, encoder_context);
  2506.         if (rate_control_mode == VA_RC_CBR /*|| rate_control_mode == VA_RC_VBR*/) {
  2507.             gen9_hcpe_stop(ctx, encode_state, encoder_context, &current_frame_bits_size);
  2508.             sts = intel_hcpe_brc_postpack(encode_state, hcpe_context, current_frame_bits_size);
  2509.             if (sts == BRC_NO_HRD_VIOLATION) {
  2510.                 intel_hcpe_hrd_context_update(encode_state, hcpe_context);
  2511.                 break;
  2512.             } else if (sts == BRC_OVERFLOW_WITH_MIN_QP || sts == BRC_UNDERFLOW_WITH_MAX_QP) {
  2513.                 if (!hcpe_context->hrd.violation_noted) {
  2514.                     fprintf(stderr, "Unrepairable %s!\n", (sts == BRC_OVERFLOW_WITH_MIN_QP) ? "overflow" : "underflow");
  2515.                     hcpe_context->hrd.violation_noted = 1;
  2516.                 }
  2517.                 return VA_STATUS_SUCCESS;
  2518.             }
  2519.         } else {
  2520.             break;
  2521.         }
  2522.     }
  2523.  
  2524.     return VA_STATUS_SUCCESS;
  2525. }
  2526.  
  2527. void
  2528. gen9_hcpe_context_destroy(void *context)
  2529. {
  2530.     struct gen9_hcpe_context *hcpe_context = context;
  2531.     int i;
  2532.  
  2533.     dri_bo_unreference(hcpe_context->deblocking_filter_line_buffer.bo);
  2534.     hcpe_context->deblocking_filter_line_buffer.bo = NULL;
  2535.  
  2536.     dri_bo_unreference(hcpe_context->deblocking_filter_tile_line_buffer.bo);
  2537.     hcpe_context->deblocking_filter_tile_line_buffer.bo = NULL;
  2538.  
  2539.     dri_bo_unreference(hcpe_context->deblocking_filter_tile_column_buffer.bo);
  2540.     hcpe_context->deblocking_filter_tile_column_buffer.bo = NULL;
  2541.  
  2542.     dri_bo_unreference(hcpe_context->uncompressed_picture_source.bo);
  2543.     hcpe_context->uncompressed_picture_source.bo = NULL;
  2544.  
  2545.     dri_bo_unreference(hcpe_context->metadata_line_buffer.bo);
  2546.     hcpe_context->metadata_line_buffer.bo = NULL;
  2547.  
  2548.     dri_bo_unreference(hcpe_context->metadata_tile_line_buffer.bo);
  2549.     hcpe_context->metadata_tile_line_buffer.bo = NULL;
  2550.  
  2551.     dri_bo_unreference(hcpe_context->metadata_tile_column_buffer.bo);
  2552.     hcpe_context->metadata_tile_column_buffer.bo = NULL;
  2553.  
  2554.     dri_bo_unreference(hcpe_context->sao_line_buffer.bo);
  2555.     hcpe_context->sao_line_buffer.bo = NULL;
  2556.  
  2557.     dri_bo_unreference(hcpe_context->sao_tile_line_buffer.bo);
  2558.     hcpe_context->sao_tile_line_buffer.bo = NULL;
  2559.  
  2560.     dri_bo_unreference(hcpe_context->sao_tile_column_buffer.bo);
  2561.     hcpe_context->sao_tile_column_buffer.bo = NULL;
  2562.  
  2563.     /* mv temporal buffer */
  2564.     for (i = 0; i < NUM_HCP_CURRENT_COLLOCATED_MV_TEMPORAL_BUFFERS; i++) {
  2565.         if (hcpe_context->current_collocated_mv_temporal_buffer[i].bo != NULL)
  2566.             dri_bo_unreference(hcpe_context->current_collocated_mv_temporal_buffer[i].bo);
  2567.         hcpe_context->current_collocated_mv_temporal_buffer[i].bo = NULL;
  2568.     }
  2569.  
  2570.     for (i = 0; i < MAX_HCP_REFERENCE_SURFACES; i++) {
  2571.         dri_bo_unreference(hcpe_context->reference_surfaces[i].bo);
  2572.         hcpe_context->reference_surfaces[i].bo = NULL;
  2573.     }
  2574.  
  2575.     dri_bo_unreference(hcpe_context->hcp_indirect_cu_object.bo);
  2576.     hcpe_context->hcp_indirect_cu_object.bo = NULL;
  2577.  
  2578.     dri_bo_unreference(hcpe_context->hcp_indirect_pak_bse_object.bo);
  2579.     hcpe_context->hcp_indirect_pak_bse_object.bo = NULL;
  2580.  
  2581.     dri_bo_unreference(hcpe_context->hcp_batchbuffer_surface.bo);
  2582.     hcpe_context->hcp_batchbuffer_surface.bo = NULL;
  2583.  
  2584.     dri_bo_unreference(hcpe_context->aux_batchbuffer_surface.bo);
  2585.     hcpe_context->aux_batchbuffer_surface.bo = NULL;
  2586.  
  2587.     if (hcpe_context->aux_batchbuffer)
  2588.         intel_batchbuffer_free(hcpe_context->aux_batchbuffer);
  2589.  
  2590.     hcpe_context->aux_batchbuffer = NULL;
  2591.  
  2592.     free(hcpe_context);
  2593. }
  2594.  
  2595. VAStatus gen9_hcpe_pipeline(VADriverContextP ctx,
  2596.                             VAProfile profile,
  2597.                             struct encode_state *encode_state,
  2598.                             struct intel_encoder_context *encoder_context)
  2599. {
  2600.     VAStatus vaStatus;
  2601.  
  2602.     switch (profile) {
  2603.     case VAProfileHEVCMain:
  2604.         vaStatus = gen9_hcpe_hevc_encode_picture(ctx, encode_state, encoder_context);
  2605.         break;
  2606.  
  2607.     default:
  2608.         vaStatus = VA_STATUS_ERROR_UNSUPPORTED_PROFILE;
  2609.         break;
  2610.     }
  2611.  
  2612.     return vaStatus;
  2613. }
  2614.  
  2615. Bool gen9_hcpe_context_init(VADriverContextP ctx, struct intel_encoder_context *encoder_context)
  2616. {
  2617.     struct gen9_hcpe_context *hcpe_context = calloc(1, sizeof(struct gen9_hcpe_context));
  2618.  
  2619.     assert(hcpe_context);
  2620.     hcpe_context->pipe_mode_select = gen9_hcpe_pipe_mode_select;
  2621.     hcpe_context->set_surface_state = gen9_hcpe_surface_state;
  2622.     hcpe_context->ind_obj_base_addr_state = gen9_hcpe_ind_obj_base_addr_state;
  2623.     hcpe_context->pic_state = gen9_hcpe_hevc_pic_state;
  2624.     hcpe_context->qm_state = gen9_hcpe_hevc_qm_state;
  2625.     hcpe_context->fqm_state = gen9_hcpe_hevc_fqm_state;
  2626.     hcpe_context->insert_object = gen9_hcpe_hevc_insert_object;
  2627.     hcpe_context->buffer_suface_setup = gen8_gpe_buffer_suface_setup;
  2628.  
  2629.     encoder_context->mfc_context = hcpe_context;
  2630.     encoder_context->mfc_context_destroy = gen9_hcpe_context_destroy;
  2631.     encoder_context->mfc_pipeline = gen9_hcpe_pipeline;
  2632.     encoder_context->mfc_brc_prepare = intel_hcpe_brc_prepare;
  2633.  
  2634.     hevc_gen_default_iq_matrix_encoder(&hcpe_context->iq_matrix_hevc);
  2635.  
  2636.     return True;
  2637. }
  2638.