Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *
  27.  */
  28.  
  29. #include "sysdeps.h"
  30.  
  31. #include <va/va_dec_jpeg.h>
  32.  
  33. #include "intel_batchbuffer.h"
  34. #include "intel_driver.h"
  35. #include "i965_defines.h"
  36. #include "i965_drv_video.h"
  37. #include "i965_decoder_utils.h"
  38.  
  39. #include "gen7_mfd.h"
  40. #include "intel_media.h"
  41.  
  42. static const uint32_t zigzag_direct[64] = {
  43.     0,   1,  8, 16,  9,  2,  3, 10,
  44.     17, 24, 32, 25, 18, 11,  4,  5,
  45.     12, 19, 26, 33, 40, 48, 41, 34,
  46.     27, 20, 13,  6,  7, 14, 21, 28,
  47.     35, 42, 49, 56, 57, 50, 43, 36,
  48.     29, 22, 15, 23, 30, 37, 44, 51,
  49.     58, 59, 52, 45, 38, 31, 39, 46,
  50.     53, 60, 61, 54, 47, 55, 62, 63
  51. };
  52.  
  53. static void
  54. gen7_mfd_init_avc_surface(VADriverContextP ctx,
  55.                           VAPictureParameterBufferH264 *pic_param,
  56.                           struct object_surface *obj_surface)
  57. {
  58.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  59.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  60.     int width_in_mbs, height_in_mbs;
  61.  
  62.     obj_surface->free_private_data = gen_free_avc_surface;
  63.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  64.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  65.  
  66.     if (!gen7_avc_surface) {
  67.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  68.         gen7_avc_surface->frame_store_id = -1;
  69.         assert((obj_surface->size & 0x3f) == 0);
  70.         obj_surface->private_data = gen7_avc_surface;
  71.     }
  72.  
  73.     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
  74.                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
  75.  
  76.     if (gen7_avc_surface->dmv_top == NULL) {
  77.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  78.                                                  "direct mv w/r buffer",
  79.                                                  width_in_mbs * (height_in_mbs + 1) * 64,
  80.                                                  0x1000);
  81.         assert(gen7_avc_surface->dmv_top);
  82.     }
  83.  
  84.     if (gen7_avc_surface->dmv_bottom_flag &&
  85.         gen7_avc_surface->dmv_bottom == NULL) {
  86.         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
  87.                                                     "direct mv w/r buffer",
  88.                                                     width_in_mbs * (height_in_mbs + 1) * 64,
  89.                                                     0x1000);
  90.         assert(gen7_avc_surface->dmv_bottom);
  91.     }
  92. }
  93.  
  94. static void
  95. gen7_mfd_pipe_mode_select(VADriverContextP ctx,
  96.                           struct decode_state *decode_state,
  97.                           int standard_select,
  98.                           struct gen7_mfd_context *gen7_mfd_context)
  99. {
  100.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  101.  
  102.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  103.            standard_select == MFX_FORMAT_AVC ||
  104.            standard_select == MFX_FORMAT_VC1 ||
  105.            standard_select == MFX_FORMAT_JPEG);
  106.  
  107.     BEGIN_BCS_BATCH(batch, 5);
  108.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  109.     OUT_BCS_BATCH(batch,
  110.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  111.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  112.                   (0 << 10) | /* disable Stream-Out */
  113.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  114.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  115.                   (0 << 5)  | /* not in stitch mode */
  116.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  117.                   (standard_select << 0));
  118.     OUT_BCS_BATCH(batch,
  119.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  120.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  121.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  122.                   (0 << 1)  |
  123.                   (0 << 0));
  124.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  125.     OUT_BCS_BATCH(batch, 0); /* reserved */
  126.     ADVANCE_BCS_BATCH(batch);
  127. }
  128.  
  129. static void
  130. gen7_mfd_surface_state(VADriverContextP ctx,
  131.                        struct decode_state *decode_state,
  132.                        int standard_select,
  133.                        struct gen7_mfd_context *gen7_mfd_context)
  134. {
  135.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  136.     struct object_surface *obj_surface = decode_state->render_object;
  137.     unsigned int y_cb_offset;
  138.     unsigned int y_cr_offset;
  139.     unsigned int surface_format;
  140.  
  141.     assert(obj_surface);
  142.  
  143.     y_cb_offset = obj_surface->y_cb_offset;
  144.     y_cr_offset = obj_surface->y_cr_offset;
  145.  
  146.     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
  147.         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  148.  
  149.     BEGIN_BCS_BATCH(batch, 6);
  150.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  151.     OUT_BCS_BATCH(batch, 0);
  152.     OUT_BCS_BATCH(batch,
  153.                   ((obj_surface->orig_height - 1) << 18) |
  154.                   ((obj_surface->orig_width - 1) << 4));
  155.     OUT_BCS_BATCH(batch,
  156.                   (surface_format << 28) | /* 420 planar YUV surface */
  157.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  158.                   (0 << 22) | /* surface object control state, ignored */
  159.                   ((obj_surface->width - 1) << 3) | /* pitch */
  160.                   (0 << 2)  | /* must be 0 */
  161.                   (1 << 1)  | /* must be tiled */
  162.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  163.     OUT_BCS_BATCH(batch,
  164.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  165.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  166.     OUT_BCS_BATCH(batch,
  167.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  168.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  169.     ADVANCE_BCS_BATCH(batch);
  170. }
  171.  
  172. static void
  173. gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  174.                              struct decode_state *decode_state,
  175.                              int standard_select,
  176.                              struct gen7_mfd_context *gen7_mfd_context)
  177. {
  178.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  179.     int i;
  180.  
  181.     BEGIN_BCS_BATCH(batch, 24);
  182.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
  183.     if (gen7_mfd_context->pre_deblocking_output.valid)
  184.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  185.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  186.                       0);
  187.     else
  188.         OUT_BCS_BATCH(batch, 0);
  189.  
  190.     if (gen7_mfd_context->post_deblocking_output.valid)
  191.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  192.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  193.                       0);
  194.     else
  195.         OUT_BCS_BATCH(batch, 0);
  196.  
  197.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  198.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  199.  
  200.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  201.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  202.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  203.                       0);
  204.     else
  205.         OUT_BCS_BATCH(batch, 0);
  206.  
  207.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  208.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  209.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  210.                       0);
  211.     else
  212.         OUT_BCS_BATCH(batch, 0);
  213.  
  214.     /* DW 7..22 */
  215.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  216.         struct object_surface *obj_surface;
  217.  
  218.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  219.             gen7_mfd_context->reference_surface[i].obj_surface &&
  220.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  221.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  222.  
  223.             OUT_BCS_RELOC(batch, obj_surface->bo,
  224.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  225.                           0);
  226.         } else {
  227.             OUT_BCS_BATCH(batch, 0);
  228.         }
  229.     }
  230.  
  231.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  232.     ADVANCE_BCS_BATCH(batch);
  233. }
  234.  
  235. static void
  236. gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  237.                                  dri_bo *slice_data_bo,
  238.                                  int standard_select,
  239.                                  struct gen7_mfd_context *gen7_mfd_context)
  240. {
  241.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  242.  
  243.     BEGIN_BCS_BATCH(batch, 11);
  244.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  245.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  246.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  247.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  248.     OUT_BCS_BATCH(batch, 0);
  249.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  250.     OUT_BCS_BATCH(batch, 0);
  251.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  252.     OUT_BCS_BATCH(batch, 0);
  253.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  254.     OUT_BCS_BATCH(batch, 0);
  255.     ADVANCE_BCS_BATCH(batch);
  256. }
  257.  
  258. static void
  259. gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  260.                                  struct decode_state *decode_state,
  261.                                  int standard_select,
  262.                                  struct gen7_mfd_context *gen7_mfd_context)
  263. {
  264.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  265.  
  266.     BEGIN_BCS_BATCH(batch, 4);
  267.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  268.  
  269.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  270.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  271.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  272.                       0);
  273.     else
  274.         OUT_BCS_BATCH(batch, 0);
  275.  
  276.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  277.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  278.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  279.                       0);
  280.     else
  281.         OUT_BCS_BATCH(batch, 0);
  282.  
  283.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  284.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  285.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  286.                       0);
  287.     else
  288.         OUT_BCS_BATCH(batch, 0);
  289.  
  290.     ADVANCE_BCS_BATCH(batch);
  291. }
  292.  
  293. static void
  294. gen7_mfd_qm_state(VADriverContextP ctx,
  295.                   int qm_type,
  296.                   unsigned char *qm,
  297.                   int qm_length,
  298.                   struct gen7_mfd_context *gen7_mfd_context)
  299. {
  300.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  301.     unsigned int qm_buffer[16];
  302.  
  303.     assert(qm_length <= 16 * 4);
  304.     memcpy(qm_buffer, qm, qm_length);
  305.  
  306.     BEGIN_BCS_BATCH(batch, 18);
  307.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  308.     OUT_BCS_BATCH(batch, qm_type << 0);
  309.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  310.     ADVANCE_BCS_BATCH(batch);
  311. }
  312.  
  313. static void
  314. gen7_mfd_avc_img_state(VADriverContextP ctx,
  315.                        struct decode_state *decode_state,
  316.                        struct gen7_mfd_context *gen7_mfd_context)
  317. {
  318.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  319.     int img_struct;
  320.     int mbaff_frame_flag;
  321.     unsigned int width_in_mbs, height_in_mbs;
  322.     VAPictureParameterBufferH264 *pic_param;
  323.  
  324.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  325.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  326.  
  327.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  328.         img_struct = 1;
  329.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  330.         img_struct = 3;
  331.     else
  332.         img_struct = 0;
  333.  
  334.     if ((img_struct & 0x1) == 0x1) {
  335.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  336.     } else {
  337.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  338.     }
  339.  
  340.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  341.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  342.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  343.     } else {
  344.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  345.     }
  346.  
  347.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  348.                         !pic_param->pic_fields.bits.field_pic_flag);
  349.  
  350.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  351.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  352.  
  353.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  354.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  355.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  356.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  357.  
  358.     BEGIN_BCS_BATCH(batch, 16);
  359.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  360.     OUT_BCS_BATCH(batch,
  361.                   (width_in_mbs * height_in_mbs - 1));
  362.     OUT_BCS_BATCH(batch,
  363.                   ((height_in_mbs - 1) << 16) |
  364.                   ((width_in_mbs - 1) << 0));
  365.     OUT_BCS_BATCH(batch,
  366.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  367.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  368.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  369.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  370.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  371.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  372.                   (img_struct << 8));
  373.     OUT_BCS_BATCH(batch,
  374.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  375.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  376.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  377.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  378.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  379.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  380.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  381.                   (mbaff_frame_flag << 1) |
  382.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  383.     OUT_BCS_BATCH(batch, 0);
  384.     OUT_BCS_BATCH(batch, 0);
  385.     OUT_BCS_BATCH(batch, 0);
  386.     OUT_BCS_BATCH(batch, 0);
  387.     OUT_BCS_BATCH(batch, 0);
  388.     OUT_BCS_BATCH(batch, 0);
  389.     OUT_BCS_BATCH(batch, 0);
  390.     OUT_BCS_BATCH(batch, 0);
  391.     OUT_BCS_BATCH(batch, 0);
  392.     OUT_BCS_BATCH(batch, 0);
  393.     OUT_BCS_BATCH(batch, 0);
  394.     ADVANCE_BCS_BATCH(batch);
  395. }
  396.  
  397. static void
  398. gen7_mfd_avc_qm_state(VADriverContextP ctx,
  399.                       struct decode_state *decode_state,
  400.                       struct gen7_mfd_context *gen7_mfd_context)
  401. {
  402.     VAIQMatrixBufferH264 *iq_matrix;
  403.     VAPictureParameterBufferH264 *pic_param;
  404.  
  405.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  406.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  407.     else
  408.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  409.  
  410.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  411.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  412.  
  413.     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  414.     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  415.  
  416.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  417.         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  418.         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  419.     }
  420. }
  421.  
  422. static void
  423. gen7_mfd_avc_directmode_state(VADriverContextP ctx,
  424.                               struct decode_state *decode_state,
  425.                               VAPictureParameterBufferH264 *pic_param,
  426.                               VASliceParameterBufferH264 *slice_param,
  427.                               struct gen7_mfd_context *gen7_mfd_context)
  428. {
  429.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  430.     struct object_surface *obj_surface;
  431.     GenAvcSurface *gen7_avc_surface;
  432.     VAPictureH264 *va_pic;
  433.     int i;
  434.  
  435.     BEGIN_BCS_BATCH(batch, 69);
  436.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  437.  
  438.     /* reference surfaces 0..15 */
  439.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  440.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  441.             gen7_mfd_context->reference_surface[i].obj_surface &&
  442.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  443.  
  444.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  445.             gen7_avc_surface = obj_surface->private_data;
  446.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  447.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  448.                           0);
  449.  
  450.             if (gen7_avc_surface->dmv_bottom_flag == 1)
  451.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  452.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  453.                               0);
  454.             else
  455.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  456.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  457.                               0);
  458.         } else {
  459.             OUT_BCS_BATCH(batch, 0);
  460.             OUT_BCS_BATCH(batch, 0);
  461.         }
  462.     }
  463.  
  464.     /* the current decoding frame/field */
  465.     va_pic = &pic_param->CurrPic;
  466.     obj_surface = decode_state->render_object;
  467.     assert(obj_surface->bo && obj_surface->private_data);
  468.     gen7_avc_surface = obj_surface->private_data;
  469.  
  470.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  471.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  472.                   0);
  473.  
  474.     if (gen7_avc_surface->dmv_bottom_flag == 1)
  475.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  476.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  477.                       0);
  478.     else
  479.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  480.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  481.                       0);
  482.  
  483.     /* POC List */
  484.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  485.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  486.  
  487.         if (obj_surface) {
  488.             const VAPictureH264 * const va_pic = avc_find_picture(
  489.                 obj_surface->base.id, pic_param->ReferenceFrames,
  490.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  491.  
  492.             assert(va_pic != NULL);
  493.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  494.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  495.         } else {
  496.             OUT_BCS_BATCH(batch, 0);
  497.             OUT_BCS_BATCH(batch, 0);
  498.         }
  499.     }
  500.  
  501.     va_pic = &pic_param->CurrPic;
  502.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  503.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  504.  
  505.     ADVANCE_BCS_BATCH(batch);
  506. }
  507.  
  508. static void
  509. gen7_mfd_avc_phantom_slice_first(VADriverContextP ctx,
  510.                                  VAPictureParameterBufferH264 *pic_param,
  511.                                  VASliceParameterBufferH264 *next_slice_param,
  512.                                  struct gen7_mfd_context *gen7_mfd_context)
  513. {
  514.     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
  515. }
  516.  
  517. static void
  518. gen7_mfd_avc_slice_state(VADriverContextP ctx,
  519.                          VAPictureParameterBufferH264 *pic_param,
  520.                          VASliceParameterBufferH264 *slice_param,
  521.                          VASliceParameterBufferH264 *next_slice_param,
  522.                          struct gen7_mfd_context *gen7_mfd_context)
  523. {
  524.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  525.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  526.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  527.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  528.     int num_ref_idx_l0, num_ref_idx_l1;
  529.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  530.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  531.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  532.     int slice_type;
  533.  
  534.     if (slice_param->slice_type == SLICE_TYPE_I ||
  535.         slice_param->slice_type == SLICE_TYPE_SI) {
  536.         slice_type = SLICE_TYPE_I;
  537.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  538.                slice_param->slice_type == SLICE_TYPE_SP) {
  539.         slice_type = SLICE_TYPE_P;
  540.     } else {
  541.         assert(slice_param->slice_type == SLICE_TYPE_B);
  542.         slice_type = SLICE_TYPE_B;
  543.     }
  544.  
  545.     if (slice_type == SLICE_TYPE_I) {
  546.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  547.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  548.         num_ref_idx_l0 = 0;
  549.         num_ref_idx_l1 = 0;
  550.     } else if (slice_type == SLICE_TYPE_P) {
  551.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  552.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  553.         num_ref_idx_l1 = 0;
  554.     } else {
  555.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  556.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  557.     }
  558.  
  559.     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
  560.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  561.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  562.  
  563.     if (next_slice_param) {
  564.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
  565.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  566.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  567.     } else {
  568.         next_slice_hor_pos = 0;
  569.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  570.     }
  571.  
  572.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  573.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  574.     OUT_BCS_BATCH(batch, slice_type);
  575.     OUT_BCS_BATCH(batch,
  576.                   (num_ref_idx_l1 << 24) |
  577.                   (num_ref_idx_l0 << 16) |
  578.                   (slice_param->chroma_log2_weight_denom << 8) |
  579.                   (slice_param->luma_log2_weight_denom << 0));
  580.     OUT_BCS_BATCH(batch,
  581.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  582.                   (slice_param->disable_deblocking_filter_idc << 27) |
  583.                   (slice_param->cabac_init_idc << 24) |
  584.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  585.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  586.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  587.     OUT_BCS_BATCH(batch,
  588.                   (slice_ver_pos << 24) |
  589.                   (slice_hor_pos << 16) |
  590.                   (first_mb_in_slice << 0));
  591.     OUT_BCS_BATCH(batch,
  592.                   (next_slice_ver_pos << 16) |
  593.                   (next_slice_hor_pos << 0));
  594.     OUT_BCS_BATCH(batch,
  595.                   (next_slice_param == NULL) << 19); /* last slice flag */
  596.     OUT_BCS_BATCH(batch, 0);
  597.     OUT_BCS_BATCH(batch, 0);
  598.     OUT_BCS_BATCH(batch, 0);
  599.     OUT_BCS_BATCH(batch, 0);
  600.     ADVANCE_BCS_BATCH(batch);
  601. }
  602.  
  603. static inline void
  604. gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
  605.                            VAPictureParameterBufferH264 *pic_param,
  606.                            VASliceParameterBufferH264 *slice_param,
  607.                            struct gen7_mfd_context *gen7_mfd_context)
  608. {
  609.     gen6_send_avc_ref_idx_state(
  610.         gen7_mfd_context->base.batch,
  611.         slice_param,
  612.         gen7_mfd_context->reference_surface
  613.     );
  614. }
  615.  
  616. static void
  617. gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
  618.                                 VAPictureParameterBufferH264 *pic_param,
  619.                                 VASliceParameterBufferH264 *slice_param,
  620.                                 struct gen7_mfd_context *gen7_mfd_context)
  621. {
  622.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  623.     int i, j, num_weight_offset_table = 0;
  624.     short weightoffsets[32 * 6];
  625.  
  626.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  627.          slice_param->slice_type == SLICE_TYPE_SP) &&
  628.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  629.         num_weight_offset_table = 1;
  630.     }
  631.    
  632.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  633.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  634.         num_weight_offset_table = 2;
  635.     }
  636.  
  637.     for (i = 0; i < num_weight_offset_table; i++) {
  638.         BEGIN_BCS_BATCH(batch, 98);
  639.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  640.         OUT_BCS_BATCH(batch, i);
  641.  
  642.         if (i == 0) {
  643.             for (j = 0; j < 32; j++) {
  644.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  645.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  646.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  647.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  648.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  649.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  650.             }
  651.         } else {
  652.             for (j = 0; j < 32; j++) {
  653.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  654.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  655.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  656.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  657.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  658.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  659.             }
  660.         }
  661.  
  662.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  663.         ADVANCE_BCS_BATCH(batch);
  664.     }
  665. }
  666.  
  667. static void
  668. gen7_mfd_avc_bsd_object(VADriverContextP ctx,
  669.                         VAPictureParameterBufferH264 *pic_param,
  670.                         VASliceParameterBufferH264 *slice_param,
  671.                         dri_bo *slice_data_bo,
  672.                         VASliceParameterBufferH264 *next_slice_param,
  673.                         struct gen7_mfd_context *gen7_mfd_context)
  674. {
  675.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  676.     unsigned int slice_data_bit_offset;
  677.  
  678.     slice_data_bit_offset = avc_get_first_mb_bit_offset(
  679.         slice_data_bo,
  680.         slice_param,
  681.         pic_param->pic_fields.bits.entropy_coding_mode_flag
  682.     );
  683.  
  684.     /* the input bitsteam format on GEN7 differs from GEN6 */
  685.     BEGIN_BCS_BATCH(batch, 6);
  686.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  687.     OUT_BCS_BATCH(batch,
  688.                   (slice_param->slice_data_size - slice_param->slice_data_offset));
  689.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  690.     OUT_BCS_BATCH(batch,
  691.                   (0 << 31) |
  692.                   (0 << 14) |
  693.                   (0 << 12) |
  694.                   (0 << 10) |
  695.                   (0 << 8));
  696.     OUT_BCS_BATCH(batch,
  697.                   ((slice_data_bit_offset >> 3) << 16) |
  698.                   (1 << 7)  |
  699.                   (0 << 5)  |
  700.                   (0 << 4)  |
  701.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  702.                   (slice_data_bit_offset & 0x7));
  703.     OUT_BCS_BATCH(batch, 0);
  704.     ADVANCE_BCS_BATCH(batch);
  705. }
  706.  
  707. static inline void
  708. gen7_mfd_avc_context_init(
  709.     VADriverContextP         ctx,
  710.     struct gen7_mfd_context *gen7_mfd_context
  711. )
  712. {
  713.     /* Initialize flat scaling lists */
  714.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  715. }
  716.  
  717. static void
  718. gen7_mfd_avc_decode_init(VADriverContextP ctx,
  719.                          struct decode_state *decode_state,
  720.                          struct gen7_mfd_context *gen7_mfd_context)
  721. {
  722.     VAPictureParameterBufferH264 *pic_param;
  723.     VASliceParameterBufferH264 *slice_param;
  724.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  725.     struct object_surface *obj_surface;
  726.     dri_bo *bo;
  727.     int i, j, enable_avc_ildb = 0;
  728.     unsigned int width_in_mbs, height_in_mbs;
  729.  
  730.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  731.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  732.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  733.  
  734.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  735.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  736.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  737.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  738.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  739.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  740.                    (slice_param->slice_type == SLICE_TYPE_B));
  741.  
  742.             if (slice_param->disable_deblocking_filter_idc != 1) {
  743.                 enable_avc_ildb = 1;
  744.                 break;
  745.             }
  746.  
  747.             slice_param++;
  748.         }
  749.     }
  750.  
  751.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  752.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  753.     intel_update_avc_frame_store_index(ctx, decode_state, pic_param,
  754.         gen7_mfd_context->reference_surface, &gen7_mfd_context->fs_ctx);
  755.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  756.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  757.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  758.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  759.  
  760.     /* Current decoded picture */
  761.     obj_surface = decode_state->render_object;
  762.     if (pic_param->pic_fields.bits.reference_pic_flag)
  763.         obj_surface->flags |= SURFACE_REFERENCED;
  764.     else
  765.         obj_surface->flags &= ~SURFACE_REFERENCED;
  766.  
  767.     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  768.     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  769.  
  770.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  771.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  772.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  773.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  774.  
  775.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  776.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  777.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  778.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  779.  
  780.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  781.     bo = dri_bo_alloc(i965->intel.bufmgr,
  782.                       "intra row store",
  783.                       width_in_mbs * 64,
  784.                       0x1000);
  785.     assert(bo);
  786.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  787.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  788.  
  789.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  790.     bo = dri_bo_alloc(i965->intel.bufmgr,
  791.                       "deblocking filter row store",
  792.                       width_in_mbs * 64 * 4,
  793.                       0x1000);
  794.     assert(bo);
  795.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  796.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  797.  
  798.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  799.     bo = dri_bo_alloc(i965->intel.bufmgr,
  800.                       "bsd mpc row store",
  801.                       width_in_mbs * 64 * 2,
  802.                       0x1000);
  803.     assert(bo);
  804.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  805.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  806.  
  807.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  808.     bo = dri_bo_alloc(i965->intel.bufmgr,
  809.                       "mpr row store",
  810.                       width_in_mbs * 64 * 2,
  811.                       0x1000);
  812.     assert(bo);
  813.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  814.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  815.  
  816.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  817. }
  818.  
  819. static void
  820. gen7_mfd_avc_decode_picture(VADriverContextP ctx,
  821.                             struct decode_state *decode_state,
  822.                             struct gen7_mfd_context *gen7_mfd_context)
  823. {
  824.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  825.     VAPictureParameterBufferH264 *pic_param;
  826.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  827.     dri_bo *slice_data_bo;
  828.     int i, j;
  829.  
  830.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  831.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  832.     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  833.  
  834.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  835.     intel_batchbuffer_emit_mi_flush(batch);
  836.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  837.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  838.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  839.     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  840.     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  841.     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  842.  
  843.     for (j = 0; j < decode_state->num_slice_params; j++) {
  844.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  845.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  846.         slice_data_bo = decode_state->slice_datas[j]->bo;
  847.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  848.  
  849.         if (j == decode_state->num_slice_params - 1)
  850.             next_slice_group_param = NULL;
  851.         else
  852.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  853.  
  854.         if (j == 0 && slice_param->first_mb_in_slice)
  855.             gen7_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
  856.  
  857.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  858.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  859.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  860.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  861.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  862.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  863.                    (slice_param->slice_type == SLICE_TYPE_B));
  864.  
  865.             if (i < decode_state->slice_params[j]->num_elements - 1)
  866.                 next_slice_param = slice_param + 1;
  867.             else
  868.                 next_slice_param = next_slice_group_param;
  869.  
  870.             gen7_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
  871.             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  872.             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  873.             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  874.             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  875.             slice_param++;
  876.         }
  877.     }
  878.  
  879.     intel_batchbuffer_end_atomic(batch);
  880.     intel_batchbuffer_flush(batch);
  881. }
  882.  
  883. static void
  884. gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
  885.                            struct decode_state *decode_state,
  886.                            struct gen7_mfd_context *gen7_mfd_context)
  887. {
  888.     VAPictureParameterBufferMPEG2 *pic_param;
  889.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  890.     struct object_surface *obj_surface;
  891.     dri_bo *bo;
  892.     unsigned int width_in_mbs;
  893.  
  894.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  895.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  896.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  897.  
  898.     mpeg2_set_reference_surfaces(
  899.         ctx,
  900.         gen7_mfd_context->reference_surface,
  901.         decode_state,
  902.         pic_param
  903.     );
  904.  
  905.     /* Current decoded picture */
  906.     obj_surface = decode_state->render_object;
  907.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  908.  
  909.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  910.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  911.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  912.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  913.  
  914.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  915.     bo = dri_bo_alloc(i965->intel.bufmgr,
  916.                       "bsd mpc row store",
  917.                       width_in_mbs * 96,
  918.                       0x1000);
  919.     assert(bo);
  920.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  921.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  922.  
  923.     gen7_mfd_context->post_deblocking_output.valid = 0;
  924.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  925.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  926.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  927.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  928. }
  929.  
  930. static void
  931. gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
  932.                          struct decode_state *decode_state,
  933.                          struct gen7_mfd_context *gen7_mfd_context)
  934. {
  935.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  936.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  937.     VAPictureParameterBufferMPEG2 *pic_param;
  938.     unsigned int slice_concealment_disable_bit = 0;
  939.  
  940.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  941.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  942.  
  943.     if (IS_HASWELL(i965->intel.device_info)) {
  944.         /* XXX: disable concealment for now */
  945.         slice_concealment_disable_bit = 1;
  946.     }
  947.  
  948.     BEGIN_BCS_BATCH(batch, 13);
  949.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  950.     OUT_BCS_BATCH(batch,
  951.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  952.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  953.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  954.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  955.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  956.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  957.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  958.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  959.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  960.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  961.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  962.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  963.     OUT_BCS_BATCH(batch,
  964.                   pic_param->picture_coding_type << 9);
  965.     OUT_BCS_BATCH(batch,
  966.                   (slice_concealment_disable_bit << 31) |
  967.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  968.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  969.     OUT_BCS_BATCH(batch, 0);
  970.     OUT_BCS_BATCH(batch, 0);
  971.     OUT_BCS_BATCH(batch, 0);
  972.     OUT_BCS_BATCH(batch, 0);
  973.     OUT_BCS_BATCH(batch, 0);
  974.     OUT_BCS_BATCH(batch, 0);
  975.     OUT_BCS_BATCH(batch, 0);
  976.     OUT_BCS_BATCH(batch, 0);
  977.     OUT_BCS_BATCH(batch, 0);
  978.     ADVANCE_BCS_BATCH(batch);
  979. }
  980.  
  981. static void
  982. gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
  983.                         struct decode_state *decode_state,
  984.                         struct gen7_mfd_context *gen7_mfd_context)
  985. {
  986.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  987.     int i, j;
  988.  
  989.     /* Update internal QM state */
  990.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  991.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  992.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  993.  
  994.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  995.             iq_matrix->load_intra_quantiser_matrix) {
  996.             gen_iq_matrix->load_intra_quantiser_matrix =
  997.                 iq_matrix->load_intra_quantiser_matrix;
  998.             if (iq_matrix->load_intra_quantiser_matrix) {
  999.                 for (j = 0; j < 64; j++)
  1000.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1001.                         iq_matrix->intra_quantiser_matrix[j];
  1002.             }
  1003.         }
  1004.  
  1005.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1006.             iq_matrix->load_non_intra_quantiser_matrix) {
  1007.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1008.                 iq_matrix->load_non_intra_quantiser_matrix;
  1009.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1010.                 for (j = 0; j < 64; j++)
  1011.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1012.                         iq_matrix->non_intra_quantiser_matrix[j];
  1013.             }
  1014.         }
  1015.     }
  1016.  
  1017.     /* Commit QM state to HW */
  1018.     for (i = 0; i < 2; i++) {
  1019.         unsigned char *qm = NULL;
  1020.         int qm_type;
  1021.  
  1022.         if (i == 0) {
  1023.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1024.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1025.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1026.             }
  1027.         } else {
  1028.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1029.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1030.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1031.             }
  1032.         }
  1033.  
  1034.         if (!qm)
  1035.             continue;
  1036.  
  1037.         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1038.     }
  1039. }
  1040.  
  1041. uint32_t mpeg2_get_slice_data_length(dri_bo *slice_data_bo, VASliceParameterBufferMPEG2 *slice_param)
  1042. {
  1043.     uint8_t *buf;
  1044.     uint32_t buf_offset = slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3);
  1045.     uint32_t buf_size = slice_param->slice_data_size - (slice_param->macroblock_offset >> 3);
  1046.     uint32_t i;
  1047.  
  1048.     dri_bo_map(slice_data_bo, 0);
  1049.     buf = (uint8_t *)slice_data_bo->virtual + buf_offset;
  1050.  
  1051.     for (i = 3; i < buf_size; i++) {
  1052.         if (buf[i - 3] &&
  1053.             !buf[i - 2] &&
  1054.             !buf[i - 1] &&
  1055.             !buf[i]) {
  1056.             dri_bo_unmap(slice_data_bo);
  1057.             return i - 3 + 1;
  1058.         }
  1059.     }
  1060.  
  1061.     dri_bo_unmap(slice_data_bo);
  1062.     return buf_size;
  1063. }
  1064.  
  1065. static void
  1066. gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1067.                           VAPictureParameterBufferMPEG2 *pic_param,
  1068.                           VASliceParameterBufferMPEG2 *slice_param,
  1069.                           dri_bo *slice_data_bo,
  1070.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1071.                           struct gen7_mfd_context *gen7_mfd_context)
  1072. {
  1073.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  1074.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1075.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1076.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1077.  
  1078.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1079.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1080.         is_field_pic = 1;
  1081.     is_field_pic_wa = is_field_pic &&
  1082.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1083.  
  1084.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1085.     hpos0 = slice_param->slice_horizontal_position;
  1086.  
  1087.     if (next_slice_param == NULL) {
  1088.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1089.         hpos1 = 0;
  1090.     } else {
  1091.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1092.         hpos1 = next_slice_param->slice_horizontal_position;
  1093.     }
  1094.  
  1095.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1096.  
  1097.     BEGIN_BCS_BATCH(batch, 5);
  1098.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1099.     OUT_BCS_BATCH(batch,
  1100.                   mpeg2_get_slice_data_length(slice_data_bo, slice_param));
  1101.     OUT_BCS_BATCH(batch,
  1102.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1103.     OUT_BCS_BATCH(batch,
  1104.                   hpos0 << 24 |
  1105.                   vpos0 << 16 |
  1106.                   mb_count << 8 |
  1107.                   (next_slice_param == NULL) << 5 |
  1108.                   (next_slice_param == NULL) << 3 |
  1109.                   (slice_param->macroblock_offset & 0x7));
  1110.     OUT_BCS_BATCH(batch,
  1111.                   (slice_param->quantiser_scale_code << 24) |
  1112.                   (IS_HASWELL(i965->intel.device_info) ? (vpos1 << 8 | hpos1) : 0));
  1113.     ADVANCE_BCS_BATCH(batch);
  1114. }
  1115.  
  1116. static void
  1117. gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1118.                               struct decode_state *decode_state,
  1119.                               struct gen7_mfd_context *gen7_mfd_context)
  1120. {
  1121.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1122.     VAPictureParameterBufferMPEG2 *pic_param;
  1123.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1124.     dri_bo *slice_data_bo;
  1125.     int i, j;
  1126.  
  1127.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1128.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1129.  
  1130.     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1131.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1132.     intel_batchbuffer_emit_mi_flush(batch);
  1133.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1134.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1135.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1136.     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1137.     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1138.     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1139.  
  1140.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1141.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1142.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1143.  
  1144.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1145.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1146.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1147.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1148.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1149.  
  1150.         if (j == decode_state->num_slice_params - 1)
  1151.             next_slice_group_param = NULL;
  1152.         else
  1153.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1154.  
  1155.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1156.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1157.  
  1158.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1159.                 next_slice_param = slice_param + 1;
  1160.             else
  1161.                 next_slice_param = next_slice_group_param;
  1162.  
  1163.             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  1164.             slice_param++;
  1165.         }
  1166.     }
  1167.  
  1168.     intel_batchbuffer_end_atomic(batch);
  1169.     intel_batchbuffer_flush(batch);
  1170. }
  1171.  
  1172. static const int va_to_gen7_vc1_pic_type[5] = {
  1173.     GEN7_VC1_I_PICTURE,
  1174.     GEN7_VC1_P_PICTURE,
  1175.     GEN7_VC1_B_PICTURE,
  1176.     GEN7_VC1_BI_PICTURE,
  1177.     GEN7_VC1_P_PICTURE,
  1178. };
  1179.  
  1180. static const int va_to_gen7_vc1_mv[4] = {
  1181.     1, /* 1-MV */
  1182.     2, /* 1-MV half-pel */
  1183.     3, /* 1-MV half-pef bilinear */
  1184.     0, /* Mixed MV */
  1185. };
  1186.  
  1187. static const int b_picture_scale_factor[21] = {
  1188.     128, 85,  170, 64,  192,
  1189.     51,  102, 153, 204, 43,
  1190.     215, 37,  74,  111, 148,
  1191.     185, 222, 32,  96,  160,
  1192.     224,
  1193. };
  1194.  
  1195. static const int va_to_gen7_vc1_condover[3] = {
  1196.     0,
  1197.     2,
  1198.     3
  1199. };
  1200.  
  1201. static const int va_to_gen7_vc1_profile[4] = {
  1202.     GEN7_VC1_SIMPLE_PROFILE,
  1203.     GEN7_VC1_MAIN_PROFILE,
  1204.     GEN7_VC1_RESERVED_PROFILE,
  1205.     GEN7_VC1_ADVANCED_PROFILE
  1206. };
  1207.  
  1208. static void
  1209. gen7_mfd_free_vc1_surface(void **data)
  1210. {
  1211.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1212.  
  1213.     if (!gen7_vc1_surface)
  1214.         return;
  1215.  
  1216.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1217.     free(gen7_vc1_surface);
  1218.     *data = NULL;
  1219. }
  1220.  
  1221. static void
  1222. gen7_mfd_init_vc1_surface(VADriverContextP ctx,
  1223.                           VAPictureParameterBufferVC1 *pic_param,
  1224.                           struct object_surface *obj_surface)
  1225. {
  1226.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1227.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1228.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1229.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1230.  
  1231.     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
  1232.  
  1233.     if (!gen7_vc1_surface) {
  1234.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1235.         assert((obj_surface->size & 0x3f) == 0);
  1236.         obj_surface->private_data = gen7_vc1_surface;
  1237.     }
  1238.  
  1239.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1240.  
  1241.     if (gen7_vc1_surface->dmv == NULL) {
  1242.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1243.                                              "direct mv w/r buffer",
  1244.                                              width_in_mbs * height_in_mbs * 64,
  1245.                                              0x1000);
  1246.     }
  1247. }
  1248.  
  1249. static void
  1250. gen7_mfd_vc1_decode_init(VADriverContextP ctx,
  1251.                          struct decode_state *decode_state,
  1252.                          struct gen7_mfd_context *gen7_mfd_context)
  1253. {
  1254.     VAPictureParameterBufferVC1 *pic_param;
  1255.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1256.     struct object_surface *obj_surface;
  1257.     dri_bo *bo;
  1258.     int width_in_mbs;
  1259.     int picture_type;
  1260.  
  1261.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1262.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1263.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1264.     picture_type = pic_param->picture_fields.bits.picture_type;
  1265.  
  1266.     intel_update_vc1_frame_store_index(ctx,
  1267.                                        decode_state,
  1268.                                        pic_param,
  1269.                                        gen7_mfd_context->reference_surface);
  1270.  
  1271.     /* Current decoded picture */
  1272.     obj_surface = decode_state->render_object;
  1273.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1274.     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1275.  
  1276.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1277.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1278.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1279.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1280.  
  1281.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1282.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1283.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1284.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1285.  
  1286.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1287.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1288.                       "intra row store",
  1289.                       width_in_mbs * 64,
  1290.                       0x1000);
  1291.     assert(bo);
  1292.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1293.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1294.  
  1295.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1296.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1297.                       "deblocking filter row store",
  1298.                       width_in_mbs * 7 * 64,
  1299.                       0x1000);
  1300.     assert(bo);
  1301.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1302.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1303.  
  1304.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1305.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1306.                       "bsd mpc row store",
  1307.                       width_in_mbs * 96,
  1308.                       0x1000);
  1309.     assert(bo);
  1310.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1311.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1312.  
  1313.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1314.  
  1315.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1316.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1317.    
  1318.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1319.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1320.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1321.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1322.         int src_w, src_h;
  1323.         uint8_t *src = NULL, *dst = NULL;
  1324.  
  1325.         assert(decode_state->bit_plane->buffer);
  1326.         src = decode_state->bit_plane->buffer;
  1327.  
  1328.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1329.                           "VC-1 Bitplane",
  1330.                           bitplane_width * height_in_mbs,
  1331.                           0x1000);
  1332.         assert(bo);
  1333.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1334.  
  1335.         dri_bo_map(bo, True);
  1336.         assert(bo->virtual);
  1337.         dst = bo->virtual;
  1338.  
  1339.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1340.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1341.                 int src_index, dst_index;
  1342.                 int src_shift;
  1343.                 uint8_t src_value;
  1344.  
  1345.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1346.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1347.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1348.  
  1349.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1350.                     src_value |= 0x2;
  1351.                 }
  1352.  
  1353.                 dst_index = src_w / 2;
  1354.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1355.             }
  1356.  
  1357.             if (src_w & 1)
  1358.                 dst[src_w / 2] >>= 4;
  1359.  
  1360.             dst += bitplane_width;
  1361.         }
  1362.  
  1363.         dri_bo_unmap(bo);
  1364.     } else
  1365.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1366. }
  1367.  
  1368. static void
  1369. gen7_mfd_vc1_pic_state(VADriverContextP ctx,
  1370.                        struct decode_state *decode_state,
  1371.                        struct gen7_mfd_context *gen7_mfd_context)
  1372. {
  1373.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1374.     VAPictureParameterBufferVC1 *pic_param;
  1375.     struct object_surface *obj_surface;
  1376.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1377.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1378.     int unified_mv_mode;
  1379.     int ref_field_pic_polarity = 0;
  1380.     int scale_factor = 0;
  1381.     int trans_ac_y = 0;
  1382.     int dmv_surface_valid = 0;
  1383.     int brfd = 0;
  1384.     int fcm = 0;
  1385.     int picture_type;
  1386.     int profile;
  1387.     int overlap;
  1388.     int interpolation_mode = 0;
  1389.  
  1390.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1391.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1392.  
  1393.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1394.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1395.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1396.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1397.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1398.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1399.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1400.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1401.  
  1402.     if (dquant == 0) {
  1403.         alt_pquant_config = 0;
  1404.         alt_pquant_edge_mask = 0;
  1405.     } else if (dquant == 2) {
  1406.         alt_pquant_config = 1;
  1407.         alt_pquant_edge_mask = 0xf;
  1408.     } else {
  1409.         assert(dquant == 1);
  1410.         if (dquantfrm == 0) {
  1411.             alt_pquant_config = 0;
  1412.             alt_pquant_edge_mask = 0;
  1413.             alt_pq = 0;
  1414.         } else {
  1415.             assert(dquantfrm == 1);
  1416.             alt_pquant_config = 1;
  1417.  
  1418.             switch (dqprofile) {
  1419.             case 3:
  1420.                 if (dqbilevel == 0) {
  1421.                     alt_pquant_config = 2;
  1422.                     alt_pquant_edge_mask = 0;
  1423.                 } else {
  1424.                     assert(dqbilevel == 1);
  1425.                     alt_pquant_config = 3;
  1426.                     alt_pquant_edge_mask = 0;
  1427.                 }
  1428.                 break;
  1429.                
  1430.             case 0:
  1431.                 alt_pquant_edge_mask = 0xf;
  1432.                 break;
  1433.  
  1434.             case 1:
  1435.                 if (dqdbedge == 3)
  1436.                     alt_pquant_edge_mask = 0x9;
  1437.                 else
  1438.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1439.  
  1440.                 break;
  1441.  
  1442.             case 2:
  1443.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1444.                 break;
  1445.  
  1446.             default:
  1447.                 assert(0);
  1448.             }
  1449.         }
  1450.     }
  1451.  
  1452.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1453.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1454.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1455.     } else {
  1456.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1457.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1458.     }
  1459.  
  1460.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1461.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1462.         /* FIXME: calculate reference field picture polarity */
  1463.         assert(0);
  1464.         ref_field_pic_polarity = 0;
  1465.     }
  1466.  
  1467.     if (pic_param->b_picture_fraction < 21)
  1468.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1469.  
  1470.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1471.    
  1472.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1473.         picture_type == GEN7_VC1_I_PICTURE)
  1474.         picture_type = GEN7_VC1_BI_PICTURE;
  1475.  
  1476.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1477.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1478.     else {
  1479.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1480.         /*
  1481.          * 8.3.6.2.1 Transform Type Selection
  1482.          * If variable-sized transform coding is not enabled,
  1483.          * then the 8x8 transform shall be used for all blocks.
  1484.          * it is also MFX_VC1_PIC_STATE requirement.
  1485.          */
  1486.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1487.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1488.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1489.         }
  1490.     }
  1491.  
  1492.  
  1493.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1494.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1495.  
  1496.         obj_surface = decode_state->reference_objects[1];
  1497.  
  1498.         if (obj_surface)
  1499.             gen7_vc1_surface = obj_surface->private_data;
  1500.  
  1501.         if (!gen7_vc1_surface ||
  1502.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1503.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1504.             dmv_surface_valid = 0;
  1505.         else
  1506.             dmv_surface_valid = 1;
  1507.     }
  1508.  
  1509.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1510.  
  1511.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1512.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1513.     else {
  1514.         if (pic_param->picture_fields.bits.top_field_first)
  1515.             fcm = 2;
  1516.         else
  1517.             fcm = 3;
  1518.     }
  1519.  
  1520.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1521.         brfd = pic_param->reference_fields.bits.reference_distance;
  1522.         brfd = (scale_factor * brfd) >> 8;
  1523.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1524.  
  1525.         if (brfd < 0)
  1526.             brfd = 0;
  1527.     }
  1528.  
  1529.     overlap = 0;
  1530.     if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1531.         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1532.             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1533.             overlap = 1;
  1534.         }
  1535.     }else {
  1536.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1537.              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1538.               overlap = 1;
  1539.         }
  1540.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1541.             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1542.              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1543.                 overlap = 1;
  1544.              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1545.                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1546.                  overlap = 1;
  1547.              }
  1548.         }
  1549.     }
  1550.  
  1551.     assert(pic_param->conditional_overlap_flag < 3);
  1552.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1553.  
  1554.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1555.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1556.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1557.         interpolation_mode = 9; /* Half-pel bilinear */
  1558.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1559.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1560.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1561.         interpolation_mode = 1; /* Half-pel bicubic */
  1562.     else
  1563.         interpolation_mode = 0; /* Quarter-pel bicubic */
  1564.  
  1565.     BEGIN_BCS_BATCH(batch, 6);
  1566.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  1567.     OUT_BCS_BATCH(batch,
  1568.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  1569.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  1570.     OUT_BCS_BATCH(batch,
  1571.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  1572.                   dmv_surface_valid << 15 |
  1573.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  1574.                   pic_param->rounding_control << 13 |
  1575.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  1576.                   interpolation_mode << 8 |
  1577.                   0 << 7 | /* FIXME: scale up or down ??? */
  1578.                   pic_param->range_reduction_frame << 6 |
  1579.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  1580.                   overlap << 4 |
  1581.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  1582.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  1583.     OUT_BCS_BATCH(batch,
  1584.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  1585.                   picture_type << 26 |
  1586.                   fcm << 24 |
  1587.                   alt_pq << 16 |
  1588.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  1589.                   scale_factor << 0);
  1590.     OUT_BCS_BATCH(batch,
  1591.                   unified_mv_mode << 28 |
  1592.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  1593.                   pic_param->fast_uvmc_flag << 26 |
  1594.                   ref_field_pic_polarity << 25 |
  1595.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  1596.                   pic_param->reference_fields.bits.reference_distance << 20 |
  1597.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  1598.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  1599.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  1600.                   alt_pquant_edge_mask << 4 |
  1601.                   alt_pquant_config << 2 |
  1602.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  1603.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  1604.     OUT_BCS_BATCH(batch,
  1605.                   !!pic_param->bitplane_present.value << 31 |
  1606.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  1607.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  1608.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  1609.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  1610.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  1611.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  1612.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  1613.                   pic_param->mv_fields.bits.mv_table << 20 |
  1614.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1615.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1616.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  1617.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1618.                   pic_param->mb_mode_table << 8 |
  1619.                   trans_ac_y << 6 |
  1620.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1621.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1622.                   pic_param->cbp_table << 0);
  1623.     ADVANCE_BCS_BATCH(batch);
  1624. }
  1625.  
  1626. static void
  1627. gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1628.                              struct decode_state *decode_state,
  1629.                              struct gen7_mfd_context *gen7_mfd_context)
  1630. {
  1631.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1632.     VAPictureParameterBufferVC1 *pic_param;
  1633.     int intensitycomp_single;
  1634.  
  1635.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1636.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1637.  
  1638.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1639.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1640.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1641.  
  1642.     BEGIN_BCS_BATCH(batch, 6);
  1643.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  1644.     OUT_BCS_BATCH(batch,
  1645.                   0 << 14 | /* FIXME: double ??? */
  1646.                   0 << 12 |
  1647.                   intensitycomp_single << 10 |
  1648.                   intensitycomp_single << 8 |
  1649.                   0 << 4 | /* FIXME: interlace mode */
  1650.                   0);
  1651.     OUT_BCS_BATCH(batch,
  1652.                   pic_param->luma_shift << 16 |
  1653.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1654.     OUT_BCS_BATCH(batch, 0);
  1655.     OUT_BCS_BATCH(batch, 0);
  1656.     OUT_BCS_BATCH(batch, 0);
  1657.     ADVANCE_BCS_BATCH(batch);
  1658. }
  1659.  
  1660.  
  1661. static void
  1662. gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
  1663.                               struct decode_state *decode_state,
  1664.                               struct gen7_mfd_context *gen7_mfd_context)
  1665. {
  1666.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1667.     struct object_surface *obj_surface;
  1668.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1669.  
  1670.     obj_surface = decode_state->render_object;
  1671.  
  1672.     if (obj_surface && obj_surface->private_data) {
  1673.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1674.     }
  1675.  
  1676.     obj_surface = decode_state->reference_objects[1];
  1677.  
  1678.     if (obj_surface && obj_surface->private_data) {
  1679.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1680.     }
  1681.  
  1682.     BEGIN_BCS_BATCH(batch, 3);
  1683.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
  1684.  
  1685.     if (dmv_write_buffer)
  1686.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1687.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1688.                       0);
  1689.     else
  1690.         OUT_BCS_BATCH(batch, 0);
  1691.  
  1692.     if (dmv_read_buffer)
  1693.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1694.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1695.                       0);
  1696.     else
  1697.         OUT_BCS_BATCH(batch, 0);
  1698.                  
  1699.     ADVANCE_BCS_BATCH(batch);
  1700. }
  1701.  
  1702. static int
  1703. gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  1704. {
  1705.     int out_slice_data_bit_offset;
  1706.     int slice_header_size = in_slice_data_bit_offset / 8;
  1707.     int i, j;
  1708.  
  1709.     if (profile != 3)
  1710.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  1711.     else {
  1712.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  1713.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  1714.                 i++, j += 2;
  1715.             }
  1716.         }
  1717.  
  1718.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  1719.     }
  1720.  
  1721.     return out_slice_data_bit_offset;
  1722. }
  1723.  
  1724. static void
  1725. gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
  1726.                         VAPictureParameterBufferVC1 *pic_param,
  1727.                         VASliceParameterBufferVC1 *slice_param,
  1728.                         VASliceParameterBufferVC1 *next_slice_param,
  1729.                         dri_bo *slice_data_bo,
  1730.                         struct gen7_mfd_context *gen7_mfd_context)
  1731. {
  1732.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1733.     int next_slice_start_vert_pos;
  1734.     int macroblock_offset;
  1735.     uint8_t *slice_data = NULL;
  1736.  
  1737.     dri_bo_map(slice_data_bo, 0);
  1738.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  1739.     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data,
  1740.                                                                slice_param->macroblock_offset,
  1741.                                                                pic_param->sequence_fields.bits.profile);
  1742.     dri_bo_unmap(slice_data_bo);
  1743.  
  1744.     if (next_slice_param)
  1745.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  1746.     else
  1747.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  1748.  
  1749.     BEGIN_BCS_BATCH(batch, 5);
  1750.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  1751.     OUT_BCS_BATCH(batch,
  1752.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  1753.     OUT_BCS_BATCH(batch,
  1754.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  1755.     OUT_BCS_BATCH(batch,
  1756.                   slice_param->slice_vertical_position << 16 |
  1757.                   next_slice_start_vert_pos << 0);
  1758.     OUT_BCS_BATCH(batch,
  1759.                   (macroblock_offset & 0x7));
  1760.     ADVANCE_BCS_BATCH(batch);
  1761. }
  1762.  
  1763. static void
  1764. gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
  1765.                             struct decode_state *decode_state,
  1766.                             struct gen7_mfd_context *gen7_mfd_context)
  1767. {
  1768.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1769.     VAPictureParameterBufferVC1 *pic_param;
  1770.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  1771.     dri_bo *slice_data_bo;
  1772.     int i, j;
  1773.  
  1774.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1775.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1776.  
  1777.     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  1778.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1779.     intel_batchbuffer_emit_mi_flush(batch);
  1780.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1781.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1782.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1783.     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1784.     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  1785.     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  1786.     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  1787.  
  1788.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1789.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1790.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  1791.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1792.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  1793.  
  1794.         if (j == decode_state->num_slice_params - 1)
  1795.             next_slice_group_param = NULL;
  1796.         else
  1797.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  1798.  
  1799.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1800.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1801.  
  1802.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1803.                 next_slice_param = slice_param + 1;
  1804.             else
  1805.                 next_slice_param = next_slice_group_param;
  1806.  
  1807.             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  1808.             slice_param++;
  1809.         }
  1810.     }
  1811.  
  1812.     intel_batchbuffer_end_atomic(batch);
  1813.     intel_batchbuffer_flush(batch);
  1814. }
  1815.  
  1816. static void
  1817. gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
  1818.                           struct decode_state *decode_state,
  1819.                           struct gen7_mfd_context *gen7_mfd_context)
  1820. {
  1821.     struct object_surface *obj_surface;
  1822.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1823.     int subsampling = SUBSAMPLE_YUV420;
  1824.     int fourcc = VA_FOURCC_IMC3;
  1825.  
  1826.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1827.  
  1828.     if (pic_param->num_components == 1) {
  1829.         subsampling = SUBSAMPLE_YUV400;
  1830.         fourcc = VA_FOURCC_Y800;
  1831.     } else if (pic_param->num_components == 3) {
  1832.         int h1 = pic_param->components[0].h_sampling_factor;
  1833.         int h2 = pic_param->components[1].h_sampling_factor;
  1834.         int h3 = pic_param->components[2].h_sampling_factor;
  1835.         int v1 = pic_param->components[0].v_sampling_factor;
  1836.         int v2 = pic_param->components[1].v_sampling_factor;
  1837.         int v3 = pic_param->components[2].v_sampling_factor;
  1838.  
  1839.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1840.             v1 == 2 && v2 == 1 && v3 == 1) {
  1841.             subsampling = SUBSAMPLE_YUV420;
  1842.             fourcc = VA_FOURCC_IMC3;
  1843.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1844.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1845.             subsampling = SUBSAMPLE_YUV422H;
  1846.             fourcc = VA_FOURCC_422H;
  1847.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1848.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1849.             subsampling = SUBSAMPLE_YUV444;
  1850.             fourcc = VA_FOURCC_444P;
  1851.         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1852.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1853.             subsampling = SUBSAMPLE_YUV411;
  1854.             fourcc = VA_FOURCC_411P;
  1855.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1856.                    v1 == 2 && v2 == 1 && v3 == 1) {
  1857.             subsampling = SUBSAMPLE_YUV422V;
  1858.             fourcc = VA_FOURCC_422V;
  1859.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1860.                    v1 == 2 && v2 == 2 && v3 == 2) {
  1861.             subsampling = SUBSAMPLE_YUV422H;
  1862.             fourcc = VA_FOURCC_422H;
  1863.         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  1864.                    v1 == 2 && v2 == 1 && v3 == 1) {
  1865.             subsampling = SUBSAMPLE_YUV422V;
  1866.             fourcc = VA_FOURCC_422V;
  1867.         } else
  1868.             assert(0);
  1869.     } else {
  1870.         assert(0);
  1871.     }
  1872.  
  1873.     /* Current decoded picture */
  1874.     obj_surface = decode_state->render_object;
  1875.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
  1876.  
  1877.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1878.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1879.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1880.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1881.  
  1882.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  1883.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1884.  
  1885.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  1886.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1887.  
  1888.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  1889.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1890.  
  1891.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  1892.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  1893.  
  1894.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  1895.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1896.  
  1897.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1898.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1899. }
  1900.  
  1901. static const int va_to_gen7_jpeg_rotation[4] = {
  1902.     GEN7_JPEG_ROTATION_0,
  1903.     GEN7_JPEG_ROTATION_90,
  1904.     GEN7_JPEG_ROTATION_180,
  1905.     GEN7_JPEG_ROTATION_270
  1906. };
  1907.  
  1908. static void
  1909. gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
  1910.                         struct decode_state *decode_state,
  1911.                         struct gen7_mfd_context *gen7_mfd_context)
  1912. {
  1913.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1914.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1915.     int chroma_type = GEN7_YUV420;
  1916.     int frame_width_in_blks;
  1917.     int frame_height_in_blks;
  1918.  
  1919.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1920.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1921.  
  1922.     if (pic_param->num_components == 1)
  1923.         chroma_type = GEN7_YUV400;
  1924.     else if (pic_param->num_components == 3) {
  1925.         int h1 = pic_param->components[0].h_sampling_factor;
  1926.         int h2 = pic_param->components[1].h_sampling_factor;
  1927.         int h3 = pic_param->components[2].h_sampling_factor;
  1928.         int v1 = pic_param->components[0].v_sampling_factor;
  1929.         int v2 = pic_param->components[1].v_sampling_factor;
  1930.         int v3 = pic_param->components[2].v_sampling_factor;
  1931.  
  1932.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1933.             v1 == 2 && v2 == 1 && v3 == 1)
  1934.             chroma_type = GEN7_YUV420;
  1935.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1936.                  v1 == 1 && v2 == 1 && v3 == 1)
  1937.             chroma_type = GEN7_YUV422H_2Y;
  1938.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1939.                  v1 == 1 && v2 == 1 && v3 == 1)
  1940.             chroma_type = GEN7_YUV444;
  1941.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1942.                  v1 == 1 && v2 == 1 && v3 == 1)
  1943.             chroma_type = GEN7_YUV411;
  1944.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1945.                  v1 == 2 && v2 == 1 && v3 == 1)
  1946.             chroma_type = GEN7_YUV422V_2Y;
  1947.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1948.                  v1 == 2 && v2 == 2 && v3 == 2)
  1949.             chroma_type = GEN7_YUV422H_4Y;
  1950.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  1951.                  v1 == 2 && v2 == 1 && v3 == 1)
  1952.             chroma_type = GEN7_YUV422V_4Y;
  1953.         else
  1954.             assert(0);
  1955.     }
  1956.  
  1957.     if (chroma_type == GEN7_YUV400 ||
  1958.         chroma_type == GEN7_YUV444 ||
  1959.         chroma_type == GEN7_YUV422V_2Y) {
  1960.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  1961.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  1962.     } else if (chroma_type == GEN7_YUV411) {
  1963.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  1964.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  1965.     } else {
  1966.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  1967.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  1968.     }
  1969.  
  1970.     BEGIN_BCS_BATCH(batch, 3);
  1971.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  1972.     OUT_BCS_BATCH(batch,
  1973.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  1974.                   (chroma_type << 0));
  1975.     OUT_BCS_BATCH(batch,
  1976.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  1977.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  1978.     ADVANCE_BCS_BATCH(batch);
  1979. }
  1980.  
  1981. static const int va_to_gen7_jpeg_hufftable[2] = {
  1982.     MFX_HUFFTABLE_ID_Y,
  1983.     MFX_HUFFTABLE_ID_UV
  1984. };
  1985.  
  1986. static void
  1987. gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  1988.                                struct decode_state *decode_state,
  1989.                                struct gen7_mfd_context *gen7_mfd_context,
  1990.                                int num_tables)
  1991. {
  1992.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  1993.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1994.     int index;
  1995.  
  1996.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  1997.         return;
  1998.  
  1999.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2000.  
  2001.     for (index = 0; index < num_tables; index++) {
  2002.         int id = va_to_gen7_jpeg_hufftable[index];
  2003.         if (!huffman_table->load_huffman_table[index])
  2004.             continue;
  2005.         BEGIN_BCS_BATCH(batch, 53);
  2006.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2007.         OUT_BCS_BATCH(batch, id);
  2008.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2009.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2010.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2011.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2012.         ADVANCE_BCS_BATCH(batch);
  2013.     }
  2014. }
  2015.  
  2016. static const int va_to_gen7_jpeg_qm[5] = {
  2017.     -1,
  2018.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2019.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2020.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2021.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2022. };
  2023.  
  2024. static void
  2025. gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
  2026.                        struct decode_state *decode_state,
  2027.                        struct gen7_mfd_context *gen7_mfd_context)
  2028. {
  2029.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2030.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2031.     int index;
  2032.  
  2033.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2034.         return;
  2035.  
  2036.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2037.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2038.  
  2039.     assert(pic_param->num_components <= 3);
  2040.  
  2041.     for (index = 0; index < pic_param->num_components; index++) {
  2042.         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
  2043.         int qm_type;
  2044.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2045.         unsigned char raster_qm[64];
  2046.         int j;
  2047.  
  2048.         if (id > 4 || id < 1)
  2049.             continue;
  2050.  
  2051.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2052.             continue;
  2053.  
  2054.         qm_type = va_to_gen7_jpeg_qm[id];
  2055.  
  2056.         for (j = 0; j < 64; j++)
  2057.             raster_qm[zigzag_direct[j]] = qm[j];
  2058.  
  2059.         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2060.     }
  2061. }
  2062.  
  2063. static void
  2064. gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2065.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2066.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2067.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2068.                          dri_bo *slice_data_bo,
  2069.                          struct gen7_mfd_context *gen7_mfd_context)
  2070. {
  2071.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2072.     int scan_component_mask = 0;
  2073.     int i;
  2074.  
  2075.     assert(slice_param->num_components > 0);
  2076.     assert(slice_param->num_components < 4);
  2077.     assert(slice_param->num_components <= pic_param->num_components);
  2078.  
  2079.     for (i = 0; i < slice_param->num_components; i++) {
  2080.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2081.         case 1:
  2082.             scan_component_mask |= (1 << 0);
  2083.             break;
  2084.         case 2:
  2085.             scan_component_mask |= (1 << 1);
  2086.             break;
  2087.         case 3:
  2088.             scan_component_mask |= (1 << 2);
  2089.             break;
  2090.         default:
  2091.             assert(0);
  2092.             break;
  2093.         }
  2094.     }
  2095.  
  2096.     BEGIN_BCS_BATCH(batch, 6);
  2097.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2098.     OUT_BCS_BATCH(batch,
  2099.                   slice_param->slice_data_size);
  2100.     OUT_BCS_BATCH(batch,
  2101.                   slice_param->slice_data_offset);
  2102.     OUT_BCS_BATCH(batch,
  2103.                   slice_param->slice_horizontal_position << 16 |
  2104.                   slice_param->slice_vertical_position << 0);
  2105.     OUT_BCS_BATCH(batch,
  2106.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2107.                   (scan_component_mask << 27) |                 /* scan components */
  2108.                   (0 << 26) |   /* disable interrupt allowed */
  2109.                   (slice_param->num_mcus << 0));                /* MCU count */
  2110.     OUT_BCS_BATCH(batch,
  2111.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2112.     ADVANCE_BCS_BATCH(batch);
  2113. }
  2114.  
  2115. /* Workaround for JPEG decoding on Ivybridge */
  2116.  
  2117. static struct {
  2118.     int width;
  2119.     int height;
  2120.     unsigned char data[32];
  2121.     int data_size;
  2122.     int data_bit_offset;
  2123.     int qp;
  2124. } gen7_jpeg_wa_clip = {
  2125.     16,
  2126.     16,
  2127.     {
  2128.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2129.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2130.     },
  2131.     14,
  2132.     40,
  2133.     28,
  2134. };
  2135.  
  2136. static void
  2137. gen7_jpeg_wa_init(VADriverContextP ctx,
  2138.                   struct gen7_mfd_context *gen7_mfd_context)
  2139. {
  2140.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2141.     VAStatus status;
  2142.     struct object_surface *obj_surface;
  2143.  
  2144.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2145.         i965_DestroySurfaces(ctx,
  2146.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2147.                              1);
  2148.  
  2149.     status = i965_CreateSurfaces(ctx,
  2150.                                  gen7_jpeg_wa_clip.width,
  2151.                                  gen7_jpeg_wa_clip.height,
  2152.                                  VA_RT_FORMAT_YUV420,
  2153.                                  1,
  2154.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2155.     assert(status == VA_STATUS_SUCCESS);
  2156.  
  2157.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2158.     assert(obj_surface);
  2159.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2160.     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
  2161.  
  2162.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2163.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2164.                                                                "JPEG WA data",
  2165.                                                                0x1000,
  2166.                                                                0x1000);
  2167.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2168.                        0,
  2169.                        gen7_jpeg_wa_clip.data_size,
  2170.                        gen7_jpeg_wa_clip.data);
  2171.     }
  2172. }
  2173.  
  2174. static void
  2175. gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2176.                               struct gen7_mfd_context *gen7_mfd_context)
  2177. {
  2178.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2179.  
  2180.     BEGIN_BCS_BATCH(batch, 5);
  2181.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2182.     OUT_BCS_BATCH(batch,
  2183.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2184.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2185.                   (0 << 10) | /* disable Stream-Out */
  2186.                   (0 << 9)  | /* Post Deblocking Output */
  2187.                   (1 << 8)  | /* Pre Deblocking Output */
  2188.                   (0 << 5)  | /* not in stitch mode */
  2189.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2190.                   (MFX_FORMAT_AVC << 0));
  2191.     OUT_BCS_BATCH(batch,
  2192.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2193.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2194.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2195.                   (0 << 1)  |
  2196.                   (0 << 0));
  2197.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2198.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2199.     ADVANCE_BCS_BATCH(batch);
  2200. }
  2201.  
  2202. static void
  2203. gen7_jpeg_wa_surface_state(VADriverContextP ctx,
  2204.                            struct gen7_mfd_context *gen7_mfd_context)
  2205. {
  2206.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2207.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2208.  
  2209.     BEGIN_BCS_BATCH(batch, 6);
  2210.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2211.     OUT_BCS_BATCH(batch, 0);
  2212.     OUT_BCS_BATCH(batch,
  2213.                   ((obj_surface->orig_width - 1) << 18) |
  2214.                   ((obj_surface->orig_height - 1) << 4));
  2215.     OUT_BCS_BATCH(batch,
  2216.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2217.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2218.                   (0 << 22) | /* surface object control state, ignored */
  2219.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2220.                   (0 << 2)  | /* must be 0 */
  2221.                   (1 << 1)  | /* must be tiled */
  2222.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2223.     OUT_BCS_BATCH(batch,
  2224.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2225.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2226.     OUT_BCS_BATCH(batch,
  2227.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2228.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2229.     ADVANCE_BCS_BATCH(batch);
  2230. }
  2231.  
  2232. static void
  2233. gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2234.                                  struct gen7_mfd_context *gen7_mfd_context)
  2235. {
  2236.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2237.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2238.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2239.     dri_bo *intra_bo;
  2240.     int i;
  2241.  
  2242.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2243.                             "intra row store",
  2244.                             128 * 64,
  2245.                             0x1000);
  2246.  
  2247.     BEGIN_BCS_BATCH(batch, 24);
  2248.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
  2249.     OUT_BCS_RELOC(batch,
  2250.                   obj_surface->bo,
  2251.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2252.                   0);
  2253.    
  2254.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2255.  
  2256.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2257.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2258.  
  2259.     OUT_BCS_RELOC(batch,
  2260.                   intra_bo,
  2261.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2262.                   0);
  2263.  
  2264.     OUT_BCS_BATCH(batch, 0);
  2265.  
  2266.     /* DW 7..22 */
  2267.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2268.         OUT_BCS_BATCH(batch, 0);
  2269.     }
  2270.  
  2271.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  2272.     ADVANCE_BCS_BATCH(batch);
  2273.  
  2274.     dri_bo_unreference(intra_bo);
  2275. }
  2276.  
  2277. static void
  2278. gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2279.                                      struct gen7_mfd_context *gen7_mfd_context)
  2280. {
  2281.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2282.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2283.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2284.  
  2285.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2286.                               "bsd mpc row store",
  2287.                               11520, /* 1.5 * 120 * 64 */
  2288.                               0x1000);
  2289.  
  2290.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2291.                           "mpr row store",
  2292.                           7680, /* 1. 0 * 120 * 64 */
  2293.                           0x1000);
  2294.  
  2295.     BEGIN_BCS_BATCH(batch, 4);
  2296.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  2297.  
  2298.     OUT_BCS_RELOC(batch,
  2299.                   bsd_mpc_bo,
  2300.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2301.                   0);
  2302.  
  2303.     OUT_BCS_RELOC(batch,
  2304.                   mpr_bo,
  2305.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2306.                   0);
  2307.     OUT_BCS_BATCH(batch, 0);
  2308.  
  2309.     ADVANCE_BCS_BATCH(batch);
  2310.  
  2311.     dri_bo_unreference(bsd_mpc_bo);
  2312.     dri_bo_unreference(mpr_bo);
  2313. }
  2314.  
  2315. static void
  2316. gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2317.                           struct gen7_mfd_context *gen7_mfd_context)
  2318. {
  2319.  
  2320. }
  2321.  
  2322. static void
  2323. gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2324.                            struct gen7_mfd_context *gen7_mfd_context)
  2325. {
  2326.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2327.     int img_struct = 0;
  2328.     int mbaff_frame_flag = 0;
  2329.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2330.  
  2331.     BEGIN_BCS_BATCH(batch, 16);
  2332.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2333.     OUT_BCS_BATCH(batch,
  2334.                   (width_in_mbs * height_in_mbs - 1));
  2335.     OUT_BCS_BATCH(batch,
  2336.                   ((height_in_mbs - 1) << 16) |
  2337.                   ((width_in_mbs - 1) << 0));
  2338.     OUT_BCS_BATCH(batch,
  2339.                   (0 << 24) |
  2340.                   (0 << 16) |
  2341.                   (0 << 14) |
  2342.                   (0 << 13) |
  2343.                   (0 << 12) | /* differ from GEN6 */
  2344.                   (0 << 10) |
  2345.                   (img_struct << 8));
  2346.     OUT_BCS_BATCH(batch,
  2347.                   (1 << 10) | /* 4:2:0 */
  2348.                   (1 << 7) |  /* CABAC */
  2349.                   (0 << 6) |
  2350.                   (0 << 5) |
  2351.                   (0 << 4) |
  2352.                   (0 << 3) |
  2353.                   (1 << 2) |
  2354.                   (mbaff_frame_flag << 1) |
  2355.                   (0 << 0));
  2356.     OUT_BCS_BATCH(batch, 0);
  2357.     OUT_BCS_BATCH(batch, 0);
  2358.     OUT_BCS_BATCH(batch, 0);
  2359.     OUT_BCS_BATCH(batch, 0);
  2360.     OUT_BCS_BATCH(batch, 0);
  2361.     OUT_BCS_BATCH(batch, 0);
  2362.     OUT_BCS_BATCH(batch, 0);
  2363.     OUT_BCS_BATCH(batch, 0);
  2364.     OUT_BCS_BATCH(batch, 0);
  2365.     OUT_BCS_BATCH(batch, 0);
  2366.     OUT_BCS_BATCH(batch, 0);
  2367.     ADVANCE_BCS_BATCH(batch);
  2368. }
  2369.  
  2370. static void
  2371. gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  2372.                                   struct gen7_mfd_context *gen7_mfd_context)
  2373. {
  2374.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2375.     int i;
  2376.  
  2377.     BEGIN_BCS_BATCH(batch, 69);
  2378.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  2379.  
  2380.     /* reference surfaces 0..15 */
  2381.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2382.         OUT_BCS_BATCH(batch, 0); /* top */
  2383.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2384.     }
  2385.  
  2386.     /* the current decoding frame/field */
  2387.     OUT_BCS_BATCH(batch, 0); /* top */
  2388.     OUT_BCS_BATCH(batch, 0); /* bottom */
  2389.  
  2390.     /* POC List */
  2391.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2392.         OUT_BCS_BATCH(batch, 0);
  2393.         OUT_BCS_BATCH(batch, 0);
  2394.     }
  2395.  
  2396.     OUT_BCS_BATCH(batch, 0);
  2397.     OUT_BCS_BATCH(batch, 0);
  2398.  
  2399.     ADVANCE_BCS_BATCH(batch);
  2400. }
  2401.  
  2402. static void
  2403. gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  2404.                                      struct gen7_mfd_context *gen7_mfd_context)
  2405. {
  2406.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2407.  
  2408.     BEGIN_BCS_BATCH(batch, 11);
  2409.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2410.     OUT_BCS_RELOC(batch,
  2411.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2412.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2413.                   0);
  2414.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2415.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2416.     OUT_BCS_BATCH(batch, 0);
  2417.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2418.     OUT_BCS_BATCH(batch, 0);
  2419.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2420.     OUT_BCS_BATCH(batch, 0);
  2421.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2422.     OUT_BCS_BATCH(batch, 0);
  2423.     ADVANCE_BCS_BATCH(batch);
  2424. }
  2425.  
  2426. static void
  2427. gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  2428.                             struct gen7_mfd_context *gen7_mfd_context)
  2429. {
  2430.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2431.  
  2432.     /* the input bitsteam format on GEN7 differs from GEN6 */
  2433.     BEGIN_BCS_BATCH(batch, 6);
  2434.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  2435.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  2436.     OUT_BCS_BATCH(batch, 0);
  2437.     OUT_BCS_BATCH(batch,
  2438.                   (0 << 31) |
  2439.                   (0 << 14) |
  2440.                   (0 << 12) |
  2441.                   (0 << 10) |
  2442.                   (0 << 8));
  2443.     OUT_BCS_BATCH(batch,
  2444.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  2445.                   (0 << 5)  |
  2446.                   (0 << 4)  |
  2447.                   (1 << 3) | /* LastSlice Flag */
  2448.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  2449.     OUT_BCS_BATCH(batch, 0);
  2450.     ADVANCE_BCS_BATCH(batch);
  2451. }
  2452.  
  2453. static void
  2454. gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  2455.                              struct gen7_mfd_context *gen7_mfd_context)
  2456. {
  2457.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2458.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  2459.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  2460.     int first_mb_in_slice = 0;
  2461.     int slice_type = SLICE_TYPE_I;
  2462.  
  2463.     BEGIN_BCS_BATCH(batch, 11);
  2464.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  2465.     OUT_BCS_BATCH(batch, slice_type);
  2466.     OUT_BCS_BATCH(batch,
  2467.                   (num_ref_idx_l1 << 24) |
  2468.                   (num_ref_idx_l0 << 16) |
  2469.                   (0 << 8) |
  2470.                   (0 << 0));
  2471.     OUT_BCS_BATCH(batch,
  2472.                   (0 << 29) |
  2473.                   (1 << 27) |   /* disable Deblocking */
  2474.                   (0 << 24) |
  2475.                   (gen7_jpeg_wa_clip.qp << 16) |
  2476.                   (0 << 8) |
  2477.                   (0 << 0));
  2478.     OUT_BCS_BATCH(batch,
  2479.                   (slice_ver_pos << 24) |
  2480.                   (slice_hor_pos << 16) |
  2481.                   (first_mb_in_slice << 0));
  2482.     OUT_BCS_BATCH(batch,
  2483.                   (next_slice_ver_pos << 16) |
  2484.                   (next_slice_hor_pos << 0));
  2485.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  2486.     OUT_BCS_BATCH(batch, 0);
  2487.     OUT_BCS_BATCH(batch, 0);
  2488.     OUT_BCS_BATCH(batch, 0);
  2489.     OUT_BCS_BATCH(batch, 0);
  2490.     ADVANCE_BCS_BATCH(batch);
  2491. }
  2492.  
  2493. static void
  2494. gen7_mfd_jpeg_wa(VADriverContextP ctx,
  2495.                  struct gen7_mfd_context *gen7_mfd_context)
  2496. {
  2497.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2498.     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
  2499.     intel_batchbuffer_emit_mi_flush(batch);
  2500.     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  2501.     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  2502.     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  2503.     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  2504.     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  2505.     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  2506.     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  2507.  
  2508.     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  2509.     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  2510.     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  2511. }
  2512.  
  2513. void
  2514. gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
  2515.                              struct decode_state *decode_state,
  2516.                              struct gen7_mfd_context *gen7_mfd_context)
  2517. {
  2518.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2519.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2520.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  2521.     dri_bo *slice_data_bo;
  2522.     int i, j, max_selector = 0;
  2523.  
  2524.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2525.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2526.  
  2527.     /* Currently only support Baseline DCT */
  2528.     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  2529.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2530.     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
  2531.     intel_batchbuffer_emit_mi_flush(batch);
  2532.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2533.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2534.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2535.     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  2536.     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  2537.  
  2538.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2539.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2540.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2541.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2542.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2543.  
  2544.         if (j == decode_state->num_slice_params - 1)
  2545.             next_slice_group_param = NULL;
  2546.         else
  2547.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2548.  
  2549.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2550.             int component;
  2551.  
  2552.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2553.  
  2554.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2555.                 next_slice_param = slice_param + 1;
  2556.             else
  2557.                 next_slice_param = next_slice_group_param;
  2558.  
  2559.             for (component = 0; component < slice_param->num_components; component++) {
  2560.                 if (max_selector < slice_param->components[component].dc_table_selector)
  2561.                     max_selector = slice_param->components[component].dc_table_selector;
  2562.  
  2563.                 if (max_selector < slice_param->components[component].ac_table_selector)
  2564.                     max_selector = slice_param->components[component].ac_table_selector;
  2565.             }
  2566.  
  2567.             slice_param++;
  2568.         }
  2569.     }
  2570.  
  2571.     assert(max_selector < 2);
  2572.     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  2573.  
  2574.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2575.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2576.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2577.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2578.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2579.  
  2580.         if (j == decode_state->num_slice_params - 1)
  2581.             next_slice_group_param = NULL;
  2582.         else
  2583.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2584.  
  2585.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2586.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2587.  
  2588.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2589.                 next_slice_param = slice_param + 1;
  2590.             else
  2591.                 next_slice_param = next_slice_group_param;
  2592.  
  2593.             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2594.             slice_param++;
  2595.         }
  2596.     }
  2597.  
  2598.     intel_batchbuffer_end_atomic(batch);
  2599.     intel_batchbuffer_flush(batch);
  2600. }
  2601.  
  2602. static VAStatus
  2603. gen7_mfd_decode_picture(VADriverContextP ctx,
  2604.                         VAProfile profile,
  2605.                         union codec_state *codec_state,
  2606.                         struct hw_context *hw_context)
  2607.  
  2608. {
  2609.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  2610.     struct decode_state *decode_state = &codec_state->decode;
  2611.     VAStatus vaStatus;
  2612.  
  2613.     assert(gen7_mfd_context);
  2614.  
  2615.     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
  2616.  
  2617.     if (vaStatus != VA_STATUS_SUCCESS)
  2618.         goto out;
  2619.  
  2620.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  2621.  
  2622.     switch (profile) {
  2623.     case VAProfileMPEG2Simple:
  2624.     case VAProfileMPEG2Main:
  2625.         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  2626.         break;
  2627.        
  2628.     case VAProfileH264ConstrainedBaseline:
  2629.     case VAProfileH264Main:
  2630.     case VAProfileH264High:
  2631.     case VAProfileH264StereoHigh:
  2632.         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  2633.         break;
  2634.  
  2635.     case VAProfileVC1Simple:
  2636.     case VAProfileVC1Main:
  2637.     case VAProfileVC1Advanced:
  2638.         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  2639.         break;
  2640.  
  2641.     case VAProfileJPEGBaseline:
  2642.         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  2643.         break;
  2644.  
  2645.     default:
  2646.         assert(0);
  2647.         break;
  2648.     }
  2649.  
  2650.     vaStatus = VA_STATUS_SUCCESS;
  2651.  
  2652. out:
  2653.     return vaStatus;
  2654. }
  2655.  
  2656. static void
  2657. gen7_mfd_context_destroy(void *hw_context)
  2658. {
  2659.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  2660.  
  2661.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  2662.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  2663.  
  2664.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2665.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  2666.  
  2667.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  2668.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  2669.  
  2670.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  2671.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2672.  
  2673.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  2674.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2675.  
  2676.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  2677.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  2678.  
  2679.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  2680.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  2681.  
  2682.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  2683.  
  2684.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  2685.     free(gen7_mfd_context);
  2686. }
  2687.  
  2688. static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
  2689.                                     struct gen7_mfd_context *gen7_mfd_context)
  2690. {
  2691.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  2692.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  2693.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  2694.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  2695. }
  2696.  
  2697. struct hw_context *
  2698. gen7_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
  2699. {
  2700.     struct intel_driver_data *intel = intel_driver_data(ctx);
  2701.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  2702.     int i;
  2703.  
  2704.     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
  2705.     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
  2706.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  2707.  
  2708.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  2709.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  2710.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  2711.         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
  2712.     }
  2713.  
  2714.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  2715.     gen7_mfd_context->jpeg_wa_surface_object = NULL;
  2716.  
  2717.     switch (obj_config->profile) {
  2718.     case VAProfileMPEG2Simple:
  2719.     case VAProfileMPEG2Main:
  2720.         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  2721.         break;
  2722.  
  2723.     case VAProfileH264ConstrainedBaseline:
  2724.     case VAProfileH264Main:
  2725.     case VAProfileH264High:
  2726.     case VAProfileH264StereoHigh:
  2727.         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
  2728.         break;
  2729.     default:
  2730.         break;
  2731.     }
  2732.     return (struct hw_context *)gen7_mfd_context;
  2733. }
  2734.