Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao  Yakui  <yakui.zhao@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <assert.h>
  34. #include <math.h>
  35. #include <va/va_dec_jpeg.h>
  36. #include <va/va_dec_vp8.h>
  37.  
  38. #include "intel_batchbuffer.h"
  39. #include "intel_driver.h"
  40.  
  41. #include "i965_defines.h"
  42. #include "i965_drv_video.h"
  43. #include "i965_decoder_utils.h"
  44.  
  45. #include "gen7_mfd.h"
  46. #include "intel_media.h"
  47.  
  48. #define B0_STEP_REV             2
  49. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  50.  
  51. static const uint32_t zigzag_direct[64] = {
  52.     0,   1,  8, 16,  9,  2,  3, 10,
  53.     17, 24, 32, 25, 18, 11,  4,  5,
  54.     12, 19, 26, 33, 40, 48, 41, 34,
  55.     27, 20, 13,  6,  7, 14, 21, 28,
  56.     35, 42, 49, 56, 57, 50, 43, 36,
  57.     29, 22, 15, 23, 30, 37, 44, 51,
  58.     58, 59, 52, 45, 38, 31, 39, 46,
  59.     53, 60, 61, 54, 47, 55, 62, 63
  60. };
  61.  
  62. static void
  63. gen8_mfd_init_avc_surface(VADriverContextP ctx,
  64.                           VAPictureParameterBufferH264 *pic_param,
  65.                           struct object_surface *obj_surface)
  66. {
  67.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  68.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  69.     int width_in_mbs, height_in_mbs;
  70.  
  71.     obj_surface->free_private_data = gen_free_avc_surface;
  72.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  73.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  74.  
  75.     if (!gen7_avc_surface) {
  76.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  77.  
  78.         if (!gen7_avc_surface)
  79.             return;
  80.  
  81.         gen7_avc_surface->base.frame_store_id = -1;
  82.         assert((obj_surface->size & 0x3f) == 0);
  83.         obj_surface->private_data = gen7_avc_surface;
  84.     }
  85.  
  86.     /* DMV buffers now relate to the whole frame, irrespective of
  87.        field coding modes */
  88.     if (gen7_avc_surface->dmv_top == NULL) {
  89.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  90.                                                  "direct mv w/r buffer",
  91.                                                  width_in_mbs * height_in_mbs * 128,
  92.                                                  0x1000);
  93.         assert(gen7_avc_surface->dmv_top);
  94.     }
  95. }
  96.  
  97. static void
  98. gen8_mfd_pipe_mode_select(VADriverContextP ctx,
  99.                           struct decode_state *decode_state,
  100.                           int standard_select,
  101.                           struct gen7_mfd_context *gen7_mfd_context)
  102. {
  103.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  104.  
  105.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  106.            standard_select == MFX_FORMAT_AVC ||
  107.            standard_select == MFX_FORMAT_VC1 ||
  108.            standard_select == MFX_FORMAT_JPEG ||
  109.            standard_select == MFX_FORMAT_VP8);
  110.  
  111.     BEGIN_BCS_BATCH(batch, 5);
  112.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  113.     OUT_BCS_BATCH(batch,
  114.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  115.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  116.                   (0 << 10) | /* disable Stream-Out */
  117.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  118.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  119.                   (0 << 5)  | /* not in stitch mode */
  120.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  121.                   (standard_select << 0));
  122.     OUT_BCS_BATCH(batch,
  123.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  124.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  125.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  126.                   (0 << 1)  |
  127.                   (0 << 0));
  128.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  129.     OUT_BCS_BATCH(batch, 0); /* reserved */
  130.     ADVANCE_BCS_BATCH(batch);
  131. }
  132.  
  133. static void
  134. gen8_mfd_surface_state(VADriverContextP ctx,
  135.                        struct decode_state *decode_state,
  136.                        int standard_select,
  137.                        struct gen7_mfd_context *gen7_mfd_context)
  138. {
  139.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  140.     struct object_surface *obj_surface = decode_state->render_object;
  141.     unsigned int y_cb_offset;
  142.     unsigned int y_cr_offset;
  143.     unsigned int surface_format;
  144.  
  145.     assert(obj_surface);
  146.  
  147.     y_cb_offset = obj_surface->y_cb_offset;
  148.     y_cr_offset = obj_surface->y_cr_offset;
  149.  
  150.     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
  151.         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  152.  
  153.     BEGIN_BCS_BATCH(batch, 6);
  154.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  155.     OUT_BCS_BATCH(batch, 0);
  156.     OUT_BCS_BATCH(batch,
  157.                   ((obj_surface->orig_height - 1) << 18) |
  158.                   ((obj_surface->orig_width - 1) << 4));
  159.     OUT_BCS_BATCH(batch,
  160.                   (surface_format << 28) | /* 420 planar YUV surface */
  161.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  162.                   (0 << 22) | /* surface object control state, ignored */
  163.                   ((obj_surface->width - 1) << 3) | /* pitch */
  164.                   (0 << 2)  | /* must be 0 */
  165.                   (1 << 1)  | /* must be tiled */
  166.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  167.     OUT_BCS_BATCH(batch,
  168.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  169.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  170.     OUT_BCS_BATCH(batch,
  171.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  172.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  173.     ADVANCE_BCS_BATCH(batch);
  174. }
  175.  
  176. static void
  177. gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  178.                              struct decode_state *decode_state,
  179.                              int standard_select,
  180.                              struct gen7_mfd_context *gen7_mfd_context)
  181. {
  182.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  183.     int i;
  184.  
  185.     BEGIN_BCS_BATCH(batch, 61);
  186.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  187.         /* Pre-deblock 1-3 */
  188.     if (gen7_mfd_context->pre_deblocking_output.valid)
  189.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  190.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  191.                       0);
  192.     else
  193.         OUT_BCS_BATCH(batch, 0);
  194.  
  195.         OUT_BCS_BATCH(batch, 0);
  196.         OUT_BCS_BATCH(batch, 0);
  197.         /* Post-debloing 4-6 */
  198.     if (gen7_mfd_context->post_deblocking_output.valid)
  199.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  200.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  201.                       0);
  202.     else
  203.         OUT_BCS_BATCH(batch, 0);
  204.  
  205.         OUT_BCS_BATCH(batch, 0);
  206.         OUT_BCS_BATCH(batch, 0);
  207.  
  208.         /* uncompressed-video & stream out 7-12 */
  209.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  210.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  211.         OUT_BCS_BATCH(batch, 0);
  212.         OUT_BCS_BATCH(batch, 0);
  213.         OUT_BCS_BATCH(batch, 0);
  214.         OUT_BCS_BATCH(batch, 0);
  215.  
  216.         /* intra row-store scratch 13-15 */
  217.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  218.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  219.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  220.                       0);
  221.     else
  222.         OUT_BCS_BATCH(batch, 0);
  223.  
  224.         OUT_BCS_BATCH(batch, 0);
  225.         OUT_BCS_BATCH(batch, 0);
  226.         /* deblocking-filter-row-store 16-18 */
  227.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  228.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  229.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  230.                       0);
  231.     else
  232.         OUT_BCS_BATCH(batch, 0);
  233.         OUT_BCS_BATCH(batch, 0);
  234.         OUT_BCS_BATCH(batch, 0);
  235.  
  236.     /* DW 19..50 */
  237.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  238.         struct object_surface *obj_surface;
  239.  
  240.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  241.             gen7_mfd_context->reference_surface[i].obj_surface &&
  242.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  243.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  244.  
  245.             OUT_BCS_RELOC(batch, obj_surface->bo,
  246.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  247.                           0);
  248.         } else {
  249.             OUT_BCS_BATCH(batch, 0);
  250.         }
  251.        
  252.         OUT_BCS_BATCH(batch, 0);
  253.     }
  254.    
  255.     /* reference property 51 */
  256.     OUT_BCS_BATCH(batch, 0);  
  257.        
  258.     /* Macroblock status & ILDB 52-57 */
  259.     OUT_BCS_BATCH(batch, 0);
  260.     OUT_BCS_BATCH(batch, 0);
  261.     OUT_BCS_BATCH(batch, 0);
  262.     OUT_BCS_BATCH(batch, 0);
  263.     OUT_BCS_BATCH(batch, 0);
  264.     OUT_BCS_BATCH(batch, 0);
  265.  
  266.     /* the second Macroblock status 58-60 */   
  267.     OUT_BCS_BATCH(batch, 0);
  268.     OUT_BCS_BATCH(batch, 0);
  269.     OUT_BCS_BATCH(batch, 0);
  270.  
  271.     ADVANCE_BCS_BATCH(batch);
  272. }
  273.  
  274. static void
  275. gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  276.                                  dri_bo *slice_data_bo,
  277.                                  int standard_select,
  278.                                  struct gen7_mfd_context *gen7_mfd_context)
  279. {
  280.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  281.  
  282.     BEGIN_BCS_BATCH(batch, 26);
  283.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  284.         /* MFX In BS 1-5 */
  285.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  286.     OUT_BCS_BATCH(batch, 0);
  287.     OUT_BCS_BATCH(batch, 0);
  288.         /* Upper bound 4-5 */  
  289.     OUT_BCS_BATCH(batch, 0);
  290.     OUT_BCS_BATCH(batch, 0);
  291.  
  292.         /* MFX indirect MV 6-10 */
  293.     OUT_BCS_BATCH(batch, 0);
  294.     OUT_BCS_BATCH(batch, 0);
  295.     OUT_BCS_BATCH(batch, 0);
  296.     OUT_BCS_BATCH(batch, 0);
  297.     OUT_BCS_BATCH(batch, 0);
  298.        
  299.         /* MFX IT_COFF 11-15 */
  300.     OUT_BCS_BATCH(batch, 0);
  301.     OUT_BCS_BATCH(batch, 0);
  302.     OUT_BCS_BATCH(batch, 0);
  303.     OUT_BCS_BATCH(batch, 0);
  304.     OUT_BCS_BATCH(batch, 0);
  305.  
  306.         /* MFX IT_DBLK 16-20 */
  307.     OUT_BCS_BATCH(batch, 0);
  308.     OUT_BCS_BATCH(batch, 0);
  309.     OUT_BCS_BATCH(batch, 0);
  310.     OUT_BCS_BATCH(batch, 0);
  311.     OUT_BCS_BATCH(batch, 0);
  312.  
  313.         /* MFX PAK_BSE object for encoder 21-25 */
  314.     OUT_BCS_BATCH(batch, 0);
  315.     OUT_BCS_BATCH(batch, 0);
  316.     OUT_BCS_BATCH(batch, 0);
  317.     OUT_BCS_BATCH(batch, 0);
  318.     OUT_BCS_BATCH(batch, 0);
  319.  
  320.     ADVANCE_BCS_BATCH(batch);
  321. }
  322.  
  323. static void
  324. gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  325.                                  struct decode_state *decode_state,
  326.                                  int standard_select,
  327.                                  struct gen7_mfd_context *gen7_mfd_context)
  328. {
  329.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  330.  
  331.     BEGIN_BCS_BATCH(batch, 10);
  332.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  333.  
  334.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  335.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  336.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  337.                       0);
  338.         else
  339.                 OUT_BCS_BATCH(batch, 0);
  340.                
  341.     OUT_BCS_BATCH(batch, 0);
  342.     OUT_BCS_BATCH(batch, 0);
  343.         /* MPR Row Store Scratch buffer 4-6 */
  344.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  345.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  346.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  347.                       0);
  348.     else
  349.         OUT_BCS_BATCH(batch, 0);
  350.  
  351.     OUT_BCS_BATCH(batch, 0);
  352.     OUT_BCS_BATCH(batch, 0);
  353.  
  354.         /* Bitplane 7-9 */
  355.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  356.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  357.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  358.                       0);
  359.     else
  360.         OUT_BCS_BATCH(batch, 0);
  361.     OUT_BCS_BATCH(batch, 0);
  362.     OUT_BCS_BATCH(batch, 0);
  363.     ADVANCE_BCS_BATCH(batch);
  364. }
  365.  
  366. static void
  367. gen8_mfd_qm_state(VADriverContextP ctx,
  368.                   int qm_type,
  369.                   unsigned char *qm,
  370.                   int qm_length,
  371.                   struct gen7_mfd_context *gen7_mfd_context)
  372. {
  373.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  374.     unsigned int qm_buffer[16];
  375.  
  376.     assert(qm_length <= 16 * 4);
  377.     memcpy(qm_buffer, qm, qm_length);
  378.  
  379.     BEGIN_BCS_BATCH(batch, 18);
  380.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  381.     OUT_BCS_BATCH(batch, qm_type << 0);
  382.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  383.     ADVANCE_BCS_BATCH(batch);
  384. }
  385.  
  386. static void
  387. gen8_mfd_avc_img_state(VADriverContextP ctx,
  388.                        struct decode_state *decode_state,
  389.                        struct gen7_mfd_context *gen7_mfd_context)
  390. {
  391.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  392.     int img_struct;
  393.     int mbaff_frame_flag;
  394.     unsigned int width_in_mbs, height_in_mbs;
  395.     VAPictureParameterBufferH264 *pic_param;
  396.  
  397.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  398.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  399.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  400.  
  401.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  402.         img_struct = 1;
  403.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  404.         img_struct = 3;
  405.     else
  406.         img_struct = 0;
  407.  
  408.     if ((img_struct & 0x1) == 0x1) {
  409.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  410.     } else {
  411.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  412.     }
  413.  
  414.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  415.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  416.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  417.     } else {
  418.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  419.     }
  420.  
  421.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  422.                         !pic_param->pic_fields.bits.field_pic_flag);
  423.  
  424.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  425.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  426.  
  427.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  428.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  429.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  430.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  431.  
  432.     BEGIN_BCS_BATCH(batch, 17);
  433.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
  434.     OUT_BCS_BATCH(batch,
  435.                   (width_in_mbs * height_in_mbs - 1));
  436.     OUT_BCS_BATCH(batch,
  437.                   ((height_in_mbs - 1) << 16) |
  438.                   ((width_in_mbs - 1) << 0));
  439.     OUT_BCS_BATCH(batch,
  440.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  441.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  442.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  443.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  444.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  445.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  446.                   (img_struct << 8));
  447.     OUT_BCS_BATCH(batch,
  448.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  449.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  450.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  451.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  452.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  453.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  454.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  455.                   (mbaff_frame_flag << 1) |
  456.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  457.     OUT_BCS_BATCH(batch, 0);
  458.     OUT_BCS_BATCH(batch, 0);
  459.     OUT_BCS_BATCH(batch, 0);
  460.     OUT_BCS_BATCH(batch, 0);
  461.     OUT_BCS_BATCH(batch, 0);
  462.     OUT_BCS_BATCH(batch, 0);
  463.     OUT_BCS_BATCH(batch, 0);
  464.     OUT_BCS_BATCH(batch, 0);
  465.     OUT_BCS_BATCH(batch, 0);
  466.     OUT_BCS_BATCH(batch, 0);
  467.     OUT_BCS_BATCH(batch, 0);
  468.     OUT_BCS_BATCH(batch, 0);
  469.     ADVANCE_BCS_BATCH(batch);
  470. }
  471.  
  472. static void
  473. gen8_mfd_avc_qm_state(VADriverContextP ctx,
  474.                       struct decode_state *decode_state,
  475.                       struct gen7_mfd_context *gen7_mfd_context)
  476. {
  477.     VAIQMatrixBufferH264 *iq_matrix;
  478.     VAPictureParameterBufferH264 *pic_param;
  479.  
  480.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  481.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  482.     else
  483.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  484.  
  485.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  486.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  487.  
  488.     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  489.     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  490.  
  491.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  492.         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  493.         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  494.     }
  495. }
  496.  
  497. static inline void
  498. gen8_mfd_avc_picid_state(VADriverContextP ctx,
  499.     struct decode_state *decode_state,
  500.     struct gen7_mfd_context *gen7_mfd_context)
  501. {
  502.     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
  503.         gen7_mfd_context->reference_surface);
  504. }
  505.  
  506. static void
  507. gen8_mfd_avc_directmode_state(VADriverContextP ctx,
  508.                               struct decode_state *decode_state,
  509.                               VAPictureParameterBufferH264 *pic_param,
  510.                               VASliceParameterBufferH264 *slice_param,
  511.                               struct gen7_mfd_context *gen7_mfd_context)
  512. {
  513.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  514.     struct object_surface *obj_surface;
  515.     GenAvcSurface *gen7_avc_surface;
  516.     VAPictureH264 *va_pic;
  517.     int i;
  518.  
  519.     BEGIN_BCS_BATCH(batch, 71);
  520.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  521.  
  522.     /* reference surfaces 0..15 */
  523.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  524.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  525.             gen7_mfd_context->reference_surface[i].obj_surface &&
  526.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  527.  
  528.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  529.             gen7_avc_surface = obj_surface->private_data;
  530.  
  531.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  532.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  533.                           0);
  534.             OUT_BCS_BATCH(batch, 0);
  535.         } else {
  536.             OUT_BCS_BATCH(batch, 0);
  537.             OUT_BCS_BATCH(batch, 0);
  538.         }
  539.     }
  540.    
  541.     OUT_BCS_BATCH(batch, 0);
  542.  
  543.     /* the current decoding frame/field */
  544.     va_pic = &pic_param->CurrPic;
  545.     obj_surface = decode_state->render_object;
  546.     assert(obj_surface->bo && obj_surface->private_data);
  547.     gen7_avc_surface = obj_surface->private_data;
  548.  
  549.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  550.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  551.                   0);
  552.  
  553.     OUT_BCS_BATCH(batch, 0);
  554.     OUT_BCS_BATCH(batch, 0);
  555.  
  556.     /* POC List */
  557.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  558.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  559.  
  560.         if (obj_surface) {
  561.             const VAPictureH264 * const va_pic = avc_find_picture(
  562.                 obj_surface->base.id, pic_param->ReferenceFrames,
  563.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  564.  
  565.             assert(va_pic != NULL);
  566.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  567.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  568.         } else {
  569.             OUT_BCS_BATCH(batch, 0);
  570.             OUT_BCS_BATCH(batch, 0);
  571.         }
  572.     }
  573.  
  574.     va_pic = &pic_param->CurrPic;
  575.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  576.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  577.  
  578.     ADVANCE_BCS_BATCH(batch);
  579. }
  580.  
  581. static void
  582. gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
  583.                                  VAPictureParameterBufferH264 *pic_param,
  584.                                  VASliceParameterBufferH264 *next_slice_param,
  585.                                  struct gen7_mfd_context *gen7_mfd_context)
  586. {
  587.     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
  588. }
  589.  
  590. static void
  591. gen8_mfd_avc_slice_state(VADriverContextP ctx,
  592.                          VAPictureParameterBufferH264 *pic_param,
  593.                          VASliceParameterBufferH264 *slice_param,
  594.                          VASliceParameterBufferH264 *next_slice_param,
  595.                          struct gen7_mfd_context *gen7_mfd_context)
  596. {
  597.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  598.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  599.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  600.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  601.     int num_ref_idx_l0, num_ref_idx_l1;
  602.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  603.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  604.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  605.     int slice_type;
  606.  
  607.     if (slice_param->slice_type == SLICE_TYPE_I ||
  608.         slice_param->slice_type == SLICE_TYPE_SI) {
  609.         slice_type = SLICE_TYPE_I;
  610.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  611.                slice_param->slice_type == SLICE_TYPE_SP) {
  612.         slice_type = SLICE_TYPE_P;
  613.     } else {
  614.         assert(slice_param->slice_type == SLICE_TYPE_B);
  615.         slice_type = SLICE_TYPE_B;
  616.     }
  617.  
  618.     if (slice_type == SLICE_TYPE_I) {
  619.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  620.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  621.         num_ref_idx_l0 = 0;
  622.         num_ref_idx_l1 = 0;
  623.     } else if (slice_type == SLICE_TYPE_P) {
  624.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  625.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  626.         num_ref_idx_l1 = 0;
  627.     } else {
  628.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  629.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  630.     }
  631.  
  632.     first_mb_in_slice = slice_param->first_mb_in_slice;
  633.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  634.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  635.  
  636.     if (mbaff_picture)
  637.         slice_ver_pos = slice_ver_pos << 1;
  638.     if (next_slice_param) {
  639.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
  640.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  641.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  642.  
  643.         if (mbaff_picture)
  644.             next_slice_ver_pos = next_slice_ver_pos << 1;
  645.     } else {
  646.         next_slice_hor_pos = 0;
  647.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  648.     }
  649.  
  650.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  651.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  652.     OUT_BCS_BATCH(batch, slice_type);
  653.     OUT_BCS_BATCH(batch,
  654.                   (num_ref_idx_l1 << 24) |
  655.                   (num_ref_idx_l0 << 16) |
  656.                   (slice_param->chroma_log2_weight_denom << 8) |
  657.                   (slice_param->luma_log2_weight_denom << 0));
  658.     OUT_BCS_BATCH(batch,
  659.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  660.                   (slice_param->disable_deblocking_filter_idc << 27) |
  661.                   (slice_param->cabac_init_idc << 24) |
  662.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  663.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  664.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  665.     OUT_BCS_BATCH(batch,
  666.                   (slice_ver_pos << 24) |
  667.                   (slice_hor_pos << 16) |
  668.                   (first_mb_in_slice << 0));
  669.     OUT_BCS_BATCH(batch,
  670.                   (next_slice_ver_pos << 16) |
  671.                   (next_slice_hor_pos << 0));
  672.     OUT_BCS_BATCH(batch,
  673.                   (next_slice_param == NULL) << 19); /* last slice flag */
  674.     OUT_BCS_BATCH(batch, 0);
  675.     OUT_BCS_BATCH(batch, 0);
  676.     OUT_BCS_BATCH(batch, 0);
  677.     OUT_BCS_BATCH(batch, 0);
  678.     ADVANCE_BCS_BATCH(batch);
  679. }
  680.  
  681. static inline void
  682. gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
  683.                            VAPictureParameterBufferH264 *pic_param,
  684.                            VASliceParameterBufferH264 *slice_param,
  685.                            struct gen7_mfd_context *gen7_mfd_context)
  686. {
  687.     gen6_send_avc_ref_idx_state(
  688.         gen7_mfd_context->base.batch,
  689.         slice_param,
  690.         gen7_mfd_context->reference_surface
  691.     );
  692. }
  693.  
  694. static void
  695. gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
  696.                                 VAPictureParameterBufferH264 *pic_param,
  697.                                 VASliceParameterBufferH264 *slice_param,
  698.                                 struct gen7_mfd_context *gen7_mfd_context)
  699. {
  700.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  701.     int i, j, num_weight_offset_table = 0;
  702.     short weightoffsets[32 * 6];
  703.  
  704.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  705.          slice_param->slice_type == SLICE_TYPE_SP) &&
  706.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  707.         num_weight_offset_table = 1;
  708.     }
  709.    
  710.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  711.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  712.         num_weight_offset_table = 2;
  713.     }
  714.  
  715.     for (i = 0; i < num_weight_offset_table; i++) {
  716.         BEGIN_BCS_BATCH(batch, 98);
  717.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  718.         OUT_BCS_BATCH(batch, i);
  719.  
  720.         if (i == 0) {
  721.             for (j = 0; j < 32; j++) {
  722.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  723.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  724.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  725.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  726.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  727.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  728.             }
  729.         } else {
  730.             for (j = 0; j < 32; j++) {
  731.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  732.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  733.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  734.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  735.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  736.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  737.             }
  738.         }
  739.  
  740.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  741.         ADVANCE_BCS_BATCH(batch);
  742.     }
  743. }
  744.  
  745. static void
  746. gen8_mfd_avc_bsd_object(VADriverContextP ctx,
  747.                         VAPictureParameterBufferH264 *pic_param,
  748.                         VASliceParameterBufferH264 *slice_param,
  749.                         dri_bo *slice_data_bo,
  750.                         VASliceParameterBufferH264 *next_slice_param,
  751.                         struct gen7_mfd_context *gen7_mfd_context)
  752. {
  753.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  754.     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
  755.                                                             slice_param,
  756.                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
  757.  
  758.     /* the input bitsteam format on GEN7 differs from GEN6 */
  759.     BEGIN_BCS_BATCH(batch, 6);
  760.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  761.     OUT_BCS_BATCH(batch,
  762.                   (slice_param->slice_data_size));
  763.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  764.     OUT_BCS_BATCH(batch,
  765.                   (0 << 31) |
  766.                   (0 << 14) |
  767.                   (0 << 12) |
  768.                   (0 << 10) |
  769.                   (0 << 8));
  770.     OUT_BCS_BATCH(batch,
  771.                   ((slice_data_bit_offset >> 3) << 16) |
  772.                   (1 << 7)  |
  773.                   (0 << 5)  |
  774.                   (0 << 4)  |
  775.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  776.                   (slice_data_bit_offset & 0x7));
  777.     OUT_BCS_BATCH(batch, 0);
  778.     ADVANCE_BCS_BATCH(batch);
  779. }
  780.  
  781. static inline void
  782. gen8_mfd_avc_context_init(
  783.     VADriverContextP         ctx,
  784.     struct gen7_mfd_context *gen7_mfd_context
  785. )
  786. {
  787.     /* Initialize flat scaling lists */
  788.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  789. }
  790.  
  791. static void
  792. gen8_mfd_avc_decode_init(VADriverContextP ctx,
  793.                          struct decode_state *decode_state,
  794.                          struct gen7_mfd_context *gen7_mfd_context)
  795. {
  796.     VAPictureParameterBufferH264 *pic_param;
  797.     VASliceParameterBufferH264 *slice_param;
  798.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  799.     struct object_surface *obj_surface;
  800.     dri_bo *bo;
  801.     int i, j, enable_avc_ildb = 0;
  802.     unsigned int width_in_mbs, height_in_mbs;
  803.  
  804.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  805.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  806.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  807.  
  808.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  809.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  810.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  811.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  812.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  813.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  814.                    (slice_param->slice_type == SLICE_TYPE_B));
  815.  
  816.             if (slice_param->disable_deblocking_filter_idc != 1) {
  817.                 enable_avc_ildb = 1;
  818.                 break;
  819.             }
  820.  
  821.             slice_param++;
  822.         }
  823.     }
  824.  
  825.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  826.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  827.     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
  828.         gen7_mfd_context->reference_surface);
  829.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  830.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  831.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  832.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  833.  
  834.     /* Current decoded picture */
  835.     obj_surface = decode_state->render_object;
  836.     if (pic_param->pic_fields.bits.reference_pic_flag)
  837.         obj_surface->flags |= SURFACE_REFERENCED;
  838.     else
  839.         obj_surface->flags &= ~SURFACE_REFERENCED;
  840.  
  841.     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  842.     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  843.  
  844.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  845.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  846.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  847.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  848.  
  849.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  850.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  851.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  852.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  853.  
  854.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  855.     bo = dri_bo_alloc(i965->intel.bufmgr,
  856.                       "intra row store",
  857.                       width_in_mbs * 64,
  858.                       0x1000);
  859.     assert(bo);
  860.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  861.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  862.  
  863.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  864.     bo = dri_bo_alloc(i965->intel.bufmgr,
  865.                       "deblocking filter row store",
  866.                       width_in_mbs * 64 * 4,
  867.                       0x1000);
  868.     assert(bo);
  869.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  870.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  871.  
  872.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  873.     bo = dri_bo_alloc(i965->intel.bufmgr,
  874.                       "bsd mpc row store",
  875.                       width_in_mbs * 64 * 2,
  876.                       0x1000);
  877.     assert(bo);
  878.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  879.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  880.  
  881.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  882.     bo = dri_bo_alloc(i965->intel.bufmgr,
  883.                       "mpr row store",
  884.                       width_in_mbs * 64 * 2,
  885.                       0x1000);
  886.     assert(bo);
  887.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  888.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  889.  
  890.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  891. }
  892.  
  893. static void
  894. gen8_mfd_avc_decode_picture(VADriverContextP ctx,
  895.                             struct decode_state *decode_state,
  896.                             struct gen7_mfd_context *gen7_mfd_context)
  897. {
  898.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  899.     VAPictureParameterBufferH264 *pic_param;
  900.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  901.     dri_bo *slice_data_bo;
  902.     int i, j;
  903.  
  904.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  905.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  906.     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  907.  
  908.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  909.     intel_batchbuffer_emit_mi_flush(batch);
  910.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  911.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  912.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  913.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  914.     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  915.     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
  916.     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  917.  
  918.     for (j = 0; j < decode_state->num_slice_params; j++) {
  919.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  920.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  921.         slice_data_bo = decode_state->slice_datas[j]->bo;
  922.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  923.  
  924.         if (j == decode_state->num_slice_params - 1)
  925.             next_slice_group_param = NULL;
  926.         else
  927.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  928.  
  929.         if (j == 0 && slice_param->first_mb_in_slice)
  930.             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
  931.  
  932.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  933.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  934.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  935.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  936.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  937.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  938.                    (slice_param->slice_type == SLICE_TYPE_B));
  939.  
  940.             if (i < decode_state->slice_params[j]->num_elements - 1)
  941.                 next_slice_param = slice_param + 1;
  942.             else
  943.                 next_slice_param = next_slice_group_param;
  944.  
  945.             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
  946.             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  947.             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  948.             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  949.             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  950.             slice_param++;
  951.         }
  952.     }
  953.  
  954.     intel_batchbuffer_end_atomic(batch);
  955.     intel_batchbuffer_flush(batch);
  956. }
  957.  
  958. static void
  959. gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
  960.                            struct decode_state *decode_state,
  961.                            struct gen7_mfd_context *gen7_mfd_context)
  962. {
  963.     VAPictureParameterBufferMPEG2 *pic_param;
  964.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  965.     struct object_surface *obj_surface;
  966.     dri_bo *bo;
  967.     unsigned int width_in_mbs;
  968.  
  969.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  970.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  971.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  972.  
  973.     mpeg2_set_reference_surfaces(
  974.         ctx,
  975.         gen7_mfd_context->reference_surface,
  976.         decode_state,
  977.         pic_param
  978.     );
  979.  
  980.     /* Current decoded picture */
  981.     obj_surface = decode_state->render_object;
  982.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  983.  
  984.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  985.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  986.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  987.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  988.  
  989.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  990.     bo = dri_bo_alloc(i965->intel.bufmgr,
  991.                       "bsd mpc row store",
  992.                       width_in_mbs * 96,
  993.                       0x1000);
  994.     assert(bo);
  995.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  996.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  997.  
  998.     gen7_mfd_context->post_deblocking_output.valid = 0;
  999.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1000.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1001.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1002.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1003. }
  1004.  
  1005. static void
  1006. gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
  1007.                          struct decode_state *decode_state,
  1008.                          struct gen7_mfd_context *gen7_mfd_context)
  1009. {
  1010.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1011.     VAPictureParameterBufferMPEG2 *pic_param;
  1012.     unsigned int slice_concealment_disable_bit = 0;
  1013.  
  1014.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1015.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1016.  
  1017.     slice_concealment_disable_bit = 1;
  1018.  
  1019.     BEGIN_BCS_BATCH(batch, 13);
  1020.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1021.     OUT_BCS_BATCH(batch,
  1022.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1023.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1024.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1025.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1026.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1027.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1028.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1029.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1030.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1031.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1032.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1033.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1034.     OUT_BCS_BATCH(batch,
  1035.                   pic_param->picture_coding_type << 9);
  1036.     OUT_BCS_BATCH(batch,
  1037.                   (slice_concealment_disable_bit << 31) |
  1038.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  1039.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  1040.     OUT_BCS_BATCH(batch, 0);
  1041.     OUT_BCS_BATCH(batch, 0);
  1042.     OUT_BCS_BATCH(batch, 0);
  1043.     OUT_BCS_BATCH(batch, 0);
  1044.     OUT_BCS_BATCH(batch, 0);
  1045.     OUT_BCS_BATCH(batch, 0);
  1046.     OUT_BCS_BATCH(batch, 0);
  1047.     OUT_BCS_BATCH(batch, 0);
  1048.     OUT_BCS_BATCH(batch, 0);
  1049.     ADVANCE_BCS_BATCH(batch);
  1050. }
  1051.  
  1052. static void
  1053. gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1054.                         struct decode_state *decode_state,
  1055.                         struct gen7_mfd_context *gen7_mfd_context)
  1056. {
  1057.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  1058.     int i, j;
  1059.  
  1060.     /* Update internal QM state */
  1061.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1062.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1063.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1064.  
  1065.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  1066.             iq_matrix->load_intra_quantiser_matrix) {
  1067.             gen_iq_matrix->load_intra_quantiser_matrix =
  1068.                 iq_matrix->load_intra_quantiser_matrix;
  1069.             if (iq_matrix->load_intra_quantiser_matrix) {
  1070.                 for (j = 0; j < 64; j++)
  1071.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1072.                         iq_matrix->intra_quantiser_matrix[j];
  1073.             }
  1074.         }
  1075.  
  1076.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1077.             iq_matrix->load_non_intra_quantiser_matrix) {
  1078.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1079.                 iq_matrix->load_non_intra_quantiser_matrix;
  1080.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1081.                 for (j = 0; j < 64; j++)
  1082.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1083.                         iq_matrix->non_intra_quantiser_matrix[j];
  1084.             }
  1085.         }
  1086.     }
  1087.  
  1088.     /* Commit QM state to HW */
  1089.     for (i = 0; i < 2; i++) {
  1090.         unsigned char *qm = NULL;
  1091.         int qm_type;
  1092.  
  1093.         if (i == 0) {
  1094.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1095.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1096.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1097.             }
  1098.         } else {
  1099.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1100.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1101.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1102.             }
  1103.         }
  1104.  
  1105.         if (!qm)
  1106.             continue;
  1107.  
  1108.         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1109.     }
  1110. }
  1111.  
  1112. static void
  1113. gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1114.                           VAPictureParameterBufferMPEG2 *pic_param,
  1115.                           VASliceParameterBufferMPEG2 *slice_param,
  1116.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1117.                           struct gen7_mfd_context *gen7_mfd_context)
  1118. {
  1119.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1120.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1121.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1122.  
  1123.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1124.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1125.         is_field_pic = 1;
  1126.     is_field_pic_wa = is_field_pic &&
  1127.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1128.  
  1129.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1130.     hpos0 = slice_param->slice_horizontal_position;
  1131.  
  1132.     if (next_slice_param == NULL) {
  1133.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1134.         hpos1 = 0;
  1135.     } else {
  1136.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1137.         hpos1 = next_slice_param->slice_horizontal_position;
  1138.     }
  1139.  
  1140.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1141.  
  1142.     BEGIN_BCS_BATCH(batch, 5);
  1143.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1144.     OUT_BCS_BATCH(batch,
  1145.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1146.     OUT_BCS_BATCH(batch,
  1147.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1148.     OUT_BCS_BATCH(batch,
  1149.                   hpos0 << 24 |
  1150.                   vpos0 << 16 |
  1151.                   mb_count << 8 |
  1152.                   (next_slice_param == NULL) << 5 |
  1153.                   (next_slice_param == NULL) << 3 |
  1154.                   (slice_param->macroblock_offset & 0x7));
  1155.     OUT_BCS_BATCH(batch,
  1156.                   (slice_param->quantiser_scale_code << 24) |
  1157.                   (vpos1 << 8 | hpos1));
  1158.     ADVANCE_BCS_BATCH(batch);
  1159. }
  1160.  
  1161. static void
  1162. gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1163.                               struct decode_state *decode_state,
  1164.                               struct gen7_mfd_context *gen7_mfd_context)
  1165. {
  1166.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1167.     VAPictureParameterBufferMPEG2 *pic_param;
  1168.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1169.     dri_bo *slice_data_bo;
  1170.     int i, j;
  1171.  
  1172.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1173.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1174.  
  1175.     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1176.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1177.     intel_batchbuffer_emit_mi_flush(batch);
  1178.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1179.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1180.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1181.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1182.     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1183.     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1184.  
  1185.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1186.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1187.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1188.  
  1189.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1190.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1191.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1192.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1193.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1194.  
  1195.         if (j == decode_state->num_slice_params - 1)
  1196.             next_slice_group_param = NULL;
  1197.         else
  1198.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1199.  
  1200.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1201.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1202.  
  1203.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1204.                 next_slice_param = slice_param + 1;
  1205.             else
  1206.                 next_slice_param = next_slice_group_param;
  1207.  
  1208.             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1209.             slice_param++;
  1210.         }
  1211.     }
  1212.  
  1213.     intel_batchbuffer_end_atomic(batch);
  1214.     intel_batchbuffer_flush(batch);
  1215. }
  1216.  
  1217. static const int va_to_gen7_vc1_pic_type[5] = {
  1218.     GEN7_VC1_I_PICTURE,
  1219.     GEN7_VC1_P_PICTURE,
  1220.     GEN7_VC1_B_PICTURE,
  1221.     GEN7_VC1_BI_PICTURE,
  1222.     GEN7_VC1_P_PICTURE,
  1223. };
  1224.  
  1225. static const int va_to_gen7_vc1_mv[4] = {
  1226.     1, /* 1-MV */
  1227.     2, /* 1-MV half-pel */
  1228.     3, /* 1-MV half-pef bilinear */
  1229.     0, /* Mixed MV */
  1230. };
  1231.  
  1232. static const int b_picture_scale_factor[21] = {
  1233.     128, 85,  170, 64,  192,
  1234.     51,  102, 153, 204, 43,
  1235.     215, 37,  74,  111, 148,
  1236.     185, 222, 32,  96,  160,
  1237.     224,
  1238. };
  1239.  
  1240. static const int va_to_gen7_vc1_condover[3] = {
  1241.     0,
  1242.     2,
  1243.     3
  1244. };
  1245.  
  1246. static const int va_to_gen7_vc1_profile[4] = {
  1247.     GEN7_VC1_SIMPLE_PROFILE,
  1248.     GEN7_VC1_MAIN_PROFILE,
  1249.     GEN7_VC1_RESERVED_PROFILE,
  1250.     GEN7_VC1_ADVANCED_PROFILE
  1251. };
  1252.  
  1253. static void
  1254. gen8_mfd_free_vc1_surface(void **data)
  1255. {
  1256.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1257.  
  1258.     if (!gen7_vc1_surface)
  1259.         return;
  1260.  
  1261.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1262.     free(gen7_vc1_surface);
  1263.     *data = NULL;
  1264. }
  1265.  
  1266. static void
  1267. gen8_mfd_init_vc1_surface(VADriverContextP ctx,
  1268.                           VAPictureParameterBufferVC1 *pic_param,
  1269.                           struct object_surface *obj_surface)
  1270. {
  1271.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1272.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1273.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1274.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1275.  
  1276.     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
  1277.  
  1278.     if (!gen7_vc1_surface) {
  1279.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1280.  
  1281.         if (!gen7_vc1_surface)
  1282.             return;
  1283.  
  1284.         assert((obj_surface->size & 0x3f) == 0);
  1285.         obj_surface->private_data = gen7_vc1_surface;
  1286.     }
  1287.  
  1288.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1289.  
  1290.     if (gen7_vc1_surface->dmv == NULL) {
  1291.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1292.                                              "direct mv w/r buffer",
  1293.                                              width_in_mbs * height_in_mbs * 64,
  1294.                                              0x1000);
  1295.     }
  1296. }
  1297.  
  1298. static void
  1299. gen8_mfd_vc1_decode_init(VADriverContextP ctx,
  1300.                          struct decode_state *decode_state,
  1301.                          struct gen7_mfd_context *gen7_mfd_context)
  1302. {
  1303.     VAPictureParameterBufferVC1 *pic_param;
  1304.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1305.     struct object_surface *obj_surface;
  1306.     dri_bo *bo;
  1307.     int width_in_mbs;
  1308.     int picture_type;
  1309.  
  1310.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1311.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1312.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1313.     picture_type = pic_param->picture_fields.bits.picture_type;
  1314.  
  1315.     intel_update_vc1_frame_store_index(ctx,
  1316.                                        decode_state,
  1317.                                        pic_param,
  1318.                                        gen7_mfd_context->reference_surface);
  1319.  
  1320.     /* Current decoded picture */
  1321.     obj_surface = decode_state->render_object;
  1322.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1323.     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1324.  
  1325.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1326.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1327.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1328.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1329.  
  1330.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1331.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1332.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1333.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1334.  
  1335.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1336.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1337.                       "intra row store",
  1338.                       width_in_mbs * 64,
  1339.                       0x1000);
  1340.     assert(bo);
  1341.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1342.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1343.  
  1344.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1345.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1346.                       "deblocking filter row store",
  1347.                       width_in_mbs * 7 * 64,
  1348.                       0x1000);
  1349.     assert(bo);
  1350.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1351.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1352.  
  1353.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1354.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1355.                       "bsd mpc row store",
  1356.                       width_in_mbs * 96,
  1357.                       0x1000);
  1358.     assert(bo);
  1359.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1360.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1361.  
  1362.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1363.  
  1364.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1365.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1366.    
  1367.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1368.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1369.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1370.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1371.         int src_w, src_h;
  1372.         uint8_t *src = NULL, *dst = NULL;
  1373.  
  1374.         assert(decode_state->bit_plane->buffer);
  1375.         src = decode_state->bit_plane->buffer;
  1376.  
  1377.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1378.                           "VC-1 Bitplane",
  1379.                           bitplane_width * height_in_mbs,
  1380.                           0x1000);
  1381.         assert(bo);
  1382.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1383.  
  1384.         dri_bo_map(bo, True);
  1385.         assert(bo->virtual);
  1386.         dst = bo->virtual;
  1387.  
  1388.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1389.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1390.                 int src_index, dst_index;
  1391.                 int src_shift;
  1392.                 uint8_t src_value;
  1393.  
  1394.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1395.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1396.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1397.  
  1398.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1399.                     src_value |= 0x2;
  1400.                 }
  1401.  
  1402.                 dst_index = src_w / 2;
  1403.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1404.             }
  1405.  
  1406.             if (src_w & 1)
  1407.                 dst[src_w / 2] >>= 4;
  1408.  
  1409.             dst += bitplane_width;
  1410.         }
  1411.  
  1412.         dri_bo_unmap(bo);
  1413.     } else
  1414.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1415. }
  1416.  
  1417. static void
  1418. gen8_mfd_vc1_pic_state(VADriverContextP ctx,
  1419.                        struct decode_state *decode_state,
  1420.                        struct gen7_mfd_context *gen7_mfd_context)
  1421. {
  1422.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1423.     VAPictureParameterBufferVC1 *pic_param;
  1424.     struct object_surface *obj_surface;
  1425.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1426.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1427.     int unified_mv_mode;
  1428.     int ref_field_pic_polarity = 0;
  1429.     int scale_factor = 0;
  1430.     int trans_ac_y = 0;
  1431.     int dmv_surface_valid = 0;
  1432.     int brfd = 0;
  1433.     int fcm = 0;
  1434.     int picture_type;
  1435.     int profile;
  1436.     int overlap;
  1437.     int interpolation_mode = 0;
  1438.  
  1439.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1440.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1441.  
  1442.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1443.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1444.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1445.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1446.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1447.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1448.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1449.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1450.  
  1451.     if (dquant == 0) {
  1452.         alt_pquant_config = 0;
  1453.         alt_pquant_edge_mask = 0;
  1454.     } else if (dquant == 2) {
  1455.         alt_pquant_config = 1;
  1456.         alt_pquant_edge_mask = 0xf;
  1457.     } else {
  1458.         assert(dquant == 1);
  1459.         if (dquantfrm == 0) {
  1460.             alt_pquant_config = 0;
  1461.             alt_pquant_edge_mask = 0;
  1462.             alt_pq = 0;
  1463.         } else {
  1464.             assert(dquantfrm == 1);
  1465.             alt_pquant_config = 1;
  1466.  
  1467.             switch (dqprofile) {
  1468.             case 3:
  1469.                 if (dqbilevel == 0) {
  1470.                     alt_pquant_config = 2;
  1471.                     alt_pquant_edge_mask = 0;
  1472.                 } else {
  1473.                     assert(dqbilevel == 1);
  1474.                     alt_pquant_config = 3;
  1475.                     alt_pquant_edge_mask = 0;
  1476.                 }
  1477.                 break;
  1478.                
  1479.             case 0:
  1480.                 alt_pquant_edge_mask = 0xf;
  1481.                 break;
  1482.  
  1483.             case 1:
  1484.                 if (dqdbedge == 3)
  1485.                     alt_pquant_edge_mask = 0x9;
  1486.                 else
  1487.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1488.  
  1489.                 break;
  1490.  
  1491.             case 2:
  1492.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1493.                 break;
  1494.  
  1495.             default:
  1496.                 assert(0);
  1497.             }
  1498.         }
  1499.     }
  1500.  
  1501.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1502.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1503.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1504.     } else {
  1505.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1506.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1507.     }
  1508.  
  1509.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1510.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1511.         /* FIXME: calculate reference field picture polarity */
  1512.         assert(0);
  1513.         ref_field_pic_polarity = 0;
  1514.     }
  1515.  
  1516.     if (pic_param->b_picture_fraction < 21)
  1517.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1518.  
  1519.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1520.    
  1521.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1522.         picture_type == GEN7_VC1_I_PICTURE)
  1523.         picture_type = GEN7_VC1_BI_PICTURE;
  1524.  
  1525.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1526.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1527.     else {
  1528.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1529.  
  1530.         /*
  1531.          * 8.3.6.2.1 Transform Type Selection
  1532.          * If variable-sized transform coding is not enabled,
  1533.          * then the 8x8 transform shall be used for all blocks.
  1534.          * it is also MFX_VC1_PIC_STATE requirement.
  1535.          */
  1536.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1537.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1538.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1539.         }
  1540.     }
  1541.  
  1542.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1543.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1544.  
  1545.         obj_surface = decode_state->reference_objects[1];
  1546.  
  1547.         if (obj_surface)
  1548.             gen7_vc1_surface = obj_surface->private_data;
  1549.  
  1550.         if (!gen7_vc1_surface ||
  1551.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1552.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1553.             dmv_surface_valid = 0;
  1554.         else
  1555.             dmv_surface_valid = 1;
  1556.     }
  1557.  
  1558.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1559.  
  1560.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1561.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1562.     else {
  1563.         if (pic_param->picture_fields.bits.top_field_first)
  1564.             fcm = 2;
  1565.         else
  1566.             fcm = 3;
  1567.     }
  1568.  
  1569.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1570.         brfd = pic_param->reference_fields.bits.reference_distance;
  1571.         brfd = (scale_factor * brfd) >> 8;
  1572.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1573.  
  1574.         if (brfd < 0)
  1575.             brfd = 0;
  1576.     }
  1577.  
  1578.     overlap = pic_param->sequence_fields.bits.overlap;
  1579.  
  1580.     if (overlap) {
  1581.         overlap = 0;
  1582.         if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1583.             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1584.                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1585.                 overlap = 1;
  1586.             }
  1587.         }else {
  1588.             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1589.                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1590.                 overlap = 1;
  1591.             }
  1592.             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1593.                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1594.                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1595.                     overlap = 1;
  1596.                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1597.                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1598.                     overlap = 1;
  1599.                 }
  1600.             }
  1601.         }
  1602.     }
  1603.  
  1604.     assert(pic_param->conditional_overlap_flag < 3);
  1605.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1606.  
  1607.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1608.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1609.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1610.         interpolation_mode = 9; /* Half-pel bilinear */
  1611.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1612.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1613.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1614.         interpolation_mode = 1; /* Half-pel bicubic */
  1615.     else
  1616.         interpolation_mode = 0; /* Quarter-pel bicubic */
  1617.  
  1618.     BEGIN_BCS_BATCH(batch, 6);
  1619.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  1620.     OUT_BCS_BATCH(batch,
  1621.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  1622.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  1623.     OUT_BCS_BATCH(batch,
  1624.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  1625.                   dmv_surface_valid << 15 |
  1626.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  1627.                   pic_param->rounding_control << 13 |
  1628.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  1629.                   interpolation_mode << 8 |
  1630.                   0 << 7 | /* FIXME: scale up or down ??? */
  1631.                   pic_param->range_reduction_frame << 6 |
  1632.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  1633.                   overlap << 4 |
  1634.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  1635.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  1636.     OUT_BCS_BATCH(batch,
  1637.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  1638.                   picture_type << 26 |
  1639.                   fcm << 24 |
  1640.                   alt_pq << 16 |
  1641.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  1642.                   scale_factor << 0);
  1643.     OUT_BCS_BATCH(batch,
  1644.                   unified_mv_mode << 28 |
  1645.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  1646.                   pic_param->fast_uvmc_flag << 26 |
  1647.                   ref_field_pic_polarity << 25 |
  1648.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  1649.                   pic_param->reference_fields.bits.reference_distance << 20 |
  1650.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  1651.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  1652.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  1653.                   alt_pquant_edge_mask << 4 |
  1654.                   alt_pquant_config << 2 |
  1655.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  1656.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  1657.     OUT_BCS_BATCH(batch,
  1658.                   !!pic_param->bitplane_present.value << 31 |
  1659.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  1660.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  1661.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  1662.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  1663.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  1664.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  1665.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  1666.                   pic_param->mv_fields.bits.mv_table << 20 |
  1667.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1668.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1669.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  1670.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1671.                   pic_param->mb_mode_table << 8 |
  1672.                   trans_ac_y << 6 |
  1673.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1674.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1675.                   pic_param->cbp_table << 0);
  1676.     ADVANCE_BCS_BATCH(batch);
  1677. }
  1678.  
  1679. static void
  1680. gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1681.                              struct decode_state *decode_state,
  1682.                              struct gen7_mfd_context *gen7_mfd_context)
  1683. {
  1684.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1685.     VAPictureParameterBufferVC1 *pic_param;
  1686.     int intensitycomp_single;
  1687.  
  1688.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1689.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1690.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1691.  
  1692.     BEGIN_BCS_BATCH(batch, 6);
  1693.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  1694.     OUT_BCS_BATCH(batch,
  1695.                   0 << 14 | /* FIXME: double ??? */
  1696.                   0 << 12 |
  1697.                   intensitycomp_single << 10 |
  1698.                   intensitycomp_single << 8 |
  1699.                   0 << 4 | /* FIXME: interlace mode */
  1700.                   0);
  1701.     OUT_BCS_BATCH(batch,
  1702.                   pic_param->luma_shift << 16 |
  1703.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1704.     OUT_BCS_BATCH(batch, 0);
  1705.     OUT_BCS_BATCH(batch, 0);
  1706.     OUT_BCS_BATCH(batch, 0);
  1707.     ADVANCE_BCS_BATCH(batch);
  1708. }
  1709.  
  1710. static void
  1711. gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
  1712.                               struct decode_state *decode_state,
  1713.                               struct gen7_mfd_context *gen7_mfd_context)
  1714. {
  1715.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1716.     struct object_surface *obj_surface;
  1717.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1718.  
  1719.     obj_surface = decode_state->render_object;
  1720.  
  1721.     if (obj_surface && obj_surface->private_data) {
  1722.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1723.     }
  1724.  
  1725.     obj_surface = decode_state->reference_objects[1];
  1726.  
  1727.     if (obj_surface && obj_surface->private_data) {
  1728.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1729.     }
  1730.  
  1731.     BEGIN_BCS_BATCH(batch, 7);
  1732.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
  1733.  
  1734.     if (dmv_write_buffer)
  1735.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1736.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1737.                       0);
  1738.     else
  1739.         OUT_BCS_BATCH(batch, 0);
  1740.  
  1741.     OUT_BCS_BATCH(batch, 0);
  1742.     OUT_BCS_BATCH(batch, 0);
  1743.  
  1744.     if (dmv_read_buffer)
  1745.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1746.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1747.                       0);
  1748.     else
  1749.         OUT_BCS_BATCH(batch, 0);
  1750.    
  1751.     OUT_BCS_BATCH(batch, 0);
  1752.     OUT_BCS_BATCH(batch, 0);
  1753.                  
  1754.     ADVANCE_BCS_BATCH(batch);
  1755. }
  1756.  
  1757. static int
  1758. gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  1759. {
  1760.     int out_slice_data_bit_offset;
  1761.     int slice_header_size = in_slice_data_bit_offset / 8;
  1762.     int i, j;
  1763.  
  1764.     if (profile != 3)
  1765.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  1766.     else {
  1767.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  1768.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  1769.                 i++, j += 2;
  1770.             }
  1771.         }
  1772.  
  1773.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  1774.     }
  1775.  
  1776.     return out_slice_data_bit_offset;
  1777. }
  1778.  
  1779. static void
  1780. gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
  1781.                         VAPictureParameterBufferVC1 *pic_param,
  1782.                         VASliceParameterBufferVC1 *slice_param,
  1783.                         VASliceParameterBufferVC1 *next_slice_param,
  1784.                         dri_bo *slice_data_bo,
  1785.                         struct gen7_mfd_context *gen7_mfd_context)
  1786. {
  1787.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1788.     int next_slice_start_vert_pos;
  1789.     int macroblock_offset;
  1790.     uint8_t *slice_data = NULL;
  1791.  
  1792.     dri_bo_map(slice_data_bo, 0);
  1793.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  1794.     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
  1795.                                                                slice_param->macroblock_offset,
  1796.                                                                pic_param->sequence_fields.bits.profile);
  1797.     dri_bo_unmap(slice_data_bo);
  1798.  
  1799.     if (next_slice_param)
  1800.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  1801.     else
  1802.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  1803.  
  1804.     BEGIN_BCS_BATCH(batch, 5);
  1805.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  1806.     OUT_BCS_BATCH(batch,
  1807.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  1808.     OUT_BCS_BATCH(batch,
  1809.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  1810.     OUT_BCS_BATCH(batch,
  1811.                   slice_param->slice_vertical_position << 16 |
  1812.                   next_slice_start_vert_pos << 0);
  1813.     OUT_BCS_BATCH(batch,
  1814.                   (macroblock_offset & 0x7));
  1815.     ADVANCE_BCS_BATCH(batch);
  1816. }
  1817.  
  1818. static void
  1819. gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
  1820.                             struct decode_state *decode_state,
  1821.                             struct gen7_mfd_context *gen7_mfd_context)
  1822. {
  1823.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1824.     VAPictureParameterBufferVC1 *pic_param;
  1825.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  1826.     dri_bo *slice_data_bo;
  1827.     int i, j;
  1828.  
  1829.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1830.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1831.  
  1832.     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  1833.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1834.     intel_batchbuffer_emit_mi_flush(batch);
  1835.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1836.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1837.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1838.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1839.     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  1840.     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  1841.     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  1842.  
  1843.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1844.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1845.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  1846.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1847.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  1848.  
  1849.         if (j == decode_state->num_slice_params - 1)
  1850.             next_slice_group_param = NULL;
  1851.         else
  1852.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  1853.  
  1854.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1855.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1856.  
  1857.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1858.                 next_slice_param = slice_param + 1;
  1859.             else
  1860.                 next_slice_param = next_slice_group_param;
  1861.  
  1862.             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  1863.             slice_param++;
  1864.         }
  1865.     }
  1866.  
  1867.     intel_batchbuffer_end_atomic(batch);
  1868.     intel_batchbuffer_flush(batch);
  1869. }
  1870.  
  1871. static void
  1872. gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
  1873.                           struct decode_state *decode_state,
  1874.                           struct gen7_mfd_context *gen7_mfd_context)
  1875. {
  1876.     struct object_surface *obj_surface;
  1877.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1878.     int subsampling = SUBSAMPLE_YUV420;
  1879.     int fourcc = VA_FOURCC_IMC3;
  1880.  
  1881.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1882.  
  1883.     if (pic_param->num_components == 1)
  1884.         subsampling = SUBSAMPLE_YUV400;
  1885.     else if (pic_param->num_components == 3) {
  1886.         int h1 = pic_param->components[0].h_sampling_factor;
  1887.         int h2 = pic_param->components[1].h_sampling_factor;
  1888.         int h3 = pic_param->components[2].h_sampling_factor;
  1889.         int v1 = pic_param->components[0].v_sampling_factor;
  1890.         int v2 = pic_param->components[1].v_sampling_factor;
  1891.         int v3 = pic_param->components[2].v_sampling_factor;
  1892.  
  1893.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1894.             v1 == 2 && v2 == 1 && v3 == 1) {
  1895.             subsampling = SUBSAMPLE_YUV420;
  1896.             fourcc = VA_FOURCC_IMC3;
  1897.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1898.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1899.             subsampling = SUBSAMPLE_YUV422H;
  1900.             fourcc = VA_FOURCC_422H;
  1901.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1902.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1903.             subsampling = SUBSAMPLE_YUV444;
  1904.             fourcc = VA_FOURCC_444P;
  1905.         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1906.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1907.             subsampling = SUBSAMPLE_YUV411;
  1908.             fourcc = VA_FOURCC_411P;
  1909.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1910.                    v1 == 2 && v2 == 1 && v3 == 1) {
  1911.             subsampling = SUBSAMPLE_YUV422V;
  1912.             fourcc = VA_FOURCC_422V;
  1913.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1914.                    v1 == 2 && v2 == 2 && v3 == 2) {
  1915.             subsampling = SUBSAMPLE_YUV422H;
  1916.             fourcc = VA_FOURCC_422H;
  1917.         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
  1918.                    v1 == 2 && v2 == 1 && v3 == 1) {
  1919.             subsampling = SUBSAMPLE_YUV422V;
  1920.             fourcc = VA_FOURCC_422V;
  1921.         } else
  1922.             assert(0);
  1923.     }
  1924.     else {
  1925.         assert(0);
  1926.     }
  1927.  
  1928.     /* Current decoded picture */
  1929.     obj_surface = decode_state->render_object;
  1930.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
  1931.  
  1932.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1933.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1934.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1935.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1936.  
  1937.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  1938.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1939.  
  1940.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  1941.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1942.  
  1943.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  1944.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1945.  
  1946.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  1947.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  1948.  
  1949.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  1950.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1951.  
  1952.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1953.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1954. }
  1955.  
  1956. static const int va_to_gen7_jpeg_rotation[4] = {
  1957.     GEN7_JPEG_ROTATION_0,
  1958.     GEN7_JPEG_ROTATION_90,
  1959.     GEN7_JPEG_ROTATION_180,
  1960.     GEN7_JPEG_ROTATION_270
  1961. };
  1962.  
  1963. static void
  1964. gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
  1965.                         struct decode_state *decode_state,
  1966.                         struct gen7_mfd_context *gen7_mfd_context)
  1967. {
  1968.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1969.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1970.     int chroma_type = GEN7_YUV420;
  1971.     int frame_width_in_blks;
  1972.     int frame_height_in_blks;
  1973.  
  1974.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1975.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1976.  
  1977.     if (pic_param->num_components == 1)
  1978.         chroma_type = GEN7_YUV400;
  1979.     else if (pic_param->num_components == 3) {
  1980.         int h1 = pic_param->components[0].h_sampling_factor;
  1981.         int h2 = pic_param->components[1].h_sampling_factor;
  1982.         int h3 = pic_param->components[2].h_sampling_factor;
  1983.         int v1 = pic_param->components[0].v_sampling_factor;
  1984.         int v2 = pic_param->components[1].v_sampling_factor;
  1985.         int v3 = pic_param->components[2].v_sampling_factor;
  1986.  
  1987.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1988.             v1 == 2 && v2 == 1 && v3 == 1)
  1989.             chroma_type = GEN7_YUV420;
  1990.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1991.                  v1 == 1 && v2 == 1 && v3 == 1)
  1992.             chroma_type = GEN7_YUV422H_2Y;
  1993.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1994.                  v1 == 1 && v2 == 1 && v3 == 1)
  1995.             chroma_type = GEN7_YUV444;
  1996.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1997.                  v1 == 1 && v2 == 1 && v3 == 1)
  1998.             chroma_type = GEN7_YUV411;
  1999.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2000.                  v1 == 2 && v2 == 1 && v3 == 1)
  2001.             chroma_type = GEN7_YUV422V_2Y;
  2002.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2003.                  v1 == 2 && v2 == 2 && v3 == 2)
  2004.             chroma_type = GEN7_YUV422H_4Y;
  2005.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2006.                  v1 == 2 && v2 == 1 && v3 == 1)
  2007.             chroma_type = GEN7_YUV422V_4Y;
  2008.         else
  2009.             assert(0);
  2010.     }
  2011.  
  2012.     if (chroma_type == GEN7_YUV400 ||
  2013.         chroma_type == GEN7_YUV444 ||
  2014.         chroma_type == GEN7_YUV422V_2Y) {
  2015.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  2016.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  2017.     } else if (chroma_type == GEN7_YUV411) {
  2018.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  2019.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  2020.     } else {
  2021.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  2022.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  2023.     }
  2024.  
  2025.     BEGIN_BCS_BATCH(batch, 3);
  2026.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2027.     OUT_BCS_BATCH(batch,
  2028.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  2029.                   (chroma_type << 0));
  2030.     OUT_BCS_BATCH(batch,
  2031.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  2032.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  2033.     ADVANCE_BCS_BATCH(batch);
  2034. }
  2035.  
  2036. static const int va_to_gen7_jpeg_hufftable[2] = {
  2037.     MFX_HUFFTABLE_ID_Y,
  2038.     MFX_HUFFTABLE_ID_UV
  2039. };
  2040.  
  2041. static void
  2042. gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  2043.                                struct decode_state *decode_state,
  2044.                                struct gen7_mfd_context *gen7_mfd_context,
  2045.                                int num_tables)
  2046. {
  2047.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  2048.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2049.     int index;
  2050.  
  2051.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  2052.         return;
  2053.  
  2054.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2055.  
  2056.     for (index = 0; index < num_tables; index++) {
  2057.         int id = va_to_gen7_jpeg_hufftable[index];
  2058.         if (!huffman_table->load_huffman_table[index])
  2059.             continue;
  2060.         BEGIN_BCS_BATCH(batch, 53);
  2061.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2062.         OUT_BCS_BATCH(batch, id);
  2063.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2064.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2065.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2066.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2067.         ADVANCE_BCS_BATCH(batch);
  2068.     }
  2069. }
  2070.  
  2071. static const int va_to_gen7_jpeg_qm[5] = {
  2072.     -1,
  2073.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2074.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2075.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2076.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2077. };
  2078.  
  2079. static void
  2080. gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
  2081.                        struct decode_state *decode_state,
  2082.                        struct gen7_mfd_context *gen7_mfd_context)
  2083. {
  2084.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2085.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2086.     int index;
  2087.  
  2088.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2089.         return;
  2090.  
  2091.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2092.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2093.  
  2094.     assert(pic_param->num_components <= 3);
  2095.  
  2096.     for (index = 0; index < pic_param->num_components; index++) {
  2097.         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
  2098.         int qm_type;
  2099.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2100.         unsigned char raster_qm[64];
  2101.         int j;
  2102.  
  2103.         if (id > 4 || id < 1)
  2104.             continue;
  2105.  
  2106.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2107.             continue;
  2108.  
  2109.         qm_type = va_to_gen7_jpeg_qm[id];
  2110.  
  2111.         for (j = 0; j < 64; j++)
  2112.             raster_qm[zigzag_direct[j]] = qm[j];
  2113.  
  2114.         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2115.     }
  2116. }
  2117.  
  2118. static void
  2119. gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2120.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2121.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2122.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2123.                          dri_bo *slice_data_bo,
  2124.                          struct gen7_mfd_context *gen7_mfd_context)
  2125. {
  2126.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2127.     int scan_component_mask = 0;
  2128.     int i;
  2129.  
  2130.     assert(slice_param->num_components > 0);
  2131.     assert(slice_param->num_components < 4);
  2132.     assert(slice_param->num_components <= pic_param->num_components);
  2133.  
  2134.     for (i = 0; i < slice_param->num_components; i++) {
  2135.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2136.         case 1:
  2137.             scan_component_mask |= (1 << 0);
  2138.             break;
  2139.         case 2:
  2140.             scan_component_mask |= (1 << 1);
  2141.             break;
  2142.         case 3:
  2143.             scan_component_mask |= (1 << 2);
  2144.             break;
  2145.         default:
  2146.             assert(0);
  2147.             break;
  2148.         }
  2149.     }
  2150.  
  2151.     BEGIN_BCS_BATCH(batch, 6);
  2152.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2153.     OUT_BCS_BATCH(batch,
  2154.                   slice_param->slice_data_size);
  2155.     OUT_BCS_BATCH(batch,
  2156.                   slice_param->slice_data_offset);
  2157.     OUT_BCS_BATCH(batch,
  2158.                   slice_param->slice_horizontal_position << 16 |
  2159.                   slice_param->slice_vertical_position << 0);
  2160.     OUT_BCS_BATCH(batch,
  2161.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2162.                   (scan_component_mask << 27) |                 /* scan components */
  2163.                   (0 << 26) |   /* disable interrupt allowed */
  2164.                   (slice_param->num_mcus << 0));                /* MCU count */
  2165.     OUT_BCS_BATCH(batch,
  2166.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2167.     ADVANCE_BCS_BATCH(batch);
  2168. }
  2169.  
  2170. /* Workaround for JPEG decoding on Ivybridge */
  2171. #ifdef JPEG_WA
  2172.  
  2173. static struct {
  2174.     int width;
  2175.     int height;
  2176.     unsigned char data[32];
  2177.     int data_size;
  2178.     int data_bit_offset;
  2179.     int qp;
  2180. } gen7_jpeg_wa_clip = {
  2181.     16,
  2182.     16,
  2183.     {
  2184.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2185.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2186.     },
  2187.     14,
  2188.     40,
  2189.     28,
  2190. };
  2191.  
  2192. static void
  2193. gen8_jpeg_wa_init(VADriverContextP ctx,
  2194.                   struct gen7_mfd_context *gen7_mfd_context)
  2195. {
  2196.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2197.     VAStatus status;
  2198.     struct object_surface *obj_surface;
  2199.  
  2200.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2201.         i965_DestroySurfaces(ctx,
  2202.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2203.                              1);
  2204.  
  2205.     status = i965_CreateSurfaces(ctx,
  2206.                                  gen7_jpeg_wa_clip.width,
  2207.                                  gen7_jpeg_wa_clip.height,
  2208.                                  VA_RT_FORMAT_YUV420,
  2209.                                  1,
  2210.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2211.     assert(status == VA_STATUS_SUCCESS);
  2212.  
  2213.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2214.     assert(obj_surface);
  2215.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2216.     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
  2217.  
  2218.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2219.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2220.                                                                "JPEG WA data",
  2221.                                                                0x1000,
  2222.                                                                0x1000);
  2223.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2224.                        0,
  2225.                        gen7_jpeg_wa_clip.data_size,
  2226.                        gen7_jpeg_wa_clip.data);
  2227.     }
  2228. }
  2229.  
  2230. static void
  2231. gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2232.                               struct gen7_mfd_context *gen7_mfd_context)
  2233. {
  2234.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2235.  
  2236.     BEGIN_BCS_BATCH(batch, 5);
  2237.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2238.     OUT_BCS_BATCH(batch,
  2239.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2240.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2241.                   (0 << 10) | /* disable Stream-Out */
  2242.                   (0 << 9)  | /* Post Deblocking Output */
  2243.                   (1 << 8)  | /* Pre Deblocking Output */
  2244.                   (0 << 5)  | /* not in stitch mode */
  2245.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2246.                   (MFX_FORMAT_AVC << 0));
  2247.     OUT_BCS_BATCH(batch,
  2248.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2249.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2250.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2251.                   (0 << 1)  |
  2252.                   (0 << 0));
  2253.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2254.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2255.     ADVANCE_BCS_BATCH(batch);
  2256. }
  2257.  
  2258. static void
  2259. gen8_jpeg_wa_surface_state(VADriverContextP ctx,
  2260.                            struct gen7_mfd_context *gen7_mfd_context)
  2261. {
  2262.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2263.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2264.  
  2265.     BEGIN_BCS_BATCH(batch, 6);
  2266.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2267.     OUT_BCS_BATCH(batch, 0);
  2268.     OUT_BCS_BATCH(batch,
  2269.                   ((obj_surface->orig_width - 1) << 18) |
  2270.                   ((obj_surface->orig_height - 1) << 4));
  2271.     OUT_BCS_BATCH(batch,
  2272.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2273.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2274.                   (0 << 22) | /* surface object control state, ignored */
  2275.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2276.                   (0 << 2)  | /* must be 0 */
  2277.                   (1 << 1)  | /* must be tiled */
  2278.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2279.     OUT_BCS_BATCH(batch,
  2280.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2281.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2282.     OUT_BCS_BATCH(batch,
  2283.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2284.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2285.     ADVANCE_BCS_BATCH(batch);
  2286. }
  2287.  
  2288. static void
  2289. gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2290.                                  struct gen7_mfd_context *gen7_mfd_context)
  2291. {
  2292.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2293.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2294.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2295.     dri_bo *intra_bo;
  2296.     int i;
  2297.  
  2298.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2299.                             "intra row store",
  2300.                             128 * 64,
  2301.                             0x1000);
  2302.  
  2303.     BEGIN_BCS_BATCH(batch, 61);
  2304.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  2305.     OUT_BCS_RELOC(batch,
  2306.                   obj_surface->bo,
  2307.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2308.                   0);
  2309.         OUT_BCS_BATCH(batch, 0);
  2310.         OUT_BCS_BATCH(batch, 0);
  2311.    
  2312.  
  2313.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2314.         OUT_BCS_BATCH(batch, 0);
  2315.         OUT_BCS_BATCH(batch, 0);
  2316.  
  2317.         /* uncompressed-video & stream out 7-12 */
  2318.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2319.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2320.         OUT_BCS_BATCH(batch, 0);
  2321.         OUT_BCS_BATCH(batch, 0);
  2322.         OUT_BCS_BATCH(batch, 0);
  2323.         OUT_BCS_BATCH(batch, 0);
  2324.  
  2325.         /* the DW 13-15 is for intra row store scratch */
  2326.     OUT_BCS_RELOC(batch,
  2327.                   intra_bo,
  2328.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2329.                   0);
  2330.         OUT_BCS_BATCH(batch, 0);
  2331.         OUT_BCS_BATCH(batch, 0);
  2332.  
  2333.         /* the DW 16-18 is for deblocking filter */
  2334.     OUT_BCS_BATCH(batch, 0);
  2335.         OUT_BCS_BATCH(batch, 0);
  2336.         OUT_BCS_BATCH(batch, 0);
  2337.  
  2338.     /* DW 19..50 */
  2339.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2340.         OUT_BCS_BATCH(batch, 0);
  2341.         OUT_BCS_BATCH(batch, 0);
  2342.     }
  2343.     OUT_BCS_BATCH(batch, 0);
  2344.  
  2345.         /* the DW52-54 is for mb status address */
  2346.     OUT_BCS_BATCH(batch, 0);
  2347.         OUT_BCS_BATCH(batch, 0);
  2348.         OUT_BCS_BATCH(batch, 0);
  2349.         /* the DW56-60 is for ILDB & second ILDB address */
  2350.     OUT_BCS_BATCH(batch, 0);
  2351.         OUT_BCS_BATCH(batch, 0);
  2352.         OUT_BCS_BATCH(batch, 0);
  2353.     OUT_BCS_BATCH(batch, 0);
  2354.         OUT_BCS_BATCH(batch, 0);
  2355.         OUT_BCS_BATCH(batch, 0);
  2356.  
  2357.     ADVANCE_BCS_BATCH(batch);
  2358.  
  2359.     dri_bo_unreference(intra_bo);
  2360. }
  2361.  
  2362. static void
  2363. gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2364.                                      struct gen7_mfd_context *gen7_mfd_context)
  2365. {
  2366.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2367.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2368.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2369.  
  2370.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2371.                               "bsd mpc row store",
  2372.                               11520, /* 1.5 * 120 * 64 */
  2373.                               0x1000);
  2374.  
  2375.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2376.                           "mpr row store",
  2377.                           7680, /* 1. 0 * 120 * 64 */
  2378.                           0x1000);
  2379.  
  2380.     BEGIN_BCS_BATCH(batch, 10);
  2381.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  2382.  
  2383.     OUT_BCS_RELOC(batch,
  2384.                   bsd_mpc_bo,
  2385.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2386.                   0);
  2387.  
  2388.     OUT_BCS_BATCH(batch, 0);
  2389.     OUT_BCS_BATCH(batch, 0);
  2390.  
  2391.     OUT_BCS_RELOC(batch,
  2392.                   mpr_bo,
  2393.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2394.                   0);
  2395.     OUT_BCS_BATCH(batch, 0);
  2396.     OUT_BCS_BATCH(batch, 0);
  2397.  
  2398.     OUT_BCS_BATCH(batch, 0);
  2399.     OUT_BCS_BATCH(batch, 0);
  2400.     OUT_BCS_BATCH(batch, 0);
  2401.  
  2402.     ADVANCE_BCS_BATCH(batch);
  2403.  
  2404.     dri_bo_unreference(bsd_mpc_bo);
  2405.     dri_bo_unreference(mpr_bo);
  2406. }
  2407.  
  2408. static void
  2409. gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2410.                           struct gen7_mfd_context *gen7_mfd_context)
  2411. {
  2412.  
  2413. }
  2414.  
  2415. static void
  2416. gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2417.                            struct gen7_mfd_context *gen7_mfd_context)
  2418. {
  2419.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2420.     int img_struct = 0;
  2421.     int mbaff_frame_flag = 0;
  2422.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2423.  
  2424.     BEGIN_BCS_BATCH(batch, 16);
  2425.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2426.     OUT_BCS_BATCH(batch,
  2427.                   width_in_mbs * height_in_mbs);
  2428.     OUT_BCS_BATCH(batch,
  2429.                   ((height_in_mbs - 1) << 16) |
  2430.                   ((width_in_mbs - 1) << 0));
  2431.     OUT_BCS_BATCH(batch,
  2432.                   (0 << 24) |
  2433.                   (0 << 16) |
  2434.                   (0 << 14) |
  2435.                   (0 << 13) |
  2436.                   (0 << 12) | /* differ from GEN6 */
  2437.                   (0 << 10) |
  2438.                   (img_struct << 8));
  2439.     OUT_BCS_BATCH(batch,
  2440.                   (1 << 10) | /* 4:2:0 */
  2441.                   (1 << 7) |  /* CABAC */
  2442.                   (0 << 6) |
  2443.                   (0 << 5) |
  2444.                   (0 << 4) |
  2445.                   (0 << 3) |
  2446.                   (1 << 2) |
  2447.                   (mbaff_frame_flag << 1) |
  2448.                   (0 << 0));
  2449.     OUT_BCS_BATCH(batch, 0);
  2450.     OUT_BCS_BATCH(batch, 0);
  2451.     OUT_BCS_BATCH(batch, 0);
  2452.     OUT_BCS_BATCH(batch, 0);
  2453.     OUT_BCS_BATCH(batch, 0);
  2454.     OUT_BCS_BATCH(batch, 0);
  2455.     OUT_BCS_BATCH(batch, 0);
  2456.     OUT_BCS_BATCH(batch, 0);
  2457.     OUT_BCS_BATCH(batch, 0);
  2458.     OUT_BCS_BATCH(batch, 0);
  2459.     OUT_BCS_BATCH(batch, 0);
  2460.     ADVANCE_BCS_BATCH(batch);
  2461. }
  2462.  
  2463. static void
  2464. gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  2465.                                   struct gen7_mfd_context *gen7_mfd_context)
  2466. {
  2467.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2468.     int i;
  2469.  
  2470.     BEGIN_BCS_BATCH(batch, 71);
  2471.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  2472.  
  2473.     /* reference surfaces 0..15 */
  2474.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2475.         OUT_BCS_BATCH(batch, 0); /* top */
  2476.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2477.     }
  2478.        
  2479.         OUT_BCS_BATCH(batch, 0);
  2480.  
  2481.     /* the current decoding frame/field */
  2482.     OUT_BCS_BATCH(batch, 0); /* top */
  2483.     OUT_BCS_BATCH(batch, 0);
  2484.     OUT_BCS_BATCH(batch, 0);
  2485.  
  2486.     /* POC List */
  2487.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2488.         OUT_BCS_BATCH(batch, 0);
  2489.         OUT_BCS_BATCH(batch, 0);
  2490.     }
  2491.  
  2492.     OUT_BCS_BATCH(batch, 0);
  2493.     OUT_BCS_BATCH(batch, 0);
  2494.  
  2495.     ADVANCE_BCS_BATCH(batch);
  2496. }
  2497.  
  2498. static void
  2499. gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  2500.                                      struct gen7_mfd_context *gen7_mfd_context)
  2501. {
  2502.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2503.  
  2504.     BEGIN_BCS_BATCH(batch, 11);
  2505.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2506.     OUT_BCS_RELOC(batch,
  2507.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2508.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2509.                   0);
  2510.     OUT_BCS_BATCH(batch, 0);
  2511.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2512.     OUT_BCS_BATCH(batch, 0);
  2513.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2514.     OUT_BCS_BATCH(batch, 0);
  2515.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2516.     OUT_BCS_BATCH(batch, 0);
  2517.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2518.     OUT_BCS_BATCH(batch, 0);
  2519.     ADVANCE_BCS_BATCH(batch);
  2520. }
  2521.  
  2522. static void
  2523. gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  2524.                             struct gen7_mfd_context *gen7_mfd_context)
  2525. {
  2526.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2527.  
  2528.     /* the input bitsteam format on GEN7 differs from GEN6 */
  2529.     BEGIN_BCS_BATCH(batch, 6);
  2530.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  2531.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  2532.     OUT_BCS_BATCH(batch, 0);
  2533.     OUT_BCS_BATCH(batch,
  2534.                   (0 << 31) |
  2535.                   (0 << 14) |
  2536.                   (0 << 12) |
  2537.                   (0 << 10) |
  2538.                   (0 << 8));
  2539.     OUT_BCS_BATCH(batch,
  2540.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  2541.                   (0 << 5)  |
  2542.                   (0 << 4)  |
  2543.                   (1 << 3) | /* LastSlice Flag */
  2544.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  2545.     OUT_BCS_BATCH(batch, 0);
  2546.     ADVANCE_BCS_BATCH(batch);
  2547. }
  2548.  
  2549. static void
  2550. gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  2551.                              struct gen7_mfd_context *gen7_mfd_context)
  2552. {
  2553.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2554.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  2555.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  2556.     int first_mb_in_slice = 0;
  2557.     int slice_type = SLICE_TYPE_I;
  2558.  
  2559.     BEGIN_BCS_BATCH(batch, 11);
  2560.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  2561.     OUT_BCS_BATCH(batch, slice_type);
  2562.     OUT_BCS_BATCH(batch,
  2563.                   (num_ref_idx_l1 << 24) |
  2564.                   (num_ref_idx_l0 << 16) |
  2565.                   (0 << 8) |
  2566.                   (0 << 0));
  2567.     OUT_BCS_BATCH(batch,
  2568.                   (0 << 29) |
  2569.                   (1 << 27) |   /* disable Deblocking */
  2570.                   (0 << 24) |
  2571.                   (gen7_jpeg_wa_clip.qp << 16) |
  2572.                   (0 << 8) |
  2573.                   (0 << 0));
  2574.     OUT_BCS_BATCH(batch,
  2575.                   (slice_ver_pos << 24) |
  2576.                   (slice_hor_pos << 16) |
  2577.                   (first_mb_in_slice << 0));
  2578.     OUT_BCS_BATCH(batch,
  2579.                   (next_slice_ver_pos << 16) |
  2580.                   (next_slice_hor_pos << 0));
  2581.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  2582.     OUT_BCS_BATCH(batch, 0);
  2583.     OUT_BCS_BATCH(batch, 0);
  2584.     OUT_BCS_BATCH(batch, 0);
  2585.     OUT_BCS_BATCH(batch, 0);
  2586.     ADVANCE_BCS_BATCH(batch);
  2587. }
  2588.  
  2589. static void
  2590. gen8_mfd_jpeg_wa(VADriverContextP ctx,
  2591.                  struct gen7_mfd_context *gen7_mfd_context)
  2592. {
  2593.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2594.     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
  2595.     intel_batchbuffer_emit_mi_flush(batch);
  2596.     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  2597.     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  2598.     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  2599.     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  2600.     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  2601.     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  2602.     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  2603.  
  2604.     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  2605.     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  2606.     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  2607. }
  2608.  
  2609. #endif
  2610.  
  2611. void
  2612. gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
  2613.                              struct decode_state *decode_state,
  2614.                              struct gen7_mfd_context *gen7_mfd_context)
  2615. {
  2616.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2617.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2618.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  2619.     dri_bo *slice_data_bo;
  2620.     int i, j, max_selector = 0;
  2621.  
  2622.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2623.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2624.  
  2625.     /* Currently only support Baseline DCT */
  2626.     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  2627.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2628. #ifdef JPEG_WA
  2629.     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
  2630. #endif
  2631.     intel_batchbuffer_emit_mi_flush(batch);
  2632.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2633.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2634.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2635.     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  2636.     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  2637.  
  2638.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2639.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2640.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2641.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2642.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2643.  
  2644.         if (j == decode_state->num_slice_params - 1)
  2645.             next_slice_group_param = NULL;
  2646.         else
  2647.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2648.  
  2649.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2650.             int component;
  2651.  
  2652.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2653.  
  2654.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2655.                 next_slice_param = slice_param + 1;
  2656.             else
  2657.                 next_slice_param = next_slice_group_param;
  2658.  
  2659.             for (component = 0; component < slice_param->num_components; component++) {
  2660.                 if (max_selector < slice_param->components[component].dc_table_selector)
  2661.                     max_selector = slice_param->components[component].dc_table_selector;
  2662.  
  2663.                 if (max_selector < slice_param->components[component].ac_table_selector)
  2664.                     max_selector = slice_param->components[component].ac_table_selector;
  2665.             }
  2666.  
  2667.             slice_param++;
  2668.         }
  2669.     }
  2670.  
  2671.     assert(max_selector < 2);
  2672.     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  2673.  
  2674.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2675.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2676.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2677.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2678.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2679.  
  2680.         if (j == decode_state->num_slice_params - 1)
  2681.             next_slice_group_param = NULL;
  2682.         else
  2683.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2684.  
  2685.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2686.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2687.  
  2688.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2689.                 next_slice_param = slice_param + 1;
  2690.             else
  2691.                 next_slice_param = next_slice_group_param;
  2692.  
  2693.             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2694.             slice_param++;
  2695.         }
  2696.     }
  2697.  
  2698.     intel_batchbuffer_end_atomic(batch);
  2699.     intel_batchbuffer_flush(batch);
  2700. }
  2701.  
  2702. static const int vp8_dc_qlookup[128] =
  2703. {
  2704.       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
  2705.      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
  2706.      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
  2707.      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
  2708.      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
  2709.      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
  2710.      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
  2711.     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
  2712. };
  2713.  
  2714. static const int vp8_ac_qlookup[128] =
  2715. {
  2716.       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
  2717.      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
  2718.      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
  2719.      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
  2720.      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
  2721.     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
  2722.     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
  2723.     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
  2724. };
  2725.  
  2726. static inline unsigned int vp8_clip_quantization_index(int index)
  2727. {
  2728.     if(index > 127)
  2729.         return 127;
  2730.     else if(index <0)
  2731.         return 0;
  2732.  
  2733.     return index;
  2734. }
  2735.  
  2736. static void
  2737. gen8_mfd_vp8_decode_init(VADriverContextP ctx,
  2738.                           struct decode_state *decode_state,
  2739.                           struct gen7_mfd_context *gen7_mfd_context)
  2740. {
  2741.     struct object_surface *obj_surface;
  2742.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2743.     dri_bo *bo;
  2744.     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
  2745.     int width_in_mbs = (pic_param->frame_width + 15) / 16;
  2746.     int height_in_mbs = (pic_param->frame_height + 15) / 16;
  2747.  
  2748.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  2749.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  2750.  
  2751.     intel_update_vp8_frame_store_index(ctx,
  2752.                                        decode_state,
  2753.                                        pic_param,
  2754.                                        gen7_mfd_context->reference_surface);
  2755.  
  2756.     /* Current decoded picture */
  2757.     obj_surface = decode_state->render_object;
  2758.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2759.  
  2760.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  2761.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  2762.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  2763.     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
  2764.  
  2765.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2766.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  2767.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  2768.     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
  2769.  
  2770.     intel_ensure_vp8_segmentation_buffer(ctx,
  2771.         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
  2772.  
  2773.     /* The same as AVC */
  2774.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  2775.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2776.                       "intra row store",
  2777.                       width_in_mbs * 64,
  2778.                       0x1000);
  2779.     assert(bo);
  2780.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  2781.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  2782.  
  2783.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  2784.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2785.                       "deblocking filter row store",
  2786.                       width_in_mbs * 64 * 4,
  2787.                       0x1000);
  2788.     assert(bo);
  2789.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  2790.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  2791.  
  2792.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  2793.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2794.                       "bsd mpc row store",
  2795.                       width_in_mbs * 64 * 2,
  2796.                       0x1000);
  2797.     assert(bo);
  2798.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  2799.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  2800.  
  2801.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  2802.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2803.                       "mpr row store",
  2804.                       width_in_mbs * 64 * 2,
  2805.                       0x1000);
  2806.     assert(bo);
  2807.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  2808.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  2809.  
  2810.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  2811. }
  2812.  
  2813. static void
  2814. gen8_mfd_vp8_pic_state(VADriverContextP ctx,
  2815.                        struct decode_state *decode_state,
  2816.                        struct gen7_mfd_context *gen7_mfd_context)
  2817. {
  2818.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2819.     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
  2820.     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
  2821.     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
  2822.     dri_bo *probs_bo = decode_state->probability_data->bo;
  2823.     int i, j,log2num;
  2824.     unsigned int quantization_value[4][6];
  2825.  
  2826.     /* There is no safe way to error out if the segmentation buffer
  2827.        could not be allocated. So, instead of aborting, simply decode
  2828.        something even if the result may look totally inacurate */
  2829.     const unsigned int enable_segmentation =
  2830.         pic_param->pic_fields.bits.segmentation_enabled &&
  2831.         gen7_mfd_context->segmentation_buffer.valid;
  2832.        
  2833.     log2num = (int)log2(slice_param->num_of_partitions - 1);
  2834.  
  2835.     BEGIN_BCS_BATCH(batch, 38);
  2836.     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
  2837.     OUT_BCS_BATCH(batch,
  2838.                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
  2839.                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
  2840.     OUT_BCS_BATCH(batch,
  2841.                   log2num << 24 |
  2842.                   pic_param->pic_fields.bits.sharpness_level << 16 |
  2843.                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
  2844.                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
  2845.                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
  2846.                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
  2847.                   (enable_segmentation &&
  2848.                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 9 |
  2849.                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
  2850.                   (enable_segmentation &&
  2851.                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
  2852.                   (enable_segmentation &&
  2853.                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
  2854.                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
  2855.                   pic_param->pic_fields.bits.filter_type << 4 |
  2856.                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
  2857.                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
  2858.  
  2859.     OUT_BCS_BATCH(batch,
  2860.                   pic_param->loop_filter_level[3] << 24 |
  2861.                   pic_param->loop_filter_level[2] << 16 |
  2862.                   pic_param->loop_filter_level[1] <<  8 |
  2863.                   pic_param->loop_filter_level[0] <<  0);
  2864.  
  2865.     /* Quantizer Value for 4 segmetns, DW4-DW15 */
  2866.     for (i = 0; i < 4; i++) {
  2867.                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
  2868.                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
  2869.                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
  2870.                 /* 101581>>16 is equivalent to 155/100 */
  2871.                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
  2872.                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
  2873.                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
  2874.  
  2875.                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
  2876.                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
  2877.  
  2878.                 OUT_BCS_BATCH(batch,
  2879.                       quantization_value[i][0] << 16 | /* Y1AC */
  2880.                       quantization_value[i][1] <<  0); /* Y1DC */
  2881.         OUT_BCS_BATCH(batch,
  2882.                       quantization_value[i][5] << 16 | /* UVAC */
  2883.                       quantization_value[i][4] <<  0); /* UVDC */
  2884.         OUT_BCS_BATCH(batch,
  2885.                       quantization_value[i][3] << 16 | /* Y2AC */
  2886.                       quantization_value[i][2] <<  0); /* Y2DC */
  2887.     }
  2888.  
  2889.     /* CoeffProbability table for non-key frame, DW16-DW18 */
  2890.     if (probs_bo) {
  2891.         OUT_BCS_RELOC(batch, probs_bo,
  2892.                       0, I915_GEM_DOMAIN_INSTRUCTION,
  2893.                       0);
  2894.         OUT_BCS_BATCH(batch, 0);
  2895.         OUT_BCS_BATCH(batch, 0);
  2896.     } else {
  2897.         OUT_BCS_BATCH(batch, 0);
  2898.         OUT_BCS_BATCH(batch, 0);
  2899.         OUT_BCS_BATCH(batch, 0);
  2900.     }
  2901.  
  2902.     OUT_BCS_BATCH(batch,
  2903.                   pic_param->mb_segment_tree_probs[2] << 16 |
  2904.                   pic_param->mb_segment_tree_probs[1] <<  8 |
  2905.                   pic_param->mb_segment_tree_probs[0] <<  0);
  2906.  
  2907.     OUT_BCS_BATCH(batch,
  2908.                   pic_param->prob_skip_false << 24 |
  2909.                   pic_param->prob_intra      << 16 |
  2910.                   pic_param->prob_last       <<  8 |
  2911.                   pic_param->prob_gf         <<  0);
  2912.  
  2913.     OUT_BCS_BATCH(batch,
  2914.                   pic_param->y_mode_probs[3] << 24 |
  2915.                   pic_param->y_mode_probs[2] << 16 |
  2916.                   pic_param->y_mode_probs[1] <<  8 |
  2917.                   pic_param->y_mode_probs[0] <<  0);
  2918.  
  2919.     OUT_BCS_BATCH(batch,
  2920.                   pic_param->uv_mode_probs[2] << 16 |
  2921.                   pic_param->uv_mode_probs[1] <<  8 |
  2922.                   pic_param->uv_mode_probs[0] <<  0);
  2923.    
  2924.     /* MV update value, DW23-DW32 */
  2925.     for (i = 0; i < 2; i++) {
  2926.         for (j = 0; j < 20; j += 4) {
  2927.             OUT_BCS_BATCH(batch,
  2928.                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
  2929.                           pic_param->mv_probs[i][j + 2] << 16 |
  2930.                           pic_param->mv_probs[i][j + 1] <<  8 |
  2931.                           pic_param->mv_probs[i][j + 0] <<  0);
  2932.         }
  2933.     }
  2934.  
  2935.     OUT_BCS_BATCH(batch,
  2936.                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
  2937.                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
  2938.                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
  2939.                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
  2940.  
  2941.     OUT_BCS_BATCH(batch,
  2942.                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
  2943.                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
  2944.                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
  2945.                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
  2946.  
  2947.     /* segmentation id stream base address, DW35-DW37 */
  2948.     if (enable_segmentation) {
  2949.         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
  2950.                       0, I915_GEM_DOMAIN_INSTRUCTION,
  2951.                       0);
  2952.         OUT_BCS_BATCH(batch, 0);
  2953.         OUT_BCS_BATCH(batch, 0);
  2954.     }
  2955.     else {
  2956.         OUT_BCS_BATCH(batch, 0);
  2957.         OUT_BCS_BATCH(batch, 0);
  2958.         OUT_BCS_BATCH(batch, 0);
  2959.     }
  2960.     ADVANCE_BCS_BATCH(batch);
  2961. }
  2962.  
  2963. static void
  2964. gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
  2965.                         VAPictureParameterBufferVP8 *pic_param,
  2966.                         VASliceParameterBufferVP8 *slice_param,
  2967.                         dri_bo *slice_data_bo,
  2968.                         struct gen7_mfd_context *gen7_mfd_context)
  2969. {
  2970.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2971.     int i, log2num;
  2972.     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
  2973.     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
  2974.     unsigned int partition_size_0 = slice_param->partition_size[0];
  2975.  
  2976.     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
  2977.     if (used_bits == 8) {
  2978.         used_bits = 0;
  2979.         offset += 1;
  2980.         partition_size_0 -= 1;
  2981.     }
  2982.  
  2983.     assert(slice_param->num_of_partitions >= 2);
  2984.     assert(slice_param->num_of_partitions <= 9);
  2985.  
  2986.     log2num = (int)log2(slice_param->num_of_partitions - 1);
  2987.  
  2988.     BEGIN_BCS_BATCH(batch, 22);
  2989.     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
  2990.     OUT_BCS_BATCH(batch,
  2991.                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
  2992.                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
  2993.                   log2num << 4 |
  2994.                   (slice_param->macroblock_offset & 0x7));
  2995.     OUT_BCS_BATCH(batch,
  2996.                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
  2997.                   0);
  2998.  
  2999.     OUT_BCS_BATCH(batch, partition_size_0 + 1);
  3000.     OUT_BCS_BATCH(batch, offset);
  3001.     //partion sizes in bytes are present after the above first partition when there are more than one token partition
  3002.     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
  3003.     for (i = 1; i < 9; i++) {
  3004.         if (i < slice_param->num_of_partitions) {
  3005.             OUT_BCS_BATCH(batch, slice_param->partition_size[i] + 1);
  3006.             OUT_BCS_BATCH(batch, offset);
  3007.         } else {
  3008.             OUT_BCS_BATCH(batch, 0);
  3009.             OUT_BCS_BATCH(batch, 0);
  3010.         }
  3011.  
  3012.         offset += slice_param->partition_size[i];
  3013.     }
  3014.  
  3015.     OUT_BCS_BATCH(batch, 0); /* concealment method */
  3016.  
  3017.     ADVANCE_BCS_BATCH(batch);
  3018. }
  3019.  
  3020. void
  3021. gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
  3022.                             struct decode_state *decode_state,
  3023.                             struct gen7_mfd_context *gen7_mfd_context)
  3024. {
  3025.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3026.     VAPictureParameterBufferVP8 *pic_param;
  3027.     VASliceParameterBufferVP8 *slice_param;
  3028.     dri_bo *slice_data_bo;
  3029.  
  3030.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  3031.     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
  3032.  
  3033.     /* one slice per frame */
  3034.     if (decode_state->num_slice_params != 1 ||
  3035.         (!decode_state->slice_params ||
  3036.          !decode_state->slice_params[0] ||
  3037.          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
  3038.         (!decode_state->slice_datas ||
  3039.          !decode_state->slice_datas[0] ||
  3040.          !decode_state->slice_datas[0]->bo) ||
  3041.         !decode_state->probability_data) {
  3042.         WARN_ONCE("Wrong parameters for VP8 decoding\n");
  3043.  
  3044.         return;
  3045.     }
  3046.  
  3047.     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
  3048.     slice_data_bo = decode_state->slice_datas[0]->bo;
  3049.  
  3050.     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
  3051.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  3052.     intel_batchbuffer_emit_mi_flush(batch);
  3053.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3054.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3055.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3056.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3057.     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
  3058.     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
  3059.     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
  3060.     intel_batchbuffer_end_atomic(batch);
  3061.     intel_batchbuffer_flush(batch);
  3062. }
  3063.  
  3064. static VAStatus
  3065. gen8_mfd_decode_picture(VADriverContextP ctx,
  3066.                         VAProfile profile,
  3067.                         union codec_state *codec_state,
  3068.                         struct hw_context *hw_context)
  3069.  
  3070. {
  3071.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3072.     struct decode_state *decode_state = &codec_state->decode;
  3073.     VAStatus vaStatus;
  3074.  
  3075.     assert(gen7_mfd_context);
  3076.  
  3077.     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
  3078.  
  3079.     if (vaStatus != VA_STATUS_SUCCESS)
  3080.         goto out;
  3081.  
  3082.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  3083.  
  3084.     switch (profile) {
  3085.     case VAProfileMPEG2Simple:
  3086.     case VAProfileMPEG2Main:
  3087.         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  3088.         break;
  3089.        
  3090.     case VAProfileH264ConstrainedBaseline:
  3091.     case VAProfileH264Main:
  3092.     case VAProfileH264High:
  3093.     case VAProfileH264StereoHigh:
  3094.     case VAProfileH264MultiviewHigh:
  3095.         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  3096.         break;
  3097.  
  3098.     case VAProfileVC1Simple:
  3099.     case VAProfileVC1Main:
  3100.     case VAProfileVC1Advanced:
  3101.         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  3102.         break;
  3103.  
  3104.     case VAProfileJPEGBaseline:
  3105.         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  3106.         break;
  3107.  
  3108.     case VAProfileVP8Version0_3:
  3109.         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
  3110.         break;
  3111.  
  3112.     default:
  3113.         assert(0);
  3114.         break;
  3115.     }
  3116.  
  3117.     vaStatus = VA_STATUS_SUCCESS;
  3118.  
  3119. out:
  3120.     return vaStatus;
  3121. }
  3122.  
  3123. static void
  3124. gen8_mfd_context_destroy(void *hw_context)
  3125. {
  3126.     VADriverContextP ctx;
  3127.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3128.  
  3129.     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
  3130.  
  3131.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  3132.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  3133.  
  3134.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  3135.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  3136.  
  3137.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  3138.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  3139.  
  3140.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  3141.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  3142.  
  3143.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  3144.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  3145.  
  3146.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  3147.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  3148.  
  3149.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  3150.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  3151.  
  3152.     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
  3153.     gen7_mfd_context->segmentation_buffer.bo = NULL;
  3154.  
  3155.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  3156.  
  3157.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
  3158.         i965_DestroySurfaces(ctx,
  3159.                              &gen7_mfd_context->jpeg_wa_surface_id,
  3160.                              1);
  3161.         gen7_mfd_context->jpeg_wa_surface_object = NULL;
  3162.     }
  3163.  
  3164.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  3165.     free(gen7_mfd_context);
  3166. }
  3167.  
  3168. static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
  3169.                                     struct gen7_mfd_context *gen7_mfd_context)
  3170. {
  3171.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  3172.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  3173.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  3174.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  3175. }
  3176.  
  3177. struct hw_context *
  3178. gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
  3179. {
  3180.     struct intel_driver_data *intel = intel_driver_data(ctx);
  3181.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  3182.     int i;
  3183.  
  3184.     if (!gen7_mfd_context)
  3185.         return NULL;
  3186.  
  3187.     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
  3188.     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
  3189.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  3190.  
  3191.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  3192.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  3193.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  3194.     }
  3195.  
  3196.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  3197.     gen7_mfd_context->segmentation_buffer.valid = 0;
  3198.  
  3199.     switch (obj_config->profile) {
  3200.     case VAProfileMPEG2Simple:
  3201.     case VAProfileMPEG2Main:
  3202.         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  3203.         break;
  3204.  
  3205.     case VAProfileH264ConstrainedBaseline:
  3206.     case VAProfileH264Main:
  3207.     case VAProfileH264High:
  3208.     case VAProfileH264StereoHigh:
  3209.     case VAProfileH264MultiviewHigh:
  3210.         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
  3211.         break;
  3212.     default:
  3213.         break;
  3214.     }
  3215.  
  3216.     gen7_mfd_context->driver_context = ctx;
  3217.     return (struct hw_context *)gen7_mfd_context;
  3218. }
  3219.