Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao  Yakui  <yakui.zhao@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <assert.h>
  34. #include <math.h>
  35. #include <va/va_dec_jpeg.h>
  36. #include <va/va_dec_vp8.h>
  37.  
  38. #include "intel_batchbuffer.h"
  39. #include "intel_driver.h"
  40.  
  41. #include "i965_defines.h"
  42. #include "i965_drv_video.h"
  43. #include "i965_decoder_utils.h"
  44.  
  45. #include "gen7_mfd.h"
  46. #include "intel_media.h"
  47.  
  48. #define B0_STEP_REV             2
  49. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  50.  
  51. static const uint32_t zigzag_direct[64] = {
  52.     0,   1,  8, 16,  9,  2,  3, 10,
  53.     17, 24, 32, 25, 18, 11,  4,  5,
  54.     12, 19, 26, 33, 40, 48, 41, 34,
  55.     27, 20, 13,  6,  7, 14, 21, 28,
  56.     35, 42, 49, 56, 57, 50, 43, 36,
  57.     29, 22, 15, 23, 30, 37, 44, 51,
  58.     58, 59, 52, 45, 38, 31, 39, 46,
  59.     53, 60, 61, 54, 47, 55, 62, 63
  60. };
  61.  
  62. static void
  63. gen8_mfd_init_avc_surface(VADriverContextP ctx,
  64.                           VAPictureParameterBufferH264 *pic_param,
  65.                           struct object_surface *obj_surface)
  66. {
  67.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  68.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  69.     int width_in_mbs, height_in_mbs;
  70.  
  71.     obj_surface->free_private_data = gen_free_avc_surface;
  72.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  73.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  74.  
  75.     if (!gen7_avc_surface) {
  76.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  77.         gen7_avc_surface->frame_store_id = -1;
  78.         assert((obj_surface->size & 0x3f) == 0);
  79.         obj_surface->private_data = gen7_avc_surface;
  80.     }
  81.  
  82.     /* DMV buffers now relate to the whole frame, irrespective of
  83.        field coding modes */
  84.     if (gen7_avc_surface->dmv_top == NULL) {
  85.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  86.                                                  "direct mv w/r buffer",
  87.                                                  width_in_mbs * height_in_mbs * 128,
  88.                                                  0x1000);
  89.         assert(gen7_avc_surface->dmv_top);
  90.     }
  91. }
  92.  
  93. static void
  94. gen8_mfd_pipe_mode_select(VADriverContextP ctx,
  95.                           struct decode_state *decode_state,
  96.                           int standard_select,
  97.                           struct gen7_mfd_context *gen7_mfd_context)
  98. {
  99.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  100.  
  101.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  102.            standard_select == MFX_FORMAT_AVC ||
  103.            standard_select == MFX_FORMAT_VC1 ||
  104.            standard_select == MFX_FORMAT_JPEG ||
  105.            standard_select == MFX_FORMAT_VP8);
  106.  
  107.     BEGIN_BCS_BATCH(batch, 5);
  108.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  109.     OUT_BCS_BATCH(batch,
  110.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  111.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  112.                   (0 << 10) | /* disable Stream-Out */
  113.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  114.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  115.                   (0 << 5)  | /* not in stitch mode */
  116.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  117.                   (standard_select << 0));
  118.     OUT_BCS_BATCH(batch,
  119.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  120.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  121.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  122.                   (0 << 1)  |
  123.                   (0 << 0));
  124.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  125.     OUT_BCS_BATCH(batch, 0); /* reserved */
  126.     ADVANCE_BCS_BATCH(batch);
  127. }
  128.  
  129. static void
  130. gen8_mfd_surface_state(VADriverContextP ctx,
  131.                        struct decode_state *decode_state,
  132.                        int standard_select,
  133.                        struct gen7_mfd_context *gen7_mfd_context)
  134. {
  135.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  136.     struct object_surface *obj_surface = decode_state->render_object;
  137.     unsigned int y_cb_offset;
  138.     unsigned int y_cr_offset;
  139.     unsigned int surface_format;
  140.  
  141.     assert(obj_surface);
  142.  
  143.     y_cb_offset = obj_surface->y_cb_offset;
  144.     y_cr_offset = obj_surface->y_cr_offset;
  145.  
  146.     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
  147.         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  148.  
  149.     BEGIN_BCS_BATCH(batch, 6);
  150.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  151.     OUT_BCS_BATCH(batch, 0);
  152.     OUT_BCS_BATCH(batch,
  153.                   ((obj_surface->orig_height - 1) << 18) |
  154.                   ((obj_surface->orig_width - 1) << 4));
  155.     OUT_BCS_BATCH(batch,
  156.                   (surface_format << 28) | /* 420 planar YUV surface */
  157.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  158.                   (0 << 22) | /* surface object control state, ignored */
  159.                   ((obj_surface->width - 1) << 3) | /* pitch */
  160.                   (0 << 2)  | /* must be 0 */
  161.                   (1 << 1)  | /* must be tiled */
  162.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  163.     OUT_BCS_BATCH(batch,
  164.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  165.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  166.     OUT_BCS_BATCH(batch,
  167.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  168.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  169.     ADVANCE_BCS_BATCH(batch);
  170. }
  171.  
  172. static void
  173. gen8_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  174.                              struct decode_state *decode_state,
  175.                              int standard_select,
  176.                              struct gen7_mfd_context *gen7_mfd_context)
  177. {
  178.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  179.     int i;
  180.  
  181.     BEGIN_BCS_BATCH(batch, 61);
  182.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  183.         /* Pre-deblock 1-3 */
  184.     if (gen7_mfd_context->pre_deblocking_output.valid)
  185.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  186.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  187.                       0);
  188.     else
  189.         OUT_BCS_BATCH(batch, 0);
  190.  
  191.         OUT_BCS_BATCH(batch, 0);
  192.         OUT_BCS_BATCH(batch, 0);
  193.         /* Post-debloing 4-6 */
  194.     if (gen7_mfd_context->post_deblocking_output.valid)
  195.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  196.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  197.                       0);
  198.     else
  199.         OUT_BCS_BATCH(batch, 0);
  200.  
  201.         OUT_BCS_BATCH(batch, 0);
  202.         OUT_BCS_BATCH(batch, 0);
  203.  
  204.         /* uncompressed-video & stream out 7-12 */
  205.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  206.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  207.         OUT_BCS_BATCH(batch, 0);
  208.         OUT_BCS_BATCH(batch, 0);
  209.         OUT_BCS_BATCH(batch, 0);
  210.         OUT_BCS_BATCH(batch, 0);
  211.  
  212.         /* intra row-store scratch 13-15 */
  213.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  214.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  215.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  216.                       0);
  217.     else
  218.         OUT_BCS_BATCH(batch, 0);
  219.  
  220.         OUT_BCS_BATCH(batch, 0);
  221.         OUT_BCS_BATCH(batch, 0);
  222.         /* deblocking-filter-row-store 16-18 */
  223.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  224.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  225.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  226.                       0);
  227.     else
  228.         OUT_BCS_BATCH(batch, 0);
  229.         OUT_BCS_BATCH(batch, 0);
  230.         OUT_BCS_BATCH(batch, 0);
  231.  
  232.     /* DW 19..50 */
  233.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  234.         struct object_surface *obj_surface;
  235.  
  236.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  237.             gen7_mfd_context->reference_surface[i].obj_surface &&
  238.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  239.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  240.  
  241.             OUT_BCS_RELOC(batch, obj_surface->bo,
  242.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  243.                           0);
  244.         } else {
  245.             OUT_BCS_BATCH(batch, 0);
  246.         }
  247.        
  248.         OUT_BCS_BATCH(batch, 0);
  249.     }
  250.    
  251.     /* reference property 51 */
  252.     OUT_BCS_BATCH(batch, 0);  
  253.        
  254.     /* Macroblock status & ILDB 52-57 */
  255.     OUT_BCS_BATCH(batch, 0);
  256.     OUT_BCS_BATCH(batch, 0);
  257.     OUT_BCS_BATCH(batch, 0);
  258.     OUT_BCS_BATCH(batch, 0);
  259.     OUT_BCS_BATCH(batch, 0);
  260.     OUT_BCS_BATCH(batch, 0);
  261.  
  262.     /* the second Macroblock status 58-60 */   
  263.     OUT_BCS_BATCH(batch, 0);
  264.     OUT_BCS_BATCH(batch, 0);
  265.     OUT_BCS_BATCH(batch, 0);
  266.  
  267.     ADVANCE_BCS_BATCH(batch);
  268. }
  269.  
  270. static void
  271. gen8_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  272.                                  dri_bo *slice_data_bo,
  273.                                  int standard_select,
  274.                                  struct gen7_mfd_context *gen7_mfd_context)
  275. {
  276.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  277.  
  278.     BEGIN_BCS_BATCH(batch, 26);
  279.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  280.         /* MFX In BS 1-5 */
  281.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  282.     OUT_BCS_BATCH(batch, 0);
  283.     OUT_BCS_BATCH(batch, 0);
  284.         /* Upper bound 4-5 */  
  285.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  286.     OUT_BCS_BATCH(batch, 0);
  287.  
  288.         /* MFX indirect MV 6-10 */
  289.     OUT_BCS_BATCH(batch, 0);
  290.     OUT_BCS_BATCH(batch, 0);
  291.     OUT_BCS_BATCH(batch, 0);
  292.     OUT_BCS_BATCH(batch, 0);
  293.     OUT_BCS_BATCH(batch, 0);
  294.        
  295.         /* MFX IT_COFF 11-15 */
  296.     OUT_BCS_BATCH(batch, 0);
  297.     OUT_BCS_BATCH(batch, 0);
  298.     OUT_BCS_BATCH(batch, 0);
  299.     OUT_BCS_BATCH(batch, 0);
  300.     OUT_BCS_BATCH(batch, 0);
  301.  
  302.         /* MFX IT_DBLK 16-20 */
  303.     OUT_BCS_BATCH(batch, 0);
  304.     OUT_BCS_BATCH(batch, 0);
  305.     OUT_BCS_BATCH(batch, 0);
  306.     OUT_BCS_BATCH(batch, 0);
  307.     OUT_BCS_BATCH(batch, 0);
  308.  
  309.         /* MFX PAK_BSE object for encoder 21-25 */
  310.     OUT_BCS_BATCH(batch, 0);
  311.     OUT_BCS_BATCH(batch, 0);
  312.     OUT_BCS_BATCH(batch, 0);
  313.     OUT_BCS_BATCH(batch, 0);
  314.     OUT_BCS_BATCH(batch, 0);
  315.  
  316.     ADVANCE_BCS_BATCH(batch);
  317. }
  318.  
  319. static void
  320. gen8_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  321.                                  struct decode_state *decode_state,
  322.                                  int standard_select,
  323.                                  struct gen7_mfd_context *gen7_mfd_context)
  324. {
  325.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  326.  
  327.     BEGIN_BCS_BATCH(batch, 10);
  328.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  329.  
  330.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  331.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  332.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  333.                       0);
  334.         else
  335.                 OUT_BCS_BATCH(batch, 0);
  336.                
  337.     OUT_BCS_BATCH(batch, 0);
  338.     OUT_BCS_BATCH(batch, 0);
  339.         /* MPR Row Store Scratch buffer 4-6 */
  340.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  341.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  342.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  343.                       0);
  344.     else
  345.         OUT_BCS_BATCH(batch, 0);
  346.  
  347.     OUT_BCS_BATCH(batch, 0);
  348.     OUT_BCS_BATCH(batch, 0);
  349.  
  350.         /* Bitplane 7-9 */
  351.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  352.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  353.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  354.                       0);
  355.     else
  356.         OUT_BCS_BATCH(batch, 0);
  357.     OUT_BCS_BATCH(batch, 0);
  358.     OUT_BCS_BATCH(batch, 0);
  359.     ADVANCE_BCS_BATCH(batch);
  360. }
  361.  
  362. static void
  363. gen8_mfd_qm_state(VADriverContextP ctx,
  364.                   int qm_type,
  365.                   unsigned char *qm,
  366.                   int qm_length,
  367.                   struct gen7_mfd_context *gen7_mfd_context)
  368. {
  369.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  370.     unsigned int qm_buffer[16];
  371.  
  372.     assert(qm_length <= 16 * 4);
  373.     memcpy(qm_buffer, qm, qm_length);
  374.  
  375.     BEGIN_BCS_BATCH(batch, 18);
  376.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  377.     OUT_BCS_BATCH(batch, qm_type << 0);
  378.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  379.     ADVANCE_BCS_BATCH(batch);
  380. }
  381.  
  382. static void
  383. gen8_mfd_avc_img_state(VADriverContextP ctx,
  384.                        struct decode_state *decode_state,
  385.                        struct gen7_mfd_context *gen7_mfd_context)
  386. {
  387.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  388.     int img_struct;
  389.     int mbaff_frame_flag;
  390.     unsigned int width_in_mbs, height_in_mbs;
  391.     VAPictureParameterBufferH264 *pic_param;
  392.  
  393.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  394.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  395.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  396.  
  397.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  398.         img_struct = 1;
  399.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  400.         img_struct = 3;
  401.     else
  402.         img_struct = 0;
  403.  
  404.     if ((img_struct & 0x1) == 0x1) {
  405.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  406.     } else {
  407.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  408.     }
  409.  
  410.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  411.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  412.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  413.     } else {
  414.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  415.     }
  416.  
  417.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  418.                         !pic_param->pic_fields.bits.field_pic_flag);
  419.  
  420.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  421.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  422.  
  423.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  424.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  425.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  426.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  427.  
  428.     BEGIN_BCS_BATCH(batch, 17);
  429.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
  430.     OUT_BCS_BATCH(batch,
  431.                   (width_in_mbs * height_in_mbs - 1));
  432.     OUT_BCS_BATCH(batch,
  433.                   ((height_in_mbs - 1) << 16) |
  434.                   ((width_in_mbs - 1) << 0));
  435.     OUT_BCS_BATCH(batch,
  436.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  437.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  438.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  439.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  440.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  441.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  442.                   (img_struct << 8));
  443.     OUT_BCS_BATCH(batch,
  444.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  445.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  446.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  447.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  448.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  449.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  450.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  451.                   (mbaff_frame_flag << 1) |
  452.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  453.     OUT_BCS_BATCH(batch, 0);
  454.     OUT_BCS_BATCH(batch, 0);
  455.     OUT_BCS_BATCH(batch, 0);
  456.     OUT_BCS_BATCH(batch, 0);
  457.     OUT_BCS_BATCH(batch, 0);
  458.     OUT_BCS_BATCH(batch, 0);
  459.     OUT_BCS_BATCH(batch, 0);
  460.     OUT_BCS_BATCH(batch, 0);
  461.     OUT_BCS_BATCH(batch, 0);
  462.     OUT_BCS_BATCH(batch, 0);
  463.     OUT_BCS_BATCH(batch, 0);
  464.     OUT_BCS_BATCH(batch, 0);
  465.     ADVANCE_BCS_BATCH(batch);
  466. }
  467.  
  468. static void
  469. gen8_mfd_avc_qm_state(VADriverContextP ctx,
  470.                       struct decode_state *decode_state,
  471.                       struct gen7_mfd_context *gen7_mfd_context)
  472. {
  473.     VAIQMatrixBufferH264 *iq_matrix;
  474.     VAPictureParameterBufferH264 *pic_param;
  475.  
  476.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  477.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  478.     else
  479.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  480.  
  481.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  482.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  483.  
  484.     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  485.     gen8_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  486.  
  487.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  488.         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  489.         gen8_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  490.     }
  491. }
  492.  
  493. static inline void
  494. gen8_mfd_avc_picid_state(VADriverContextP ctx,
  495.     struct decode_state *decode_state,
  496.     struct gen7_mfd_context *gen7_mfd_context)
  497. {
  498.     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
  499.         gen7_mfd_context->reference_surface);
  500. }
  501.  
  502. static void
  503. gen8_mfd_avc_directmode_state(VADriverContextP ctx,
  504.                               struct decode_state *decode_state,
  505.                               VAPictureParameterBufferH264 *pic_param,
  506.                               VASliceParameterBufferH264 *slice_param,
  507.                               struct gen7_mfd_context *gen7_mfd_context)
  508. {
  509.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  510.     struct object_surface *obj_surface;
  511.     GenAvcSurface *gen7_avc_surface;
  512.     VAPictureH264 *va_pic;
  513.     int i;
  514.  
  515.     BEGIN_BCS_BATCH(batch, 71);
  516.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  517.  
  518.     /* reference surfaces 0..15 */
  519.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  520.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  521.             gen7_mfd_context->reference_surface[i].obj_surface &&
  522.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  523.  
  524.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  525.             gen7_avc_surface = obj_surface->private_data;
  526.  
  527.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  528.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  529.                           0);
  530.             OUT_BCS_BATCH(batch, 0);
  531.         } else {
  532.             OUT_BCS_BATCH(batch, 0);
  533.             OUT_BCS_BATCH(batch, 0);
  534.         }
  535.     }
  536.    
  537.     OUT_BCS_BATCH(batch, 0);
  538.  
  539.     /* the current decoding frame/field */
  540.     va_pic = &pic_param->CurrPic;
  541.     obj_surface = decode_state->render_object;
  542.     assert(obj_surface->bo && obj_surface->private_data);
  543.     gen7_avc_surface = obj_surface->private_data;
  544.  
  545.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  546.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  547.                   0);
  548.  
  549.     OUT_BCS_BATCH(batch, 0);
  550.     OUT_BCS_BATCH(batch, 0);
  551.  
  552.     /* POC List */
  553.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  554.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  555.  
  556.         if (obj_surface) {
  557.             const VAPictureH264 * const va_pic = avc_find_picture(
  558.                 obj_surface->base.id, pic_param->ReferenceFrames,
  559.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  560.  
  561.             assert(va_pic != NULL);
  562.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  563.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  564.         } else {
  565.             OUT_BCS_BATCH(batch, 0);
  566.             OUT_BCS_BATCH(batch, 0);
  567.         }
  568.     }
  569.  
  570.     va_pic = &pic_param->CurrPic;
  571.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  572.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  573.  
  574.     ADVANCE_BCS_BATCH(batch);
  575. }
  576.  
  577. static void
  578. gen8_mfd_avc_phantom_slice_first(VADriverContextP ctx,
  579.                                  VAPictureParameterBufferH264 *pic_param,
  580.                                  VASliceParameterBufferH264 *next_slice_param,
  581.                                  struct gen7_mfd_context *gen7_mfd_context)
  582. {
  583.     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
  584. }
  585.  
  586. static void
  587. gen8_mfd_avc_slice_state(VADriverContextP ctx,
  588.                          VAPictureParameterBufferH264 *pic_param,
  589.                          VASliceParameterBufferH264 *slice_param,
  590.                          VASliceParameterBufferH264 *next_slice_param,
  591.                          struct gen7_mfd_context *gen7_mfd_context)
  592. {
  593.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  594.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  595.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  596.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  597.     int num_ref_idx_l0, num_ref_idx_l1;
  598.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  599.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  600.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  601.     int slice_type;
  602.  
  603.     if (slice_param->slice_type == SLICE_TYPE_I ||
  604.         slice_param->slice_type == SLICE_TYPE_SI) {
  605.         slice_type = SLICE_TYPE_I;
  606.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  607.                slice_param->slice_type == SLICE_TYPE_SP) {
  608.         slice_type = SLICE_TYPE_P;
  609.     } else {
  610.         assert(slice_param->slice_type == SLICE_TYPE_B);
  611.         slice_type = SLICE_TYPE_B;
  612.     }
  613.  
  614.     if (slice_type == SLICE_TYPE_I) {
  615.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  616.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  617.         num_ref_idx_l0 = 0;
  618.         num_ref_idx_l1 = 0;
  619.     } else if (slice_type == SLICE_TYPE_P) {
  620.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  621.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  622.         num_ref_idx_l1 = 0;
  623.     } else {
  624.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  625.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  626.     }
  627.  
  628.     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
  629.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  630.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  631.  
  632.     if (next_slice_param) {
  633.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
  634.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  635.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  636.     } else {
  637.         next_slice_hor_pos = 0;
  638.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  639.     }
  640.  
  641.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  642.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  643.     OUT_BCS_BATCH(batch, slice_type);
  644.     OUT_BCS_BATCH(batch,
  645.                   (num_ref_idx_l1 << 24) |
  646.                   (num_ref_idx_l0 << 16) |
  647.                   (slice_param->chroma_log2_weight_denom << 8) |
  648.                   (slice_param->luma_log2_weight_denom << 0));
  649.     OUT_BCS_BATCH(batch,
  650.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  651.                   (slice_param->disable_deblocking_filter_idc << 27) |
  652.                   (slice_param->cabac_init_idc << 24) |
  653.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  654.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  655.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  656.     OUT_BCS_BATCH(batch,
  657.                   (slice_ver_pos << 24) |
  658.                   (slice_hor_pos << 16) |
  659.                   (first_mb_in_slice << 0));
  660.     OUT_BCS_BATCH(batch,
  661.                   (next_slice_ver_pos << 16) |
  662.                   (next_slice_hor_pos << 0));
  663.     OUT_BCS_BATCH(batch,
  664.                   (next_slice_param == NULL) << 19); /* last slice flag */
  665.     OUT_BCS_BATCH(batch, 0);
  666.     OUT_BCS_BATCH(batch, 0);
  667.     OUT_BCS_BATCH(batch, 0);
  668.     OUT_BCS_BATCH(batch, 0);
  669.     ADVANCE_BCS_BATCH(batch);
  670. }
  671.  
  672. static inline void
  673. gen8_mfd_avc_ref_idx_state(VADriverContextP ctx,
  674.                            VAPictureParameterBufferH264 *pic_param,
  675.                            VASliceParameterBufferH264 *slice_param,
  676.                            struct gen7_mfd_context *gen7_mfd_context)
  677. {
  678.     gen6_send_avc_ref_idx_state(
  679.         gen7_mfd_context->base.batch,
  680.         slice_param,
  681.         gen7_mfd_context->reference_surface
  682.     );
  683. }
  684.  
  685. static void
  686. gen8_mfd_avc_weightoffset_state(VADriverContextP ctx,
  687.                                 VAPictureParameterBufferH264 *pic_param,
  688.                                 VASliceParameterBufferH264 *slice_param,
  689.                                 struct gen7_mfd_context *gen7_mfd_context)
  690. {
  691.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  692.     int i, j, num_weight_offset_table = 0;
  693.     short weightoffsets[32 * 6];
  694.  
  695.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  696.          slice_param->slice_type == SLICE_TYPE_SP) &&
  697.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  698.         num_weight_offset_table = 1;
  699.     }
  700.    
  701.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  702.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  703.         num_weight_offset_table = 2;
  704.     }
  705.  
  706.     for (i = 0; i < num_weight_offset_table; i++) {
  707.         BEGIN_BCS_BATCH(batch, 98);
  708.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  709.         OUT_BCS_BATCH(batch, i);
  710.  
  711.         if (i == 0) {
  712.             for (j = 0; j < 32; j++) {
  713.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  714.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  715.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  716.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  717.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  718.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  719.             }
  720.         } else {
  721.             for (j = 0; j < 32; j++) {
  722.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  723.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  724.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  725.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  726.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  727.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  728.             }
  729.         }
  730.  
  731.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  732.         ADVANCE_BCS_BATCH(batch);
  733.     }
  734. }
  735.  
  736. static void
  737. gen8_mfd_avc_bsd_object(VADriverContextP ctx,
  738.                         VAPictureParameterBufferH264 *pic_param,
  739.                         VASliceParameterBufferH264 *slice_param,
  740.                         dri_bo *slice_data_bo,
  741.                         VASliceParameterBufferH264 *next_slice_param,
  742.                         struct gen7_mfd_context *gen7_mfd_context)
  743. {
  744.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  745.     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
  746.                                                             slice_param,
  747.                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
  748.  
  749.     /* the input bitsteam format on GEN7 differs from GEN6 */
  750.     BEGIN_BCS_BATCH(batch, 6);
  751.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  752.     OUT_BCS_BATCH(batch,
  753.                   (slice_param->slice_data_size));
  754.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  755.     OUT_BCS_BATCH(batch,
  756.                   (0 << 31) |
  757.                   (0 << 14) |
  758.                   (0 << 12) |
  759.                   (0 << 10) |
  760.                   (0 << 8));
  761.     OUT_BCS_BATCH(batch,
  762.                   ((slice_data_bit_offset >> 3) << 16) |
  763.                   (1 << 7)  |
  764.                   (0 << 5)  |
  765.                   (0 << 4)  |
  766.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  767.                   (slice_data_bit_offset & 0x7));
  768.     OUT_BCS_BATCH(batch, 0);
  769.     ADVANCE_BCS_BATCH(batch);
  770. }
  771.  
  772. static inline void
  773. gen8_mfd_avc_context_init(
  774.     VADriverContextP         ctx,
  775.     struct gen7_mfd_context *gen7_mfd_context
  776. )
  777. {
  778.     /* Initialize flat scaling lists */
  779.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  780. }
  781.  
  782. static void
  783. gen8_mfd_avc_decode_init(VADriverContextP ctx,
  784.                          struct decode_state *decode_state,
  785.                          struct gen7_mfd_context *gen7_mfd_context)
  786. {
  787.     VAPictureParameterBufferH264 *pic_param;
  788.     VASliceParameterBufferH264 *slice_param;
  789.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  790.     struct object_surface *obj_surface;
  791.     dri_bo *bo;
  792.     int i, j, enable_avc_ildb = 0;
  793.     unsigned int width_in_mbs, height_in_mbs;
  794.  
  795.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  796.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  797.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  798.  
  799.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  800.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  801.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  802.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  803.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  804.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  805.                    (slice_param->slice_type == SLICE_TYPE_B));
  806.  
  807.             if (slice_param->disable_deblocking_filter_idc != 1) {
  808.                 enable_avc_ildb = 1;
  809.                 break;
  810.             }
  811.  
  812.             slice_param++;
  813.         }
  814.     }
  815.  
  816.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  817.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  818.     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
  819.         gen7_mfd_context->reference_surface);
  820.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  821.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  822.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  823.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  824.  
  825.     /* Current decoded picture */
  826.     obj_surface = decode_state->render_object;
  827.     if (pic_param->pic_fields.bits.reference_pic_flag)
  828.         obj_surface->flags |= SURFACE_REFERENCED;
  829.     else
  830.         obj_surface->flags &= ~SURFACE_REFERENCED;
  831.  
  832.     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  833.     gen8_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  834.  
  835.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  836.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  837.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  838.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  839.  
  840.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  841.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  842.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  843.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  844.  
  845.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  846.     bo = dri_bo_alloc(i965->intel.bufmgr,
  847.                       "intra row store",
  848.                       width_in_mbs * 64,
  849.                       0x1000);
  850.     assert(bo);
  851.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  852.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  853.  
  854.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  855.     bo = dri_bo_alloc(i965->intel.bufmgr,
  856.                       "deblocking filter row store",
  857.                       width_in_mbs * 64 * 4,
  858.                       0x1000);
  859.     assert(bo);
  860.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  861.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  862.  
  863.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  864.     bo = dri_bo_alloc(i965->intel.bufmgr,
  865.                       "bsd mpc row store",
  866.                       width_in_mbs * 64 * 2,
  867.                       0x1000);
  868.     assert(bo);
  869.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  870.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  871.  
  872.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  873.     bo = dri_bo_alloc(i965->intel.bufmgr,
  874.                       "mpr row store",
  875.                       width_in_mbs * 64 * 2,
  876.                       0x1000);
  877.     assert(bo);
  878.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  879.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  880.  
  881.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  882. }
  883.  
  884. static void
  885. gen8_mfd_avc_decode_picture(VADriverContextP ctx,
  886.                             struct decode_state *decode_state,
  887.                             struct gen7_mfd_context *gen7_mfd_context)
  888. {
  889.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  890.     VAPictureParameterBufferH264 *pic_param;
  891.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  892.     dri_bo *slice_data_bo;
  893.     int i, j;
  894.  
  895.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  896.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  897.     gen8_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  898.  
  899.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  900.     intel_batchbuffer_emit_mi_flush(batch);
  901.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  902.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  903.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  904.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  905.     gen8_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  906.     gen8_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
  907.     gen8_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  908.  
  909.     for (j = 0; j < decode_state->num_slice_params; j++) {
  910.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  911.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  912.         slice_data_bo = decode_state->slice_datas[j]->bo;
  913.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  914.  
  915.         if (j == decode_state->num_slice_params - 1)
  916.             next_slice_group_param = NULL;
  917.         else
  918.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  919.  
  920.         if (j == 0 && slice_param->first_mb_in_slice)
  921.             gen8_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
  922.  
  923.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  924.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  925.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  926.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  927.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  928.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  929.                    (slice_param->slice_type == SLICE_TYPE_B));
  930.  
  931.             if (i < decode_state->slice_params[j]->num_elements - 1)
  932.                 next_slice_param = slice_param + 1;
  933.             else
  934.                 next_slice_param = next_slice_group_param;
  935.  
  936.             gen8_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
  937.             gen8_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  938.             gen8_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  939.             gen8_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  940.             gen8_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  941.             slice_param++;
  942.         }
  943.     }
  944.  
  945.     intel_batchbuffer_end_atomic(batch);
  946.     intel_batchbuffer_flush(batch);
  947. }
  948.  
  949. static void
  950. gen8_mfd_mpeg2_decode_init(VADriverContextP ctx,
  951.                            struct decode_state *decode_state,
  952.                            struct gen7_mfd_context *gen7_mfd_context)
  953. {
  954.     VAPictureParameterBufferMPEG2 *pic_param;
  955.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  956.     struct object_surface *obj_surface;
  957.     dri_bo *bo;
  958.     unsigned int width_in_mbs;
  959.  
  960.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  961.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  962.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  963.  
  964.     mpeg2_set_reference_surfaces(
  965.         ctx,
  966.         gen7_mfd_context->reference_surface,
  967.         decode_state,
  968.         pic_param
  969.     );
  970.  
  971.     /* Current decoded picture */
  972.     obj_surface = decode_state->render_object;
  973.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  974.  
  975.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  976.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  977.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  978.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  979.  
  980.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  981.     bo = dri_bo_alloc(i965->intel.bufmgr,
  982.                       "bsd mpc row store",
  983.                       width_in_mbs * 96,
  984.                       0x1000);
  985.     assert(bo);
  986.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  987.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  988.  
  989.     gen7_mfd_context->post_deblocking_output.valid = 0;
  990.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  991.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  992.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  993.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  994. }
  995.  
  996. static void
  997. gen8_mfd_mpeg2_pic_state(VADriverContextP ctx,
  998.                          struct decode_state *decode_state,
  999.                          struct gen7_mfd_context *gen7_mfd_context)
  1000. {
  1001.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1002.     VAPictureParameterBufferMPEG2 *pic_param;
  1003.     unsigned int slice_concealment_disable_bit = 0;
  1004.  
  1005.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1006.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1007.  
  1008.     slice_concealment_disable_bit = 1;
  1009.  
  1010.     BEGIN_BCS_BATCH(batch, 13);
  1011.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1012.     OUT_BCS_BATCH(batch,
  1013.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1014.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1015.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1016.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1017.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1018.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1019.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1020.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1021.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1022.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1023.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1024.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1025.     OUT_BCS_BATCH(batch,
  1026.                   pic_param->picture_coding_type << 9);
  1027.     OUT_BCS_BATCH(batch,
  1028.                   (slice_concealment_disable_bit << 31) |
  1029.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  1030.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  1031.     OUT_BCS_BATCH(batch, 0);
  1032.     OUT_BCS_BATCH(batch, 0);
  1033.     OUT_BCS_BATCH(batch, 0);
  1034.     OUT_BCS_BATCH(batch, 0);
  1035.     OUT_BCS_BATCH(batch, 0);
  1036.     OUT_BCS_BATCH(batch, 0);
  1037.     OUT_BCS_BATCH(batch, 0);
  1038.     OUT_BCS_BATCH(batch, 0);
  1039.     OUT_BCS_BATCH(batch, 0);
  1040.     ADVANCE_BCS_BATCH(batch);
  1041. }
  1042.  
  1043. static void
  1044. gen8_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1045.                         struct decode_state *decode_state,
  1046.                         struct gen7_mfd_context *gen7_mfd_context)
  1047. {
  1048.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  1049.     int i, j;
  1050.  
  1051.     /* Update internal QM state */
  1052.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1053.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1054.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1055.  
  1056.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  1057.             iq_matrix->load_intra_quantiser_matrix) {
  1058.             gen_iq_matrix->load_intra_quantiser_matrix =
  1059.                 iq_matrix->load_intra_quantiser_matrix;
  1060.             if (iq_matrix->load_intra_quantiser_matrix) {
  1061.                 for (j = 0; j < 64; j++)
  1062.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1063.                         iq_matrix->intra_quantiser_matrix[j];
  1064.             }
  1065.         }
  1066.  
  1067.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1068.             iq_matrix->load_non_intra_quantiser_matrix) {
  1069.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1070.                 iq_matrix->load_non_intra_quantiser_matrix;
  1071.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1072.                 for (j = 0; j < 64; j++)
  1073.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1074.                         iq_matrix->non_intra_quantiser_matrix[j];
  1075.             }
  1076.         }
  1077.     }
  1078.  
  1079.     /* Commit QM state to HW */
  1080.     for (i = 0; i < 2; i++) {
  1081.         unsigned char *qm = NULL;
  1082.         int qm_type;
  1083.  
  1084.         if (i == 0) {
  1085.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1086.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1087.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1088.             }
  1089.         } else {
  1090.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1091.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1092.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1093.             }
  1094.         }
  1095.  
  1096.         if (!qm)
  1097.             continue;
  1098.  
  1099.         gen8_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1100.     }
  1101. }
  1102.  
  1103. static void
  1104. gen8_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1105.                           VAPictureParameterBufferMPEG2 *pic_param,
  1106.                           VASliceParameterBufferMPEG2 *slice_param,
  1107.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1108.                           struct gen7_mfd_context *gen7_mfd_context)
  1109. {
  1110.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1111.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1112.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1113.  
  1114.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1115.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1116.         is_field_pic = 1;
  1117.     is_field_pic_wa = is_field_pic &&
  1118.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1119.  
  1120.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1121.     hpos0 = slice_param->slice_horizontal_position;
  1122.  
  1123.     if (next_slice_param == NULL) {
  1124.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1125.         hpos1 = 0;
  1126.     } else {
  1127.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1128.         hpos1 = next_slice_param->slice_horizontal_position;
  1129.     }
  1130.  
  1131.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1132.  
  1133.     BEGIN_BCS_BATCH(batch, 5);
  1134.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1135.     OUT_BCS_BATCH(batch,
  1136.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1137.     OUT_BCS_BATCH(batch,
  1138.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1139.     OUT_BCS_BATCH(batch,
  1140.                   hpos0 << 24 |
  1141.                   vpos0 << 16 |
  1142.                   mb_count << 8 |
  1143.                   (next_slice_param == NULL) << 5 |
  1144.                   (next_slice_param == NULL) << 3 |
  1145.                   (slice_param->macroblock_offset & 0x7));
  1146.     OUT_BCS_BATCH(batch,
  1147.                   (slice_param->quantiser_scale_code << 24) |
  1148.                   (vpos1 << 8 | hpos1));
  1149.     ADVANCE_BCS_BATCH(batch);
  1150. }
  1151.  
  1152. static void
  1153. gen8_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1154.                               struct decode_state *decode_state,
  1155.                               struct gen7_mfd_context *gen7_mfd_context)
  1156. {
  1157.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1158.     VAPictureParameterBufferMPEG2 *pic_param;
  1159.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1160.     dri_bo *slice_data_bo;
  1161.     int i, j;
  1162.  
  1163.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1164.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1165.  
  1166.     gen8_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1167.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1168.     intel_batchbuffer_emit_mi_flush(batch);
  1169.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1170.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1171.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1172.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1173.     gen8_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1174.     gen8_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1175.  
  1176.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1177.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1178.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1179.  
  1180.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1181.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1182.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1183.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1184.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1185.  
  1186.         if (j == decode_state->num_slice_params - 1)
  1187.             next_slice_group_param = NULL;
  1188.         else
  1189.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1190.  
  1191.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1192.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1193.  
  1194.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1195.                 next_slice_param = slice_param + 1;
  1196.             else
  1197.                 next_slice_param = next_slice_group_param;
  1198.  
  1199.             gen8_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1200.             slice_param++;
  1201.         }
  1202.     }
  1203.  
  1204.     intel_batchbuffer_end_atomic(batch);
  1205.     intel_batchbuffer_flush(batch);
  1206. }
  1207.  
  1208. static const int va_to_gen7_vc1_pic_type[5] = {
  1209.     GEN7_VC1_I_PICTURE,
  1210.     GEN7_VC1_P_PICTURE,
  1211.     GEN7_VC1_B_PICTURE,
  1212.     GEN7_VC1_BI_PICTURE,
  1213.     GEN7_VC1_P_PICTURE,
  1214. };
  1215.  
  1216. static const int va_to_gen7_vc1_mv[4] = {
  1217.     1, /* 1-MV */
  1218.     2, /* 1-MV half-pel */
  1219.     3, /* 1-MV half-pef bilinear */
  1220.     0, /* Mixed MV */
  1221. };
  1222.  
  1223. static const int b_picture_scale_factor[21] = {
  1224.     128, 85,  170, 64,  192,
  1225.     51,  102, 153, 204, 43,
  1226.     215, 37,  74,  111, 148,
  1227.     185, 222, 32,  96,  160,
  1228.     224,
  1229. };
  1230.  
  1231. static const int va_to_gen7_vc1_condover[3] = {
  1232.     0,
  1233.     2,
  1234.     3
  1235. };
  1236.  
  1237. static const int va_to_gen7_vc1_profile[4] = {
  1238.     GEN7_VC1_SIMPLE_PROFILE,
  1239.     GEN7_VC1_MAIN_PROFILE,
  1240.     GEN7_VC1_RESERVED_PROFILE,
  1241.     GEN7_VC1_ADVANCED_PROFILE
  1242. };
  1243.  
  1244. static void
  1245. gen8_mfd_free_vc1_surface(void **data)
  1246. {
  1247.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1248.  
  1249.     if (!gen7_vc1_surface)
  1250.         return;
  1251.  
  1252.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1253.     free(gen7_vc1_surface);
  1254.     *data = NULL;
  1255. }
  1256.  
  1257. static void
  1258. gen8_mfd_init_vc1_surface(VADriverContextP ctx,
  1259.                           VAPictureParameterBufferVC1 *pic_param,
  1260.                           struct object_surface *obj_surface)
  1261. {
  1262.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1263.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1264.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1265.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1266.  
  1267.     obj_surface->free_private_data = gen8_mfd_free_vc1_surface;
  1268.  
  1269.     if (!gen7_vc1_surface) {
  1270.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1271.         assert((obj_surface->size & 0x3f) == 0);
  1272.         obj_surface->private_data = gen7_vc1_surface;
  1273.     }
  1274.  
  1275.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1276.  
  1277.     if (gen7_vc1_surface->dmv == NULL) {
  1278.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1279.                                              "direct mv w/r buffer",
  1280.                                              width_in_mbs * height_in_mbs * 64,
  1281.                                              0x1000);
  1282.     }
  1283. }
  1284.  
  1285. static void
  1286. gen8_mfd_vc1_decode_init(VADriverContextP ctx,
  1287.                          struct decode_state *decode_state,
  1288.                          struct gen7_mfd_context *gen7_mfd_context)
  1289. {
  1290.     VAPictureParameterBufferVC1 *pic_param;
  1291.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1292.     struct object_surface *obj_surface;
  1293.     dri_bo *bo;
  1294.     int width_in_mbs;
  1295.     int picture_type;
  1296.  
  1297.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1298.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1299.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1300.     picture_type = pic_param->picture_fields.bits.picture_type;
  1301.  
  1302.     intel_update_vc1_frame_store_index(ctx,
  1303.                                        decode_state,
  1304.                                        pic_param,
  1305.                                        gen7_mfd_context->reference_surface);
  1306.  
  1307.     /* Current decoded picture */
  1308.     obj_surface = decode_state->render_object;
  1309.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1310.     gen8_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1311.  
  1312.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1313.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1314.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1315.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1316.  
  1317.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1318.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1319.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1320.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1321.  
  1322.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1323.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1324.                       "intra row store",
  1325.                       width_in_mbs * 64,
  1326.                       0x1000);
  1327.     assert(bo);
  1328.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1329.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1330.  
  1331.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1332.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1333.                       "deblocking filter row store",
  1334.                       width_in_mbs * 7 * 64,
  1335.                       0x1000);
  1336.     assert(bo);
  1337.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1338.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1339.  
  1340.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1341.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1342.                       "bsd mpc row store",
  1343.                       width_in_mbs * 96,
  1344.                       0x1000);
  1345.     assert(bo);
  1346.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1347.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1348.  
  1349.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1350.  
  1351.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1352.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1353.    
  1354.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1355.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1356.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1357.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1358.         int src_w, src_h;
  1359.         uint8_t *src = NULL, *dst = NULL;
  1360.  
  1361.         assert(decode_state->bit_plane->buffer);
  1362.         src = decode_state->bit_plane->buffer;
  1363.  
  1364.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1365.                           "VC-1 Bitplane",
  1366.                           bitplane_width * height_in_mbs,
  1367.                           0x1000);
  1368.         assert(bo);
  1369.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1370.  
  1371.         dri_bo_map(bo, True);
  1372.         assert(bo->virtual);
  1373.         dst = bo->virtual;
  1374.  
  1375.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1376.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1377.                 int src_index, dst_index;
  1378.                 int src_shift;
  1379.                 uint8_t src_value;
  1380.  
  1381.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1382.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1383.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1384.  
  1385.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1386.                     src_value |= 0x2;
  1387.                 }
  1388.  
  1389.                 dst_index = src_w / 2;
  1390.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1391.             }
  1392.  
  1393.             if (src_w & 1)
  1394.                 dst[src_w / 2] >>= 4;
  1395.  
  1396.             dst += bitplane_width;
  1397.         }
  1398.  
  1399.         dri_bo_unmap(bo);
  1400.     } else
  1401.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1402. }
  1403.  
  1404. static void
  1405. gen8_mfd_vc1_pic_state(VADriverContextP ctx,
  1406.                        struct decode_state *decode_state,
  1407.                        struct gen7_mfd_context *gen7_mfd_context)
  1408. {
  1409.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1410.     VAPictureParameterBufferVC1 *pic_param;
  1411.     struct object_surface *obj_surface;
  1412.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1413.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1414.     int unified_mv_mode;
  1415.     int ref_field_pic_polarity = 0;
  1416.     int scale_factor = 0;
  1417.     int trans_ac_y = 0;
  1418.     int dmv_surface_valid = 0;
  1419.     int brfd = 0;
  1420.     int fcm = 0;
  1421.     int picture_type;
  1422.     int profile;
  1423.     int overlap;
  1424.     int interpolation_mode = 0;
  1425.  
  1426.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1427.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1428.  
  1429.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1430.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1431.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1432.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1433.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1434.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1435.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1436.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1437.  
  1438.     if (dquant == 0) {
  1439.         alt_pquant_config = 0;
  1440.         alt_pquant_edge_mask = 0;
  1441.     } else if (dquant == 2) {
  1442.         alt_pquant_config = 1;
  1443.         alt_pquant_edge_mask = 0xf;
  1444.     } else {
  1445.         assert(dquant == 1);
  1446.         if (dquantfrm == 0) {
  1447.             alt_pquant_config = 0;
  1448.             alt_pquant_edge_mask = 0;
  1449.             alt_pq = 0;
  1450.         } else {
  1451.             assert(dquantfrm == 1);
  1452.             alt_pquant_config = 1;
  1453.  
  1454.             switch (dqprofile) {
  1455.             case 3:
  1456.                 if (dqbilevel == 0) {
  1457.                     alt_pquant_config = 2;
  1458.                     alt_pquant_edge_mask = 0;
  1459.                 } else {
  1460.                     assert(dqbilevel == 1);
  1461.                     alt_pquant_config = 3;
  1462.                     alt_pquant_edge_mask = 0;
  1463.                 }
  1464.                 break;
  1465.                
  1466.             case 0:
  1467.                 alt_pquant_edge_mask = 0xf;
  1468.                 break;
  1469.  
  1470.             case 1:
  1471.                 if (dqdbedge == 3)
  1472.                     alt_pquant_edge_mask = 0x9;
  1473.                 else
  1474.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1475.  
  1476.                 break;
  1477.  
  1478.             case 2:
  1479.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1480.                 break;
  1481.  
  1482.             default:
  1483.                 assert(0);
  1484.             }
  1485.         }
  1486.     }
  1487.  
  1488.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1489.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1490.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1491.     } else {
  1492.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1493.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1494.     }
  1495.  
  1496.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1497.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1498.         /* FIXME: calculate reference field picture polarity */
  1499.         assert(0);
  1500.         ref_field_pic_polarity = 0;
  1501.     }
  1502.  
  1503.     if (pic_param->b_picture_fraction < 21)
  1504.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1505.  
  1506.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1507.    
  1508.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1509.         picture_type == GEN7_VC1_I_PICTURE)
  1510.         picture_type = GEN7_VC1_BI_PICTURE;
  1511.  
  1512.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1513.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1514.     else {
  1515.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1516.  
  1517.         /*
  1518.          * 8.3.6.2.1 Transform Type Selection
  1519.          * If variable-sized transform coding is not enabled,
  1520.          * then the 8x8 transform shall be used for all blocks.
  1521.          * it is also MFX_VC1_PIC_STATE requirement.
  1522.          */
  1523.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1524.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1525.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1526.         }
  1527.     }
  1528.  
  1529.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1530.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1531.  
  1532.         obj_surface = decode_state->reference_objects[1];
  1533.  
  1534.         if (obj_surface)
  1535.             gen7_vc1_surface = obj_surface->private_data;
  1536.  
  1537.         if (!gen7_vc1_surface ||
  1538.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1539.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1540.             dmv_surface_valid = 0;
  1541.         else
  1542.             dmv_surface_valid = 1;
  1543.     }
  1544.  
  1545.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1546.  
  1547.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1548.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1549.     else {
  1550.         if (pic_param->picture_fields.bits.top_field_first)
  1551.             fcm = 2;
  1552.         else
  1553.             fcm = 3;
  1554.     }
  1555.  
  1556.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1557.         brfd = pic_param->reference_fields.bits.reference_distance;
  1558.         brfd = (scale_factor * brfd) >> 8;
  1559.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1560.  
  1561.         if (brfd < 0)
  1562.             brfd = 0;
  1563.     }
  1564.  
  1565.     overlap = 0;
  1566.     if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1567.         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1568.             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1569.             overlap = 1;
  1570.         }
  1571.     }else {
  1572.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1573.              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1574.               overlap = 1;
  1575.         }
  1576.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1577.             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1578.              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1579.                 overlap = 1;
  1580.              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1581.                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1582.                  overlap = 1;
  1583.              }
  1584.         }
  1585.     }
  1586.  
  1587.     assert(pic_param->conditional_overlap_flag < 3);
  1588.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1589.  
  1590.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1591.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1592.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1593.         interpolation_mode = 9; /* Half-pel bilinear */
  1594.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1595.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1596.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1597.         interpolation_mode = 1; /* Half-pel bicubic */
  1598.     else
  1599.         interpolation_mode = 0; /* Quarter-pel bicubic */
  1600.  
  1601.     BEGIN_BCS_BATCH(batch, 6);
  1602.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  1603.     OUT_BCS_BATCH(batch,
  1604.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  1605.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  1606.     OUT_BCS_BATCH(batch,
  1607.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  1608.                   dmv_surface_valid << 15 |
  1609.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  1610.                   pic_param->rounding_control << 13 |
  1611.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  1612.                   interpolation_mode << 8 |
  1613.                   0 << 7 | /* FIXME: scale up or down ??? */
  1614.                   pic_param->range_reduction_frame << 6 |
  1615.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  1616.                   overlap << 4 |
  1617.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  1618.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  1619.     OUT_BCS_BATCH(batch,
  1620.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  1621.                   picture_type << 26 |
  1622.                   fcm << 24 |
  1623.                   alt_pq << 16 |
  1624.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  1625.                   scale_factor << 0);
  1626.     OUT_BCS_BATCH(batch,
  1627.                   unified_mv_mode << 28 |
  1628.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  1629.                   pic_param->fast_uvmc_flag << 26 |
  1630.                   ref_field_pic_polarity << 25 |
  1631.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  1632.                   pic_param->reference_fields.bits.reference_distance << 20 |
  1633.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  1634.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  1635.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  1636.                   alt_pquant_edge_mask << 4 |
  1637.                   alt_pquant_config << 2 |
  1638.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  1639.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  1640.     OUT_BCS_BATCH(batch,
  1641.                   !!pic_param->bitplane_present.value << 31 |
  1642.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  1643.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  1644.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  1645.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  1646.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  1647.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  1648.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  1649.                   pic_param->mv_fields.bits.mv_table << 20 |
  1650.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1651.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1652.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  1653.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1654.                   pic_param->mb_mode_table << 8 |
  1655.                   trans_ac_y << 6 |
  1656.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1657.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1658.                   pic_param->cbp_table << 0);
  1659.     ADVANCE_BCS_BATCH(batch);
  1660. }
  1661.  
  1662. static void
  1663. gen8_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1664.                              struct decode_state *decode_state,
  1665.                              struct gen7_mfd_context *gen7_mfd_context)
  1666. {
  1667.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1668.     VAPictureParameterBufferVC1 *pic_param;
  1669.     int intensitycomp_single;
  1670.  
  1671.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1672.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1673.  
  1674.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1675.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1676.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1677.  
  1678.     BEGIN_BCS_BATCH(batch, 6);
  1679.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  1680.     OUT_BCS_BATCH(batch,
  1681.                   0 << 14 | /* FIXME: double ??? */
  1682.                   0 << 12 |
  1683.                   intensitycomp_single << 10 |
  1684.                   intensitycomp_single << 8 |
  1685.                   0 << 4 | /* FIXME: interlace mode */
  1686.                   0);
  1687.     OUT_BCS_BATCH(batch,
  1688.                   pic_param->luma_shift << 16 |
  1689.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1690.     OUT_BCS_BATCH(batch, 0);
  1691.     OUT_BCS_BATCH(batch, 0);
  1692.     OUT_BCS_BATCH(batch, 0);
  1693.     ADVANCE_BCS_BATCH(batch);
  1694. }
  1695.  
  1696. static void
  1697. gen8_mfd_vc1_directmode_state(VADriverContextP ctx,
  1698.                               struct decode_state *decode_state,
  1699.                               struct gen7_mfd_context *gen7_mfd_context)
  1700. {
  1701.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1702.     struct object_surface *obj_surface;
  1703.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1704.  
  1705.     obj_surface = decode_state->render_object;
  1706.  
  1707.     if (obj_surface && obj_surface->private_data) {
  1708.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1709.     }
  1710.  
  1711.     obj_surface = decode_state->reference_objects[1];
  1712.  
  1713.     if (obj_surface && obj_surface->private_data) {
  1714.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1715.     }
  1716.  
  1717.     BEGIN_BCS_BATCH(batch, 7);
  1718.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
  1719.  
  1720.     if (dmv_write_buffer)
  1721.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1722.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1723.                       0);
  1724.     else
  1725.         OUT_BCS_BATCH(batch, 0);
  1726.  
  1727.     OUT_BCS_BATCH(batch, 0);
  1728.     OUT_BCS_BATCH(batch, 0);
  1729.  
  1730.     if (dmv_read_buffer)
  1731.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1732.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1733.                       0);
  1734.     else
  1735.         OUT_BCS_BATCH(batch, 0);
  1736.    
  1737.     OUT_BCS_BATCH(batch, 0);
  1738.     OUT_BCS_BATCH(batch, 0);
  1739.                  
  1740.     ADVANCE_BCS_BATCH(batch);
  1741. }
  1742.  
  1743. static int
  1744. gen8_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  1745. {
  1746.     int out_slice_data_bit_offset;
  1747.     int slice_header_size = in_slice_data_bit_offset / 8;
  1748.     int i, j;
  1749.  
  1750.     if (profile != 3)
  1751.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  1752.     else {
  1753.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  1754.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  1755.                 i++, j += 2;
  1756.             }
  1757.         }
  1758.  
  1759.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  1760.     }
  1761.  
  1762.     return out_slice_data_bit_offset;
  1763. }
  1764.  
  1765. static void
  1766. gen8_mfd_vc1_bsd_object(VADriverContextP ctx,
  1767.                         VAPictureParameterBufferVC1 *pic_param,
  1768.                         VASliceParameterBufferVC1 *slice_param,
  1769.                         VASliceParameterBufferVC1 *next_slice_param,
  1770.                         dri_bo *slice_data_bo,
  1771.                         struct gen7_mfd_context *gen7_mfd_context)
  1772. {
  1773.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1774.     int next_slice_start_vert_pos;
  1775.     int macroblock_offset;
  1776.     uint8_t *slice_data = NULL;
  1777.  
  1778.     dri_bo_map(slice_data_bo, 0);
  1779.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  1780.     macroblock_offset = gen8_mfd_vc1_get_macroblock_bit_offset(slice_data,
  1781.                                                                slice_param->macroblock_offset,
  1782.                                                                pic_param->sequence_fields.bits.profile);
  1783.     dri_bo_unmap(slice_data_bo);
  1784.  
  1785.     if (next_slice_param)
  1786.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  1787.     else
  1788.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  1789.  
  1790.     BEGIN_BCS_BATCH(batch, 5);
  1791.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  1792.     OUT_BCS_BATCH(batch,
  1793.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  1794.     OUT_BCS_BATCH(batch,
  1795.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  1796.     OUT_BCS_BATCH(batch,
  1797.                   slice_param->slice_vertical_position << 16 |
  1798.                   next_slice_start_vert_pos << 0);
  1799.     OUT_BCS_BATCH(batch,
  1800.                   (macroblock_offset & 0x7));
  1801.     ADVANCE_BCS_BATCH(batch);
  1802. }
  1803.  
  1804. static void
  1805. gen8_mfd_vc1_decode_picture(VADriverContextP ctx,
  1806.                             struct decode_state *decode_state,
  1807.                             struct gen7_mfd_context *gen7_mfd_context)
  1808. {
  1809.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1810.     VAPictureParameterBufferVC1 *pic_param;
  1811.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  1812.     dri_bo *slice_data_bo;
  1813.     int i, j;
  1814.  
  1815.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1816.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1817.  
  1818.     gen8_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  1819.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1820.     intel_batchbuffer_emit_mi_flush(batch);
  1821.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1822.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1823.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1824.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1825.     gen8_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  1826.     gen8_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  1827.     gen8_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  1828.  
  1829.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1830.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1831.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  1832.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1833.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  1834.  
  1835.         if (j == decode_state->num_slice_params - 1)
  1836.             next_slice_group_param = NULL;
  1837.         else
  1838.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  1839.  
  1840.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1841.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1842.  
  1843.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1844.                 next_slice_param = slice_param + 1;
  1845.             else
  1846.                 next_slice_param = next_slice_group_param;
  1847.  
  1848.             gen8_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  1849.             slice_param++;
  1850.         }
  1851.     }
  1852.  
  1853.     intel_batchbuffer_end_atomic(batch);
  1854.     intel_batchbuffer_flush(batch);
  1855. }
  1856.  
  1857. static void
  1858. gen8_mfd_jpeg_decode_init(VADriverContextP ctx,
  1859.                           struct decode_state *decode_state,
  1860.                           struct gen7_mfd_context *gen7_mfd_context)
  1861. {
  1862.     struct object_surface *obj_surface;
  1863.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1864.     int subsampling = SUBSAMPLE_YUV420;
  1865.     int fourcc = VA_FOURCC_IMC3;
  1866.  
  1867.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1868.  
  1869.     if (pic_param->num_components == 1)
  1870.         subsampling = SUBSAMPLE_YUV400;
  1871.     else if (pic_param->num_components == 3) {
  1872.         int h1 = pic_param->components[0].h_sampling_factor;
  1873.         int h2 = pic_param->components[1].h_sampling_factor;
  1874.         int h3 = pic_param->components[2].h_sampling_factor;
  1875.         int v1 = pic_param->components[0].v_sampling_factor;
  1876.         int v2 = pic_param->components[1].v_sampling_factor;
  1877.         int v3 = pic_param->components[2].v_sampling_factor;
  1878.  
  1879.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1880.             v1 == 2 && v2 == 1 && v3 == 1) {
  1881.             subsampling = SUBSAMPLE_YUV420;
  1882.             fourcc = VA_FOURCC_IMC3;
  1883.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1884.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1885.             subsampling = SUBSAMPLE_YUV422H;
  1886.             fourcc = VA_FOURCC_422H;
  1887.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1888.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1889.             subsampling = SUBSAMPLE_YUV444;
  1890.             fourcc = VA_FOURCC_444P;
  1891.         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1892.                    v1 == 1 && v2 == 1 && v3 == 1) {
  1893.             subsampling = SUBSAMPLE_YUV411;
  1894.             fourcc = VA_FOURCC_411P;
  1895.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1896.                    v1 == 2 && v2 == 1 && v3 == 1) {
  1897.             subsampling = SUBSAMPLE_YUV422V;
  1898.             fourcc = VA_FOURCC_422V;
  1899.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1900.                    v1 == 2 && v2 == 2 && v3 == 2) {
  1901.             subsampling = SUBSAMPLE_YUV422H;
  1902.             fourcc = VA_FOURCC_422H;
  1903.         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  1904.                    v1 == 2 && v2 == 1 && v3 == 1) {
  1905.             subsampling = SUBSAMPLE_YUV422V;
  1906.             fourcc = VA_FOURCC_422V;
  1907.         } else
  1908.             assert(0);
  1909.     }
  1910.     else {
  1911.         assert(0);
  1912.     }
  1913.  
  1914.     /* Current decoded picture */
  1915.     obj_surface = decode_state->render_object;
  1916.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
  1917.  
  1918.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1919.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1920.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1921.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1922.  
  1923.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  1924.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1925.  
  1926.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  1927.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1928.  
  1929.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  1930.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1931.  
  1932.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  1933.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  1934.  
  1935.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  1936.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1937.  
  1938.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1939.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1940. }
  1941.  
  1942. static const int va_to_gen7_jpeg_rotation[4] = {
  1943.     GEN7_JPEG_ROTATION_0,
  1944.     GEN7_JPEG_ROTATION_90,
  1945.     GEN7_JPEG_ROTATION_180,
  1946.     GEN7_JPEG_ROTATION_270
  1947. };
  1948.  
  1949. static void
  1950. gen8_mfd_jpeg_pic_state(VADriverContextP ctx,
  1951.                         struct decode_state *decode_state,
  1952.                         struct gen7_mfd_context *gen7_mfd_context)
  1953. {
  1954.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1955.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1956.     int chroma_type = GEN7_YUV420;
  1957.     int frame_width_in_blks;
  1958.     int frame_height_in_blks;
  1959.  
  1960.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1961.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1962.  
  1963.     if (pic_param->num_components == 1)
  1964.         chroma_type = GEN7_YUV400;
  1965.     else if (pic_param->num_components == 3) {
  1966.         int h1 = pic_param->components[0].h_sampling_factor;
  1967.         int h2 = pic_param->components[1].h_sampling_factor;
  1968.         int h3 = pic_param->components[2].h_sampling_factor;
  1969.         int v1 = pic_param->components[0].v_sampling_factor;
  1970.         int v2 = pic_param->components[1].v_sampling_factor;
  1971.         int v3 = pic_param->components[2].v_sampling_factor;
  1972.  
  1973.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1974.             v1 == 2 && v2 == 1 && v3 == 1)
  1975.             chroma_type = GEN7_YUV420;
  1976.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1977.                  v1 == 1 && v2 == 1 && v3 == 1)
  1978.             chroma_type = GEN7_YUV422H_2Y;
  1979.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1980.                  v1 == 1 && v2 == 1 && v3 == 1)
  1981.             chroma_type = GEN7_YUV444;
  1982.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1983.                  v1 == 1 && v2 == 1 && v3 == 1)
  1984.             chroma_type = GEN7_YUV411;
  1985.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1986.                  v1 == 2 && v2 == 1 && v3 == 1)
  1987.             chroma_type = GEN7_YUV422V_2Y;
  1988.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1989.                  v1 == 2 && v2 == 2 && v3 == 2)
  1990.             chroma_type = GEN7_YUV422H_4Y;
  1991.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  1992.                  v1 == 2 && v2 == 1 && v3 == 1)
  1993.             chroma_type = GEN7_YUV422V_4Y;
  1994.         else
  1995.             assert(0);
  1996.     }
  1997.  
  1998.     if (chroma_type == GEN7_YUV400 ||
  1999.         chroma_type == GEN7_YUV444 ||
  2000.         chroma_type == GEN7_YUV422V_2Y) {
  2001.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  2002.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  2003.     } else if (chroma_type == GEN7_YUV411) {
  2004.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  2005.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  2006.     } else {
  2007.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  2008.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  2009.     }
  2010.  
  2011.     BEGIN_BCS_BATCH(batch, 3);
  2012.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2013.     OUT_BCS_BATCH(batch,
  2014.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  2015.                   (chroma_type << 0));
  2016.     OUT_BCS_BATCH(batch,
  2017.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  2018.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  2019.     ADVANCE_BCS_BATCH(batch);
  2020. }
  2021.  
  2022. static const int va_to_gen7_jpeg_hufftable[2] = {
  2023.     MFX_HUFFTABLE_ID_Y,
  2024.     MFX_HUFFTABLE_ID_UV
  2025. };
  2026.  
  2027. static void
  2028. gen8_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  2029.                                struct decode_state *decode_state,
  2030.                                struct gen7_mfd_context *gen7_mfd_context,
  2031.                                int num_tables)
  2032. {
  2033.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  2034.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2035.     int index;
  2036.  
  2037.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  2038.         return;
  2039.  
  2040.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2041.  
  2042.     for (index = 0; index < num_tables; index++) {
  2043.         int id = va_to_gen7_jpeg_hufftable[index];
  2044.         if (!huffman_table->load_huffman_table[index])
  2045.             continue;
  2046.         BEGIN_BCS_BATCH(batch, 53);
  2047.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2048.         OUT_BCS_BATCH(batch, id);
  2049.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2050.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2051.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2052.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2053.         ADVANCE_BCS_BATCH(batch);
  2054.     }
  2055. }
  2056.  
  2057. static const int va_to_gen7_jpeg_qm[5] = {
  2058.     -1,
  2059.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2060.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2061.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2062.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2063. };
  2064.  
  2065. static void
  2066. gen8_mfd_jpeg_qm_state(VADriverContextP ctx,
  2067.                        struct decode_state *decode_state,
  2068.                        struct gen7_mfd_context *gen7_mfd_context)
  2069. {
  2070.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2071.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2072.     int index;
  2073.  
  2074.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2075.         return;
  2076.  
  2077.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2078.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2079.  
  2080.     assert(pic_param->num_components <= 3);
  2081.  
  2082.     for (index = 0; index < pic_param->num_components; index++) {
  2083.         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
  2084.         int qm_type;
  2085.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2086.         unsigned char raster_qm[64];
  2087.         int j;
  2088.  
  2089.         if (id > 4 || id < 1)
  2090.             continue;
  2091.  
  2092.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2093.             continue;
  2094.  
  2095.         qm_type = va_to_gen7_jpeg_qm[id];
  2096.  
  2097.         for (j = 0; j < 64; j++)
  2098.             raster_qm[zigzag_direct[j]] = qm[j];
  2099.  
  2100.         gen8_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2101.     }
  2102. }
  2103.  
  2104. static void
  2105. gen8_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2106.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2107.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2108.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2109.                          dri_bo *slice_data_bo,
  2110.                          struct gen7_mfd_context *gen7_mfd_context)
  2111. {
  2112.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2113.     int scan_component_mask = 0;
  2114.     int i;
  2115.  
  2116.     assert(slice_param->num_components > 0);
  2117.     assert(slice_param->num_components < 4);
  2118.     assert(slice_param->num_components <= pic_param->num_components);
  2119.  
  2120.     for (i = 0; i < slice_param->num_components; i++) {
  2121.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2122.         case 1:
  2123.             scan_component_mask |= (1 << 0);
  2124.             break;
  2125.         case 2:
  2126.             scan_component_mask |= (1 << 1);
  2127.             break;
  2128.         case 3:
  2129.             scan_component_mask |= (1 << 2);
  2130.             break;
  2131.         default:
  2132.             assert(0);
  2133.             break;
  2134.         }
  2135.     }
  2136.  
  2137.     BEGIN_BCS_BATCH(batch, 6);
  2138.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2139.     OUT_BCS_BATCH(batch,
  2140.                   slice_param->slice_data_size);
  2141.     OUT_BCS_BATCH(batch,
  2142.                   slice_param->slice_data_offset);
  2143.     OUT_BCS_BATCH(batch,
  2144.                   slice_param->slice_horizontal_position << 16 |
  2145.                   slice_param->slice_vertical_position << 0);
  2146.     OUT_BCS_BATCH(batch,
  2147.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2148.                   (scan_component_mask << 27) |                 /* scan components */
  2149.                   (0 << 26) |   /* disable interrupt allowed */
  2150.                   (slice_param->num_mcus << 0));                /* MCU count */
  2151.     OUT_BCS_BATCH(batch,
  2152.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2153.     ADVANCE_BCS_BATCH(batch);
  2154. }
  2155.  
  2156. /* Workaround for JPEG decoding on Ivybridge */
  2157. #ifdef JPEG_WA
  2158.  
  2159. static struct {
  2160.     int width;
  2161.     int height;
  2162.     unsigned char data[32];
  2163.     int data_size;
  2164.     int data_bit_offset;
  2165.     int qp;
  2166. } gen7_jpeg_wa_clip = {
  2167.     16,
  2168.     16,
  2169.     {
  2170.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2171.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2172.     },
  2173.     14,
  2174.     40,
  2175.     28,
  2176. };
  2177.  
  2178. static void
  2179. gen8_jpeg_wa_init(VADriverContextP ctx,
  2180.                   struct gen7_mfd_context *gen7_mfd_context)
  2181. {
  2182.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2183.     VAStatus status;
  2184.     struct object_surface *obj_surface;
  2185.  
  2186.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2187.         i965_DestroySurfaces(ctx,
  2188.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2189.                              1);
  2190.  
  2191.     status = i965_CreateSurfaces(ctx,
  2192.                                  gen7_jpeg_wa_clip.width,
  2193.                                  gen7_jpeg_wa_clip.height,
  2194.                                  VA_RT_FORMAT_YUV420,
  2195.                                  1,
  2196.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2197.     assert(status == VA_STATUS_SUCCESS);
  2198.  
  2199.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2200.     assert(obj_surface);
  2201.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2202.     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
  2203.  
  2204.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2205.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2206.                                                                "JPEG WA data",
  2207.                                                                0x1000,
  2208.                                                                0x1000);
  2209.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2210.                        0,
  2211.                        gen7_jpeg_wa_clip.data_size,
  2212.                        gen7_jpeg_wa_clip.data);
  2213.     }
  2214. }
  2215.  
  2216. static void
  2217. gen8_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2218.                               struct gen7_mfd_context *gen7_mfd_context)
  2219. {
  2220.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2221.  
  2222.     BEGIN_BCS_BATCH(batch, 5);
  2223.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2224.     OUT_BCS_BATCH(batch,
  2225.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2226.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2227.                   (0 << 10) | /* disable Stream-Out */
  2228.                   (0 << 9)  | /* Post Deblocking Output */
  2229.                   (1 << 8)  | /* Pre Deblocking Output */
  2230.                   (0 << 5)  | /* not in stitch mode */
  2231.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2232.                   (MFX_FORMAT_AVC << 0));
  2233.     OUT_BCS_BATCH(batch,
  2234.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2235.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2236.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2237.                   (0 << 1)  |
  2238.                   (0 << 0));
  2239.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2240.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2241.     ADVANCE_BCS_BATCH(batch);
  2242. }
  2243.  
  2244. static void
  2245. gen8_jpeg_wa_surface_state(VADriverContextP ctx,
  2246.                            struct gen7_mfd_context *gen7_mfd_context)
  2247. {
  2248.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2249.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2250.  
  2251.     BEGIN_BCS_BATCH(batch, 6);
  2252.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2253.     OUT_BCS_BATCH(batch, 0);
  2254.     OUT_BCS_BATCH(batch,
  2255.                   ((obj_surface->orig_width - 1) << 18) |
  2256.                   ((obj_surface->orig_height - 1) << 4));
  2257.     OUT_BCS_BATCH(batch,
  2258.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2259.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2260.                   (0 << 22) | /* surface object control state, ignored */
  2261.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2262.                   (0 << 2)  | /* must be 0 */
  2263.                   (1 << 1)  | /* must be tiled */
  2264.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2265.     OUT_BCS_BATCH(batch,
  2266.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2267.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2268.     OUT_BCS_BATCH(batch,
  2269.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2270.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2271.     ADVANCE_BCS_BATCH(batch);
  2272. }
  2273.  
  2274. static void
  2275. gen8_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2276.                                  struct gen7_mfd_context *gen7_mfd_context)
  2277. {
  2278.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2279.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2280.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2281.     dri_bo *intra_bo;
  2282.     int i;
  2283.  
  2284.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2285.                             "intra row store",
  2286.                             128 * 64,
  2287.                             0x1000);
  2288.  
  2289.     BEGIN_BCS_BATCH(batch, 61);
  2290.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  2291.     OUT_BCS_RELOC(batch,
  2292.                   obj_surface->bo,
  2293.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2294.                   0);
  2295.         OUT_BCS_BATCH(batch, 0);
  2296.         OUT_BCS_BATCH(batch, 0);
  2297.    
  2298.  
  2299.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2300.         OUT_BCS_BATCH(batch, 0);
  2301.         OUT_BCS_BATCH(batch, 0);
  2302.  
  2303.         /* uncompressed-video & stream out 7-12 */
  2304.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2305.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2306.         OUT_BCS_BATCH(batch, 0);
  2307.         OUT_BCS_BATCH(batch, 0);
  2308.         OUT_BCS_BATCH(batch, 0);
  2309.         OUT_BCS_BATCH(batch, 0);
  2310.  
  2311.         /* the DW 13-15 is for intra row store scratch */
  2312.     OUT_BCS_RELOC(batch,
  2313.                   intra_bo,
  2314.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2315.                   0);
  2316.         OUT_BCS_BATCH(batch, 0);
  2317.         OUT_BCS_BATCH(batch, 0);
  2318.  
  2319.         /* the DW 16-18 is for deblocking filter */
  2320.     OUT_BCS_BATCH(batch, 0);
  2321.         OUT_BCS_BATCH(batch, 0);
  2322.         OUT_BCS_BATCH(batch, 0);
  2323.  
  2324.     /* DW 19..50 */
  2325.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2326.         OUT_BCS_BATCH(batch, 0);
  2327.         OUT_BCS_BATCH(batch, 0);
  2328.     }
  2329.     OUT_BCS_BATCH(batch, 0);
  2330.  
  2331.         /* the DW52-54 is for mb status address */
  2332.     OUT_BCS_BATCH(batch, 0);
  2333.         OUT_BCS_BATCH(batch, 0);
  2334.         OUT_BCS_BATCH(batch, 0);
  2335.         /* the DW56-60 is for ILDB & second ILDB address */
  2336.     OUT_BCS_BATCH(batch, 0);
  2337.         OUT_BCS_BATCH(batch, 0);
  2338.         OUT_BCS_BATCH(batch, 0);
  2339.     OUT_BCS_BATCH(batch, 0);
  2340.         OUT_BCS_BATCH(batch, 0);
  2341.         OUT_BCS_BATCH(batch, 0);
  2342.  
  2343.     ADVANCE_BCS_BATCH(batch);
  2344.  
  2345.     dri_bo_unreference(intra_bo);
  2346. }
  2347.  
  2348. static void
  2349. gen8_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2350.                                      struct gen7_mfd_context *gen7_mfd_context)
  2351. {
  2352.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2353.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2354.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2355.  
  2356.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2357.                               "bsd mpc row store",
  2358.                               11520, /* 1.5 * 120 * 64 */
  2359.                               0x1000);
  2360.  
  2361.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2362.                           "mpr row store",
  2363.                           7680, /* 1. 0 * 120 * 64 */
  2364.                           0x1000);
  2365.  
  2366.     BEGIN_BCS_BATCH(batch, 10);
  2367.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  2368.  
  2369.     OUT_BCS_RELOC(batch,
  2370.                   bsd_mpc_bo,
  2371.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2372.                   0);
  2373.  
  2374.     OUT_BCS_BATCH(batch, 0);
  2375.     OUT_BCS_BATCH(batch, 0);
  2376.  
  2377.     OUT_BCS_RELOC(batch,
  2378.                   mpr_bo,
  2379.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2380.                   0);
  2381.     OUT_BCS_BATCH(batch, 0);
  2382.     OUT_BCS_BATCH(batch, 0);
  2383.  
  2384.     OUT_BCS_BATCH(batch, 0);
  2385.     OUT_BCS_BATCH(batch, 0);
  2386.     OUT_BCS_BATCH(batch, 0);
  2387.  
  2388.     ADVANCE_BCS_BATCH(batch);
  2389.  
  2390.     dri_bo_unreference(bsd_mpc_bo);
  2391.     dri_bo_unreference(mpr_bo);
  2392. }
  2393.  
  2394. static void
  2395. gen8_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2396.                           struct gen7_mfd_context *gen7_mfd_context)
  2397. {
  2398.  
  2399. }
  2400.  
  2401. static void
  2402. gen8_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2403.                            struct gen7_mfd_context *gen7_mfd_context)
  2404. {
  2405.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2406.     int img_struct = 0;
  2407.     int mbaff_frame_flag = 0;
  2408.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2409.  
  2410.     BEGIN_BCS_BATCH(batch, 16);
  2411.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2412.     OUT_BCS_BATCH(batch,
  2413.                   width_in_mbs * height_in_mbs);
  2414.     OUT_BCS_BATCH(batch,
  2415.                   ((height_in_mbs - 1) << 16) |
  2416.                   ((width_in_mbs - 1) << 0));
  2417.     OUT_BCS_BATCH(batch,
  2418.                   (0 << 24) |
  2419.                   (0 << 16) |
  2420.                   (0 << 14) |
  2421.                   (0 << 13) |
  2422.                   (0 << 12) | /* differ from GEN6 */
  2423.                   (0 << 10) |
  2424.                   (img_struct << 8));
  2425.     OUT_BCS_BATCH(batch,
  2426.                   (1 << 10) | /* 4:2:0 */
  2427.                   (1 << 7) |  /* CABAC */
  2428.                   (0 << 6) |
  2429.                   (0 << 5) |
  2430.                   (0 << 4) |
  2431.                   (0 << 3) |
  2432.                   (1 << 2) |
  2433.                   (mbaff_frame_flag << 1) |
  2434.                   (0 << 0));
  2435.     OUT_BCS_BATCH(batch, 0);
  2436.     OUT_BCS_BATCH(batch, 0);
  2437.     OUT_BCS_BATCH(batch, 0);
  2438.     OUT_BCS_BATCH(batch, 0);
  2439.     OUT_BCS_BATCH(batch, 0);
  2440.     OUT_BCS_BATCH(batch, 0);
  2441.     OUT_BCS_BATCH(batch, 0);
  2442.     OUT_BCS_BATCH(batch, 0);
  2443.     OUT_BCS_BATCH(batch, 0);
  2444.     OUT_BCS_BATCH(batch, 0);
  2445.     OUT_BCS_BATCH(batch, 0);
  2446.     ADVANCE_BCS_BATCH(batch);
  2447. }
  2448.  
  2449. static void
  2450. gen8_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  2451.                                   struct gen7_mfd_context *gen7_mfd_context)
  2452. {
  2453.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2454.     int i;
  2455.  
  2456.     BEGIN_BCS_BATCH(batch, 71);
  2457.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  2458.  
  2459.     /* reference surfaces 0..15 */
  2460.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2461.         OUT_BCS_BATCH(batch, 0); /* top */
  2462.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2463.     }
  2464.        
  2465.         OUT_BCS_BATCH(batch, 0);
  2466.  
  2467.     /* the current decoding frame/field */
  2468.     OUT_BCS_BATCH(batch, 0); /* top */
  2469.     OUT_BCS_BATCH(batch, 0);
  2470.     OUT_BCS_BATCH(batch, 0);
  2471.  
  2472.     /* POC List */
  2473.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2474.         OUT_BCS_BATCH(batch, 0);
  2475.         OUT_BCS_BATCH(batch, 0);
  2476.     }
  2477.  
  2478.     OUT_BCS_BATCH(batch, 0);
  2479.     OUT_BCS_BATCH(batch, 0);
  2480.  
  2481.     ADVANCE_BCS_BATCH(batch);
  2482. }
  2483.  
  2484. static void
  2485. gen8_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  2486.                                      struct gen7_mfd_context *gen7_mfd_context)
  2487. {
  2488.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2489.  
  2490.     BEGIN_BCS_BATCH(batch, 11);
  2491.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2492.     OUT_BCS_RELOC(batch,
  2493.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2494.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2495.                   0);
  2496.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2497.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2498.     OUT_BCS_BATCH(batch, 0);
  2499.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2500.     OUT_BCS_BATCH(batch, 0);
  2501.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2502.     OUT_BCS_BATCH(batch, 0);
  2503.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2504.     OUT_BCS_BATCH(batch, 0);
  2505.     ADVANCE_BCS_BATCH(batch);
  2506. }
  2507.  
  2508. static void
  2509. gen8_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  2510.                             struct gen7_mfd_context *gen7_mfd_context)
  2511. {
  2512.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2513.  
  2514.     /* the input bitsteam format on GEN7 differs from GEN6 */
  2515.     BEGIN_BCS_BATCH(batch, 6);
  2516.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  2517.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  2518.     OUT_BCS_BATCH(batch, 0);
  2519.     OUT_BCS_BATCH(batch,
  2520.                   (0 << 31) |
  2521.                   (0 << 14) |
  2522.                   (0 << 12) |
  2523.                   (0 << 10) |
  2524.                   (0 << 8));
  2525.     OUT_BCS_BATCH(batch,
  2526.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  2527.                   (0 << 5)  |
  2528.                   (0 << 4)  |
  2529.                   (1 << 3) | /* LastSlice Flag */
  2530.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  2531.     OUT_BCS_BATCH(batch, 0);
  2532.     ADVANCE_BCS_BATCH(batch);
  2533. }
  2534.  
  2535. static void
  2536. gen8_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  2537.                              struct gen7_mfd_context *gen7_mfd_context)
  2538. {
  2539.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2540.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  2541.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  2542.     int first_mb_in_slice = 0;
  2543.     int slice_type = SLICE_TYPE_I;
  2544.  
  2545.     BEGIN_BCS_BATCH(batch, 11);
  2546.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  2547.     OUT_BCS_BATCH(batch, slice_type);
  2548.     OUT_BCS_BATCH(batch,
  2549.                   (num_ref_idx_l1 << 24) |
  2550.                   (num_ref_idx_l0 << 16) |
  2551.                   (0 << 8) |
  2552.                   (0 << 0));
  2553.     OUT_BCS_BATCH(batch,
  2554.                   (0 << 29) |
  2555.                   (1 << 27) |   /* disable Deblocking */
  2556.                   (0 << 24) |
  2557.                   (gen7_jpeg_wa_clip.qp << 16) |
  2558.                   (0 << 8) |
  2559.                   (0 << 0));
  2560.     OUT_BCS_BATCH(batch,
  2561.                   (slice_ver_pos << 24) |
  2562.                   (slice_hor_pos << 16) |
  2563.                   (first_mb_in_slice << 0));
  2564.     OUT_BCS_BATCH(batch,
  2565.                   (next_slice_ver_pos << 16) |
  2566.                   (next_slice_hor_pos << 0));
  2567.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  2568.     OUT_BCS_BATCH(batch, 0);
  2569.     OUT_BCS_BATCH(batch, 0);
  2570.     OUT_BCS_BATCH(batch, 0);
  2571.     OUT_BCS_BATCH(batch, 0);
  2572.     ADVANCE_BCS_BATCH(batch);
  2573. }
  2574.  
  2575. static void
  2576. gen8_mfd_jpeg_wa(VADriverContextP ctx,
  2577.                  struct gen7_mfd_context *gen7_mfd_context)
  2578. {
  2579.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2580.     gen8_jpeg_wa_init(ctx, gen7_mfd_context);
  2581.     intel_batchbuffer_emit_mi_flush(batch);
  2582.     gen8_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  2583.     gen8_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  2584.     gen8_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  2585.     gen8_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  2586.     gen8_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  2587.     gen8_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  2588.     gen8_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  2589.  
  2590.     gen8_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  2591.     gen8_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  2592.     gen8_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  2593. }
  2594.  
  2595. #endif
  2596.  
  2597. void
  2598. gen8_mfd_jpeg_decode_picture(VADriverContextP ctx,
  2599.                              struct decode_state *decode_state,
  2600.                              struct gen7_mfd_context *gen7_mfd_context)
  2601. {
  2602.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2603.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2604.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  2605.     dri_bo *slice_data_bo;
  2606.     int i, j, max_selector = 0;
  2607.  
  2608.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2609.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2610.  
  2611.     /* Currently only support Baseline DCT */
  2612.     gen8_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  2613.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2614. #ifdef JPEG_WA
  2615.     gen8_mfd_jpeg_wa(ctx, gen7_mfd_context);
  2616. #endif
  2617.     intel_batchbuffer_emit_mi_flush(batch);
  2618.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2619.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2620.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2621.     gen8_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  2622.     gen8_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  2623.  
  2624.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2625.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2626.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2627.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2628.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2629.  
  2630.         if (j == decode_state->num_slice_params - 1)
  2631.             next_slice_group_param = NULL;
  2632.         else
  2633.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2634.  
  2635.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2636.             int component;
  2637.  
  2638.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2639.  
  2640.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2641.                 next_slice_param = slice_param + 1;
  2642.             else
  2643.                 next_slice_param = next_slice_group_param;
  2644.  
  2645.             for (component = 0; component < slice_param->num_components; component++) {
  2646.                 if (max_selector < slice_param->components[component].dc_table_selector)
  2647.                     max_selector = slice_param->components[component].dc_table_selector;
  2648.  
  2649.                 if (max_selector < slice_param->components[component].ac_table_selector)
  2650.                     max_selector = slice_param->components[component].ac_table_selector;
  2651.             }
  2652.  
  2653.             slice_param++;
  2654.         }
  2655.     }
  2656.  
  2657.     assert(max_selector < 2);
  2658.     gen8_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  2659.  
  2660.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2661.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2662.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2663.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2664.         gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2665.  
  2666.         if (j == decode_state->num_slice_params - 1)
  2667.             next_slice_group_param = NULL;
  2668.         else
  2669.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2670.  
  2671.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2672.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2673.  
  2674.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2675.                 next_slice_param = slice_param + 1;
  2676.             else
  2677.                 next_slice_param = next_slice_group_param;
  2678.  
  2679.             gen8_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2680.             slice_param++;
  2681.         }
  2682.     }
  2683.  
  2684.     intel_batchbuffer_end_atomic(batch);
  2685.     intel_batchbuffer_flush(batch);
  2686. }
  2687.  
  2688. static const int vp8_dc_qlookup[128] =
  2689. {
  2690.       4,   5,   6,   7,   8,   9,  10,  10,  11,  12,  13,  14,  15,  16,  17,  17,
  2691.      18,  19,  20,  20,  21,  21,  22,  22,  23,  23,  24,  25,  25,  26,  27,  28,
  2692.      29,  30,  31,  32,  33,  34,  35,  36,  37,  37,  38,  39,  40,  41,  42,  43,
  2693.      44,  45,  46,  46,  47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,
  2694.      59,  60,  61,  62,  63,  64,  65,  66,  67,  68,  69,  70,  71,  72,  73,  74,
  2695.      75,  76,  76,  77,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,
  2696.      91,  93,  95,  96,  98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118,
  2697.     122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157,
  2698. };
  2699.  
  2700. static const int vp8_ac_qlookup[128] =
  2701. {
  2702.       4,   5,   6,   7,   8,   9,  10,  11,  12,  13,  14,  15,  16,  17,  18,  19,
  2703.      20,  21,  22,  23,  24,  25,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,
  2704.      36,  37,  38,  39,  40,  41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,
  2705.      52,  53,  54,  55,  56,  57,  58,  60,  62,  64,  66,  68,  70,  72,  74,  76,
  2706.      78,  80,  82,  84,  86,  88,  90,  92,  94,  96,  98, 100, 102, 104, 106, 108,
  2707.     110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152,
  2708.     155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209,
  2709.     213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284,
  2710. };
  2711.  
  2712. static inline unsigned int vp8_clip_quantization_index(int index)
  2713. {
  2714.     if(index > 127)
  2715.         return 127;
  2716.     else if(index <0)
  2717.         return 0;
  2718.  
  2719.     return index;
  2720. }
  2721.  
  2722. static void
  2723. gen8_mfd_vp8_decode_init(VADriverContextP ctx,
  2724.                           struct decode_state *decode_state,
  2725.                           struct gen7_mfd_context *gen7_mfd_context)
  2726. {
  2727.     struct object_surface *obj_surface;
  2728.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2729.     dri_bo *bo;
  2730.     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
  2731.     int width_in_mbs = (pic_param->frame_width + 15) / 16;
  2732.     int height_in_mbs = (pic_param->frame_height + 15) / 16;
  2733.  
  2734.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  2735.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  2736.  
  2737.     intel_update_vp8_frame_store_index(ctx,
  2738.                                        decode_state,
  2739.                                        pic_param,
  2740.                                        gen7_mfd_context->reference_surface);
  2741.  
  2742.     /* Current decoded picture */
  2743.     obj_surface = decode_state->render_object;
  2744.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2745.  
  2746.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  2747.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  2748.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  2749.     gen7_mfd_context->post_deblocking_output.valid = !pic_param->pic_fields.bits.loop_filter_disable;
  2750.  
  2751.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2752.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  2753.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  2754.     gen7_mfd_context->pre_deblocking_output.valid = pic_param->pic_fields.bits.loop_filter_disable;
  2755.  
  2756.     intel_ensure_vp8_segmentation_buffer(ctx,
  2757.         &gen7_mfd_context->segmentation_buffer, width_in_mbs, height_in_mbs);
  2758.  
  2759.     /* The same as AVC */
  2760.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  2761.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2762.                       "intra row store",
  2763.                       width_in_mbs * 64,
  2764.                       0x1000);
  2765.     assert(bo);
  2766.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  2767.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  2768.  
  2769.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  2770.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2771.                       "deblocking filter row store",
  2772.                       width_in_mbs * 64 * 4,
  2773.                       0x1000);
  2774.     assert(bo);
  2775.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  2776.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  2777.  
  2778.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  2779.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2780.                       "bsd mpc row store",
  2781.                       width_in_mbs * 64 * 2,
  2782.                       0x1000);
  2783.     assert(bo);
  2784.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  2785.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  2786.  
  2787.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  2788.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2789.                       "mpr row store",
  2790.                       width_in_mbs * 64 * 2,
  2791.                       0x1000);
  2792.     assert(bo);
  2793.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  2794.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  2795.  
  2796.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  2797. }
  2798.  
  2799. static void
  2800. gen8_mfd_vp8_pic_state(VADriverContextP ctx,
  2801.                        struct decode_state *decode_state,
  2802.                        struct gen7_mfd_context *gen7_mfd_context)
  2803. {
  2804.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2805.     VAPictureParameterBufferVP8 *pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
  2806.     VAIQMatrixBufferVP8 *iq_matrix = (VAIQMatrixBufferVP8 *)decode_state->iq_matrix->buffer;
  2807.     VASliceParameterBufferVP8 *slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer; /* one slice per frame */
  2808.     dri_bo *probs_bo = decode_state->probability_data->bo;
  2809.     int i, j,log2num;
  2810.     unsigned int quantization_value[4][6];
  2811.  
  2812.     /* There is no safe way to error out if the segmentation buffer
  2813.        could not be allocated. So, instead of aborting, simply decode
  2814.        something even if the result may look totally inacurate */
  2815.     const unsigned int enable_segmentation =
  2816.         pic_param->pic_fields.bits.segmentation_enabled &&
  2817.         gen7_mfd_context->segmentation_buffer.valid;
  2818.        
  2819.     log2num = (int)log2(slice_param->num_of_partitions - 1);
  2820.  
  2821.     BEGIN_BCS_BATCH(batch, 38);
  2822.     OUT_BCS_BATCH(batch, MFX_VP8_PIC_STATE | (38 - 2));
  2823.     OUT_BCS_BATCH(batch,
  2824.                   (ALIGN(pic_param->frame_height, 16) / 16 - 1) << 16 |
  2825.                   (ALIGN(pic_param->frame_width, 16) / 16 - 1) << 0);
  2826.     OUT_BCS_BATCH(batch,
  2827.                   log2num << 24 |
  2828.                   pic_param->pic_fields.bits.sharpness_level << 16 |
  2829.                   pic_param->pic_fields.bits.sign_bias_alternate << 13 |
  2830.                   pic_param->pic_fields.bits.sign_bias_golden << 12 |
  2831.                   pic_param->pic_fields.bits.loop_filter_adj_enable << 11 |
  2832.                   pic_param->pic_fields.bits.mb_no_coeff_skip << 10 |
  2833.                   pic_param->pic_fields.bits.update_mb_segmentation_map << 9 |
  2834.                   pic_param->pic_fields.bits.segmentation_enabled << 8 |
  2835.                   (enable_segmentation &&
  2836.                    !pic_param->pic_fields.bits.update_mb_segmentation_map) << 7 |
  2837.                   (enable_segmentation &&
  2838.                    pic_param->pic_fields.bits.update_mb_segmentation_map) << 6 |
  2839.                   (pic_param->pic_fields.bits.key_frame == 0 ? 1 : 0) << 5 |    /* 0 indicate an intra frame in VP8 stream/spec($9.1)*/
  2840.                   pic_param->pic_fields.bits.filter_type << 4 |
  2841.                   (pic_param->pic_fields.bits.version == 3) << 1 | /* full pixel mode for version 3 */
  2842.                   !!pic_param->pic_fields.bits.version << 0); /* version 0: 6 tap */
  2843.  
  2844.     OUT_BCS_BATCH(batch,
  2845.                   pic_param->loop_filter_level[3] << 24 |
  2846.                   pic_param->loop_filter_level[2] << 16 |
  2847.                   pic_param->loop_filter_level[1] <<  8 |
  2848.                   pic_param->loop_filter_level[0] <<  0);
  2849.  
  2850.     /* Quantizer Value for 4 segmetns, DW4-DW15 */
  2851.     for (i = 0; i < 4; i++) {
  2852.                 quantization_value[i][0] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][0])];/*yac*/
  2853.                 quantization_value[i][1] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][1])];/*ydc*/
  2854.                 quantization_value[i][2] = 2*vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][2])];/*y2dc*/
  2855.                 /* 101581>>16 is equivalent to 155/100 */
  2856.                 quantization_value[i][3] = (101581*vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][3])]) >> 16;/*y2ac*/
  2857.                 quantization_value[i][4] = vp8_dc_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][4])];/*uvdc*/
  2858.                 quantization_value[i][5] = vp8_ac_qlookup[vp8_clip_quantization_index(iq_matrix->quantization_index[i][5])];/*uvac*/
  2859.  
  2860.                 quantization_value[i][3] = (quantization_value[i][3] > 8 ? quantization_value[i][3] : 8);
  2861.                 quantization_value[i][4] = (quantization_value[i][4] < 132 ? quantization_value[i][4] : 132);
  2862.  
  2863.                 OUT_BCS_BATCH(batch,
  2864.                       quantization_value[i][0] << 16 | /* Y1AC */
  2865.                       quantization_value[i][1] <<  0); /* Y1DC */
  2866.         OUT_BCS_BATCH(batch,
  2867.                       quantization_value[i][5] << 16 | /* UVAC */
  2868.                       quantization_value[i][4] <<  0); /* UVDC */
  2869.         OUT_BCS_BATCH(batch,
  2870.                       quantization_value[i][3] << 16 | /* Y2AC */
  2871.                       quantization_value[i][2] <<  0); /* Y2DC */
  2872.     }
  2873.  
  2874.     /* CoeffProbability table for non-key frame, DW16-DW18 */
  2875.     if (probs_bo) {
  2876.         OUT_BCS_RELOC(batch, probs_bo,
  2877.                       0, I915_GEM_DOMAIN_INSTRUCTION,
  2878.                       0);
  2879.         OUT_BCS_BATCH(batch, 0);
  2880.         OUT_BCS_BATCH(batch, 0);
  2881.     } else {
  2882.         OUT_BCS_BATCH(batch, 0);
  2883.         OUT_BCS_BATCH(batch, 0);
  2884.         OUT_BCS_BATCH(batch, 0);
  2885.     }
  2886.  
  2887.     OUT_BCS_BATCH(batch,
  2888.                   pic_param->mb_segment_tree_probs[2] << 16 |
  2889.                   pic_param->mb_segment_tree_probs[1] <<  8 |
  2890.                   pic_param->mb_segment_tree_probs[0] <<  0);
  2891.  
  2892.     OUT_BCS_BATCH(batch,
  2893.                   pic_param->prob_skip_false << 24 |
  2894.                   pic_param->prob_intra      << 16 |
  2895.                   pic_param->prob_last       <<  8 |
  2896.                   pic_param->prob_gf         <<  0);
  2897.  
  2898.     OUT_BCS_BATCH(batch,
  2899.                   pic_param->y_mode_probs[3] << 24 |
  2900.                   pic_param->y_mode_probs[2] << 16 |
  2901.                   pic_param->y_mode_probs[1] <<  8 |
  2902.                   pic_param->y_mode_probs[0] <<  0);
  2903.  
  2904.     OUT_BCS_BATCH(batch,
  2905.                   pic_param->uv_mode_probs[2] << 16 |
  2906.                   pic_param->uv_mode_probs[1] <<  8 |
  2907.                   pic_param->uv_mode_probs[0] <<  0);
  2908.    
  2909.     /* MV update value, DW23-DW32 */
  2910.     for (i = 0; i < 2; i++) {
  2911.         for (j = 0; j < 20; j += 4) {
  2912.             OUT_BCS_BATCH(batch,
  2913.                           (j + 3 == 19 ? 0 : pic_param->mv_probs[i][j + 3]) << 24 |
  2914.                           pic_param->mv_probs[i][j + 2] << 16 |
  2915.                           pic_param->mv_probs[i][j + 1] <<  8 |
  2916.                           pic_param->mv_probs[i][j + 0] <<  0);
  2917.         }
  2918.     }
  2919.  
  2920.     OUT_BCS_BATCH(batch,
  2921.                   (pic_param->loop_filter_deltas_ref_frame[3] & 0x7f) << 24 |
  2922.                   (pic_param->loop_filter_deltas_ref_frame[2] & 0x7f) << 16 |
  2923.                   (pic_param->loop_filter_deltas_ref_frame[1] & 0x7f) <<  8 |
  2924.                   (pic_param->loop_filter_deltas_ref_frame[0] & 0x7f) <<  0);
  2925.  
  2926.     OUT_BCS_BATCH(batch,
  2927.                   (pic_param->loop_filter_deltas_mode[3] & 0x7f) << 24 |
  2928.                   (pic_param->loop_filter_deltas_mode[2] & 0x7f) << 16 |
  2929.                   (pic_param->loop_filter_deltas_mode[1] & 0x7f) <<  8 |
  2930.                   (pic_param->loop_filter_deltas_mode[0] & 0x7f) <<  0);
  2931.  
  2932.     /* segmentation id stream base address, DW35-DW37 */
  2933.     if (enable_segmentation) {
  2934.         OUT_BCS_RELOC(batch, gen7_mfd_context->segmentation_buffer.bo,
  2935.                       0, I915_GEM_DOMAIN_INSTRUCTION,
  2936.                       0);
  2937.         OUT_BCS_BATCH(batch, 0);
  2938.         OUT_BCS_BATCH(batch, 0);
  2939.     }
  2940.     else {
  2941.         OUT_BCS_BATCH(batch, 0);
  2942.         OUT_BCS_BATCH(batch, 0);
  2943.         OUT_BCS_BATCH(batch, 0);
  2944.     }
  2945.     ADVANCE_BCS_BATCH(batch);
  2946. }
  2947.  
  2948. static void
  2949. gen8_mfd_vp8_bsd_object(VADriverContextP ctx,
  2950.                         VAPictureParameterBufferVP8 *pic_param,
  2951.                         VASliceParameterBufferVP8 *slice_param,
  2952.                         dri_bo *slice_data_bo,
  2953.                         struct gen7_mfd_context *gen7_mfd_context)
  2954. {
  2955.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2956.     int i, log2num;
  2957.     unsigned int offset = slice_param->slice_data_offset + ((slice_param->macroblock_offset + 7 ) >> 3);
  2958.     unsigned int used_bits = 8-pic_param->bool_coder_ctx.count;
  2959.     unsigned int partition_size_0 = slice_param->partition_size[0];
  2960.  
  2961.     assert(pic_param->bool_coder_ctx.count >= 0 && pic_param->bool_coder_ctx.count <= 7);
  2962.     if (used_bits == 8) {
  2963.         used_bits = 0;
  2964.         offset += 1;
  2965.         partition_size_0 -= 1;
  2966.     }
  2967.  
  2968.     assert(slice_param->num_of_partitions >= 2);
  2969.     assert(slice_param->num_of_partitions <= 9);
  2970.  
  2971.     log2num = (int)log2(slice_param->num_of_partitions - 1);
  2972.  
  2973.     BEGIN_BCS_BATCH(batch, 22);
  2974.     OUT_BCS_BATCH(batch, MFD_VP8_BSD_OBJECT | (22 - 2));
  2975.     OUT_BCS_BATCH(batch,
  2976.                   used_bits << 16 | /* Partition 0 CPBAC Entropy Count */
  2977.                   pic_param->bool_coder_ctx.range <<  8 | /* Partition 0 Count Entropy Range */
  2978.                   log2num << 4 |
  2979.                   (slice_param->macroblock_offset & 0x7));
  2980.     OUT_BCS_BATCH(batch,
  2981.                   pic_param->bool_coder_ctx.value << 24 | /* Partition 0 Count Entropy Value */
  2982.                   0);
  2983.  
  2984.     OUT_BCS_BATCH(batch, partition_size_0);
  2985.     OUT_BCS_BATCH(batch, offset);
  2986.     //partion sizes in bytes are present after the above first partition when there are more than one token partition
  2987.     offset += (partition_size_0 + 3 * (slice_param->num_of_partitions - 2));
  2988.     for (i = 1; i < 9; i++) {
  2989.         if (i < slice_param->num_of_partitions) {
  2990.             OUT_BCS_BATCH(batch, slice_param->partition_size[i]);
  2991.             OUT_BCS_BATCH(batch, offset);
  2992.         } else {
  2993.             OUT_BCS_BATCH(batch, 0);
  2994.             OUT_BCS_BATCH(batch, 0);
  2995.         }
  2996.  
  2997.         offset += slice_param->partition_size[i];
  2998.     }
  2999.  
  3000.     OUT_BCS_BATCH(batch,
  3001.                   1 << 31 | /* concealment method */
  3002.                   0);
  3003.  
  3004.     ADVANCE_BCS_BATCH(batch);
  3005. }
  3006.  
  3007. void
  3008. gen8_mfd_vp8_decode_picture(VADriverContextP ctx,
  3009.                             struct decode_state *decode_state,
  3010.                             struct gen7_mfd_context *gen7_mfd_context)
  3011. {
  3012.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3013.     VAPictureParameterBufferVP8 *pic_param;
  3014.     VASliceParameterBufferVP8 *slice_param;
  3015.     dri_bo *slice_data_bo;
  3016.  
  3017.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  3018.     pic_param = (VAPictureParameterBufferVP8 *)decode_state->pic_param->buffer;
  3019.  
  3020.     /* one slice per frame */
  3021.     if (decode_state->num_slice_params != 1 ||
  3022.         (!decode_state->slice_params ||
  3023.          !decode_state->slice_params[0] ||
  3024.          (decode_state->slice_params[0]->num_elements != 1 || decode_state->slice_params[0]->buffer == NULL)) ||
  3025.         (!decode_state->slice_datas ||
  3026.          !decode_state->slice_datas[0] ||
  3027.          !decode_state->slice_datas[0]->bo) ||
  3028.         !decode_state->probability_data) {
  3029.         WARN_ONCE("Wrong parameters for VP8 decoding\n");
  3030.  
  3031.         return;
  3032.     }
  3033.  
  3034.     slice_param = (VASliceParameterBufferVP8 *)decode_state->slice_params[0]->buffer;
  3035.     slice_data_bo = decode_state->slice_datas[0]->bo;
  3036.  
  3037.     gen8_mfd_vp8_decode_init(ctx, decode_state, gen7_mfd_context);
  3038.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  3039.     intel_batchbuffer_emit_mi_flush(batch);
  3040.     gen8_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3041.     gen8_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3042.     gen8_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3043.     gen8_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VP8, gen7_mfd_context);
  3044.     gen8_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VP8, gen7_mfd_context);
  3045.     gen8_mfd_vp8_pic_state(ctx, decode_state, gen7_mfd_context);
  3046.     gen8_mfd_vp8_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen7_mfd_context);
  3047.     intel_batchbuffer_end_atomic(batch);
  3048.     intel_batchbuffer_flush(batch);
  3049. }
  3050.  
  3051. static VAStatus
  3052. gen8_mfd_decode_picture(VADriverContextP ctx,
  3053.                         VAProfile profile,
  3054.                         union codec_state *codec_state,
  3055.                         struct hw_context *hw_context)
  3056.  
  3057. {
  3058.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3059.     struct decode_state *decode_state = &codec_state->decode;
  3060.     VAStatus vaStatus;
  3061.  
  3062.     assert(gen7_mfd_context);
  3063.  
  3064.     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
  3065.  
  3066.     if (vaStatus != VA_STATUS_SUCCESS)
  3067.         goto out;
  3068.  
  3069.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  3070.  
  3071.     switch (profile) {
  3072.     case VAProfileMPEG2Simple:
  3073.     case VAProfileMPEG2Main:
  3074.         gen8_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  3075.         break;
  3076.        
  3077.     case VAProfileH264ConstrainedBaseline:
  3078.     case VAProfileH264Main:
  3079.     case VAProfileH264High:
  3080.     case VAProfileH264StereoHigh:
  3081.     case VAProfileH264MultiviewHigh:
  3082.         gen8_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  3083.         break;
  3084.  
  3085.     case VAProfileVC1Simple:
  3086.     case VAProfileVC1Main:
  3087.     case VAProfileVC1Advanced:
  3088.         gen8_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  3089.         break;
  3090.  
  3091.     case VAProfileJPEGBaseline:
  3092.         gen8_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  3093.         break;
  3094.  
  3095.     case VAProfileVP8Version0_3:
  3096.         gen8_mfd_vp8_decode_picture(ctx, decode_state, gen7_mfd_context);
  3097.         break;
  3098.  
  3099.     default:
  3100.         assert(0);
  3101.         break;
  3102.     }
  3103.  
  3104.     vaStatus = VA_STATUS_SUCCESS;
  3105.  
  3106. out:
  3107.     return vaStatus;
  3108. }
  3109.  
  3110. static void
  3111. gen8_mfd_context_destroy(void *hw_context)
  3112. {
  3113.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3114.  
  3115.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  3116.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  3117.  
  3118.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  3119.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  3120.  
  3121.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  3122.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  3123.  
  3124.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  3125.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  3126.  
  3127.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  3128.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  3129.  
  3130.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  3131.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  3132.  
  3133.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  3134.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  3135.  
  3136.     dri_bo_unreference(gen7_mfd_context->segmentation_buffer.bo);
  3137.     gen7_mfd_context->segmentation_buffer.bo = NULL;
  3138.  
  3139.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  3140.  
  3141.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  3142.     free(gen7_mfd_context);
  3143. }
  3144.  
  3145. static void gen8_mfd_mpeg2_context_init(VADriverContextP ctx,
  3146.                                     struct gen7_mfd_context *gen7_mfd_context)
  3147. {
  3148.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  3149.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  3150.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  3151.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  3152. }
  3153.  
  3154. struct hw_context *
  3155. gen8_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
  3156. {
  3157.     struct intel_driver_data *intel = intel_driver_data(ctx);
  3158.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  3159.     int i;
  3160.  
  3161.     gen7_mfd_context->base.destroy = gen8_mfd_context_destroy;
  3162.     gen7_mfd_context->base.run = gen8_mfd_decode_picture;
  3163.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  3164.  
  3165.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  3166.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  3167.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  3168.     }
  3169.  
  3170.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  3171.     gen7_mfd_context->segmentation_buffer.valid = 0;
  3172.  
  3173.     switch (obj_config->profile) {
  3174.     case VAProfileMPEG2Simple:
  3175.     case VAProfileMPEG2Main:
  3176.         gen8_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  3177.         break;
  3178.  
  3179.     case VAProfileH264ConstrainedBaseline:
  3180.     case VAProfileH264Main:
  3181.     case VAProfileH264High:
  3182.     case VAProfileH264StereoHigh:
  3183.     case VAProfileH264MultiviewHigh:
  3184.         gen8_mfd_avc_context_init(ctx, gen7_mfd_context);
  3185.         break;
  3186.     default:
  3187.         break;
  3188.     }
  3189.     return (struct hw_context *)gen7_mfd_context;
  3190. }
  3191.