Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *
  27.  */
  28.  
  29. #ifndef HAVE_GEN_AVC_SURFACE
  30. #define HAVE_GEN_AVC_SURFACE 1
  31. #endif
  32.  
  33. #include "sysdeps.h"
  34. #include "intel_batchbuffer.h"
  35. #include "intel_driver.h"
  36. #include "i965_defines.h"
  37. #include "i965_drv_video.h"
  38. #include "i965_decoder_utils.h"
  39.  
  40. #include "gen7_mfd.h"
  41.  
  42. static const uint32_t zigzag_direct[64] = {
  43.     0,   1,  8, 16,  9,  2,  3, 10,
  44.     17, 24, 32, 25, 18, 11,  4,  5,
  45.     12, 19, 26, 33, 40, 48, 41, 34,
  46.     27, 20, 13,  6,  7, 14, 21, 28,
  47.     35, 42, 49, 56, 57, 50, 43, 36,
  48.     29, 22, 15, 23, 30, 37, 44, 51,
  49.     58, 59, 52, 45, 38, 31, 39, 46,
  50.     53, 60, 61, 54, 47, 55, 62, 63
  51. };
  52.  
  53. static void
  54. gen7_mfd_avc_frame_store_index(VADriverContextP ctx,
  55.                                VAPictureParameterBufferH264 *pic_param,
  56.                                struct gen7_mfd_context *gen7_mfd_context)
  57. {
  58.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  59.     int i, j;
  60.  
  61.     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
  62.  
  63.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  64.         int found = 0;
  65.  
  66.         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
  67.             continue;
  68.  
  69.         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  70.             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
  71.             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
  72.                 continue;
  73.  
  74.             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
  75.                 found = 1;
  76.                 break;
  77.             }
  78.         }
  79.  
  80.         if (!found) {
  81.             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  82.             obj_surface->flags &= ~SURFACE_REFERENCED;
  83.  
  84.             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
  85.                 dri_bo_unreference(obj_surface->bo);
  86.                 obj_surface->bo = NULL;
  87.                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
  88.             }
  89.  
  90.             if (obj_surface->free_private_data)
  91.                 obj_surface->free_private_data(&obj_surface->private_data);
  92.  
  93.             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  94.             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  95.         }
  96.     }
  97.  
  98.     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
  99.         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
  100.         int found = 0;
  101.  
  102.         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
  103.             continue;
  104.  
  105.         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  106.             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
  107.                 continue;
  108.            
  109.             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
  110.                 found = 1;
  111.                 break;
  112.             }
  113.         }
  114.  
  115.         if (!found) {
  116.             int frame_idx;
  117.             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
  118.            
  119.             assert(obj_surface);
  120.             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  121.  
  122.             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
  123.                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  124.                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
  125.                         continue;
  126.  
  127.                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
  128.                         break;
  129.                 }
  130.  
  131.                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
  132.                     break;
  133.             }
  134.  
  135.             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
  136.  
  137.             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  138.                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
  139.                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
  140.                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
  141.                     break;
  142.                 }
  143.             }
  144.         }
  145.     }
  146.  
  147.     /* sort */
  148.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
  149.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  150.             gen7_mfd_context->reference_surface[i].frame_store_id == i)
  151.             continue;
  152.  
  153.         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  154.             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
  155.                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
  156.                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
  157.                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
  158.  
  159.                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
  160.                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
  161.                 gen7_mfd_context->reference_surface[j].surface_id = id;
  162.                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
  163.                 break;
  164.             }
  165.         }
  166.     }
  167. }
  168.  
  169. static void
  170. gen7_mfd_init_avc_surface(VADriverContextP ctx,
  171.                           VAPictureParameterBufferH264 *pic_param,
  172.                           struct object_surface *obj_surface)
  173. {
  174.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  175.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  176.     int width_in_mbs, height_in_mbs;
  177.  
  178.     obj_surface->free_private_data = gen_free_avc_surface;
  179.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  180.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  181.  
  182.     if (!gen7_avc_surface) {
  183.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  184.         assert((obj_surface->size & 0x3f) == 0);
  185.         obj_surface->private_data = gen7_avc_surface;
  186.     }
  187.  
  188.     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
  189.                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
  190.  
  191.     if (gen7_avc_surface->dmv_top == NULL) {
  192.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  193.                                                  "direct mv w/r buffer",
  194.                                                  width_in_mbs * (height_in_mbs + 1) * 64,
  195.                                                  0x1000);
  196.         assert(gen7_avc_surface->dmv_top);
  197.     }
  198.  
  199.     if (gen7_avc_surface->dmv_bottom_flag &&
  200.         gen7_avc_surface->dmv_bottom == NULL) {
  201.         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
  202.                                                     "direct mv w/r buffer",
  203.                                                     width_in_mbs * (height_in_mbs + 1) * 64,
  204.                                                     0x1000);
  205.         assert(gen7_avc_surface->dmv_bottom);
  206.     }
  207. }
  208.  
  209. static void
  210. gen7_mfd_pipe_mode_select(VADriverContextP ctx,
  211.                           struct decode_state *decode_state,
  212.                           int standard_select,
  213.                           struct gen7_mfd_context *gen7_mfd_context)
  214. {
  215.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  216.  
  217.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  218.            standard_select == MFX_FORMAT_AVC ||
  219.            standard_select == MFX_FORMAT_VC1 ||
  220.            standard_select == MFX_FORMAT_JPEG);
  221.  
  222.     BEGIN_BCS_BATCH(batch, 5);
  223.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  224.     OUT_BCS_BATCH(batch,
  225.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  226.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  227.                   (0 << 10) | /* disable Stream-Out */
  228.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  229.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  230.                   (0 << 5)  | /* not in stitch mode */
  231.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  232.                   (standard_select << 0));
  233.     OUT_BCS_BATCH(batch,
  234.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  235.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  236.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  237.                   (0 << 1)  |
  238.                   (0 << 0));
  239.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  240.     OUT_BCS_BATCH(batch, 0); /* reserved */
  241.     ADVANCE_BCS_BATCH(batch);
  242. }
  243.  
  244. static void
  245. gen7_mfd_surface_state(VADriverContextP ctx,
  246.                        struct decode_state *decode_state,
  247.                        int standard_select,
  248.                        struct gen7_mfd_context *gen7_mfd_context)
  249. {
  250.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  251.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  252.     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
  253.     unsigned int y_cb_offset;
  254.     unsigned int y_cr_offset;
  255.  
  256.     assert(obj_surface);
  257.  
  258.     y_cb_offset = obj_surface->y_cb_offset;
  259.     y_cr_offset = obj_surface->y_cr_offset;
  260.  
  261.     BEGIN_BCS_BATCH(batch, 6);
  262.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  263.     OUT_BCS_BATCH(batch, 0);
  264.     OUT_BCS_BATCH(batch,
  265.                   ((obj_surface->orig_height - 1) << 18) |
  266.                   ((obj_surface->orig_width - 1) << 4));
  267.     OUT_BCS_BATCH(batch,
  268.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  269.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  270.                   (0 << 22) | /* surface object control state, ignored */
  271.                   ((obj_surface->width - 1) << 3) | /* pitch */
  272.                   (0 << 2)  | /* must be 0 */
  273.                   (1 << 1)  | /* must be tiled */
  274.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  275.     OUT_BCS_BATCH(batch,
  276.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  277.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  278.     OUT_BCS_BATCH(batch,
  279.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  280.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  281.     ADVANCE_BCS_BATCH(batch);
  282. }
  283.  
  284. static void
  285. gen7_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  286.                              struct decode_state *decode_state,
  287.                              int standard_select,
  288.                              struct gen7_mfd_context *gen7_mfd_context)
  289. {
  290.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  291.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  292.     int i;
  293.  
  294.     BEGIN_BCS_BATCH(batch, 24);
  295.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
  296.     if (gen7_mfd_context->pre_deblocking_output.valid)
  297.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  298.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  299.                       0);
  300.     else
  301.         OUT_BCS_BATCH(batch, 0);
  302.  
  303.     if (gen7_mfd_context->post_deblocking_output.valid)
  304.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  305.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  306.                       0);
  307.     else
  308.         OUT_BCS_BATCH(batch, 0);
  309.  
  310.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  311.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  312.  
  313.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  314.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  315.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  316.                       0);
  317.     else
  318.         OUT_BCS_BATCH(batch, 0);
  319.  
  320.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  321.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  322.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  323.                       0);
  324.     else
  325.         OUT_BCS_BATCH(batch, 0);
  326.  
  327.     /* DW 7..22 */
  328.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  329.         struct object_surface *obj_surface;
  330.  
  331.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  332.             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  333.             assert(obj_surface && obj_surface->bo);
  334.  
  335.             OUT_BCS_RELOC(batch, obj_surface->bo,
  336.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  337.                           0);
  338.         } else {
  339.             OUT_BCS_BATCH(batch, 0);
  340.         }
  341.     }
  342.  
  343.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  344.     ADVANCE_BCS_BATCH(batch);
  345. }
  346.  
  347. static void
  348. gen7_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  349.                                  dri_bo *slice_data_bo,
  350.                                  int standard_select,
  351.                                  struct gen7_mfd_context *gen7_mfd_context)
  352. {
  353.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  354.  
  355.     BEGIN_BCS_BATCH(batch, 11);
  356.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  357.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  358.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  359.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  360.     OUT_BCS_BATCH(batch, 0);
  361.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  362.     OUT_BCS_BATCH(batch, 0);
  363.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  364.     OUT_BCS_BATCH(batch, 0);
  365.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  366.     OUT_BCS_BATCH(batch, 0);
  367.     ADVANCE_BCS_BATCH(batch);
  368. }
  369.  
  370. static void
  371. gen7_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  372.                                  struct decode_state *decode_state,
  373.                                  int standard_select,
  374.                                  struct gen7_mfd_context *gen7_mfd_context)
  375. {
  376.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  377.  
  378.     BEGIN_BCS_BATCH(batch, 4);
  379.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  380.  
  381.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  382.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  383.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  384.                       0);
  385.     else
  386.         OUT_BCS_BATCH(batch, 0);
  387.  
  388.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  389.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  390.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  391.                       0);
  392.     else
  393.         OUT_BCS_BATCH(batch, 0);
  394.  
  395.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  396.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  397.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  398.                       0);
  399.     else
  400.         OUT_BCS_BATCH(batch, 0);
  401.  
  402.     ADVANCE_BCS_BATCH(batch);
  403. }
  404.  
  405. static void
  406. gen7_mfd_qm_state(VADriverContextP ctx,
  407.                   int qm_type,
  408.                   unsigned char *qm,
  409.                   int qm_length,
  410.                   struct gen7_mfd_context *gen7_mfd_context)
  411. {
  412.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  413.     unsigned int qm_buffer[16];
  414.  
  415.     assert(qm_length <= 16 * 4);
  416.     memcpy(qm_buffer, qm, qm_length);
  417.  
  418.     BEGIN_BCS_BATCH(batch, 18);
  419.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  420.     OUT_BCS_BATCH(batch, qm_type << 0);
  421.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  422.     ADVANCE_BCS_BATCH(batch);
  423. }
  424.  
  425. static void
  426. gen7_mfd_avc_img_state(VADriverContextP ctx,
  427.                        struct decode_state *decode_state,
  428.                        struct gen7_mfd_context *gen7_mfd_context)
  429. {
  430.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  431.     int img_struct;
  432.     int mbaff_frame_flag;
  433.     unsigned int width_in_mbs, height_in_mbs;
  434.     VAPictureParameterBufferH264 *pic_param;
  435.  
  436.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  437.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  438.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  439.  
  440.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  441.         img_struct = 1;
  442.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  443.         img_struct = 3;
  444.     else
  445.         img_struct = 0;
  446.  
  447.     if ((img_struct & 0x1) == 0x1) {
  448.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  449.     } else {
  450.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  451.     }
  452.  
  453.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  454.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  455.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  456.     } else {
  457.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  458.     }
  459.  
  460.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  461.                         !pic_param->pic_fields.bits.field_pic_flag);
  462.  
  463.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  464.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  465.  
  466.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  467.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  468.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  469.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  470.  
  471.     BEGIN_BCS_BATCH(batch, 16);
  472.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  473.     OUT_BCS_BATCH(batch,
  474.                   width_in_mbs * height_in_mbs);
  475.     OUT_BCS_BATCH(batch,
  476.                   ((height_in_mbs - 1) << 16) |
  477.                   ((width_in_mbs - 1) << 0));
  478.     OUT_BCS_BATCH(batch,
  479.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  480.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  481.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  482.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  483.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  484.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  485.                   (img_struct << 8));
  486.     OUT_BCS_BATCH(batch,
  487.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  488.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  489.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  490.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  491.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  492.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  493.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  494.                   (mbaff_frame_flag << 1) |
  495.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  496.     OUT_BCS_BATCH(batch, 0);
  497.     OUT_BCS_BATCH(batch, 0);
  498.     OUT_BCS_BATCH(batch, 0);
  499.     OUT_BCS_BATCH(batch, 0);
  500.     OUT_BCS_BATCH(batch, 0);
  501.     OUT_BCS_BATCH(batch, 0);
  502.     OUT_BCS_BATCH(batch, 0);
  503.     OUT_BCS_BATCH(batch, 0);
  504.     OUT_BCS_BATCH(batch, 0);
  505.     OUT_BCS_BATCH(batch, 0);
  506.     OUT_BCS_BATCH(batch, 0);
  507.     ADVANCE_BCS_BATCH(batch);
  508. }
  509.  
  510. static void
  511. gen7_mfd_avc_qm_state(VADriverContextP ctx,
  512.                       struct decode_state *decode_state,
  513.                       struct gen7_mfd_context *gen7_mfd_context)
  514. {
  515.     VAIQMatrixBufferH264 *iq_matrix;
  516.     VAPictureParameterBufferH264 *pic_param;
  517.  
  518.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  519.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  520.     else
  521.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  522.  
  523.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  524.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  525.  
  526.     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  527.     gen7_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  528.  
  529.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  530.         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  531.         gen7_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  532.     }
  533. }
  534.  
  535. static void
  536. gen7_mfd_avc_directmode_state(VADriverContextP ctx,
  537.                               VAPictureParameterBufferH264 *pic_param,
  538.                               VASliceParameterBufferH264 *slice_param,
  539.                               struct gen7_mfd_context *gen7_mfd_context)
  540. {
  541.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  542.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  543.     struct object_surface *obj_surface;
  544.     GenAvcSurface *gen7_avc_surface;
  545.     VAPictureH264 *va_pic;
  546.     int i, j;
  547.  
  548.     BEGIN_BCS_BATCH(batch, 69);
  549.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  550.  
  551.     /* reference surfaces 0..15 */
  552.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  553.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  554.             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  555.             assert(obj_surface);
  556.             gen7_avc_surface = obj_surface->private_data;
  557.  
  558.             if (gen7_avc_surface == NULL) {
  559.                 OUT_BCS_BATCH(batch, 0);
  560.                 OUT_BCS_BATCH(batch, 0);
  561.             } else {
  562.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  563.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  564.                               0);
  565.  
  566.                 if (gen7_avc_surface->dmv_bottom_flag == 1)
  567.                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  568.                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  569.                                   0);
  570.                 else
  571.                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  572.                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  573.                                   0);
  574.             }
  575.         } else {
  576.             OUT_BCS_BATCH(batch, 0);
  577.             OUT_BCS_BATCH(batch, 0);
  578.         }
  579.     }
  580.  
  581.     /* the current decoding frame/field */
  582.     va_pic = &pic_param->CurrPic;
  583.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  584.     obj_surface = SURFACE(va_pic->picture_id);
  585.     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
  586.     gen7_avc_surface = obj_surface->private_data;
  587.  
  588.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  589.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  590.                   0);
  591.  
  592.     if (gen7_avc_surface->dmv_bottom_flag == 1)
  593.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  594.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  595.                       0);
  596.     else
  597.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  598.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  599.                       0);
  600.  
  601.     /* POC List */
  602.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  603.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  604.             int found = 0;
  605.             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  606.                 va_pic = &pic_param->ReferenceFrames[j];
  607.                
  608.                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
  609.                     continue;
  610.  
  611.                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
  612.                     found = 1;
  613.                     break;
  614.                 }
  615.             }
  616.  
  617.             assert(found == 1);
  618.             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  619.            
  620.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  621.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  622.         } else {
  623.             OUT_BCS_BATCH(batch, 0);
  624.             OUT_BCS_BATCH(batch, 0);
  625.         }
  626.     }
  627.  
  628.     va_pic = &pic_param->CurrPic;
  629.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  630.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  631.  
  632.     ADVANCE_BCS_BATCH(batch);
  633. }
  634.  
  635. static void
  636. gen7_mfd_avc_slice_state(VADriverContextP ctx,
  637.                          VAPictureParameterBufferH264 *pic_param,
  638.                          VASliceParameterBufferH264 *slice_param,
  639.                          VASliceParameterBufferH264 *next_slice_param,
  640.                          struct gen7_mfd_context *gen7_mfd_context)
  641. {
  642.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  643.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  644.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  645.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  646.     int num_ref_idx_l0, num_ref_idx_l1;
  647.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  648.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  649.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  650.     int slice_type;
  651.  
  652.     if (slice_param->slice_type == SLICE_TYPE_I ||
  653.         slice_param->slice_type == SLICE_TYPE_SI) {
  654.         slice_type = SLICE_TYPE_I;
  655.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  656.                slice_param->slice_type == SLICE_TYPE_SP) {
  657.         slice_type = SLICE_TYPE_P;
  658.     } else {
  659.         assert(slice_param->slice_type == SLICE_TYPE_B);
  660.         slice_type = SLICE_TYPE_B;
  661.     }
  662.  
  663.     if (slice_type == SLICE_TYPE_I) {
  664.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  665.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  666.         num_ref_idx_l0 = 0;
  667.         num_ref_idx_l1 = 0;
  668.     } else if (slice_type == SLICE_TYPE_P) {
  669.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  670.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  671.         num_ref_idx_l1 = 0;
  672.     } else {
  673.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  674.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  675.     }
  676.  
  677.     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
  678.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  679.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  680.  
  681.     if (next_slice_param) {
  682.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
  683.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  684.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  685.     } else {
  686.         next_slice_hor_pos = 0;
  687.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  688.     }
  689.  
  690.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  691.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  692.     OUT_BCS_BATCH(batch, slice_type);
  693.     OUT_BCS_BATCH(batch,
  694.                   (num_ref_idx_l1 << 24) |
  695.                   (num_ref_idx_l0 << 16) |
  696.                   (slice_param->chroma_log2_weight_denom << 8) |
  697.                   (slice_param->luma_log2_weight_denom << 0));
  698.     OUT_BCS_BATCH(batch,
  699.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  700.                   (slice_param->disable_deblocking_filter_idc << 27) |
  701.                   (slice_param->cabac_init_idc << 24) |
  702.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  703.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  704.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  705.     OUT_BCS_BATCH(batch,
  706.                   (slice_ver_pos << 24) |
  707.                   (slice_hor_pos << 16) |
  708.                   (first_mb_in_slice << 0));
  709.     OUT_BCS_BATCH(batch,
  710.                   (next_slice_ver_pos << 16) |
  711.                   (next_slice_hor_pos << 0));
  712.     OUT_BCS_BATCH(batch,
  713.                   (next_slice_param == NULL) << 19); /* last slice flag */
  714.     OUT_BCS_BATCH(batch, 0);
  715.     OUT_BCS_BATCH(batch, 0);
  716.     OUT_BCS_BATCH(batch, 0);
  717.     OUT_BCS_BATCH(batch, 0);
  718.     ADVANCE_BCS_BATCH(batch);
  719. }
  720.  
  721. static inline void
  722. gen7_mfd_avc_ref_idx_state(VADriverContextP ctx,
  723.                            VAPictureParameterBufferH264 *pic_param,
  724.                            VASliceParameterBufferH264 *slice_param,
  725.                            struct gen7_mfd_context *gen7_mfd_context)
  726. {
  727.     gen6_send_avc_ref_idx_state(
  728.         gen7_mfd_context->base.batch,
  729.         slice_param,
  730.         gen7_mfd_context->reference_surface
  731.     );
  732. }
  733.  
  734. static void
  735. gen7_mfd_avc_weightoffset_state(VADriverContextP ctx,
  736.                                 VAPictureParameterBufferH264 *pic_param,
  737.                                 VASliceParameterBufferH264 *slice_param,
  738.                                 struct gen7_mfd_context *gen7_mfd_context)
  739. {
  740.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  741.     int i, j, num_weight_offset_table = 0;
  742.     short weightoffsets[32 * 6];
  743.  
  744.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  745.          slice_param->slice_type == SLICE_TYPE_SP) &&
  746.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  747.         num_weight_offset_table = 1;
  748.     }
  749.    
  750.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  751.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  752.         num_weight_offset_table = 2;
  753.     }
  754.  
  755.     for (i = 0; i < num_weight_offset_table; i++) {
  756.         BEGIN_BCS_BATCH(batch, 98);
  757.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  758.         OUT_BCS_BATCH(batch, i);
  759.  
  760.         if (i == 0) {
  761.             for (j = 0; j < 32; j++) {
  762.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  763.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  764.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  765.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  766.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  767.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  768.             }
  769.         } else {
  770.             for (j = 0; j < 32; j++) {
  771.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  772.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  773.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  774.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  775.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  776.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  777.             }
  778.         }
  779.  
  780.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  781.         ADVANCE_BCS_BATCH(batch);
  782.     }
  783. }
  784.  
  785. static void
  786. gen7_mfd_avc_bsd_object(VADriverContextP ctx,
  787.                         VAPictureParameterBufferH264 *pic_param,
  788.                         VASliceParameterBufferH264 *slice_param,
  789.                         dri_bo *slice_data_bo,
  790.                         VASliceParameterBufferH264 *next_slice_param,
  791.                         struct gen7_mfd_context *gen7_mfd_context)
  792. {
  793.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  794.     unsigned int slice_data_bit_offset;
  795.  
  796.     slice_data_bit_offset = avc_get_first_mb_bit_offset(
  797.         slice_data_bo,
  798.         slice_param,
  799.         pic_param->pic_fields.bits.entropy_coding_mode_flag
  800.     );
  801.  
  802.     /* the input bitsteam format on GEN7 differs from GEN6 */
  803.     BEGIN_BCS_BATCH(batch, 6);
  804.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  805.     OUT_BCS_BATCH(batch,
  806.                   (slice_param->slice_data_size - slice_param->slice_data_offset));
  807.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  808.     OUT_BCS_BATCH(batch,
  809.                   (0 << 31) |
  810.                   (0 << 14) |
  811.                   (0 << 12) |
  812.                   (0 << 10) |
  813.                   (0 << 8));
  814.     OUT_BCS_BATCH(batch,
  815.                   ((slice_data_bit_offset >> 3) << 16) |
  816.                   (1 << 7)  |
  817.                   (0 << 5)  |
  818.                   (0 << 4)  |
  819.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  820.                   (slice_data_bit_offset & 0x7));
  821.     OUT_BCS_BATCH(batch, 0);
  822.     ADVANCE_BCS_BATCH(batch);
  823. }
  824.  
  825. static inline void
  826. gen7_mfd_avc_context_init(
  827.     VADriverContextP         ctx,
  828.     struct gen7_mfd_context *gen7_mfd_context
  829. )
  830. {
  831.     /* Initialize flat scaling lists */
  832.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  833. }
  834.  
  835. static void
  836. gen7_mfd_avc_decode_init(VADriverContextP ctx,
  837.                          struct decode_state *decode_state,
  838.                          struct gen7_mfd_context *gen7_mfd_context)
  839. {
  840.     VAPictureParameterBufferH264 *pic_param;
  841.     VASliceParameterBufferH264 *slice_param;
  842.     VAPictureH264 *va_pic;
  843.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  844.     struct object_surface *obj_surface;
  845.     dri_bo *bo;
  846.     int i, j, enable_avc_ildb = 0;
  847.     unsigned int width_in_mbs, height_in_mbs;
  848.  
  849.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  850.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  851.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  852.  
  853.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  854.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  855.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  856.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  857.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  858.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  859.                    (slice_param->slice_type == SLICE_TYPE_B));
  860.  
  861.             if (slice_param->disable_deblocking_filter_idc != 1) {
  862.                 enable_avc_ildb = 1;
  863.                 break;
  864.             }
  865.  
  866.             slice_param++;
  867.         }
  868.     }
  869.  
  870.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  871.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  872.     gen7_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
  873.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  874.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  875.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  876.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  877.  
  878.     /* Current decoded picture */
  879.     va_pic = &pic_param->CurrPic;
  880.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  881.     obj_surface = SURFACE(va_pic->picture_id);
  882.     assert(obj_surface);
  883.     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
  884.     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
  885.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  886.     gen7_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  887.  
  888.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  889.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  890.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  891.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  892.  
  893.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  894.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  895.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  896.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  897.  
  898.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  899.     bo = dri_bo_alloc(i965->intel.bufmgr,
  900.                       "intra row store",
  901.                       width_in_mbs * 64,
  902.                       0x1000);
  903.     assert(bo);
  904.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  905.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  906.  
  907.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  908.     bo = dri_bo_alloc(i965->intel.bufmgr,
  909.                       "deblocking filter row store",
  910.                       width_in_mbs * 64 * 4,
  911.                       0x1000);
  912.     assert(bo);
  913.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  914.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  915.  
  916.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  917.     bo = dri_bo_alloc(i965->intel.bufmgr,
  918.                       "bsd mpc row store",
  919.                       width_in_mbs * 64 * 2,
  920.                       0x1000);
  921.     assert(bo);
  922.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  923.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  924.  
  925.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  926.     bo = dri_bo_alloc(i965->intel.bufmgr,
  927.                       "mpr row store",
  928.                       width_in_mbs * 64 * 2,
  929.                       0x1000);
  930.     assert(bo);
  931.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  932.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  933.  
  934.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  935. }
  936.  
  937. static void
  938. gen7_mfd_avc_decode_picture(VADriverContextP ctx,
  939.                             struct decode_state *decode_state,
  940.                             struct gen7_mfd_context *gen7_mfd_context)
  941. {
  942.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  943.     VAPictureParameterBufferH264 *pic_param;
  944.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  945.     dri_bo *slice_data_bo;
  946.     int i, j;
  947.  
  948.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  949.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  950.     gen7_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  951.  
  952.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  953.     intel_batchbuffer_emit_mi_flush(batch);
  954.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  955.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  956.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  957.     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  958.     gen7_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  959.     gen7_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  960.  
  961.     for (j = 0; j < decode_state->num_slice_params; j++) {
  962.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  963.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  964.         slice_data_bo = decode_state->slice_datas[j]->bo;
  965.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  966.  
  967.         if (j == decode_state->num_slice_params - 1)
  968.             next_slice_group_param = NULL;
  969.         else
  970.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  971.  
  972.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  973.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  974.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  975.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  976.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  977.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  978.                    (slice_param->slice_type == SLICE_TYPE_B));
  979.  
  980.             if (i < decode_state->slice_params[j]->num_elements - 1)
  981.                 next_slice_param = slice_param + 1;
  982.             else
  983.                 next_slice_param = next_slice_group_param;
  984.  
  985.             gen7_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
  986.             gen7_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  987.             gen7_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  988.             gen7_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  989.             gen7_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  990.             slice_param++;
  991.         }
  992.     }
  993.  
  994.     intel_batchbuffer_end_atomic(batch);
  995.     intel_batchbuffer_flush(batch);
  996. }
  997.  
  998. static void
  999. gen7_mfd_mpeg2_decode_init(VADriverContextP ctx,
  1000.                            struct decode_state *decode_state,
  1001.                            struct gen7_mfd_context *gen7_mfd_context)
  1002. {
  1003.     VAPictureParameterBufferMPEG2 *pic_param;
  1004.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1005.     struct object_surface *obj_surface;
  1006.     dri_bo *bo;
  1007.     unsigned int width_in_mbs;
  1008.  
  1009.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1010.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1011.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1012.  
  1013.     mpeg2_set_reference_surfaces(
  1014.         ctx,
  1015.         gen7_mfd_context->reference_surface,
  1016.         decode_state,
  1017.         pic_param
  1018.     );
  1019.  
  1020.     /* Current decoded picture */
  1021.     obj_surface = SURFACE(decode_state->current_render_target);
  1022.     assert(obj_surface);
  1023.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1024.  
  1025.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1026.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1027.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1028.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1029.  
  1030.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1031.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1032.                       "bsd mpc row store",
  1033.                       width_in_mbs * 96,
  1034.                       0x1000);
  1035.     assert(bo);
  1036.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1037.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1038.  
  1039.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1040.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1041.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1042.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1043.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1044. }
  1045.  
  1046. static void
  1047. gen7_mfd_mpeg2_pic_state(VADriverContextP ctx,
  1048.                          struct decode_state *decode_state,
  1049.                          struct gen7_mfd_context *gen7_mfd_context)
  1050. {
  1051.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1052.     VAPictureParameterBufferMPEG2 *pic_param;
  1053.  
  1054.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1055.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1056.  
  1057.     BEGIN_BCS_BATCH(batch, 13);
  1058.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1059.     OUT_BCS_BATCH(batch,
  1060.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1061.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1062.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1063.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1064.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1065.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1066.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1067.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1068.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1069.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1070.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1071.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1072.     OUT_BCS_BATCH(batch,
  1073.                   pic_param->picture_coding_type << 9);
  1074.     OUT_BCS_BATCH(batch,
  1075.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  1076.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  1077.     OUT_BCS_BATCH(batch, 0);
  1078.     OUT_BCS_BATCH(batch, 0);
  1079.     OUT_BCS_BATCH(batch, 0);
  1080.     OUT_BCS_BATCH(batch, 0);
  1081.     OUT_BCS_BATCH(batch, 0);
  1082.     OUT_BCS_BATCH(batch, 0);
  1083.     OUT_BCS_BATCH(batch, 0);
  1084.     OUT_BCS_BATCH(batch, 0);
  1085.     OUT_BCS_BATCH(batch, 0);
  1086.     ADVANCE_BCS_BATCH(batch);
  1087. }
  1088.  
  1089. static void
  1090. gen7_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1091.                         struct decode_state *decode_state,
  1092.                         struct gen7_mfd_context *gen7_mfd_context)
  1093. {
  1094.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  1095.     int i, j;
  1096.  
  1097.     /* Update internal QM state */
  1098.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1099.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1100.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1101.  
  1102.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  1103.             iq_matrix->load_intra_quantiser_matrix) {
  1104.             gen_iq_matrix->load_intra_quantiser_matrix =
  1105.                 iq_matrix->load_intra_quantiser_matrix;
  1106.             if (iq_matrix->load_intra_quantiser_matrix) {
  1107.                 for (j = 0; j < 64; j++)
  1108.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1109.                         iq_matrix->intra_quantiser_matrix[j];
  1110.             }
  1111.         }
  1112.  
  1113.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1114.             iq_matrix->load_non_intra_quantiser_matrix) {
  1115.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1116.                 iq_matrix->load_non_intra_quantiser_matrix;
  1117.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1118.                 for (j = 0; j < 64; j++)
  1119.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1120.                         iq_matrix->non_intra_quantiser_matrix[j];
  1121.             }
  1122.         }
  1123.     }
  1124.  
  1125.     /* Commit QM state to HW */
  1126.     for (i = 0; i < 2; i++) {
  1127.         unsigned char *qm = NULL;
  1128.         int qm_type;
  1129.  
  1130.         if (i == 0) {
  1131.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1132.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1133.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1134.             }
  1135.         } else {
  1136.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1137.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1138.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1139.             }
  1140.         }
  1141.  
  1142.         if (!qm)
  1143.             continue;
  1144.  
  1145.         gen7_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1146.     }
  1147. }
  1148.  
  1149. static void
  1150. gen7_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1151.                           VAPictureParameterBufferMPEG2 *pic_param,
  1152.                           VASliceParameterBufferMPEG2 *slice_param,
  1153.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1154.                           struct gen7_mfd_context *gen7_mfd_context)
  1155. {
  1156.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1157.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1158.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1159.  
  1160.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1161.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1162.         is_field_pic = 1;
  1163.     is_field_pic_wa = is_field_pic &&
  1164.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1165.  
  1166.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1167.     hpos0 = slice_param->slice_horizontal_position;
  1168.  
  1169.     if (next_slice_param == NULL) {
  1170.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1171.         hpos1 = 0;
  1172.     } else {
  1173.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1174.         hpos1 = next_slice_param->slice_horizontal_position;
  1175.     }
  1176.  
  1177.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1178.  
  1179.     BEGIN_BCS_BATCH(batch, 5);
  1180.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1181.     OUT_BCS_BATCH(batch,
  1182.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1183.     OUT_BCS_BATCH(batch,
  1184.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1185.     OUT_BCS_BATCH(batch,
  1186.                   hpos0 << 24 |
  1187.                   vpos0 << 16 |
  1188.                   mb_count << 8 |
  1189.                   (next_slice_param == NULL) << 5 |
  1190.                   (next_slice_param == NULL) << 3 |
  1191.                   (slice_param->macroblock_offset & 0x7));
  1192.     OUT_BCS_BATCH(batch,
  1193.                   slice_param->quantiser_scale_code << 24);
  1194.     ADVANCE_BCS_BATCH(batch);
  1195. }
  1196.  
  1197. static void
  1198. gen7_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1199.                               struct decode_state *decode_state,
  1200.                               struct gen7_mfd_context *gen7_mfd_context)
  1201. {
  1202.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1203.     VAPictureParameterBufferMPEG2 *pic_param;
  1204.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1205.     dri_bo *slice_data_bo;
  1206.     int i, j;
  1207.  
  1208.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1209.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1210.  
  1211.     gen7_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1212.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1213.     intel_batchbuffer_emit_mi_flush(batch);
  1214.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1215.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1216.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1217.     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1218.     gen7_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1219.     gen7_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1220.  
  1221.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1222.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1223.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1224.  
  1225.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1226.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1227.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1228.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1229.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1230.  
  1231.         if (j == decode_state->num_slice_params - 1)
  1232.             next_slice_group_param = NULL;
  1233.         else
  1234.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1235.  
  1236.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1237.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1238.  
  1239.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1240.                 next_slice_param = slice_param + 1;
  1241.             else
  1242.                 next_slice_param = next_slice_group_param;
  1243.  
  1244.             gen7_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1245.             slice_param++;
  1246.         }
  1247.     }
  1248.  
  1249.     intel_batchbuffer_end_atomic(batch);
  1250.     intel_batchbuffer_flush(batch);
  1251. }
  1252.  
  1253. static const int va_to_gen7_vc1_pic_type[5] = {
  1254.     GEN7_VC1_I_PICTURE,
  1255.     GEN7_VC1_P_PICTURE,
  1256.     GEN7_VC1_B_PICTURE,
  1257.     GEN7_VC1_BI_PICTURE,
  1258.     GEN7_VC1_P_PICTURE,
  1259. };
  1260.  
  1261. static const int va_to_gen7_vc1_mv[4] = {
  1262.     1, /* 1-MV */
  1263.     2, /* 1-MV half-pel */
  1264.     3, /* 1-MV half-pef bilinear */
  1265.     0, /* Mixed MV */
  1266. };
  1267.  
  1268. static const int b_picture_scale_factor[21] = {
  1269.     128, 85,  170, 64,  192,
  1270.     51,  102, 153, 204, 43,
  1271.     215, 37,  74,  111, 148,
  1272.     185, 222, 32,  96,  160,
  1273.     224,
  1274. };
  1275.  
  1276. static const int va_to_gen7_vc1_condover[3] = {
  1277.     0,
  1278.     2,
  1279.     3
  1280. };
  1281.  
  1282. static const int va_to_gen7_vc1_profile[4] = {
  1283.     GEN7_VC1_SIMPLE_PROFILE,
  1284.     GEN7_VC1_MAIN_PROFILE,
  1285.     GEN7_VC1_RESERVED_PROFILE,
  1286.     GEN7_VC1_ADVANCED_PROFILE
  1287. };
  1288.  
  1289. static void
  1290. gen7_mfd_free_vc1_surface(void **data)
  1291. {
  1292.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1293.  
  1294.     if (!gen7_vc1_surface)
  1295.         return;
  1296.  
  1297.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1298.     free(gen7_vc1_surface);
  1299.     *data = NULL;
  1300. }
  1301.  
  1302. static void
  1303. gen7_mfd_init_vc1_surface(VADriverContextP ctx,
  1304.                           VAPictureParameterBufferVC1 *pic_param,
  1305.                           struct object_surface *obj_surface)
  1306. {
  1307.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1308.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1309.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1310.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1311.  
  1312.     obj_surface->free_private_data = gen7_mfd_free_vc1_surface;
  1313.  
  1314.     if (!gen7_vc1_surface) {
  1315.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1316.         assert((obj_surface->size & 0x3f) == 0);
  1317.         obj_surface->private_data = gen7_vc1_surface;
  1318.     }
  1319.  
  1320.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1321.  
  1322.     if (gen7_vc1_surface->dmv == NULL) {
  1323.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1324.                                              "direct mv w/r buffer",
  1325.                                              width_in_mbs * height_in_mbs * 64,
  1326.                                              0x1000);
  1327.     }
  1328. }
  1329.  
  1330. static void
  1331. gen7_mfd_vc1_decode_init(VADriverContextP ctx,
  1332.                          struct decode_state *decode_state,
  1333.                          struct gen7_mfd_context *gen7_mfd_context)
  1334. {
  1335.     VAPictureParameterBufferVC1 *pic_param;
  1336.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1337.     struct object_surface *obj_surface;
  1338.     int i;
  1339.     dri_bo *bo;
  1340.     int width_in_mbs;
  1341.     int picture_type;
  1342.  
  1343.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1344.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1345.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1346.     picture_type = pic_param->picture_fields.bits.picture_type;
  1347.  
  1348.     /* reference picture */
  1349.     obj_surface = SURFACE(pic_param->forward_reference_picture);
  1350.  
  1351.     if (obj_surface && obj_surface->bo)
  1352.         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
  1353.     else
  1354.         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
  1355.  
  1356.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  1357.  
  1358.     if (obj_surface && obj_surface->bo)
  1359.         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
  1360.     else
  1361.         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
  1362.  
  1363.     /* must do so !!! */
  1364.     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
  1365.         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
  1366.  
  1367.     /* Current decoded picture */
  1368.     obj_surface = SURFACE(decode_state->current_render_target);
  1369.     assert(obj_surface);
  1370.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1371.     gen7_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1372.  
  1373.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1374.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1375.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1376.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1377.  
  1378.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1379.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1380.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1381.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1382.  
  1383.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1384.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1385.                       "intra row store",
  1386.                       width_in_mbs * 64,
  1387.                       0x1000);
  1388.     assert(bo);
  1389.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1390.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1391.  
  1392.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1393.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1394.                       "deblocking filter row store",
  1395.                       width_in_mbs * 7 * 64,
  1396.                       0x1000);
  1397.     assert(bo);
  1398.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1399.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1400.  
  1401.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1402.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1403.                       "bsd mpc row store",
  1404.                       width_in_mbs * 96,
  1405.                       0x1000);
  1406.     assert(bo);
  1407.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1408.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1409.  
  1410.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1411.  
  1412.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1413.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1414.    
  1415.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1416.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1417.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1418.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1419.         int src_w, src_h;
  1420.         uint8_t *src = NULL, *dst = NULL;
  1421.  
  1422.         assert(decode_state->bit_plane->buffer);
  1423.         src = decode_state->bit_plane->buffer;
  1424.  
  1425.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1426.                           "VC-1 Bitplane",
  1427.                           bitplane_width * height_in_mbs,
  1428.                           0x1000);
  1429.         assert(bo);
  1430.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1431.  
  1432.         dri_bo_map(bo, True);
  1433.         assert(bo->virtual);
  1434.         dst = bo->virtual;
  1435.  
  1436.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1437.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1438.                 int src_index, dst_index;
  1439.                 int src_shift;
  1440.                 uint8_t src_value;
  1441.  
  1442.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1443.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1444.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1445.  
  1446.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1447.                     src_value |= 0x2;
  1448.                 }
  1449.  
  1450.                 dst_index = src_w / 2;
  1451.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1452.             }
  1453.  
  1454.             if (src_w & 1)
  1455.                 dst[src_w / 2] >>= 4;
  1456.  
  1457.             dst += bitplane_width;
  1458.         }
  1459.  
  1460.         dri_bo_unmap(bo);
  1461.     } else
  1462.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1463. }
  1464.  
  1465. static void
  1466. gen7_mfd_vc1_pic_state(VADriverContextP ctx,
  1467.                        struct decode_state *decode_state,
  1468.                        struct gen7_mfd_context *gen7_mfd_context)
  1469. {
  1470.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1471.     VAPictureParameterBufferVC1 *pic_param;
  1472.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1473.     struct object_surface *obj_surface;
  1474.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1475.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1476.     int unified_mv_mode;
  1477.     int ref_field_pic_polarity = 0;
  1478.     int scale_factor = 0;
  1479.     int trans_ac_y = 0;
  1480.     int dmv_surface_valid = 0;
  1481.     int brfd = 0;
  1482.     int fcm = 0;
  1483.     int picture_type;
  1484.     int profile;
  1485.     int overlap;
  1486.     int interpolation_mode = 0;
  1487.  
  1488.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1489.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1490.  
  1491.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1492.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1493.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1494.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1495.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1496.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1497.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1498.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1499.  
  1500.     if (dquant == 0) {
  1501.         alt_pquant_config = 0;
  1502.         alt_pquant_edge_mask = 0;
  1503.     } else if (dquant == 2) {
  1504.         alt_pquant_config = 1;
  1505.         alt_pquant_edge_mask = 0xf;
  1506.     } else {
  1507.         assert(dquant == 1);
  1508.         if (dquantfrm == 0) {
  1509.             alt_pquant_config = 0;
  1510.             alt_pquant_edge_mask = 0;
  1511.             alt_pq = 0;
  1512.         } else {
  1513.             assert(dquantfrm == 1);
  1514.             alt_pquant_config = 1;
  1515.  
  1516.             switch (dqprofile) {
  1517.             case 3:
  1518.                 if (dqbilevel == 0) {
  1519.                     alt_pquant_config = 2;
  1520.                     alt_pquant_edge_mask = 0;
  1521.                 } else {
  1522.                     assert(dqbilevel == 1);
  1523.                     alt_pquant_config = 3;
  1524.                     alt_pquant_edge_mask = 0;
  1525.                 }
  1526.                 break;
  1527.                
  1528.             case 0:
  1529.                 alt_pquant_edge_mask = 0xf;
  1530.                 break;
  1531.  
  1532.             case 1:
  1533.                 if (dqdbedge == 3)
  1534.                     alt_pquant_edge_mask = 0x9;
  1535.                 else
  1536.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1537.  
  1538.                 break;
  1539.  
  1540.             case 2:
  1541.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1542.                 break;
  1543.  
  1544.             default:
  1545.                 assert(0);
  1546.             }
  1547.         }
  1548.     }
  1549.  
  1550.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1551.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1552.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1553.     } else {
  1554.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1555.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1556.     }
  1557.  
  1558.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1559.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1560.         /* FIXME: calculate reference field picture polarity */
  1561.         assert(0);
  1562.         ref_field_pic_polarity = 0;
  1563.     }
  1564.  
  1565.     if (pic_param->b_picture_fraction < 21)
  1566.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1567.  
  1568.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1569.    
  1570.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1571.         picture_type == GEN7_VC1_I_PICTURE)
  1572.         picture_type = GEN7_VC1_BI_PICTURE;
  1573.  
  1574.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1575.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1576.     else {
  1577.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1578.         /*
  1579.          * 8.3.6.2.1 Transform Type Selection
  1580.          * If variable-sized transform coding is not enabled,
  1581.          * then the 8x8 transform shall be used for all blocks.
  1582.          * it is also MFX_VC1_PIC_STATE requirement.
  1583.          */
  1584.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1585.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1586.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1587.         }
  1588.     }
  1589.  
  1590.  
  1591.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1592.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1593.  
  1594.         obj_surface = SURFACE(pic_param->backward_reference_picture);
  1595.         assert(obj_surface);
  1596.         gen7_vc1_surface = obj_surface->private_data;
  1597.  
  1598.         if (!gen7_vc1_surface ||
  1599.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1600.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1601.             dmv_surface_valid = 0;
  1602.         else
  1603.             dmv_surface_valid = 1;
  1604.     }
  1605.  
  1606.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1607.  
  1608.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1609.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1610.     else {
  1611.         if (pic_param->picture_fields.bits.top_field_first)
  1612.             fcm = 2;
  1613.         else
  1614.             fcm = 3;
  1615.     }
  1616.  
  1617.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1618.         brfd = pic_param->reference_fields.bits.reference_distance;
  1619.         brfd = (scale_factor * brfd) >> 8;
  1620.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1621.  
  1622.         if (brfd < 0)
  1623.             brfd = 0;
  1624.     }
  1625.  
  1626.     overlap = 0;
  1627.     if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1628.         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1629.             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1630.             overlap = 1;
  1631.         }
  1632.     }else {
  1633.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1634.              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1635.               overlap = 1;
  1636.         }
  1637.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1638.             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1639.              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1640.                 overlap = 1;
  1641.              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1642.                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1643.                  overlap = 1;
  1644.              }
  1645.         }
  1646.     }
  1647.  
  1648.     assert(pic_param->conditional_overlap_flag < 3);
  1649.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1650.  
  1651.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1652.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1653.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1654.         interpolation_mode = 9; /* Half-pel bilinear */
  1655.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1656.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1657.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1658.         interpolation_mode = 1; /* Half-pel bicubic */
  1659.     else
  1660.         interpolation_mode = 0; /* Quarter-pel bicubic */
  1661.  
  1662.     BEGIN_BCS_BATCH(batch, 6);
  1663.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  1664.     OUT_BCS_BATCH(batch,
  1665.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  1666.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  1667.     OUT_BCS_BATCH(batch,
  1668.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  1669.                   dmv_surface_valid << 15 |
  1670.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  1671.                   pic_param->rounding_control << 13 |
  1672.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  1673.                   interpolation_mode << 8 |
  1674.                   0 << 7 | /* FIXME: scale up or down ??? */
  1675.                   pic_param->range_reduction_frame << 6 |
  1676.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  1677.                   overlap << 4 |
  1678.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  1679.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  1680.     OUT_BCS_BATCH(batch,
  1681.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  1682.                   picture_type << 26 |
  1683.                   fcm << 24 |
  1684.                   alt_pq << 16 |
  1685.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  1686.                   scale_factor << 0);
  1687.     OUT_BCS_BATCH(batch,
  1688.                   unified_mv_mode << 28 |
  1689.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  1690.                   pic_param->fast_uvmc_flag << 26 |
  1691.                   ref_field_pic_polarity << 25 |
  1692.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  1693.                   pic_param->reference_fields.bits.reference_distance << 20 |
  1694.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  1695.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  1696.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  1697.                   alt_pquant_edge_mask << 4 |
  1698.                   alt_pquant_config << 2 |
  1699.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  1700.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  1701.     OUT_BCS_BATCH(batch,
  1702.                   !!pic_param->bitplane_present.value << 31 |
  1703.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  1704.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  1705.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  1706.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  1707.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  1708.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  1709.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  1710.                   pic_param->mv_fields.bits.mv_table << 20 |
  1711.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1712.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1713.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  1714.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1715.                   pic_param->mb_mode_table << 8 |
  1716.                   trans_ac_y << 6 |
  1717.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1718.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1719.                   pic_param->cbp_table << 0);
  1720.     ADVANCE_BCS_BATCH(batch);
  1721. }
  1722.  
  1723. static void
  1724. gen7_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1725.                              struct decode_state *decode_state,
  1726.                              struct gen7_mfd_context *gen7_mfd_context)
  1727. {
  1728.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1729.     VAPictureParameterBufferVC1 *pic_param;
  1730.     int intensitycomp_single;
  1731.  
  1732.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1733.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1734.  
  1735.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1736.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1737.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1738.  
  1739.     BEGIN_BCS_BATCH(batch, 6);
  1740.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  1741.     OUT_BCS_BATCH(batch,
  1742.                   0 << 14 | /* FIXME: double ??? */
  1743.                   0 << 12 |
  1744.                   intensitycomp_single << 10 |
  1745.                   intensitycomp_single << 8 |
  1746.                   0 << 4 | /* FIXME: interlace mode */
  1747.                   0);
  1748.     OUT_BCS_BATCH(batch,
  1749.                   pic_param->luma_shift << 16 |
  1750.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1751.     OUT_BCS_BATCH(batch, 0);
  1752.     OUT_BCS_BATCH(batch, 0);
  1753.     OUT_BCS_BATCH(batch, 0);
  1754.     ADVANCE_BCS_BATCH(batch);
  1755. }
  1756.  
  1757.  
  1758. static void
  1759. gen7_mfd_vc1_directmode_state(VADriverContextP ctx,
  1760.                               struct decode_state *decode_state,
  1761.                               struct gen7_mfd_context *gen7_mfd_context)
  1762. {
  1763.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1764.     VAPictureParameterBufferVC1 *pic_param;
  1765.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1766.     struct object_surface *obj_surface;
  1767.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1768.  
  1769.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1770.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1771.  
  1772.     obj_surface = SURFACE(decode_state->current_render_target);
  1773.  
  1774.     if (obj_surface && obj_surface->private_data) {
  1775.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1776.     }
  1777.  
  1778.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  1779.  
  1780.     if (obj_surface && obj_surface->private_data) {
  1781.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1782.     }
  1783.  
  1784.     BEGIN_BCS_BATCH(batch, 3);
  1785.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
  1786.  
  1787.     if (dmv_write_buffer)
  1788.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1789.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1790.                       0);
  1791.     else
  1792.         OUT_BCS_BATCH(batch, 0);
  1793.  
  1794.     if (dmv_read_buffer)
  1795.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1796.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1797.                       0);
  1798.     else
  1799.         OUT_BCS_BATCH(batch, 0);
  1800.                  
  1801.     ADVANCE_BCS_BATCH(batch);
  1802. }
  1803.  
  1804. static int
  1805. gen7_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  1806. {
  1807.     int out_slice_data_bit_offset;
  1808.     int slice_header_size = in_slice_data_bit_offset / 8;
  1809.     int i, j;
  1810.  
  1811.     if (profile != 3)
  1812.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  1813.     else {
  1814.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  1815.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  1816.                 i++, j += 2;
  1817.             }
  1818.         }
  1819.  
  1820.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  1821.     }
  1822.  
  1823.     return out_slice_data_bit_offset;
  1824. }
  1825.  
  1826. static void
  1827. gen7_mfd_vc1_bsd_object(VADriverContextP ctx,
  1828.                         VAPictureParameterBufferVC1 *pic_param,
  1829.                         VASliceParameterBufferVC1 *slice_param,
  1830.                         VASliceParameterBufferVC1 *next_slice_param,
  1831.                         dri_bo *slice_data_bo,
  1832.                         struct gen7_mfd_context *gen7_mfd_context)
  1833. {
  1834.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1835.     int next_slice_start_vert_pos;
  1836.     int macroblock_offset;
  1837.     uint8_t *slice_data = NULL;
  1838.  
  1839.     dri_bo_map(slice_data_bo, 0);
  1840.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  1841.     macroblock_offset = gen7_mfd_vc1_get_macroblock_bit_offset(slice_data,
  1842.                                                                slice_param->macroblock_offset,
  1843.                                                                pic_param->sequence_fields.bits.profile);
  1844.     dri_bo_unmap(slice_data_bo);
  1845.  
  1846.     if (next_slice_param)
  1847.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  1848.     else
  1849.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  1850.  
  1851.     BEGIN_BCS_BATCH(batch, 5);
  1852.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  1853.     OUT_BCS_BATCH(batch,
  1854.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  1855.     OUT_BCS_BATCH(batch,
  1856.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  1857.     OUT_BCS_BATCH(batch,
  1858.                   slice_param->slice_vertical_position << 16 |
  1859.                   next_slice_start_vert_pos << 0);
  1860.     OUT_BCS_BATCH(batch,
  1861.                   (macroblock_offset & 0x7));
  1862.     ADVANCE_BCS_BATCH(batch);
  1863. }
  1864.  
  1865. static void
  1866. gen7_mfd_vc1_decode_picture(VADriverContextP ctx,
  1867.                             struct decode_state *decode_state,
  1868.                             struct gen7_mfd_context *gen7_mfd_context)
  1869. {
  1870.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1871.     VAPictureParameterBufferVC1 *pic_param;
  1872.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  1873.     dri_bo *slice_data_bo;
  1874.     int i, j;
  1875.  
  1876.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1877.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1878.  
  1879.     gen7_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  1880.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1881.     intel_batchbuffer_emit_mi_flush(batch);
  1882.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1883.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1884.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1885.     gen7_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  1886.     gen7_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  1887.     gen7_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  1888.     gen7_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  1889.  
  1890.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1891.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1892.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  1893.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1894.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  1895.  
  1896.         if (j == decode_state->num_slice_params - 1)
  1897.             next_slice_group_param = NULL;
  1898.         else
  1899.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  1900.  
  1901.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1902.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1903.  
  1904.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1905.                 next_slice_param = slice_param + 1;
  1906.             else
  1907.                 next_slice_param = next_slice_group_param;
  1908.  
  1909.             gen7_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  1910.             slice_param++;
  1911.         }
  1912.     }
  1913.  
  1914.     intel_batchbuffer_end_atomic(batch);
  1915.     intel_batchbuffer_flush(batch);
  1916. }
  1917.  
  1918. #ifdef HAVE_VA_JPEG_DECODE
  1919. static void
  1920. gen7_mfd_jpeg_decode_init(VADriverContextP ctx,
  1921.                           struct decode_state *decode_state,
  1922.                           struct gen7_mfd_context *gen7_mfd_context)
  1923. {
  1924.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1925.     struct object_surface *obj_surface;
  1926.     VAPictureParameterBufferJPEGBaseline *pic_param;
  1927.     int subsampling = SUBSAMPLE_YUV420;
  1928.  
  1929.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  1930.  
  1931.     if (pic_param->num_components == 1)
  1932.         subsampling = SUBSAMPLE_YUV400;
  1933.     else if (pic_param->num_components == 3) {
  1934.         int h1 = pic_param->components[0].h_sampling_factor;
  1935.         int h2 = pic_param->components[1].h_sampling_factor;
  1936.         int h3 = pic_param->components[2].h_sampling_factor;
  1937.         int v1 = pic_param->components[0].v_sampling_factor;
  1938.         int v2 = pic_param->components[1].v_sampling_factor;
  1939.         int v3 = pic_param->components[2].v_sampling_factor;
  1940.  
  1941.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1942.             v1 == 2 && v2 == 1 && v3 == 1)
  1943.             subsampling = SUBSAMPLE_YUV420;
  1944.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1945.                  v1 == 1 && v2 == 1 && v3 == 1)
  1946.             subsampling = SUBSAMPLE_YUV422H;
  1947.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1948.                  v1 == 1 && v2 == 1 && v3 == 1)
  1949.             subsampling = SUBSAMPLE_YUV444;
  1950.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  1951.                  v1 == 1 && v2 == 1 && v3 == 1)
  1952.             subsampling = SUBSAMPLE_YUV411;
  1953.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  1954.                  v1 == 2 && v2 == 1 && v3 == 1)
  1955.             subsampling = SUBSAMPLE_YUV422V;
  1956.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  1957.                  v1 == 2 && v2 == 2 && v3 == 2)
  1958.             subsampling = SUBSAMPLE_YUV422H;
  1959.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  1960.                  v1 == 2 && v2 == 1 && v3 == 1)
  1961.             subsampling = SUBSAMPLE_YUV422V;
  1962.         else
  1963.             assert(0);
  1964.     } else {
  1965.         assert(0);
  1966.     }
  1967.  
  1968.     /* Current decoded picture */
  1969.     obj_surface = SURFACE(decode_state->current_render_target);
  1970.     assert(obj_surface);
  1971.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
  1972.  
  1973.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1974.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1975.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1976.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1977.  
  1978.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  1979.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1980.  
  1981.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  1982.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1983.  
  1984.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  1985.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1986.  
  1987.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  1988.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  1989.  
  1990.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  1991.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1992.  
  1993.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1994.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1995. }
  1996.  
  1997. static const int va_to_gen7_jpeg_rotation[4] = {
  1998.     GEN7_JPEG_ROTATION_0,
  1999.     GEN7_JPEG_ROTATION_90,
  2000.     GEN7_JPEG_ROTATION_180,
  2001.     GEN7_JPEG_ROTATION_270
  2002. };
  2003.  
  2004. static void
  2005. gen7_mfd_jpeg_pic_state(VADriverContextP ctx,
  2006.                         struct decode_state *decode_state,
  2007.                         struct gen7_mfd_context *gen7_mfd_context)
  2008. {
  2009.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2010.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2011.     int chroma_type = GEN7_YUV420;
  2012.     int frame_width_in_blks;
  2013.     int frame_height_in_blks;
  2014.  
  2015.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2016.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2017.  
  2018.     if (pic_param->num_components == 1)
  2019.         chroma_type = GEN7_YUV400;
  2020.     else if (pic_param->num_components == 3) {
  2021.         int h1 = pic_param->components[0].h_sampling_factor;
  2022.         int h2 = pic_param->components[1].h_sampling_factor;
  2023.         int h3 = pic_param->components[2].h_sampling_factor;
  2024.         int v1 = pic_param->components[0].v_sampling_factor;
  2025.         int v2 = pic_param->components[1].v_sampling_factor;
  2026.         int v3 = pic_param->components[2].v_sampling_factor;
  2027.  
  2028.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2029.             v1 == 2 && v2 == 1 && v3 == 1)
  2030.             chroma_type = GEN7_YUV420;
  2031.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2032.                  v1 == 1 && v2 == 1 && v3 == 1)
  2033.             chroma_type = GEN7_YUV422H_2Y;
  2034.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2035.                  v1 == 1 && v2 == 1 && v3 == 1)
  2036.             chroma_type = GEN7_YUV444;
  2037.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2038.                  v1 == 1 && v2 == 1 && v3 == 1)
  2039.             chroma_type = GEN7_YUV411;
  2040.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2041.                  v1 == 2 && v2 == 1 && v3 == 1)
  2042.             chroma_type = GEN7_YUV422V_2Y;
  2043.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2044.                  v1 == 2 && v2 == 2 && v3 == 2)
  2045.             chroma_type = GEN7_YUV422H_4Y;
  2046.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2047.                  v1 == 2 && v2 == 1 && v3 == 1)
  2048.             chroma_type = GEN7_YUV422V_4Y;
  2049.         else
  2050.             assert(0);
  2051.     }
  2052.  
  2053.     if (chroma_type == GEN7_YUV400 ||
  2054.         chroma_type == GEN7_YUV444 ||
  2055.         chroma_type == GEN7_YUV422V_2Y) {
  2056.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  2057.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  2058.     } else if (chroma_type == GEN7_YUV411) {
  2059.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  2060.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  2061.     } else {
  2062.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  2063.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  2064.     }
  2065.  
  2066.     BEGIN_BCS_BATCH(batch, 3);
  2067.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2068.     OUT_BCS_BATCH(batch,
  2069.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  2070.                   (chroma_type << 0));
  2071.     OUT_BCS_BATCH(batch,
  2072.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  2073.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  2074.     ADVANCE_BCS_BATCH(batch);
  2075. }
  2076.  
  2077. static const int va_to_gen7_jpeg_hufftable[2] = {
  2078.     MFX_HUFFTABLE_ID_Y,
  2079.     MFX_HUFFTABLE_ID_UV
  2080. };
  2081.  
  2082. static void
  2083. gen7_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  2084.                                struct decode_state *decode_state,
  2085.                                struct gen7_mfd_context *gen7_mfd_context,
  2086.                                int num_tables)
  2087. {
  2088.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  2089.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2090.     int index;
  2091.  
  2092.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  2093.         return;
  2094.  
  2095.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2096.  
  2097.     for (index = 0; index < num_tables; index++) {
  2098.         int id = va_to_gen7_jpeg_hufftable[index];
  2099.         BEGIN_BCS_BATCH(batch, 53);
  2100.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2101.         OUT_BCS_BATCH(batch, id);
  2102.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2103.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2104.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2105.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2106.         ADVANCE_BCS_BATCH(batch);
  2107.     }
  2108. }
  2109.  
  2110. static const int va_to_gen7_jpeg_qm[5] = {
  2111.     -1,
  2112.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2113.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2114.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2115.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2116. };
  2117.  
  2118. static void
  2119. gen7_mfd_jpeg_qm_state(VADriverContextP ctx,
  2120.                        struct decode_state *decode_state,
  2121.                        struct gen7_mfd_context *gen7_mfd_context)
  2122. {
  2123.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2124.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2125.     int index;
  2126.  
  2127.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2128.         return;
  2129.  
  2130.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2131.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2132.  
  2133.     assert(pic_param->num_components <= 3);
  2134.  
  2135.     for (index = 0; index < pic_param->num_components; index++) {
  2136.         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
  2137.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2138.         unsigned char raster_qm[64];
  2139.         int j;
  2140.  
  2141.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2142.             continue;
  2143.  
  2144.         for (j = 0; j < 64; j++)
  2145.             raster_qm[zigzag_direct[j]] = qm[j];
  2146.  
  2147.         gen7_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2148.     }
  2149. }
  2150.  
  2151. static void
  2152. gen7_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2153.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2154.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2155.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2156.                          dri_bo *slice_data_bo,
  2157.                          struct gen7_mfd_context *gen7_mfd_context)
  2158. {
  2159.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2160.     int scan_component_mask = 0;
  2161.     int i;
  2162.  
  2163.     assert(slice_param->num_components > 0);
  2164.     assert(slice_param->num_components < 4);
  2165.     assert(slice_param->num_components <= pic_param->num_components);
  2166.  
  2167.     for (i = 0; i < slice_param->num_components; i++) {
  2168.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2169.         case 1:
  2170.             scan_component_mask |= (1 << 0);
  2171.             break;
  2172.         case 2:
  2173.             scan_component_mask |= (1 << 1);
  2174.             break;
  2175.         case 3:
  2176.             scan_component_mask |= (1 << 2);
  2177.             break;
  2178.         default:
  2179.             assert(0);
  2180.             break;
  2181.         }
  2182.     }
  2183.  
  2184.     BEGIN_BCS_BATCH(batch, 6);
  2185.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2186.     OUT_BCS_BATCH(batch,
  2187.                   slice_param->slice_data_size);
  2188.     OUT_BCS_BATCH(batch,
  2189.                   slice_param->slice_data_offset);
  2190.     OUT_BCS_BATCH(batch,
  2191.                   slice_param->slice_horizontal_position << 16 |
  2192.                   slice_param->slice_vertical_position << 0);
  2193.     OUT_BCS_BATCH(batch,
  2194.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2195.                   (scan_component_mask << 27) |                 /* scan components */
  2196.                   (0 << 26) |   /* disable interrupt allowed */
  2197.                   (slice_param->num_mcus << 0));                /* MCU count */
  2198.     OUT_BCS_BATCH(batch,
  2199.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2200.     ADVANCE_BCS_BATCH(batch);
  2201. }
  2202.  
  2203. /* Workaround for JPEG decoding on Ivybridge */
  2204.  
  2205. VAStatus
  2206. i965_DestroySurfaces(VADriverContextP ctx,
  2207.                      VASurfaceID *surface_list,
  2208.                      int num_surfaces);
  2209. VAStatus
  2210. i965_CreateSurfaces(VADriverContextP ctx,
  2211.                     int width,
  2212.                     int height,
  2213.                     int format,
  2214.                     int num_surfaces,
  2215.                     VASurfaceID *surfaces);
  2216.  
  2217. static struct {
  2218.     int width;
  2219.     int height;
  2220.     unsigned char data[32];
  2221.     int data_size;
  2222.     int data_bit_offset;
  2223.     int qp;
  2224. } gen7_jpeg_wa_clip = {
  2225.     16,
  2226.     16,
  2227.     {
  2228.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2229.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2230.     },
  2231.     14,
  2232.     40,
  2233.     28,
  2234. };
  2235.  
  2236. static void
  2237. gen7_jpeg_wa_init(VADriverContextP ctx,
  2238.                   struct gen7_mfd_context *gen7_mfd_context)
  2239. {
  2240.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2241.     VAStatus status;
  2242.     struct object_surface *obj_surface;
  2243.  
  2244.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2245.         i965_DestroySurfaces(ctx,
  2246.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2247.                              1);
  2248.  
  2249.     status = i965_CreateSurfaces(ctx,
  2250.                                  gen7_jpeg_wa_clip.width,
  2251.                                  gen7_jpeg_wa_clip.height,
  2252.                                  VA_RT_FORMAT_YUV420,
  2253.                                  1,
  2254.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2255.     assert(status == VA_STATUS_SUCCESS);
  2256.  
  2257.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2258.     assert(obj_surface);
  2259.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
  2260.  
  2261.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2262.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2263.                                                                "JPEG WA data",
  2264.                                                                0x1000,
  2265.                                                                0x1000);
  2266.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2267.                        0,
  2268.                        gen7_jpeg_wa_clip.data_size,
  2269.                        gen7_jpeg_wa_clip.data);
  2270.     }
  2271. }
  2272.  
  2273. static void
  2274. gen7_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2275.                               struct gen7_mfd_context *gen7_mfd_context)
  2276. {
  2277.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2278.  
  2279.     BEGIN_BCS_BATCH(batch, 5);
  2280.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2281.     OUT_BCS_BATCH(batch,
  2282.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2283.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2284.                   (0 << 10) | /* disable Stream-Out */
  2285.                   (0 << 9)  | /* Post Deblocking Output */
  2286.                   (1 << 8)  | /* Pre Deblocking Output */
  2287.                   (0 << 5)  | /* not in stitch mode */
  2288.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2289.                   (MFX_FORMAT_AVC << 0));
  2290.     OUT_BCS_BATCH(batch,
  2291.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2292.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2293.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2294.                   (0 << 1)  |
  2295.                   (0 << 0));
  2296.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2297.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2298.     ADVANCE_BCS_BATCH(batch);
  2299. }
  2300.  
  2301. static void
  2302. gen7_jpeg_wa_surface_state(VADriverContextP ctx,
  2303.                            struct gen7_mfd_context *gen7_mfd_context)
  2304. {
  2305.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2306.     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2307.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2308.  
  2309.     BEGIN_BCS_BATCH(batch, 6);
  2310.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2311.     OUT_BCS_BATCH(batch, 0);
  2312.     OUT_BCS_BATCH(batch,
  2313.                   ((obj_surface->orig_width - 1) << 18) |
  2314.                   ((obj_surface->orig_height - 1) << 4));
  2315.     OUT_BCS_BATCH(batch,
  2316.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2317.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2318.                   (0 << 22) | /* surface object control state, ignored */
  2319.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2320.                   (0 << 2)  | /* must be 0 */
  2321.                   (1 << 1)  | /* must be tiled */
  2322.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2323.     OUT_BCS_BATCH(batch,
  2324.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2325.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2326.     OUT_BCS_BATCH(batch,
  2327.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2328.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2329.     ADVANCE_BCS_BATCH(batch);
  2330. }
  2331.  
  2332. static void
  2333. gen7_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2334.                                  struct gen7_mfd_context *gen7_mfd_context)
  2335. {
  2336.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2337.     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2338.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2339.     dri_bo *intra_bo;
  2340.     int i;
  2341.  
  2342.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2343.                             "intra row store",
  2344.                             128 * 64,
  2345.                             0x1000);
  2346.  
  2347.     BEGIN_BCS_BATCH(batch, 24);
  2348.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
  2349.     OUT_BCS_RELOC(batch,
  2350.                   obj_surface->bo,
  2351.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2352.                   0);
  2353.    
  2354.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2355.  
  2356.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2357.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2358.  
  2359.     OUT_BCS_RELOC(batch,
  2360.                   intra_bo,
  2361.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2362.                   0);
  2363.  
  2364.     OUT_BCS_BATCH(batch, 0);
  2365.  
  2366.     /* DW 7..22 */
  2367.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2368.         OUT_BCS_BATCH(batch, 0);
  2369.     }
  2370.  
  2371.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  2372.     ADVANCE_BCS_BATCH(batch);
  2373.  
  2374.     dri_bo_unreference(intra_bo);
  2375. }
  2376.  
  2377. static void
  2378. gen7_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2379.                                      struct gen7_mfd_context *gen7_mfd_context)
  2380. {
  2381.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2382.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2383.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2384.  
  2385.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2386.                               "bsd mpc row store",
  2387.                               11520, /* 1.5 * 120 * 64 */
  2388.                               0x1000);
  2389.  
  2390.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2391.                           "mpr row store",
  2392.                           7680, /* 1. 0 * 120 * 64 */
  2393.                           0x1000);
  2394.  
  2395.     BEGIN_BCS_BATCH(batch, 4);
  2396.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  2397.  
  2398.     OUT_BCS_RELOC(batch,
  2399.                   bsd_mpc_bo,
  2400.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2401.                   0);
  2402.  
  2403.     OUT_BCS_RELOC(batch,
  2404.                   mpr_bo,
  2405.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2406.                   0);
  2407.     OUT_BCS_BATCH(batch, 0);
  2408.  
  2409.     ADVANCE_BCS_BATCH(batch);
  2410.  
  2411.     dri_bo_unreference(bsd_mpc_bo);
  2412.     dri_bo_unreference(mpr_bo);
  2413. }
  2414.  
  2415. static void
  2416. gen7_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2417.                           struct gen7_mfd_context *gen7_mfd_context)
  2418. {
  2419.  
  2420. }
  2421.  
  2422. static void
  2423. gen7_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2424.                            struct gen7_mfd_context *gen7_mfd_context)
  2425. {
  2426.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2427.     int img_struct = 0;
  2428.     int mbaff_frame_flag = 0;
  2429.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2430.  
  2431.     BEGIN_BCS_BATCH(batch, 16);
  2432.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2433.     OUT_BCS_BATCH(batch,
  2434.                   width_in_mbs * height_in_mbs);
  2435.     OUT_BCS_BATCH(batch,
  2436.                   ((height_in_mbs - 1) << 16) |
  2437.                   ((width_in_mbs - 1) << 0));
  2438.     OUT_BCS_BATCH(batch,
  2439.                   (0 << 24) |
  2440.                   (0 << 16) |
  2441.                   (0 << 14) |
  2442.                   (0 << 13) |
  2443.                   (0 << 12) | /* differ from GEN6 */
  2444.                   (0 << 10) |
  2445.                   (img_struct << 8));
  2446.     OUT_BCS_BATCH(batch,
  2447.                   (1 << 10) | /* 4:2:0 */
  2448.                   (1 << 7) |  /* CABAC */
  2449.                   (0 << 6) |
  2450.                   (0 << 5) |
  2451.                   (0 << 4) |
  2452.                   (0 << 3) |
  2453.                   (1 << 2) |
  2454.                   (mbaff_frame_flag << 1) |
  2455.                   (0 << 0));
  2456.     OUT_BCS_BATCH(batch, 0);
  2457.     OUT_BCS_BATCH(batch, 0);
  2458.     OUT_BCS_BATCH(batch, 0);
  2459.     OUT_BCS_BATCH(batch, 0);
  2460.     OUT_BCS_BATCH(batch, 0);
  2461.     OUT_BCS_BATCH(batch, 0);
  2462.     OUT_BCS_BATCH(batch, 0);
  2463.     OUT_BCS_BATCH(batch, 0);
  2464.     OUT_BCS_BATCH(batch, 0);
  2465.     OUT_BCS_BATCH(batch, 0);
  2466.     OUT_BCS_BATCH(batch, 0);
  2467.     ADVANCE_BCS_BATCH(batch);
  2468. }
  2469.  
  2470. static void
  2471. gen7_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  2472.                                   struct gen7_mfd_context *gen7_mfd_context)
  2473. {
  2474.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2475.     int i;
  2476.  
  2477.     BEGIN_BCS_BATCH(batch, 69);
  2478.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  2479.  
  2480.     /* reference surfaces 0..15 */
  2481.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2482.         OUT_BCS_BATCH(batch, 0); /* top */
  2483.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2484.     }
  2485.  
  2486.     /* the current decoding frame/field */
  2487.     OUT_BCS_BATCH(batch, 0); /* top */
  2488.     OUT_BCS_BATCH(batch, 0); /* bottom */
  2489.  
  2490.     /* POC List */
  2491.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2492.         OUT_BCS_BATCH(batch, 0);
  2493.         OUT_BCS_BATCH(batch, 0);
  2494.     }
  2495.  
  2496.     OUT_BCS_BATCH(batch, 0);
  2497.     OUT_BCS_BATCH(batch, 0);
  2498.  
  2499.     ADVANCE_BCS_BATCH(batch);
  2500. }
  2501.  
  2502. static void
  2503. gen7_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  2504.                                      struct gen7_mfd_context *gen7_mfd_context)
  2505. {
  2506.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2507.  
  2508.     BEGIN_BCS_BATCH(batch, 11);
  2509.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2510.     OUT_BCS_RELOC(batch,
  2511.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2512.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2513.                   0);
  2514.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2515.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2516.     OUT_BCS_BATCH(batch, 0);
  2517.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2518.     OUT_BCS_BATCH(batch, 0);
  2519.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2520.     OUT_BCS_BATCH(batch, 0);
  2521.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2522.     OUT_BCS_BATCH(batch, 0);
  2523.     ADVANCE_BCS_BATCH(batch);
  2524. }
  2525.  
  2526. static void
  2527. gen7_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  2528.                             struct gen7_mfd_context *gen7_mfd_context)
  2529. {
  2530.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2531.  
  2532.     /* the input bitsteam format on GEN7 differs from GEN6 */
  2533.     BEGIN_BCS_BATCH(batch, 6);
  2534.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  2535.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  2536.     OUT_BCS_BATCH(batch, 0);
  2537.     OUT_BCS_BATCH(batch,
  2538.                   (0 << 31) |
  2539.                   (0 << 14) |
  2540.                   (0 << 12) |
  2541.                   (0 << 10) |
  2542.                   (0 << 8));
  2543.     OUT_BCS_BATCH(batch,
  2544.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  2545.                   (0 << 5)  |
  2546.                   (0 << 4)  |
  2547.                   (1 << 3) | /* LastSlice Flag */
  2548.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  2549.     OUT_BCS_BATCH(batch, 0);
  2550.     ADVANCE_BCS_BATCH(batch);
  2551. }
  2552.  
  2553. static void
  2554. gen7_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  2555.                              struct gen7_mfd_context *gen7_mfd_context)
  2556. {
  2557.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2558.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  2559.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  2560.     int first_mb_in_slice = 0;
  2561.     int slice_type = SLICE_TYPE_I;
  2562.  
  2563.     BEGIN_BCS_BATCH(batch, 11);
  2564.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  2565.     OUT_BCS_BATCH(batch, slice_type);
  2566.     OUT_BCS_BATCH(batch,
  2567.                   (num_ref_idx_l1 << 24) |
  2568.                   (num_ref_idx_l0 << 16) |
  2569.                   (0 << 8) |
  2570.                   (0 << 0));
  2571.     OUT_BCS_BATCH(batch,
  2572.                   (0 << 29) |
  2573.                   (1 << 27) |   /* disable Deblocking */
  2574.                   (0 << 24) |
  2575.                   (gen7_jpeg_wa_clip.qp << 16) |
  2576.                   (0 << 8) |
  2577.                   (0 << 0));
  2578.     OUT_BCS_BATCH(batch,
  2579.                   (slice_ver_pos << 24) |
  2580.                   (slice_hor_pos << 16) |
  2581.                   (first_mb_in_slice << 0));
  2582.     OUT_BCS_BATCH(batch,
  2583.                   (next_slice_ver_pos << 16) |
  2584.                   (next_slice_hor_pos << 0));
  2585.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  2586.     OUT_BCS_BATCH(batch, 0);
  2587.     OUT_BCS_BATCH(batch, 0);
  2588.     OUT_BCS_BATCH(batch, 0);
  2589.     OUT_BCS_BATCH(batch, 0);
  2590.     ADVANCE_BCS_BATCH(batch);
  2591. }
  2592.  
  2593. static void
  2594. gen7_mfd_jpeg_wa(VADriverContextP ctx,
  2595.                  struct gen7_mfd_context *gen7_mfd_context)
  2596. {
  2597.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2598.     gen7_jpeg_wa_init(ctx, gen7_mfd_context);
  2599.     intel_batchbuffer_emit_mi_flush(batch);
  2600.     gen7_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  2601.     gen7_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  2602.     gen7_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  2603.     gen7_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  2604.     gen7_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  2605.     gen7_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  2606.     gen7_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  2607.  
  2608.     gen7_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  2609.     gen7_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  2610.     gen7_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  2611. }
  2612.  
  2613. void
  2614. gen7_mfd_jpeg_decode_picture(VADriverContextP ctx,
  2615.                              struct decode_state *decode_state,
  2616.                              struct gen7_mfd_context *gen7_mfd_context)
  2617. {
  2618.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2619.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2620.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  2621.     dri_bo *slice_data_bo;
  2622.     int i, j, max_selector = 0;
  2623.  
  2624.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2625.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2626.  
  2627.     /* Currently only support Baseline DCT */
  2628.     gen7_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  2629.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2630.     gen7_mfd_jpeg_wa(ctx, gen7_mfd_context);
  2631.     intel_batchbuffer_emit_mi_flush(batch);
  2632.     gen7_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2633.     gen7_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2634.     gen7_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  2635.     gen7_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  2636.     gen7_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  2637.  
  2638.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2639.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2640.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2641.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2642.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2643.  
  2644.         if (j == decode_state->num_slice_params - 1)
  2645.             next_slice_group_param = NULL;
  2646.         else
  2647.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2648.  
  2649.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2650.             int component;
  2651.  
  2652.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2653.  
  2654.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2655.                 next_slice_param = slice_param + 1;
  2656.             else
  2657.                 next_slice_param = next_slice_group_param;
  2658.  
  2659.             for (component = 0; component < slice_param->num_components; component++) {
  2660.                 if (max_selector < slice_param->components[component].dc_table_selector)
  2661.                     max_selector = slice_param->components[component].dc_table_selector;
  2662.  
  2663.                 if (max_selector < slice_param->components[component].ac_table_selector)
  2664.                     max_selector = slice_param->components[component].ac_table_selector;
  2665.             }
  2666.  
  2667.             slice_param++;
  2668.         }
  2669.     }
  2670.  
  2671.     assert(max_selector < 2);
  2672.     gen7_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  2673.  
  2674.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2675.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2676.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  2677.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2678.         gen7_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  2679.  
  2680.         if (j == decode_state->num_slice_params - 1)
  2681.             next_slice_group_param = NULL;
  2682.         else
  2683.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  2684.  
  2685.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2686.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2687.  
  2688.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2689.                 next_slice_param = slice_param + 1;
  2690.             else
  2691.                 next_slice_param = next_slice_group_param;
  2692.  
  2693.             gen7_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2694.             slice_param++;
  2695.         }
  2696.     }
  2697.  
  2698.     intel_batchbuffer_end_atomic(batch);
  2699.     intel_batchbuffer_flush(batch);
  2700. }
  2701. #endif
  2702.  
  2703. static void
  2704. gen7_mfd_decode_picture(VADriverContextP ctx,
  2705.                         VAProfile profile,
  2706.                         union codec_state *codec_state,
  2707.                         struct hw_context *hw_context)
  2708.  
  2709. {
  2710.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  2711.     struct decode_state *decode_state = &codec_state->decode;
  2712.  
  2713.     assert(gen7_mfd_context);
  2714.  
  2715.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  2716.  
  2717.     switch (profile) {
  2718.     case VAProfileMPEG2Simple:
  2719.     case VAProfileMPEG2Main:
  2720.         gen7_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  2721.         break;
  2722.        
  2723.     case VAProfileH264Baseline:
  2724.     case VAProfileH264Main:
  2725.     case VAProfileH264High:
  2726.         gen7_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  2727.         break;
  2728.  
  2729.     case VAProfileVC1Simple:
  2730.     case VAProfileVC1Main:
  2731.     case VAProfileVC1Advanced:
  2732.         gen7_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  2733.         break;
  2734.  
  2735. #ifdef HAVE_VA_JPEG_DECODE
  2736.     case VAProfileJPEGBaseline:
  2737.         gen7_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  2738.         break;
  2739. #endif
  2740.  
  2741.     default:
  2742.         assert(0);
  2743.         break;
  2744.     }
  2745. }
  2746.  
  2747. static void
  2748. gen7_mfd_context_destroy(void *hw_context)
  2749. {
  2750.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  2751.  
  2752.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  2753.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  2754.  
  2755.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2756.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  2757.  
  2758.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  2759.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  2760.  
  2761.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  2762.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2763.  
  2764.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  2765.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2766.  
  2767.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  2768.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  2769.  
  2770.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  2771.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  2772.  
  2773.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  2774.  
  2775.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  2776.     free(gen7_mfd_context);
  2777. }
  2778.  
  2779. static void gen7_mfd_mpeg2_context_init(VADriverContextP ctx,
  2780.                                     struct gen7_mfd_context *gen7_mfd_context)
  2781. {
  2782.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  2783.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  2784.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  2785.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  2786. }
  2787.  
  2788. struct hw_context *
  2789. gen7_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
  2790. {
  2791.     struct intel_driver_data *intel = intel_driver_data(ctx);
  2792.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  2793.     int i;
  2794.  
  2795.     gen7_mfd_context->base.destroy = gen7_mfd_context_destroy;
  2796.     gen7_mfd_context->base.run = gen7_mfd_decode_picture;
  2797.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  2798.  
  2799.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  2800.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  2801.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  2802.     }
  2803.  
  2804.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  2805.  
  2806.     switch (profile) {
  2807.     case VAProfileMPEG2Simple:
  2808.     case VAProfileMPEG2Main:
  2809.         gen7_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  2810.         break;
  2811.  
  2812.     case VAProfileH264Baseline:
  2813.     case VAProfileH264Main:
  2814.     case VAProfileH264High:
  2815.         gen7_mfd_avc_context_init(ctx, gen7_mfd_context);
  2816.         break;
  2817.     default:
  2818.         break;
  2819.     }
  2820.     return (struct hw_context *)gen7_mfd_context;
  2821. }
  2822.