Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *
  27.  */
  28.  
  29. #ifndef HAVE_GEN_AVC_SURFACE
  30. #define HAVE_GEN_AVC_SURFACE 1
  31. #endif
  32.  
  33. #include "sysdeps.h"
  34. #include "intel_batchbuffer.h"
  35. #include "intel_driver.h"
  36. #include "i965_defines.h"
  37. #include "i965_drv_video.h"
  38. #include "i965_decoder_utils.h"
  39.  
  40. #include "gen6_mfd.h"
  41.  
  42. static const uint32_t zigzag_direct[64] = {
  43.     0,   1,  8, 16,  9,  2,  3, 10,
  44.     17, 24, 32, 25, 18, 11,  4,  5,
  45.     12, 19, 26, 33, 40, 48, 41, 34,
  46.     27, 20, 13,  6,  7, 14, 21, 28,
  47.     35, 42, 49, 56, 57, 50, 43, 36,
  48.     29, 22, 15, 23, 30, 37, 44, 51,
  49.     58, 59, 52, 45, 38, 31, 39, 46,
  50.     53, 60, 61, 54, 47, 55, 62, 63
  51. };
  52.  
  53. static void
  54. gen6_mfd_avc_frame_store_index(VADriverContextP ctx,
  55.                                VAPictureParameterBufferH264 *pic_param,
  56.                                struct gen6_mfd_context *gen6_mfd_context)
  57. {
  58.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  59.     int i, j;
  60.  
  61.     assert(ARRAY_ELEMS(gen6_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
  62.  
  63.     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
  64.         int found = 0;
  65.  
  66.         if (gen6_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
  67.             continue;
  68.  
  69.         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  70.             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
  71.             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
  72.                 continue;
  73.  
  74.             if (gen6_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
  75.                 found = 1;
  76.                 break;
  77.             }
  78.         }
  79.  
  80.         if (!found) {
  81.             struct object_surface *obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
  82.             obj_surface->flags &= ~SURFACE_REFERENCED;
  83.  
  84.             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
  85.                 dri_bo_unreference(obj_surface->bo);
  86.                 obj_surface->bo = NULL;
  87.                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
  88.             }
  89.  
  90.             if (obj_surface->free_private_data)
  91.                 obj_surface->free_private_data(&obj_surface->private_data);
  92.  
  93.             gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  94.             gen6_mfd_context->reference_surface[i].frame_store_id = -1;
  95.         }
  96.     }
  97.  
  98.     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
  99.         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
  100.         int found = 0;
  101.  
  102.         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
  103.             continue;
  104.  
  105.         for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
  106.             if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
  107.                 continue;
  108.            
  109.             if (gen6_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
  110.                 found = 1;
  111.                 break;
  112.             }
  113.         }
  114.  
  115.         if (!found) {
  116.             int frame_idx;
  117.             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
  118.            
  119.             assert(obj_surface);
  120.             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
  121.  
  122.             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface); frame_idx++) {
  123.                 for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
  124.                     if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
  125.                         continue;
  126.  
  127.                     if (gen6_mfd_context->reference_surface[j].frame_store_id == frame_idx)
  128.                         break;
  129.                 }
  130.  
  131.                 if (j == ARRAY_ELEMS(gen6_mfd_context->reference_surface))
  132.                     break;
  133.             }
  134.  
  135.             assert(frame_idx < ARRAY_ELEMS(gen6_mfd_context->reference_surface));
  136.  
  137.             for (j = 0; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
  138.                 if (gen6_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
  139.                     gen6_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
  140.                     gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
  141.                     break;
  142.                 }
  143.             }
  144.         }
  145.     }
  146.  
  147.     /* sort */
  148.     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface) - 1; i++) {
  149.         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  150.             gen6_mfd_context->reference_surface[i].frame_store_id == i)
  151.             continue;
  152.  
  153.         for (j = i + 1; j < ARRAY_ELEMS(gen6_mfd_context->reference_surface); j++) {
  154.             if (gen6_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
  155.                 gen6_mfd_context->reference_surface[j].frame_store_id == i) {
  156.                 VASurfaceID id = gen6_mfd_context->reference_surface[i].surface_id;
  157.                 int frame_idx = gen6_mfd_context->reference_surface[i].frame_store_id;
  158.  
  159.                 gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[j].surface_id;
  160.                 gen6_mfd_context->reference_surface[i].frame_store_id = gen6_mfd_context->reference_surface[j].frame_store_id;
  161.                 gen6_mfd_context->reference_surface[j].surface_id = id;
  162.                 gen6_mfd_context->reference_surface[j].frame_store_id = frame_idx;
  163.                 break;
  164.             }
  165.         }
  166.     }
  167. }
  168.  
  169. static void
  170. gen6_mfd_init_avc_surface(VADriverContextP ctx,
  171.                           VAPictureParameterBufferH264 *pic_param,
  172.                           struct object_surface *obj_surface)
  173. {
  174.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  175.     GenAvcSurface *gen6_avc_surface = obj_surface->private_data;
  176.     int height_in_mbs;
  177.  
  178.     obj_surface->free_private_data = gen_free_avc_surface;
  179.     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
  180.  
  181.     if (!gen6_avc_surface) {
  182.         gen6_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  183.         assert((obj_surface->size & 0x3f) == 0);
  184.         obj_surface->private_data = gen6_avc_surface;
  185.     }
  186.  
  187.     gen6_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
  188.                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
  189.  
  190.     if (gen6_avc_surface->dmv_top == NULL) {
  191.         gen6_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  192.                                                  "direct mv w/r buffer",
  193.                                                  128 * height_in_mbs * 64,      /* scalable with frame height */
  194.                                                  0x1000);
  195.     }
  196.  
  197.     if (gen6_avc_surface->dmv_bottom_flag &&
  198.         gen6_avc_surface->dmv_bottom == NULL) {
  199.         gen6_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
  200.                                                     "direct mv w/r buffer",
  201.                                                     128 * height_in_mbs * 64,   /* scalable with frame height */
  202.                                                     0x1000);
  203.     }
  204. }
  205.  
  206. static void
  207. gen6_mfd_pipe_mode_select(VADriverContextP ctx,
  208.                           struct decode_state *decode_state,
  209.                           int standard_select,
  210.                           struct gen6_mfd_context *gen6_mfd_context)
  211. {
  212.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  213.  
  214.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  215.            standard_select == MFX_FORMAT_AVC ||
  216.            standard_select == MFX_FORMAT_VC1);
  217.  
  218.     BEGIN_BCS_BATCH(batch, 4);
  219.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (4 - 2));
  220.     OUT_BCS_BATCH(batch,
  221.                   (MFD_MODE_VLD << 16) | /* VLD mode */
  222.                   (0 << 10) | /* disable Stream-Out */
  223.                   (gen6_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  224.                   (gen6_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  225.                   (0 << 7)  | /* disable TLB prefectch */
  226.                   (0 << 5)  | /* not in stitch mode */
  227.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  228.                   (standard_select << 0));
  229.     OUT_BCS_BATCH(batch,
  230.                   (0 << 20) | /* round flag in PB slice */
  231.                   (0 << 19) | /* round flag in Intra8x8 */
  232.                   (0 << 7)  | /* expand NOA bus flag */
  233.                   (1 << 6)  | /* must be 1 */
  234.                   (0 << 5)  | /* disable clock gating for NOA */
  235.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  236.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  237.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  238.                   (0 << 1)  | /* AVC long field motion vector */
  239.                   (1 << 0));  /* always calculate AVC ILDB boundary strength */
  240.     OUT_BCS_BATCH(batch, 0);
  241.     ADVANCE_BCS_BATCH(batch);
  242. }
  243.  
  244. static void
  245. gen6_mfd_surface_state(VADriverContextP ctx,
  246.                        struct decode_state *decode_state,
  247.                        int standard_select,
  248.                        struct gen6_mfd_context *gen6_mfd_context)
  249. {
  250.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  251.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  252.     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
  253.     assert(obj_surface);
  254.    
  255.     BEGIN_BCS_BATCH(batch, 6);
  256.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  257.     OUT_BCS_BATCH(batch, 0);
  258.     OUT_BCS_BATCH(batch,
  259.                   ((obj_surface->orig_height - 1) << 19) |
  260.                   ((obj_surface->orig_width - 1) << 6));
  261.     OUT_BCS_BATCH(batch,
  262.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  263.                   (1 << 27) | /* must be 1 for interleave U/V, hardware requirement */
  264.                   (0 << 22) | /* surface object control state, FIXME??? */
  265.                   ((obj_surface->width - 1) << 3) | /* pitch */
  266.                   (0 << 2)  | /* must be 0 for interleave U/V */
  267.                   (1 << 1)  | /* must be y-tiled */
  268.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, FIXME: must be 1 ??? */
  269.     OUT_BCS_BATCH(batch,
  270.                   (0 << 16) | /* must be 0 for interleave U/V */
  271.                   (obj_surface->height)); /* y offset for U(cb) */
  272.     OUT_BCS_BATCH(batch, 0);
  273.     ADVANCE_BCS_BATCH(batch);
  274. }
  275.  
  276. static void
  277. gen6_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  278.                              struct decode_state *decode_state,
  279.                              int standard_select,
  280.                              struct gen6_mfd_context *gen6_mfd_context)
  281. {
  282.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  283.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  284.     int i;
  285.  
  286.     BEGIN_BCS_BATCH(batch, 24);
  287.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (24 - 2));
  288.     if (gen6_mfd_context->pre_deblocking_output.valid)
  289.         OUT_BCS_RELOC(batch, gen6_mfd_context->pre_deblocking_output.bo,
  290.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  291.                       0);
  292.     else
  293.         OUT_BCS_BATCH(batch, 0);
  294.  
  295.     if (gen6_mfd_context->post_deblocking_output.valid)
  296.         OUT_BCS_RELOC(batch, gen6_mfd_context->post_deblocking_output.bo,
  297.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  298.                       0);
  299.     else
  300.         OUT_BCS_BATCH(batch, 0);
  301.  
  302.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  303.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  304.  
  305.     if (gen6_mfd_context->intra_row_store_scratch_buffer.valid)
  306.         OUT_BCS_RELOC(batch, gen6_mfd_context->intra_row_store_scratch_buffer.bo,
  307.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  308.                       0);
  309.     else
  310.         OUT_BCS_BATCH(batch, 0);
  311.  
  312.     if (gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  313.         OUT_BCS_RELOC(batch, gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  314.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  315.                       0);
  316.     else
  317.         OUT_BCS_BATCH(batch, 0);
  318.  
  319.     /* DW 7..22 */
  320.     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
  321.         struct object_surface *obj_surface;
  322.  
  323.         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  324.             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
  325.             assert(obj_surface && obj_surface->bo);
  326.  
  327.             OUT_BCS_RELOC(batch, obj_surface->bo,
  328.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  329.                           0);
  330.         } else {
  331.             OUT_BCS_BATCH(batch, 0);
  332.         }
  333.     }
  334.  
  335.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  336.     ADVANCE_BCS_BATCH(batch);
  337. }
  338.  
  339. static void
  340. gen6_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  341.                                  dri_bo *slice_data_bo,
  342.                                  int standard_select,
  343.                                  struct gen6_mfd_context *gen6_mfd_context)
  344. {
  345.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  346.  
  347.     BEGIN_BCS_BATCH(batch, 11);
  348.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  349.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  350.     OUT_BCS_BATCH(batch, 0);
  351.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  352.     OUT_BCS_BATCH(batch, 0);
  353.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  354.     OUT_BCS_BATCH(batch, 0);
  355.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  356.     OUT_BCS_BATCH(batch, 0);
  357.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  358.     OUT_BCS_BATCH(batch, 0);
  359.     ADVANCE_BCS_BATCH(batch);
  360. }
  361.  
  362. static void
  363. gen6_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  364.                                  struct decode_state *decode_state,
  365.                                  int standard_select,
  366.                                  struct gen6_mfd_context *gen6_mfd_context)
  367. {
  368.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  369.  
  370.     BEGIN_BCS_BATCH(batch, 4);
  371.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  372.  
  373.     if (gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  374.         OUT_BCS_RELOC(batch, gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  375.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  376.                       0);
  377.     else
  378.         OUT_BCS_BATCH(batch, 0);
  379.  
  380.     if (gen6_mfd_context->mpr_row_store_scratch_buffer.valid)
  381.         OUT_BCS_RELOC(batch, gen6_mfd_context->mpr_row_store_scratch_buffer.bo,
  382.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  383.                       0);
  384.     else
  385.         OUT_BCS_BATCH(batch, 0);
  386.  
  387.     if (gen6_mfd_context->bitplane_read_buffer.valid)
  388.         OUT_BCS_RELOC(batch, gen6_mfd_context->bitplane_read_buffer.bo,
  389.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  390.                       0);
  391.     else
  392.         OUT_BCS_BATCH(batch, 0);
  393.  
  394.     ADVANCE_BCS_BATCH(batch);
  395. }
  396.  
  397. static void
  398. gen6_mfd_avc_img_state(VADriverContextP ctx,
  399.                        struct decode_state *decode_state,
  400.                        struct gen6_mfd_context *gen6_mfd_context)
  401. {
  402.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  403.     int qm_present_flag;
  404.     int img_struct;
  405.     int mbaff_frame_flag;
  406.     unsigned int width_in_mbs, height_in_mbs;
  407.     VAPictureParameterBufferH264 *pic_param;
  408.  
  409.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  410.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  411.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  412.  
  413.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  414.         qm_present_flag = 1;
  415.     else
  416.         qm_present_flag = 0; /* built-in QM matrices */
  417.  
  418.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  419.         img_struct = 1;
  420.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  421.         img_struct = 3;
  422.     else
  423.         img_struct = 0;
  424.  
  425.     if ((img_struct & 0x1) == 0x1) {
  426.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  427.     } else {
  428.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  429.     }
  430.  
  431.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  432.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  433.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  434.     } else {
  435.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  436.     }
  437.  
  438.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  439.                         !pic_param->pic_fields.bits.field_pic_flag);
  440.  
  441.     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
  442.     height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff); /* frame height */
  443.     assert(!((width_in_mbs * height_in_mbs) & 0x8000)); /* hardware requirement */
  444.  
  445.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  446.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  447.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  448.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  449.  
  450.     BEGIN_BCS_BATCH(batch, 13);
  451.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (13 - 2));
  452.     OUT_BCS_BATCH(batch,
  453.                   ((width_in_mbs * height_in_mbs) & 0x7fff));
  454.     OUT_BCS_BATCH(batch,
  455.                   (height_in_mbs << 16) |
  456.                   (width_in_mbs << 0));
  457.     OUT_BCS_BATCH(batch,
  458.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  459.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  460.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  461.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  462.                   (1 << 12) | /* always 1, hardware requirement */
  463.                   (qm_present_flag << 10) |
  464.                   (img_struct << 8) |
  465.                   (16 << 0));
  466.     OUT_BCS_BATCH(batch,
  467.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  468.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  469.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  470.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  471.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  472.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  473.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  474.                   (mbaff_frame_flag << 1) |
  475.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  476.     OUT_BCS_BATCH(batch, 0);
  477.     OUT_BCS_BATCH(batch, 0);
  478.     OUT_BCS_BATCH(batch, 0);
  479.     OUT_BCS_BATCH(batch, 0);
  480.     OUT_BCS_BATCH(batch, 0);
  481.     OUT_BCS_BATCH(batch, 0);
  482.     OUT_BCS_BATCH(batch, 0);
  483.     OUT_BCS_BATCH(batch, 0);
  484.     ADVANCE_BCS_BATCH(batch);
  485. }
  486.  
  487. static void
  488. gen6_mfd_avc_qm_state(VADriverContextP ctx,
  489.                       struct decode_state *decode_state,
  490.                       struct gen6_mfd_context *gen6_mfd_context)
  491. {
  492.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  493.     int cmd_len;
  494.     VAIQMatrixBufferH264 *iq_matrix;
  495.     VAPictureParameterBufferH264 *pic_param;
  496.  
  497.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  498.         return;
  499.  
  500.     iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  501.  
  502.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  503.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  504.  
  505.     cmd_len = 2 + 6 * 4; /* always load six 4x4 scaling matrices */
  506.  
  507.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
  508.         cmd_len += 2 * 16; /* load two 8x8 scaling matrices */
  509.  
  510.     BEGIN_BCS_BATCH(batch, cmd_len);
  511.     OUT_BCS_BATCH(batch, MFX_AVC_QM_STATE | (cmd_len - 2));
  512.  
  513.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
  514.         OUT_BCS_BATCH(batch,
  515.                       (0x0  << 8) | /* don't use default built-in matrices */
  516.                       (0xff << 0)); /* six 4x4 and two 8x8 scaling matrices */
  517.     else
  518.         OUT_BCS_BATCH(batch,
  519.                       (0x0  << 8) | /* don't use default built-in matrices */
  520.                       (0x3f << 0)); /* six 4x4 scaling matrices */
  521.  
  522.     intel_batchbuffer_data(batch, &iq_matrix->ScalingList4x4[0][0], 6 * 4 * 4);
  523.  
  524.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag)
  525.         intel_batchbuffer_data(batch, &iq_matrix->ScalingList8x8[0][0], 2 * 16 * 4);
  526.  
  527.     ADVANCE_BCS_BATCH(batch);
  528. }
  529.  
  530. static void
  531. gen6_mfd_avc_directmode_state(VADriverContextP ctx,
  532.                               VAPictureParameterBufferH264 *pic_param,
  533.                               VASliceParameterBufferH264 *slice_param,
  534.                               struct gen6_mfd_context *gen6_mfd_context)
  535. {
  536.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  537.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  538.     struct object_surface *obj_surface;
  539.     GenAvcSurface *gen6_avc_surface;
  540.     VAPictureH264 *va_pic;
  541.     int i, j;
  542.  
  543.     BEGIN_BCS_BATCH(batch, 69);
  544.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  545.  
  546.     /* reference surfaces 0..15 */
  547.     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
  548.         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  549.             obj_surface = SURFACE(gen6_mfd_context->reference_surface[i].surface_id);
  550.             assert(obj_surface);
  551.             gen6_avc_surface = obj_surface->private_data;
  552.  
  553.             if (gen6_avc_surface == NULL) {
  554.                 OUT_BCS_BATCH(batch, 0);
  555.                 OUT_BCS_BATCH(batch, 0);
  556.             } else {
  557.                 OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
  558.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  559.                               0);
  560.  
  561.                 if (gen6_avc_surface->dmv_bottom_flag == 1)
  562.                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
  563.                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  564.                                   0);
  565.                 else
  566.                     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
  567.                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  568.                                   0);
  569.             }
  570.         } else {
  571.             OUT_BCS_BATCH(batch, 0);
  572.             OUT_BCS_BATCH(batch, 0);
  573.         }
  574.     }
  575.  
  576.     /* the current decoding frame/field */
  577.     va_pic = &pic_param->CurrPic;
  578.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  579.     obj_surface = SURFACE(va_pic->picture_id);
  580.     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
  581.     gen6_avc_surface = obj_surface->private_data;
  582.  
  583.     OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
  584.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  585.                   0);
  586.  
  587.     if (gen6_avc_surface->dmv_bottom_flag == 1)
  588.         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_bottom,
  589.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  590.                       0);
  591.     else
  592.         OUT_BCS_RELOC(batch, gen6_avc_surface->dmv_top,
  593.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  594.                       0);
  595.  
  596.     /* POC List */
  597.     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
  598.         if (gen6_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  599.             int found = 0;
  600.             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  601.                 va_pic = &pic_param->ReferenceFrames[j];
  602.                
  603.                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
  604.                     continue;
  605.  
  606.                 if (va_pic->picture_id == gen6_mfd_context->reference_surface[i].surface_id) {
  607.                     found = 1;
  608.                     break;
  609.                 }
  610.             }
  611.  
  612.             assert(found == 1);
  613.             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  614.            
  615.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  616.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  617.         } else {
  618.             OUT_BCS_BATCH(batch, 0);
  619.             OUT_BCS_BATCH(batch, 0);
  620.         }
  621.     }
  622.  
  623.     va_pic = &pic_param->CurrPic;
  624.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  625.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  626.  
  627.     ADVANCE_BCS_BATCH(batch);
  628. }
  629.  
  630. static void
  631. gen6_mfd_avc_slice_state(VADriverContextP ctx,
  632.                          VAPictureParameterBufferH264 *pic_param,
  633.                          VASliceParameterBufferH264 *slice_param,
  634.                          VASliceParameterBufferH264 *next_slice_param,
  635.                          struct gen6_mfd_context *gen6_mfd_context)
  636. {
  637.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  638.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  639.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  640.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  641.     int num_ref_idx_l0, num_ref_idx_l1;
  642.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  643.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  644.     int weighted_pred_idc = 0;
  645.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  646.     unsigned int chroma_log2_weight_denom, luma_log2_weight_denom;
  647.     int slice_type;
  648.  
  649.     if (slice_param->slice_type == SLICE_TYPE_I ||
  650.         slice_param->slice_type == SLICE_TYPE_SI) {
  651.         slice_type = SLICE_TYPE_I;
  652.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  653.                slice_param->slice_type == SLICE_TYPE_SP) {
  654.         slice_type = SLICE_TYPE_P;
  655.     } else {
  656.         assert(slice_param->slice_type == SLICE_TYPE_B);
  657.         slice_type = SLICE_TYPE_B;
  658.     }
  659.  
  660.     luma_log2_weight_denom   = slice_param->luma_log2_weight_denom;
  661.     chroma_log2_weight_denom = slice_param->chroma_log2_weight_denom;
  662.  
  663.     if (slice_type == SLICE_TYPE_I) {
  664.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  665.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  666.         num_ref_idx_l0 = 0;
  667.         num_ref_idx_l1 = 0;
  668.     } else if (slice_type == SLICE_TYPE_P) {
  669.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  670.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  671.         num_ref_idx_l1 = 0;
  672.         weighted_pred_idc = (pic_param->pic_fields.bits.weighted_pred_flag == 1);
  673.     } else {
  674.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  675.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  676.         weighted_pred_idc = pic_param->pic_fields.bits.weighted_bipred_idc;
  677.  
  678.         if (weighted_pred_idc == 2) {
  679.             /* 8.4.3 - Derivation process for prediction weights (8-279) */
  680.             luma_log2_weight_denom   = 5;
  681.             chroma_log2_weight_denom = 5;
  682.         }
  683.     }
  684.  
  685.     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
  686.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  687.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  688.  
  689.     if (next_slice_param) {
  690.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
  691.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  692.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  693.     } else {
  694.         next_slice_hor_pos = 0;
  695.         next_slice_ver_pos = height_in_mbs;
  696.     }
  697.  
  698.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  699.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  700.     OUT_BCS_BATCH(batch, slice_type);
  701.     OUT_BCS_BATCH(batch,
  702.                   (num_ref_idx_l1 << 24) |
  703.                   (num_ref_idx_l0 << 16) |
  704.                   (chroma_log2_weight_denom << 8) |
  705.                   (luma_log2_weight_denom << 0));
  706.     OUT_BCS_BATCH(batch,
  707.                   (weighted_pred_idc << 30) |
  708.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  709.                   (slice_param->disable_deblocking_filter_idc << 27) |
  710.                   (slice_param->cabac_init_idc << 24) |
  711.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  712.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  713.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  714.     OUT_BCS_BATCH(batch,
  715.                   (slice_ver_pos << 24) |
  716.                   (slice_hor_pos << 16) |
  717.                   (first_mb_in_slice << 0));
  718.     OUT_BCS_BATCH(batch,
  719.                   (next_slice_ver_pos << 16) |
  720.                   (next_slice_hor_pos << 0));
  721.     OUT_BCS_BATCH(batch,
  722.                   (next_slice_param == NULL) << 19); /* last slice flag */
  723.     OUT_BCS_BATCH(batch, 0);
  724.     OUT_BCS_BATCH(batch, 0);
  725.     OUT_BCS_BATCH(batch, 0);
  726.     OUT_BCS_BATCH(batch, 0);
  727.     ADVANCE_BCS_BATCH(batch);
  728. }
  729.  
  730. static void
  731. gen6_mfd_avc_phantom_slice_state(VADriverContextP ctx,
  732.                                  VAPictureParameterBufferH264 *pic_param,
  733.                                  struct gen6_mfd_context *gen6_mfd_context)
  734. {
  735.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  736.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  737.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  738.  
  739.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  740.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  741.     OUT_BCS_BATCH(batch, 0);
  742.     OUT_BCS_BATCH(batch, 0);
  743.     OUT_BCS_BATCH(batch, 0);
  744.     OUT_BCS_BATCH(batch,
  745.                   height_in_mbs << 24 |
  746.                   width_in_mbs * height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag));
  747.     OUT_BCS_BATCH(batch, 0);
  748.     OUT_BCS_BATCH(batch, 0);
  749.     OUT_BCS_BATCH(batch, 0);
  750.     OUT_BCS_BATCH(batch, 0);
  751.     OUT_BCS_BATCH(batch, 0);
  752.     OUT_BCS_BATCH(batch, 0);
  753.     ADVANCE_BCS_BATCH(batch);
  754. }
  755.  
  756. static inline void
  757. gen6_mfd_avc_ref_idx_state(VADriverContextP ctx,
  758.                            VAPictureParameterBufferH264 *pic_param,
  759.                            VASliceParameterBufferH264 *slice_param,
  760.                            struct gen6_mfd_context *gen6_mfd_context)
  761. {
  762.     gen6_send_avc_ref_idx_state(
  763.         gen6_mfd_context->base.batch,
  764.         slice_param,
  765.         gen6_mfd_context->reference_surface
  766.     );
  767. }
  768.  
  769. static void
  770. gen6_mfd_avc_weightoffset_state(VADriverContextP ctx,
  771.                                 VAPictureParameterBufferH264 *pic_param,
  772.                                 VASliceParameterBufferH264 *slice_param,
  773.                                 struct gen6_mfd_context *gen6_mfd_context)
  774. {
  775.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  776.     int i, j, num_weight_offset_table = 0;
  777.     short weightoffsets[32 * 6];
  778.  
  779.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  780.          slice_param->slice_type == SLICE_TYPE_SP) &&
  781.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  782.         num_weight_offset_table = 1;
  783.     }
  784.    
  785.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  786.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  787.         num_weight_offset_table = 2;
  788.     }
  789.  
  790.     for (i = 0; i < num_weight_offset_table; i++) {
  791.         BEGIN_BCS_BATCH(batch, 98);
  792.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  793.         OUT_BCS_BATCH(batch, i);
  794.  
  795.         if (i == 0) {
  796.             for (j = 0; j < 32; j++) {
  797.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  798.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  799.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  800.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  801.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  802.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  803.             }
  804.         } else {
  805.             for (j = 0; j < 32; j++) {
  806.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  807.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  808.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  809.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  810.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  811.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  812.             }
  813.         }
  814.  
  815.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  816.         ADVANCE_BCS_BATCH(batch);
  817.     }
  818. }
  819.  
  820. static void
  821. gen6_mfd_avc_bsd_object(VADriverContextP ctx,
  822.                         VAPictureParameterBufferH264 *pic_param,
  823.                         VASliceParameterBufferH264 *slice_param,
  824.                         dri_bo *slice_data_bo,
  825.                         struct gen6_mfd_context *gen6_mfd_context)
  826. {
  827.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  828.     unsigned int slice_data_bit_offset;
  829.  
  830.     slice_data_bit_offset = avc_get_first_mb_bit_offset(
  831.         slice_data_bo,
  832.         slice_param,
  833.         pic_param->pic_fields.bits.entropy_coding_mode_flag
  834.     );
  835.  
  836.     BEGIN_BCS_BATCH(batch, 6);
  837.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  838.     OUT_BCS_BATCH(batch,
  839.                   (slice_param->slice_data_size - slice_param->slice_data_offset));
  840.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  841.     OUT_BCS_BATCH(batch,
  842.                   (0 << 31) |
  843.                   (0 << 14) |
  844.                   (0 << 12) |
  845.                   (0 << 10) |
  846.                   (0 << 8));
  847.     OUT_BCS_BATCH(batch,
  848.                   ((slice_data_bit_offset >> 3) << 16) |
  849.                   (1 << 7)  |
  850.                   (1 << 6)  |
  851.                   ((0x7 - (slice_data_bit_offset & 0x7)) << 0));
  852.     OUT_BCS_BATCH(batch, 0);
  853.     ADVANCE_BCS_BATCH(batch);
  854. }
  855.  
  856. static void
  857. gen6_mfd_avc_phantom_slice_bsd_object(VADriverContextP ctx,
  858.                                       VAPictureParameterBufferH264 *pic_param,
  859.                                       struct gen6_mfd_context *gen6_mfd_context)
  860. {
  861.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  862.  
  863.     BEGIN_BCS_BATCH(batch, 6);
  864.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  865.     OUT_BCS_BATCH(batch, 0);
  866.     OUT_BCS_BATCH(batch, 0);
  867.     OUT_BCS_BATCH(batch, 0);
  868.     OUT_BCS_BATCH(batch, 0);
  869.     OUT_BCS_BATCH(batch, 0);
  870.     ADVANCE_BCS_BATCH(batch);
  871. }
  872.  
  873. static void
  874. gen6_mfd_avc_phantom_slice(VADriverContextP ctx,
  875.                            VAPictureParameterBufferH264 *pic_param,
  876.                            struct gen6_mfd_context *gen6_mfd_context)
  877. {
  878.     gen6_mfd_avc_phantom_slice_state(ctx, pic_param, gen6_mfd_context);
  879.     gen6_mfd_avc_phantom_slice_bsd_object(ctx, pic_param, gen6_mfd_context);
  880. }
  881.  
  882. static void
  883. gen6_mfd_avc_decode_init(VADriverContextP ctx,
  884.                          struct decode_state *decode_state,
  885.                          struct gen6_mfd_context *gen6_mfd_context)
  886. {
  887.     VAPictureParameterBufferH264 *pic_param;
  888.     VASliceParameterBufferH264 *slice_param;
  889.     VAPictureH264 *va_pic;
  890.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  891.     struct object_surface *obj_surface;
  892.     dri_bo *bo;
  893.     int i, j, enable_avc_ildb = 0;
  894.     int width_in_mbs;
  895.  
  896.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  897.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  898.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  899.  
  900.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  901.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  902.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  903.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  904.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  905.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  906.                    (slice_param->slice_type == SLICE_TYPE_B));
  907.  
  908.             if (slice_param->disable_deblocking_filter_idc != 1) {
  909.                 enable_avc_ildb = 1;
  910.                 break;
  911.             }
  912.  
  913.             slice_param++;
  914.         }
  915.     }
  916.  
  917.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  918.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  919.     gen6_mfd_avc_frame_store_index(ctx, pic_param, gen6_mfd_context);
  920.     width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
  921.  
  922.     /* Current decoded picture */
  923.     va_pic = &pic_param->CurrPic;
  924.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  925.     obj_surface = SURFACE(va_pic->picture_id);
  926.     assert(obj_surface);
  927.     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
  928.     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
  929.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  930.  
  931.     /* initial uv component for YUV400 case */
  932.     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
  933.          unsigned int uv_offset = obj_surface->width * obj_surface->height;
  934.          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2;
  935.  
  936.          drm_intel_gem_bo_map_gtt(obj_surface->bo);
  937.          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
  938.          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
  939.     }
  940.  
  941.     gen6_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  942.  
  943.     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
  944.     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  945.     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
  946.     gen6_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  947.  
  948.     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
  949.     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  950.     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
  951.     gen6_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  952.  
  953.     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
  954.     bo = dri_bo_alloc(i965->intel.bufmgr,
  955.                       "intra row store",
  956.                       width_in_mbs * 64,
  957.                       0x1000);
  958.     assert(bo);
  959.     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  960.     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  961.  
  962.     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  963.     bo = dri_bo_alloc(i965->intel.bufmgr,
  964.                       "deblocking filter row store",
  965.                       width_in_mbs * 64 * 4,
  966.                       0x1000);
  967.     assert(bo);
  968.     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  969.     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  970.  
  971.     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  972.     bo = dri_bo_alloc(i965->intel.bufmgr,
  973.                       "bsd mpc row store",
  974.                       width_in_mbs * 96,
  975.                       0x1000);
  976.     assert(bo);
  977.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  978.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  979.  
  980.     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
  981.     bo = dri_bo_alloc(i965->intel.bufmgr,
  982.                       "mpr row store",
  983.                       width_in_mbs * 64,
  984.                       0x1000);
  985.     assert(bo);
  986.     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  987.     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  988.  
  989.     gen6_mfd_context->bitplane_read_buffer.valid = 0;
  990. }
  991.  
  992. static void
  993. gen6_mfd_avc_decode_picture(VADriverContextP ctx,
  994.                             struct decode_state *decode_state,
  995.                             struct gen6_mfd_context *gen6_mfd_context)
  996. {
  997.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  998.     VAPictureParameterBufferH264 *pic_param;
  999.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  1000.     dri_bo *slice_data_bo;
  1001.     int i, j;
  1002.  
  1003.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1004.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1005.     gen6_mfd_avc_decode_init(ctx, decode_state, gen6_mfd_context);
  1006.  
  1007.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1008.     intel_batchbuffer_emit_mi_flush(batch);
  1009.     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
  1010.     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
  1011.     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
  1012.     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen6_mfd_context);
  1013.     gen6_mfd_avc_img_state(ctx, decode_state, gen6_mfd_context);
  1014.     gen6_mfd_avc_qm_state(ctx, decode_state, gen6_mfd_context);
  1015.  
  1016.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1017.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1018.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1019.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1020.         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen6_mfd_context);
  1021.  
  1022.         if (j == decode_state->num_slice_params - 1)
  1023.             next_slice_group_param = NULL;
  1024.         else
  1025.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  1026.  
  1027.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1028.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1029.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1030.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1031.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1032.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1033.                    (slice_param->slice_type == SLICE_TYPE_B));
  1034.  
  1035.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1036.                 next_slice_param = slice_param + 1;
  1037.             else
  1038.                 next_slice_param = next_slice_group_param;
  1039.  
  1040.             gen6_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen6_mfd_context);
  1041.             gen6_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
  1042.             gen6_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen6_mfd_context);
  1043.             gen6_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen6_mfd_context);
  1044.             gen6_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, gen6_mfd_context);
  1045.             slice_param++;
  1046.         }
  1047.     }
  1048.    
  1049.     gen6_mfd_avc_phantom_slice(ctx, pic_param, gen6_mfd_context);
  1050.     intel_batchbuffer_end_atomic(batch);
  1051.     intel_batchbuffer_flush(batch);
  1052. }
  1053.  
  1054. static void
  1055. gen6_mfd_mpeg2_decode_init(VADriverContextP ctx,
  1056.                            struct decode_state *decode_state,
  1057.                            struct gen6_mfd_context *gen6_mfd_context)
  1058. {
  1059.     VAPictureParameterBufferMPEG2 *pic_param;
  1060.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1061.     struct object_surface *obj_surface;
  1062.     dri_bo *bo;
  1063.     unsigned int width_in_mbs;
  1064.  
  1065.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1066.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1067.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1068.  
  1069.     mpeg2_set_reference_surfaces(
  1070.         ctx,
  1071.         gen6_mfd_context->reference_surface,
  1072.         decode_state,
  1073.         pic_param
  1074.     );
  1075.  
  1076.     /* Current decoded picture */
  1077.     obj_surface = SURFACE(decode_state->current_render_target);
  1078.     assert(obj_surface);
  1079.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1080.  
  1081.     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
  1082.     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1083.     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
  1084.     gen6_mfd_context->pre_deblocking_output.valid = 1;
  1085.  
  1086.     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1087.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1088.                       "bsd mpc row store",
  1089.                       width_in_mbs * 96,
  1090.                       0x1000);
  1091.     assert(bo);
  1092.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1093.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1094.  
  1095.     gen6_mfd_context->post_deblocking_output.valid = 0;
  1096.     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1097.     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1098.     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1099.     gen6_mfd_context->bitplane_read_buffer.valid = 0;
  1100. }
  1101.  
  1102. static void
  1103. gen6_mfd_mpeg2_pic_state(VADriverContextP ctx,
  1104.                          struct decode_state *decode_state,
  1105.                          struct gen6_mfd_context *gen6_mfd_context)
  1106. {
  1107.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1108.     VAPictureParameterBufferMPEG2 *pic_param;
  1109.     unsigned int tff, pic_structure;
  1110.  
  1111.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1112.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1113.  
  1114.     pic_structure = pic_param->picture_coding_extension.bits.picture_structure;
  1115.     if (pic_structure == MPEG_FRAME)
  1116.         tff = pic_param->picture_coding_extension.bits.top_field_first;
  1117.     else
  1118.         tff = !(pic_param->picture_coding_extension.bits.is_first_field ^
  1119.                 (pic_structure & MPEG_TOP_FIELD));
  1120.  
  1121.     BEGIN_BCS_BATCH(batch, 4);
  1122.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (4 - 2));
  1123.     OUT_BCS_BATCH(batch,
  1124.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1125.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1126.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1127.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1128.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1129.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1130.                   tff << 11 |
  1131.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1132.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1133.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1134.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1135.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1136.     OUT_BCS_BATCH(batch,
  1137.                   pic_param->picture_coding_type << 9);
  1138.     OUT_BCS_BATCH(batch,
  1139.                   (ALIGN(pic_param->vertical_size, 16) / 16) << 16 |
  1140.                   (ALIGN(pic_param->horizontal_size, 16) / 16));
  1141.     ADVANCE_BCS_BATCH(batch);
  1142. }
  1143.  
  1144. static void
  1145. gen6_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1146.                         struct decode_state *decode_state,
  1147.                         struct gen6_mfd_context *gen6_mfd_context)
  1148. {
  1149.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1150.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen6_mfd_context->iq_matrix.mpeg2;
  1151.     int i, j;
  1152.  
  1153.     /* Update internal QM state */
  1154.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1155.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1156.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1157.  
  1158.         gen_iq_matrix->load_intra_quantiser_matrix =
  1159.             iq_matrix->load_intra_quantiser_matrix;
  1160.         if (iq_matrix->load_intra_quantiser_matrix) {
  1161.             for (j = 0; j < 64; j++)
  1162.                 gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1163.                     iq_matrix->intra_quantiser_matrix[j];
  1164.         }
  1165.  
  1166.         gen_iq_matrix->load_non_intra_quantiser_matrix =
  1167.             iq_matrix->load_non_intra_quantiser_matrix;
  1168.         if (iq_matrix->load_non_intra_quantiser_matrix) {
  1169.             for (j = 0; j < 64; j++)
  1170.                 gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1171.                     iq_matrix->non_intra_quantiser_matrix[j];
  1172.         }
  1173.     }
  1174.  
  1175.     /* Commit QM state to HW */
  1176.     for (i = 0; i < 2; i++) {
  1177.         unsigned char *qm = NULL;
  1178.  
  1179.         if (i == 0) {
  1180.             if (gen_iq_matrix->load_intra_quantiser_matrix)
  1181.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1182.         } else {
  1183.             if (gen_iq_matrix->load_non_intra_quantiser_matrix)
  1184.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1185.         }
  1186.  
  1187.         if (!qm)
  1188.             continue;
  1189.  
  1190.         BEGIN_BCS_BATCH(batch, 18);
  1191.         OUT_BCS_BATCH(batch, MFX_MPEG2_QM_STATE | (18 - 2));
  1192.         OUT_BCS_BATCH(batch, i);
  1193.         intel_batchbuffer_data(batch, qm, 64);
  1194.         ADVANCE_BCS_BATCH(batch);
  1195.     }
  1196. }
  1197.  
  1198. static void
  1199. gen6_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1200.                           VAPictureParameterBufferMPEG2 *pic_param,
  1201.                           VASliceParameterBufferMPEG2 *slice_param,
  1202.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1203.                           struct gen6_mfd_context *gen6_mfd_context)
  1204. {
  1205.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1206.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1207.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1208.  
  1209.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1210.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1211.         is_field_pic = 1;
  1212.     is_field_pic_wa = is_field_pic &&
  1213.         gen6_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1214.  
  1215.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1216.     hpos0 = slice_param->slice_horizontal_position;
  1217.  
  1218.     if (next_slice_param == NULL) {
  1219.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1220.         hpos1 = 0;
  1221.     } else {
  1222.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1223.         hpos1 = next_slice_param->slice_horizontal_position;
  1224.     }
  1225.  
  1226.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1227.  
  1228.     BEGIN_BCS_BATCH(batch, 5);
  1229.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1230.     OUT_BCS_BATCH(batch,
  1231.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1232.     OUT_BCS_BATCH(batch,
  1233.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1234.     OUT_BCS_BATCH(batch,
  1235.                   hpos0 << 24 |
  1236.                   vpos0 << 16 |
  1237.                   mb_count << 8 |
  1238.                   (next_slice_param == NULL) << 5 |
  1239.                   (next_slice_param == NULL) << 3 |
  1240.                   (slice_param->macroblock_offset & 0x7));
  1241.     OUT_BCS_BATCH(batch,
  1242.                   slice_param->quantiser_scale_code << 24);
  1243.     ADVANCE_BCS_BATCH(batch);
  1244. }
  1245.  
  1246. static void
  1247. gen6_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1248.                               struct decode_state *decode_state,
  1249.                               struct gen6_mfd_context *gen6_mfd_context)
  1250. {
  1251.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1252.     VAPictureParameterBufferMPEG2 *pic_param;
  1253.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1254.     dri_bo *slice_data_bo;
  1255.     int i, j;
  1256.  
  1257.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1258.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1259.  
  1260.     gen6_mfd_mpeg2_decode_init(ctx, decode_state, gen6_mfd_context);
  1261.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1262.     intel_batchbuffer_emit_mi_flush(batch);
  1263.     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
  1264.     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
  1265.     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
  1266.     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen6_mfd_context);
  1267.     gen6_mfd_mpeg2_pic_state(ctx, decode_state, gen6_mfd_context);
  1268.     gen6_mfd_mpeg2_qm_state(ctx, decode_state, gen6_mfd_context);
  1269.  
  1270.     if (gen6_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1271.         gen6_mfd_context->wa_mpeg2_slice_vertical_position =
  1272.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1273.  
  1274.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1275.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1276.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1277.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1278.         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen6_mfd_context);
  1279.  
  1280.         if (j == decode_state->num_slice_params - 1)
  1281.             next_slice_group_param = NULL;
  1282.         else
  1283.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1284.  
  1285.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1286.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1287.  
  1288.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1289.                 next_slice_param = slice_param + 1;
  1290.             else
  1291.                 next_slice_param = next_slice_group_param;
  1292.  
  1293.             gen6_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen6_mfd_context);
  1294.             slice_param++;
  1295.         }
  1296.     }
  1297.  
  1298.     intel_batchbuffer_end_atomic(batch);
  1299.     intel_batchbuffer_flush(batch);
  1300. }
  1301.  
  1302. static const int va_to_gen6_vc1_pic_type[5] = {
  1303.     GEN6_VC1_I_PICTURE,
  1304.     GEN6_VC1_P_PICTURE,
  1305.     GEN6_VC1_B_PICTURE,
  1306.     GEN6_VC1_BI_PICTURE,
  1307.     GEN6_VC1_P_PICTURE,
  1308. };
  1309.  
  1310. static const int va_to_gen6_vc1_mv[4] = {
  1311.     1, /* 1-MV */
  1312.     2, /* 1-MV half-pel */
  1313.     3, /* 1-MV half-pef bilinear */
  1314.     0, /* Mixed MV */
  1315. };
  1316.  
  1317. static const int b_picture_scale_factor[21] = {
  1318.     128, 85,  170, 64,  192,
  1319.     51,  102, 153, 204, 43,
  1320.     215, 37,  74,  111, 148,
  1321.     185, 222, 32,  96,  160,
  1322.     224,
  1323. };
  1324.  
  1325. static const int va_to_gen6_vc1_condover[3] = {
  1326.     0,
  1327.     2,
  1328.     3
  1329. };
  1330.  
  1331. static const int va_to_gen6_vc1_profile[4] = {
  1332.     GEN6_VC1_SIMPLE_PROFILE,
  1333.     GEN6_VC1_MAIN_PROFILE,
  1334.     GEN6_VC1_RESERVED_PROFILE,
  1335.     GEN6_VC1_ADVANCED_PROFILE
  1336. };
  1337.  
  1338. static void
  1339. gen6_mfd_free_vc1_surface(void **data)
  1340. {
  1341.     struct gen6_vc1_surface *gen6_vc1_surface = *data;
  1342.  
  1343.     if (!gen6_vc1_surface)
  1344.         return;
  1345.  
  1346.     dri_bo_unreference(gen6_vc1_surface->dmv);
  1347.     free(gen6_vc1_surface);
  1348.     *data = NULL;
  1349. }
  1350.  
  1351. static void
  1352. gen6_mfd_init_vc1_surface(VADriverContextP ctx,
  1353.                           VAPictureParameterBufferVC1 *pic_param,
  1354.                           struct object_surface *obj_surface)
  1355. {
  1356.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1357.     struct gen6_vc1_surface *gen6_vc1_surface = obj_surface->private_data;
  1358.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1359.  
  1360.     obj_surface->free_private_data = gen6_mfd_free_vc1_surface;
  1361.  
  1362.     if (!gen6_vc1_surface) {
  1363.         gen6_vc1_surface = calloc(sizeof(struct gen6_vc1_surface), 1);
  1364.         assert((obj_surface->size & 0x3f) == 0);
  1365.         obj_surface->private_data = gen6_vc1_surface;
  1366.     }
  1367.  
  1368.     gen6_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1369.  
  1370.     if (gen6_vc1_surface->dmv == NULL) {
  1371.         gen6_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1372.                                              "direct mv w/r buffer",
  1373.                                              128 * height_in_mbs * 64,  /* scalable with frame height */
  1374.                                              0x1000);
  1375.     }
  1376. }
  1377.  
  1378. static void
  1379. gen6_mfd_vc1_decode_init(VADriverContextP ctx,
  1380.                          struct decode_state *decode_state,
  1381.                          struct gen6_mfd_context *gen6_mfd_context)
  1382. {
  1383.     VAPictureParameterBufferVC1 *pic_param;
  1384.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1385.     struct object_surface *obj_surface;
  1386.     int i;
  1387.     dri_bo *bo;
  1388.     int width_in_mbs;
  1389.     int picture_type;
  1390.  
  1391.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1392.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1393.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1394.     picture_type = pic_param->picture_fields.bits.picture_type;
  1395.  
  1396.     /* reference picture */
  1397.     obj_surface = SURFACE(pic_param->forward_reference_picture);
  1398.  
  1399.     if (obj_surface && obj_surface->bo)
  1400.         gen6_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
  1401.     else
  1402.         gen6_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
  1403.  
  1404.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  1405.  
  1406.     if (obj_surface && obj_surface->bo)
  1407.         gen6_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
  1408.     else
  1409.         gen6_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
  1410.  
  1411.     /* must do so !!! */
  1412.     for (i = 2; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++)
  1413.         gen6_mfd_context->reference_surface[i].surface_id = gen6_mfd_context->reference_surface[i % 2].surface_id;
  1414.  
  1415.     /* Current decoded picture */
  1416.     obj_surface = SURFACE(decode_state->current_render_target);
  1417.     assert(obj_surface);
  1418.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1419.     gen6_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1420.  
  1421.     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
  1422.     gen6_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1423.     dri_bo_reference(gen6_mfd_context->post_deblocking_output.bo);
  1424.     gen6_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1425.  
  1426.     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
  1427.     gen6_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1428.     dri_bo_reference(gen6_mfd_context->pre_deblocking_output.bo);
  1429.     gen6_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1430.  
  1431.     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
  1432.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1433.                       "intra row store",
  1434.                       width_in_mbs * 64,
  1435.                       0x1000);
  1436.     assert(bo);
  1437.     gen6_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1438.     gen6_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1439.  
  1440.     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1441.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1442.                       "deblocking filter row store",
  1443.                       width_in_mbs * 7 * 64,
  1444.                       0x1000);
  1445.     assert(bo);
  1446.     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1447.     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1448.  
  1449.     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1450.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1451.                       "bsd mpc row store",
  1452.                       width_in_mbs * 96,
  1453.                       0x1000);
  1454.     assert(bo);
  1455.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1456.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1457.  
  1458.     gen6_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1459.  
  1460.     gen6_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1461.     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
  1462.    
  1463.     if (gen6_mfd_context->bitplane_read_buffer.valid) {
  1464.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1465.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1466.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1467.         int src_w, src_h;
  1468.         uint8_t *src = NULL, *dst = NULL;
  1469.  
  1470.         assert(decode_state->bit_plane->buffer);
  1471.         src = decode_state->bit_plane->buffer;
  1472.  
  1473.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1474.                           "VC-1 Bitplane",
  1475.                           bitplane_width * height_in_mbs,
  1476.                           0x1000);
  1477.         assert(bo);
  1478.         gen6_mfd_context->bitplane_read_buffer.bo = bo;
  1479.  
  1480.         dri_bo_map(bo, True);
  1481.         assert(bo->virtual);
  1482.         dst = bo->virtual;
  1483.  
  1484.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1485.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1486.                 int src_index, dst_index;
  1487.                 int src_shift;
  1488.                 uint8_t src_value;
  1489.  
  1490.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1491.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1492.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1493.  
  1494.                 if (picture_type == GEN6_VC1_SKIPPED_PICTURE){
  1495.                     src_value |= 0x2;
  1496.                 }
  1497.  
  1498.                 dst_index = src_w / 2;
  1499.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1500.             }
  1501.  
  1502.             if (src_w & 1)
  1503.                 dst[src_w / 2] >>= 4;
  1504.  
  1505.             dst += bitplane_width;
  1506.         }
  1507.  
  1508.         dri_bo_unmap(bo);
  1509.     } else
  1510.         gen6_mfd_context->bitplane_read_buffer.bo = NULL;
  1511. }
  1512.  
  1513. static void
  1514. gen6_mfd_vc1_pic_state(VADriverContextP ctx,
  1515.                        struct decode_state *decode_state,
  1516.                        struct gen6_mfd_context *gen6_mfd_context)
  1517. {
  1518.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1519.     VAPictureParameterBufferVC1 *pic_param;
  1520.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1521.     struct object_surface *obj_surface;
  1522.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1523.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1524.     int unified_mv_mode;
  1525.     int ref_field_pic_polarity = 0;
  1526.     int scale_factor = 0;
  1527.     int trans_ac_y = 0;
  1528.     int dmv_surface_valid = 0;
  1529.     int brfd = 0;
  1530.     int fcm = 0;
  1531.     int picture_type;
  1532.     int profile;
  1533.     int overlap;
  1534.  
  1535.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1536.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1537.  
  1538.     profile = va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile];
  1539.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1540.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1541.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1542.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1543.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1544.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1545.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1546.  
  1547.     if (dquant == 0) {
  1548.         alt_pquant_config = 0;
  1549.         alt_pquant_edge_mask = 0;
  1550.     } else if (dquant == 2) {
  1551.         alt_pquant_config = 1;
  1552.         alt_pquant_edge_mask = 0xf;
  1553.     } else {
  1554.         assert(dquant == 1);
  1555.         if (dquantfrm == 0) {
  1556.             alt_pquant_config = 0;
  1557.             alt_pquant_edge_mask = 0;
  1558.             alt_pq = 0;
  1559.         } else {
  1560.             assert(dquantfrm == 1);
  1561.             alt_pquant_config = 1;
  1562.  
  1563.             switch (dqprofile) {
  1564.             case 3:
  1565.                 if (dqbilevel == 0) {
  1566.                     alt_pquant_config = 2;
  1567.                     alt_pquant_edge_mask = 0;
  1568.                 } else {
  1569.                     assert(dqbilevel == 1);
  1570.                     alt_pquant_config = 3;
  1571.                     alt_pquant_edge_mask = 0;
  1572.                 }
  1573.                 break;
  1574.                
  1575.             case 0:
  1576.                 alt_pquant_edge_mask = 0xf;
  1577.                 break;
  1578.  
  1579.             case 1:
  1580.                 if (dqdbedge == 3)
  1581.                     alt_pquant_edge_mask = 0x9;
  1582.                 else
  1583.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1584.  
  1585.                 break;
  1586.  
  1587.             case 2:
  1588.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1589.                 break;
  1590.  
  1591.             default:
  1592.                 assert(0);
  1593.             }
  1594.         }
  1595.     }
  1596.  
  1597.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1598.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1599.         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1600.     } else {
  1601.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1602.         unified_mv_mode = va_to_gen6_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1603.     }
  1604.  
  1605.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1606.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1607.         /* FIXME: calculate reference field picture polarity */
  1608.         assert(0);
  1609.         ref_field_pic_polarity = 0;
  1610.     }
  1611.  
  1612.     if (pic_param->b_picture_fraction < 21)
  1613.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1614.  
  1615.     picture_type = va_to_gen6_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1616.    
  1617.     if (profile == GEN6_VC1_ADVANCED_PROFILE &&
  1618.         picture_type == GEN6_VC1_I_PICTURE)
  1619.         picture_type = GEN6_VC1_BI_PICTURE;
  1620.  
  1621.     if (picture_type == GEN6_VC1_I_PICTURE || picture_type == GEN6_VC1_BI_PICTURE) /* I picture */
  1622.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1623.     else {
  1624.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1625.         /*
  1626.          * 8.3.6.2.1 Transform Type Selection
  1627.          * If variable-sized transform coding is not enabled,
  1628.          * then the 8x8 transform shall be used for all blocks.
  1629.          * it is also MFX_VC1_PIC_STATE requirement.
  1630.          */
  1631.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1632.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1633.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1634.         }
  1635.     }
  1636.  
  1637.     if (picture_type == GEN6_VC1_B_PICTURE) {
  1638.         struct gen6_vc1_surface *gen6_vc1_surface = NULL;
  1639.  
  1640.         obj_surface = SURFACE(pic_param->backward_reference_picture);
  1641.         assert(obj_surface);
  1642.         gen6_vc1_surface = obj_surface->private_data;
  1643.  
  1644.         if (!gen6_vc1_surface ||
  1645.             (va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_I_PICTURE ||
  1646.              va_to_gen6_vc1_pic_type[gen6_vc1_surface->picture_type] == GEN6_VC1_BI_PICTURE))
  1647.             dmv_surface_valid = 0;
  1648.         else
  1649.             dmv_surface_valid = 1;
  1650.     }
  1651.  
  1652.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1653.  
  1654.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1655.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1656.     else {
  1657.         if (pic_param->picture_fields.bits.top_field_first)
  1658.             fcm = 2;
  1659.         else
  1660.             fcm = 3;
  1661.     }
  1662.  
  1663.     if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_B_PICTURE) { /* B picture */
  1664.         brfd = pic_param->reference_fields.bits.reference_distance;
  1665.         brfd = (scale_factor * brfd) >> 8;
  1666.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1667.  
  1668.         if (brfd < 0)
  1669.             brfd = 0;
  1670.     }
  1671.  
  1672.     overlap = 0;
  1673.     if (profile != GEN6_VC1_ADVANCED_PROFILE){
  1674.         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1675.             pic_param->picture_fields.bits.picture_type != GEN6_VC1_B_PICTURE) {
  1676.             overlap = 1;
  1677.         }
  1678.     }else {
  1679.         if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_P_PICTURE &&
  1680.              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1681.               overlap = 1;
  1682.         }
  1683.         if (pic_param->picture_fields.bits.picture_type == GEN6_VC1_I_PICTURE ||
  1684.             pic_param->picture_fields.bits.picture_type == GEN6_VC1_BI_PICTURE){
  1685.              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1686.                 overlap = 1;
  1687.              } else if (va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1688.                         va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1689.                  overlap = 1;
  1690.              }
  1691.         }
  1692.     }
  1693.  
  1694.     assert(pic_param->conditional_overlap_flag < 3);
  1695.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1696.  
  1697.     BEGIN_BCS_BATCH(batch, 6);
  1698.     OUT_BCS_BATCH(batch, MFX_VC1_PIC_STATE | (6 - 2));
  1699.     OUT_BCS_BATCH(batch,
  1700.                   (ALIGN(pic_param->coded_height, 16) / 16) << 16 |
  1701.                   (ALIGN(pic_param->coded_width, 16) / 16));
  1702.     OUT_BCS_BATCH(batch,
  1703.                   pic_param->sequence_fields.bits.syncmarker << 31 |
  1704.                   1 << 29 | /* concealment */
  1705.                   alt_pq << 24 |
  1706.                   pic_param->entrypoint_fields.bits.loopfilter << 23 |
  1707.                   overlap << 22 |
  1708.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 21 | /* implicit quantizer */
  1709.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 16 |
  1710.                   alt_pquant_edge_mask << 12 |
  1711.                   alt_pquant_config << 10 |
  1712.                   pic_param->pic_quantizer_fields.bits.half_qp << 9 |
  1713.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 8 |
  1714.                   va_to_gen6_vc1_condover[pic_param->conditional_overlap_flag] << 6 |
  1715.                   !pic_param->picture_fields.bits.is_first_field << 5 |
  1716.                   picture_type << 2 |
  1717.                   fcm << 0);
  1718.     OUT_BCS_BATCH(batch,
  1719.                   !!pic_param->bitplane_present.value << 23 |
  1720.                   !pic_param->bitplane_present.flags.bp_forward_mb << 22 |
  1721.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 21 |
  1722.                   !pic_param->bitplane_present.flags.bp_skip_mb << 20 |
  1723.                   !pic_param->bitplane_present.flags.bp_direct_mb << 19 |
  1724.                   !pic_param->bitplane_present.flags.bp_overflags << 18 |
  1725.                   !pic_param->bitplane_present.flags.bp_ac_pred << 17 |
  1726.                   !pic_param->bitplane_present.flags.bp_field_tx << 16 |
  1727.                   pic_param->mv_fields.bits.extended_dmv_range << 14 |
  1728.                   pic_param->mv_fields.bits.extended_mv_range << 12 |
  1729.                   pic_param->mv_fields.bits.four_mv_switch << 11 |
  1730.                   pic_param->fast_uvmc_flag << 10 |
  1731.                   unified_mv_mode << 8 |
  1732.                   ref_field_pic_polarity << 6 |
  1733.                   pic_param->reference_fields.bits.num_reference_pictures << 5 |
  1734.                   pic_param->reference_fields.bits.reference_distance << 0);
  1735.     OUT_BCS_BATCH(batch,
  1736.                   scale_factor << 24 |
  1737.                   pic_param->mv_fields.bits.mv_table << 20 |
  1738.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1739.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1740.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |
  1741.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1742.                   pic_param->mb_mode_table << 8 |
  1743.                   trans_ac_y << 6 |
  1744.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1745.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1746.                   pic_param->cbp_table << 0);
  1747.     OUT_BCS_BATCH(batch,
  1748.                   dmv_surface_valid << 13 |
  1749.                   brfd << 8 |
  1750.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1));
  1751.     ADVANCE_BCS_BATCH(batch);
  1752. }
  1753.  
  1754. static void
  1755. gen6_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1756.                              struct decode_state *decode_state,
  1757.                              struct gen6_mfd_context *gen6_mfd_context)
  1758. {
  1759.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1760.     VAPictureParameterBufferVC1 *pic_param;
  1761.     int interpolation_mode = 0;
  1762.     int intensitycomp_single;
  1763.  
  1764.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1765.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1766.  
  1767.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1768.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1769.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1770.         interpolation_mode = 2; /* Half-pel bilinear */
  1771.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1772.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1773.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1774.         interpolation_mode = 0; /* Half-pel bicubic */
  1775.     else
  1776.         interpolation_mode = 1; /* Quarter-pel bicubic */
  1777.  
  1778.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1779.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1780.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1781.  
  1782.     BEGIN_BCS_BATCH(batch, 7);
  1783.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (7 - 2));
  1784.     OUT_BCS_BATCH(batch,
  1785.                   0 << 8 | /* FIXME: interlace mode */
  1786.                   pic_param->rounding_control << 4 |
  1787.                   va_to_gen6_vc1_profile[pic_param->sequence_fields.bits.profile] << 2);
  1788.     OUT_BCS_BATCH(batch,
  1789.                   pic_param->luma_shift << 16 |
  1790.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1791.     OUT_BCS_BATCH(batch, 0);
  1792.     OUT_BCS_BATCH(batch, 0);
  1793.     OUT_BCS_BATCH(batch, 0);
  1794.     OUT_BCS_BATCH(batch,
  1795.                   interpolation_mode << 19 |
  1796.                   pic_param->fast_uvmc_flag << 18 |
  1797.                   0 << 17 | /* FIXME: scale up or down ??? */
  1798.                   pic_param->range_reduction_frame << 16 |
  1799.                   0 << 6 | /* FIXME: double ??? */
  1800.                   0 << 4 |
  1801.                   intensitycomp_single << 2 |
  1802.                   intensitycomp_single << 0);
  1803.     ADVANCE_BCS_BATCH(batch);
  1804. }
  1805.  
  1806.  
  1807. static void
  1808. gen6_mfd_vc1_directmode_state(VADriverContextP ctx,
  1809.                               struct decode_state *decode_state,
  1810.                               struct gen6_mfd_context *gen6_mfd_context)
  1811. {
  1812.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1813.     VAPictureParameterBufferVC1 *pic_param;
  1814.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1815.     struct object_surface *obj_surface;
  1816.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1817.  
  1818.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1819.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1820.  
  1821.     obj_surface = SURFACE(decode_state->current_render_target);
  1822.  
  1823.     if (obj_surface && obj_surface->private_data) {
  1824.         dmv_write_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
  1825.     }
  1826.  
  1827.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  1828.  
  1829.     if (obj_surface && obj_surface->private_data) {
  1830.         dmv_read_buffer = ((struct gen6_vc1_surface *)(obj_surface->private_data))->dmv;
  1831.     }
  1832.  
  1833.     BEGIN_BCS_BATCH(batch, 3);
  1834.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
  1835.  
  1836.     if (dmv_write_buffer)
  1837.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1838.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1839.                       0);
  1840.     else
  1841.         OUT_BCS_BATCH(batch, 0);
  1842.  
  1843.     if (dmv_read_buffer)
  1844.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1845.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1846.                       0);
  1847.     else
  1848.         OUT_BCS_BATCH(batch, 0);
  1849.                  
  1850.     ADVANCE_BCS_BATCH(batch);
  1851. }
  1852.  
  1853. static int
  1854. gen6_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  1855. {
  1856.     int out_slice_data_bit_offset;
  1857.     int slice_header_size = in_slice_data_bit_offset / 8;
  1858.     int i, j;
  1859.  
  1860.     if (profile != 3)
  1861.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  1862.     else {
  1863.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  1864.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  1865.                 i++, j += 2;
  1866.             }
  1867.         }
  1868.  
  1869.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  1870.     }
  1871.  
  1872.     return out_slice_data_bit_offset;
  1873. }
  1874.  
  1875. static void
  1876. gen6_mfd_vc1_bsd_object(VADriverContextP ctx,
  1877.                         VAPictureParameterBufferVC1 *pic_param,
  1878.                         VASliceParameterBufferVC1 *slice_param,
  1879.                         VASliceParameterBufferVC1 *next_slice_param,
  1880.                         dri_bo *slice_data_bo,
  1881.                         struct gen6_mfd_context *gen6_mfd_context)
  1882. {
  1883.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1884.     int next_slice_start_vert_pos;
  1885.     int macroblock_offset;
  1886.     uint8_t *slice_data = NULL;
  1887.  
  1888.     dri_bo_map(slice_data_bo, 0);
  1889.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  1890.     macroblock_offset = gen6_mfd_vc1_get_macroblock_bit_offset(slice_data,
  1891.                                                                slice_param->macroblock_offset,
  1892.                                                                pic_param->sequence_fields.bits.profile);
  1893.     dri_bo_unmap(slice_data_bo);
  1894.  
  1895.     if (next_slice_param)
  1896.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  1897.     else
  1898.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  1899.  
  1900.     BEGIN_BCS_BATCH(batch, 4);
  1901.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (4 - 2));
  1902.     OUT_BCS_BATCH(batch,
  1903.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  1904.     OUT_BCS_BATCH(batch,
  1905.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  1906.     OUT_BCS_BATCH(batch,
  1907.                   slice_param->slice_vertical_position << 24 |
  1908.                   next_slice_start_vert_pos << 16 |
  1909.                   (macroblock_offset & 0x7));
  1910.     ADVANCE_BCS_BATCH(batch);
  1911. }
  1912.  
  1913. static void
  1914. gen6_mfd_vc1_decode_picture(VADriverContextP ctx,
  1915.                             struct decode_state *decode_state,
  1916.                             struct gen6_mfd_context *gen6_mfd_context)
  1917. {
  1918.     struct intel_batchbuffer *batch = gen6_mfd_context->base.batch;
  1919.     VAPictureParameterBufferVC1 *pic_param;
  1920.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  1921.     dri_bo *slice_data_bo;
  1922.     int i, j;
  1923.  
  1924.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1925.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1926.  
  1927.     gen6_mfd_vc1_decode_init(ctx, decode_state, gen6_mfd_context);
  1928.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1929.     intel_batchbuffer_emit_mi_flush(batch);
  1930.     gen6_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
  1931.     gen6_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
  1932.     gen6_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
  1933.     gen6_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen6_mfd_context);
  1934.     gen6_mfd_vc1_pic_state(ctx, decode_state, gen6_mfd_context);
  1935.     gen6_mfd_vc1_pred_pipe_state(ctx, decode_state, gen6_mfd_context);
  1936.     gen6_mfd_vc1_directmode_state(ctx, decode_state, gen6_mfd_context);
  1937.  
  1938.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1939.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1940.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  1941.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1942.         gen6_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen6_mfd_context);
  1943.  
  1944.         if (j == decode_state->num_slice_params - 1)
  1945.             next_slice_group_param = NULL;
  1946.         else
  1947.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  1948.  
  1949.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1950.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1951.  
  1952.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1953.                 next_slice_param = slice_param + 1;
  1954.             else
  1955.                 next_slice_param = next_slice_group_param;
  1956.  
  1957.             gen6_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen6_mfd_context);
  1958.             slice_param++;
  1959.         }
  1960.     }
  1961.  
  1962.     intel_batchbuffer_end_atomic(batch);
  1963.     intel_batchbuffer_flush(batch);
  1964. }
  1965.  
  1966. static void
  1967. gen6_mfd_decode_picture(VADriverContextP ctx,
  1968.                         VAProfile profile,
  1969.                         union codec_state *codec_state,
  1970.                         struct hw_context *hw_context)
  1971.  
  1972. {
  1973.     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
  1974.     struct decode_state *decode_state = &codec_state->decode;
  1975.  
  1976.     assert(gen6_mfd_context);
  1977.  
  1978.     switch (profile) {
  1979.     case VAProfileMPEG2Simple:
  1980.     case VAProfileMPEG2Main:
  1981.         gen6_mfd_mpeg2_decode_picture(ctx, decode_state, gen6_mfd_context);
  1982.         break;
  1983.        
  1984.     case VAProfileH264Baseline:
  1985.     case VAProfileH264Main:
  1986.     case VAProfileH264High:
  1987.         gen6_mfd_avc_decode_picture(ctx, decode_state, gen6_mfd_context);
  1988.         break;
  1989.  
  1990.     case VAProfileVC1Simple:
  1991.     case VAProfileVC1Main:
  1992.     case VAProfileVC1Advanced:
  1993.         gen6_mfd_vc1_decode_picture(ctx, decode_state, gen6_mfd_context);
  1994.         break;
  1995.  
  1996.     default:
  1997.         assert(0);
  1998.         break;
  1999.     }
  2000. }
  2001.  
  2002. static void
  2003. gen6_mfd_context_destroy(void *hw_context)
  2004. {
  2005.     struct gen6_mfd_context *gen6_mfd_context = (struct gen6_mfd_context *)hw_context;
  2006.  
  2007.     dri_bo_unreference(gen6_mfd_context->post_deblocking_output.bo);
  2008.     gen6_mfd_context->post_deblocking_output.bo = NULL;
  2009.  
  2010.     dri_bo_unreference(gen6_mfd_context->pre_deblocking_output.bo);
  2011.     gen6_mfd_context->pre_deblocking_output.bo = NULL;
  2012.  
  2013.     dri_bo_unreference(gen6_mfd_context->intra_row_store_scratch_buffer.bo);
  2014.     gen6_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  2015.  
  2016.     dri_bo_unreference(gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  2017.     gen6_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2018.  
  2019.     dri_bo_unreference(gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  2020.     gen6_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2021.  
  2022.     dri_bo_unreference(gen6_mfd_context->mpr_row_store_scratch_buffer.bo);
  2023.     gen6_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  2024.  
  2025.     dri_bo_unreference(gen6_mfd_context->bitplane_read_buffer.bo);
  2026.     gen6_mfd_context->bitplane_read_buffer.bo = NULL;
  2027.  
  2028.     intel_batchbuffer_free(gen6_mfd_context->base.batch);
  2029.     free(gen6_mfd_context);
  2030. }
  2031.  
  2032. struct hw_context *
  2033. gen6_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
  2034. {
  2035.     struct intel_driver_data *intel = intel_driver_data(ctx);
  2036.     struct gen6_mfd_context *gen6_mfd_context = calloc(1, sizeof(struct gen6_mfd_context));
  2037.     int i;
  2038.  
  2039.     gen6_mfd_context->base.destroy = gen6_mfd_context_destroy;
  2040.     gen6_mfd_context->base.run = gen6_mfd_decode_picture;
  2041.     gen6_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  2042.  
  2043.     for (i = 0; i < ARRAY_ELEMS(gen6_mfd_context->reference_surface); i++) {
  2044.         gen6_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  2045.         gen6_mfd_context->reference_surface[i].frame_store_id = -1;
  2046.     }
  2047.  
  2048.     gen6_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  2049.    
  2050.     return (struct hw_context *)gen6_mfd_context;
  2051. }
  2052.