Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao Yakui  <yakui.zhao@intel.com>
  27.  *
  28.  */
  29. #include "sysdeps.h"
  30.  
  31. #include <va/va_dec_jpeg.h>
  32.  
  33. #include "intel_batchbuffer.h"
  34. #include "intel_driver.h"
  35. #include "i965_defines.h"
  36. #include "i965_drv_video.h"
  37. #include "i965_decoder_utils.h"
  38. #include "gen7_mfd.h"
  39. #include "intel_media.h"
  40.  
  41. #define B0_STEP_REV             2
  42. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  43.  
  44. static const uint32_t zigzag_direct[64] = {
  45.     0,   1,  8, 16,  9,  2,  3, 10,
  46.     17, 24, 32, 25, 18, 11,  4,  5,
  47.     12, 19, 26, 33, 40, 48, 41, 34,
  48.     27, 20, 13,  6,  7, 14, 21, 28,
  49.     35, 42, 49, 56, 57, 50, 43, 36,
  50.     29, 22, 15, 23, 30, 37, 44, 51,
  51.     58, 59, 52, 45, 38, 31, 39, 46,
  52.     53, 60, 61, 54, 47, 55, 62, 63
  53. };
  54.  
  55. static void
  56. gen75_mfd_init_avc_surface(VADriverContextP ctx,
  57.                           VAPictureParameterBufferH264 *pic_param,
  58.                           struct object_surface *obj_surface)
  59. {
  60.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  61.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  62.     int width_in_mbs, height_in_mbs;
  63.  
  64.     obj_surface->free_private_data = gen_free_avc_surface;
  65.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  66.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  67.  
  68.     if (!gen7_avc_surface) {
  69.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  70.         assert(gen7_avc_surface);
  71.         gen7_avc_surface->base.frame_store_id = -1;
  72.         assert((obj_surface->size & 0x3f) == 0);
  73.         obj_surface->private_data = gen7_avc_surface;
  74.     }
  75.  
  76.     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
  77.                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
  78.  
  79.     if (gen7_avc_surface->dmv_top == NULL) {
  80.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  81.                                                  "direct mv w/r buffer",
  82.                                                  width_in_mbs * height_in_mbs * 128,
  83.                                                  0x1000);
  84.         assert(gen7_avc_surface->dmv_top);
  85.     }
  86.  
  87.     if (gen7_avc_surface->dmv_bottom_flag &&
  88.         gen7_avc_surface->dmv_bottom == NULL) {
  89.         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
  90.                                                     "direct mv w/r buffer",
  91.                                                     width_in_mbs * height_in_mbs * 128,                                                    
  92.                                                     0x1000);
  93.         assert(gen7_avc_surface->dmv_bottom);
  94.     }
  95. }
  96.  
  97. static void
  98. gen75_mfd_pipe_mode_select(VADriverContextP ctx,
  99.                           struct decode_state *decode_state,
  100.                           int standard_select,
  101.                           struct gen7_mfd_context *gen7_mfd_context)
  102. {
  103.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  104.  
  105.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  106.            standard_select == MFX_FORMAT_AVC ||
  107.            standard_select == MFX_FORMAT_VC1 ||
  108.            standard_select == MFX_FORMAT_JPEG);
  109.  
  110.     BEGIN_BCS_BATCH(batch, 5);
  111.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  112.     OUT_BCS_BATCH(batch,
  113.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  114.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  115.                   (0 << 10) | /* disable Stream-Out */
  116.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  117.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  118.                   (0 << 5)  | /* not in stitch mode */
  119.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  120.                   (standard_select << 0));
  121.     OUT_BCS_BATCH(batch,
  122.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  123.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  124.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  125.                   (0 << 1)  |
  126.                   (0 << 0));
  127.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  128.     OUT_BCS_BATCH(batch, 0); /* reserved */
  129.     ADVANCE_BCS_BATCH(batch);
  130. }
  131.  
  132. static void
  133. gen75_mfd_surface_state(VADriverContextP ctx,
  134.                        struct decode_state *decode_state,
  135.                        int standard_select,
  136.                        struct gen7_mfd_context *gen7_mfd_context)
  137. {
  138.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  139.     struct object_surface *obj_surface = decode_state->render_object;
  140.     unsigned int y_cb_offset;
  141.     unsigned int y_cr_offset;
  142.     unsigned int surface_format;
  143.  
  144.     assert(obj_surface);
  145.  
  146.     y_cb_offset = obj_surface->y_cb_offset;
  147.     y_cr_offset = obj_surface->y_cr_offset;
  148.  
  149.     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
  150.         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  151.  
  152.     BEGIN_BCS_BATCH(batch, 6);
  153.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  154.     OUT_BCS_BATCH(batch, 0);
  155.     OUT_BCS_BATCH(batch,
  156.                   ((obj_surface->orig_height - 1) << 18) |
  157.                   ((obj_surface->orig_width - 1) << 4));
  158.     OUT_BCS_BATCH(batch,
  159.                   (surface_format << 28) | /* 420 planar YUV surface */
  160.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  161.                   (0 << 22) | /* surface object control state, ignored */
  162.                   ((obj_surface->width - 1) << 3) | /* pitch */
  163.                   (0 << 2)  | /* must be 0 */
  164.                   (1 << 1)  | /* must be tiled */
  165.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  166.     OUT_BCS_BATCH(batch,
  167.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  168.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  169.     OUT_BCS_BATCH(batch,
  170.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  171.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  172.     ADVANCE_BCS_BATCH(batch);
  173. }
  174.  
  175. static void
  176. gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  177.                              struct decode_state *decode_state,
  178.                              int standard_select,
  179.                              struct gen7_mfd_context *gen7_mfd_context)
  180. {
  181.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  182.     int i;
  183.  
  184.     BEGIN_BCS_BATCH(batch, 61);
  185.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  186.         /* Pre-deblock 1-3 */
  187.     if (gen7_mfd_context->pre_deblocking_output.valid)
  188.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  189.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  190.                       0);
  191.     else
  192.         OUT_BCS_BATCH(batch, 0);
  193.  
  194.         OUT_BCS_BATCH(batch, 0);
  195.         OUT_BCS_BATCH(batch, 0);
  196.         /* Post-debloing 4-6 */
  197.     if (gen7_mfd_context->post_deblocking_output.valid)
  198.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  199.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  200.                       0);
  201.     else
  202.         OUT_BCS_BATCH(batch, 0);
  203.  
  204.         OUT_BCS_BATCH(batch, 0);
  205.         OUT_BCS_BATCH(batch, 0);
  206.  
  207.         /* uncompressed-video & stream out 7-12 */
  208.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  209.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  210.         OUT_BCS_BATCH(batch, 0);
  211.         OUT_BCS_BATCH(batch, 0);
  212.         OUT_BCS_BATCH(batch, 0);
  213.         OUT_BCS_BATCH(batch, 0);
  214.  
  215.         /* intra row-store scratch 13-15 */
  216.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  217.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  218.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  219.                       0);
  220.     else
  221.         OUT_BCS_BATCH(batch, 0);
  222.  
  223.         OUT_BCS_BATCH(batch, 0);
  224.         OUT_BCS_BATCH(batch, 0);
  225.         /* deblocking-filter-row-store 16-18 */
  226.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  227.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  228.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  229.                       0);
  230.     else
  231.         OUT_BCS_BATCH(batch, 0);
  232.         OUT_BCS_BATCH(batch, 0);
  233.         OUT_BCS_BATCH(batch, 0);
  234.  
  235.     /* DW 19..50 */
  236.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  237.         struct object_surface *obj_surface;
  238.  
  239.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  240.             gen7_mfd_context->reference_surface[i].obj_surface &&
  241.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  242.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  243.  
  244.             OUT_BCS_RELOC(batch, obj_surface->bo,
  245.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  246.                           0);
  247.         } else {
  248.             OUT_BCS_BATCH(batch, 0);
  249.         }
  250.             OUT_BCS_BATCH(batch, 0);
  251.     }
  252.         /* reference property 51 */
  253.     OUT_BCS_BATCH(batch, 0);  
  254.        
  255.         /* Macroblock status & ILDB 52-57 */
  256.         OUT_BCS_BATCH(batch, 0);
  257.         OUT_BCS_BATCH(batch, 0);
  258.         OUT_BCS_BATCH(batch, 0);
  259.         OUT_BCS_BATCH(batch, 0);
  260.         OUT_BCS_BATCH(batch, 0);
  261.         OUT_BCS_BATCH(batch, 0);
  262.  
  263.         /* the second Macroblock status 58-60 */       
  264.         OUT_BCS_BATCH(batch, 0);
  265.         OUT_BCS_BATCH(batch, 0);
  266.         OUT_BCS_BATCH(batch, 0);
  267.     ADVANCE_BCS_BATCH(batch);
  268. }
  269.  
  270. static void
  271. gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  272.                              struct decode_state *decode_state,
  273.                              int standard_select,
  274.                              struct gen7_mfd_context *gen7_mfd_context)
  275. {
  276.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  277.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  278.     int i;
  279.  
  280.     if (IS_STEPPING_BPLUS(i965)) {
  281.         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
  282.                                             standard_select, gen7_mfd_context);
  283.         return;
  284.     }
  285.  
  286.     BEGIN_BCS_BATCH(batch, 25);
  287.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  288.     if (gen7_mfd_context->pre_deblocking_output.valid)
  289.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  290.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  291.                       0);
  292.     else
  293.         OUT_BCS_BATCH(batch, 0);
  294.  
  295.     if (gen7_mfd_context->post_deblocking_output.valid)
  296.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  297.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  298.                       0);
  299.     else
  300.         OUT_BCS_BATCH(batch, 0);
  301.  
  302.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  303.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  304.  
  305.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  306.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  307.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  308.                       0);
  309.     else
  310.         OUT_BCS_BATCH(batch, 0);
  311.  
  312.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  313.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  314.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  315.                       0);
  316.     else
  317.         OUT_BCS_BATCH(batch, 0);
  318.  
  319.     /* DW 7..22 */
  320.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  321.         struct object_surface *obj_surface;
  322.  
  323.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  324.             gen7_mfd_context->reference_surface[i].obj_surface &&
  325.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  326.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  327.  
  328.             OUT_BCS_RELOC(batch, obj_surface->bo,
  329.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  330.                           0);
  331.         } else {
  332.             OUT_BCS_BATCH(batch, 0);
  333.         }
  334.     }
  335.  
  336.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  337.     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
  338.     ADVANCE_BCS_BATCH(batch);
  339. }
  340.  
  341. static void
  342. gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  343.                                  dri_bo *slice_data_bo,
  344.                                  int standard_select,
  345.                                  struct gen7_mfd_context *gen7_mfd_context)
  346. {
  347.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  348.  
  349.     BEGIN_BCS_BATCH(batch, 26);
  350.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  351.         /* MFX In BS 1-5 */
  352.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  353.     OUT_BCS_BATCH(batch, 0);
  354.     OUT_BCS_BATCH(batch, 0);
  355.         /* Upper bound 4-5 */  
  356.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  357.     OUT_BCS_BATCH(batch, 0);
  358.  
  359.         /* MFX indirect MV 6-10 */
  360.     OUT_BCS_BATCH(batch, 0);
  361.     OUT_BCS_BATCH(batch, 0);
  362.     OUT_BCS_BATCH(batch, 0);
  363.     OUT_BCS_BATCH(batch, 0);
  364.     OUT_BCS_BATCH(batch, 0);
  365.        
  366.         /* MFX IT_COFF 11-15 */
  367.     OUT_BCS_BATCH(batch, 0);
  368.     OUT_BCS_BATCH(batch, 0);
  369.     OUT_BCS_BATCH(batch, 0);
  370.     OUT_BCS_BATCH(batch, 0);
  371.     OUT_BCS_BATCH(batch, 0);
  372.  
  373.         /* MFX IT_DBLK 16-20 */
  374.     OUT_BCS_BATCH(batch, 0);
  375.     OUT_BCS_BATCH(batch, 0);
  376.     OUT_BCS_BATCH(batch, 0);
  377.     OUT_BCS_BATCH(batch, 0);
  378.     OUT_BCS_BATCH(batch, 0);
  379.  
  380.         /* MFX PAK_BSE object for encoder 21-25 */
  381.     OUT_BCS_BATCH(batch, 0);
  382.     OUT_BCS_BATCH(batch, 0);
  383.     OUT_BCS_BATCH(batch, 0);
  384.     OUT_BCS_BATCH(batch, 0);
  385.     OUT_BCS_BATCH(batch, 0);
  386.  
  387.     ADVANCE_BCS_BATCH(batch);
  388. }
  389.  
  390. static void
  391. gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  392.                                  dri_bo *slice_data_bo,
  393.                                  int standard_select,
  394.                                  struct gen7_mfd_context *gen7_mfd_context)
  395. {
  396.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  397.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  398.  
  399.     if (IS_STEPPING_BPLUS(i965)) {
  400.         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
  401.                                                 standard_select, gen7_mfd_context);
  402.         return;
  403.     }
  404.  
  405.     BEGIN_BCS_BATCH(batch, 11);
  406.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  407.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  408.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  409.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  410.     OUT_BCS_BATCH(batch, 0);
  411.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  412.     OUT_BCS_BATCH(batch, 0);
  413.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  414.     OUT_BCS_BATCH(batch, 0);
  415.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  416.     OUT_BCS_BATCH(batch, 0);
  417.     ADVANCE_BCS_BATCH(batch);
  418. }
  419.  
  420. static void
  421. gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  422.                                  struct decode_state *decode_state,
  423.                                  int standard_select,
  424.                                  struct gen7_mfd_context *gen7_mfd_context)
  425. {
  426.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  427.  
  428.     BEGIN_BCS_BATCH(batch, 10);
  429.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  430.  
  431.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  432.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  433.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  434.                       0);
  435.         else
  436.                 OUT_BCS_BATCH(batch, 0);
  437.                
  438.     OUT_BCS_BATCH(batch, 0);
  439.     OUT_BCS_BATCH(batch, 0);
  440.         /* MPR Row Store Scratch buffer 4-6 */
  441.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  442.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  443.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  444.                       0);
  445.     else
  446.             OUT_BCS_BATCH(batch, 0);
  447.     OUT_BCS_BATCH(batch, 0);
  448.     OUT_BCS_BATCH(batch, 0);
  449.  
  450.         /* Bitplane 7-9 */
  451.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  452.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  453.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  454.                       0);
  455.     else
  456.         OUT_BCS_BATCH(batch, 0);
  457.     OUT_BCS_BATCH(batch, 0);
  458.     OUT_BCS_BATCH(batch, 0);
  459.  
  460.     ADVANCE_BCS_BATCH(batch);
  461. }
  462.  
  463. static void
  464. gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  465.                                  struct decode_state *decode_state,
  466.                                  int standard_select,
  467.                                  struct gen7_mfd_context *gen7_mfd_context)
  468. {
  469.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  470.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  471.  
  472.     if (IS_STEPPING_BPLUS(i965)) {
  473.         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
  474.                                                 standard_select, gen7_mfd_context);
  475.         return;
  476.     }
  477.  
  478.     BEGIN_BCS_BATCH(batch, 4);
  479.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  480.  
  481.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  482.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  483.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  484.                       0);
  485.     else
  486.         OUT_BCS_BATCH(batch, 0);
  487.  
  488.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  489.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  490.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  491.                       0);
  492.     else
  493.         OUT_BCS_BATCH(batch, 0);
  494.  
  495.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  496.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  497.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  498.                       0);
  499.     else
  500.         OUT_BCS_BATCH(batch, 0);
  501.  
  502.     ADVANCE_BCS_BATCH(batch);
  503. }
  504.  
  505. static void
  506. gen75_mfd_qm_state(VADriverContextP ctx,
  507.                   int qm_type,
  508.                   unsigned char *qm,
  509.                   int qm_length,
  510.                   struct gen7_mfd_context *gen7_mfd_context)
  511. {
  512.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  513.     unsigned int qm_buffer[16];
  514.  
  515.     assert(qm_length <= 16 * 4);
  516.     memcpy(qm_buffer, qm, qm_length);
  517.  
  518.     BEGIN_BCS_BATCH(batch, 18);
  519.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  520.     OUT_BCS_BATCH(batch, qm_type << 0);
  521.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  522.     ADVANCE_BCS_BATCH(batch);
  523. }
  524.  
  525. static void
  526. gen75_mfd_avc_img_state(VADriverContextP ctx,
  527.                        struct decode_state *decode_state,
  528.                        struct gen7_mfd_context *gen7_mfd_context)
  529. {
  530.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  531.     int img_struct;
  532.     int mbaff_frame_flag;
  533.     unsigned int width_in_mbs, height_in_mbs;
  534.     VAPictureParameterBufferH264 *pic_param;
  535.  
  536.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  537.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  538.  
  539.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  540.  
  541.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  542.         img_struct = 1;
  543.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  544.         img_struct = 3;
  545.     else
  546.         img_struct = 0;
  547.  
  548.     if ((img_struct & 0x1) == 0x1) {
  549.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  550.     } else {
  551.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  552.     }
  553.  
  554.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  555.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  556.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  557.     } else {
  558.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  559.     }
  560.  
  561.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  562.                         !pic_param->pic_fields.bits.field_pic_flag);
  563.  
  564.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  565.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  566.  
  567.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  568.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  569.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  570.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  571.  
  572.     BEGIN_BCS_BATCH(batch, 17);
  573.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
  574.     OUT_BCS_BATCH(batch,
  575.                   (width_in_mbs * height_in_mbs - 1));
  576.     OUT_BCS_BATCH(batch,
  577.                   ((height_in_mbs - 1) << 16) |
  578.                   ((width_in_mbs - 1) << 0));
  579.     OUT_BCS_BATCH(batch,
  580.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  581.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  582.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  583.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  584.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  585.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  586.                   (img_struct << 8));
  587.     OUT_BCS_BATCH(batch,
  588.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  589.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  590.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  591.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  592.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  593.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  594.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  595.                   (mbaff_frame_flag << 1) |
  596.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  597.     OUT_BCS_BATCH(batch, 0);
  598.     OUT_BCS_BATCH(batch, 0);
  599.     OUT_BCS_BATCH(batch, 0);
  600.     OUT_BCS_BATCH(batch, 0);
  601.     OUT_BCS_BATCH(batch, 0);
  602.     OUT_BCS_BATCH(batch, 0);
  603.     OUT_BCS_BATCH(batch, 0);
  604.     OUT_BCS_BATCH(batch, 0);
  605.     OUT_BCS_BATCH(batch, 0);
  606.     OUT_BCS_BATCH(batch, 0);
  607.     OUT_BCS_BATCH(batch, 0);
  608.     OUT_BCS_BATCH(batch, 0);
  609.     ADVANCE_BCS_BATCH(batch);
  610. }
  611.  
  612. static void
  613. gen75_mfd_avc_qm_state(VADriverContextP ctx,
  614.                       struct decode_state *decode_state,
  615.                       struct gen7_mfd_context *gen7_mfd_context)
  616. {
  617.     VAIQMatrixBufferH264 *iq_matrix;
  618.     VAPictureParameterBufferH264 *pic_param;
  619.  
  620.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  621.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  622.     else
  623.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  624.  
  625.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  626.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  627.  
  628.     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  629.     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  630.  
  631.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  632.         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  633.         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  634.     }
  635. }
  636.  
  637. static inline void
  638. gen75_mfd_avc_picid_state(VADriverContextP ctx,
  639.                       struct decode_state *decode_state,
  640.                       struct gen7_mfd_context *gen7_mfd_context)
  641. {
  642.     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
  643.         gen7_mfd_context->reference_surface);
  644. }
  645.  
  646. static void
  647. gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
  648.                               struct decode_state *decode_state,
  649.                               VAPictureParameterBufferH264 *pic_param,
  650.                               VASliceParameterBufferH264 *slice_param,
  651.                               struct gen7_mfd_context *gen7_mfd_context)
  652. {
  653.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  654.     struct object_surface *obj_surface;
  655.     GenAvcSurface *gen7_avc_surface;
  656.     VAPictureH264 *va_pic;
  657.     int i;
  658.  
  659.     BEGIN_BCS_BATCH(batch, 71);
  660.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  661.  
  662.     /* reference surfaces 0..15 */
  663.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  664.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  665.             gen7_mfd_context->reference_surface[i].obj_surface &&
  666.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  667.  
  668.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  669.             gen7_avc_surface = obj_surface->private_data;
  670.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  671.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  672.                           0);
  673.             OUT_BCS_BATCH(batch, 0);
  674.         } else {
  675.             OUT_BCS_BATCH(batch, 0);
  676.             OUT_BCS_BATCH(batch, 0);
  677.         }
  678.     }
  679.  
  680.     OUT_BCS_BATCH(batch, 0);
  681.  
  682.     /* the current decoding frame/field */
  683.     va_pic = &pic_param->CurrPic;
  684.     obj_surface = decode_state->render_object;
  685.     assert(obj_surface->bo && obj_surface->private_data);
  686.     gen7_avc_surface = obj_surface->private_data;
  687.  
  688.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  689.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  690.                   0);
  691.  
  692.     OUT_BCS_BATCH(batch, 0);
  693.     OUT_BCS_BATCH(batch, 0);
  694.  
  695.     /* POC List */
  696.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  697.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  698.  
  699.         if (obj_surface) {
  700.             const VAPictureH264 * const va_pic = avc_find_picture(
  701.                 obj_surface->base.id, pic_param->ReferenceFrames,
  702.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  703.  
  704.             assert(va_pic != NULL);
  705.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  706.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  707.         } else {
  708.             OUT_BCS_BATCH(batch, 0);
  709.             OUT_BCS_BATCH(batch, 0);
  710.         }
  711.     }
  712.  
  713.     va_pic = &pic_param->CurrPic;
  714.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  715.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  716.  
  717.     ADVANCE_BCS_BATCH(batch);
  718. }
  719.  
  720. static void
  721. gen75_mfd_avc_directmode_state(VADriverContextP ctx,
  722.                               struct decode_state *decode_state,
  723.                               VAPictureParameterBufferH264 *pic_param,
  724.                               VASliceParameterBufferH264 *slice_param,
  725.                               struct gen7_mfd_context *gen7_mfd_context)
  726. {
  727.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  728.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  729.     struct object_surface *obj_surface;
  730.     GenAvcSurface *gen7_avc_surface;
  731.     VAPictureH264 *va_pic;
  732.     int i;
  733.  
  734.     if (IS_STEPPING_BPLUS(i965)) {
  735.         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
  736.                                              gen7_mfd_context);
  737.  
  738.         return;
  739.     }
  740.  
  741.     BEGIN_BCS_BATCH(batch, 69);
  742.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  743.  
  744.     /* reference surfaces 0..15 */
  745.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  746.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  747.             gen7_mfd_context->reference_surface[i].obj_surface &&
  748.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  749.  
  750.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  751.             gen7_avc_surface = obj_surface->private_data;
  752.  
  753.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  754.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  755.                           0);
  756.  
  757.             if (gen7_avc_surface->dmv_bottom_flag == 1)
  758.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  759.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  760.                               0);
  761.             else
  762.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  763.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  764.                               0);
  765.         } else {
  766.             OUT_BCS_BATCH(batch, 0);
  767.             OUT_BCS_BATCH(batch, 0);
  768.         }
  769.     }
  770.  
  771.     /* the current decoding frame/field */
  772.     va_pic = &pic_param->CurrPic;
  773.     obj_surface = decode_state->render_object;
  774.     assert(obj_surface->bo && obj_surface->private_data);
  775.     gen7_avc_surface = obj_surface->private_data;
  776.  
  777.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  778.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  779.                   0);
  780.  
  781.     if (gen7_avc_surface->dmv_bottom_flag == 1)
  782.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  783.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  784.                       0);
  785.     else
  786.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  787.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  788.                       0);
  789.  
  790.     /* POC List */
  791.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  792.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  793.  
  794.         if (obj_surface) {
  795.             const VAPictureH264 * const va_pic = avc_find_picture(
  796.                 obj_surface->base.id, pic_param->ReferenceFrames,
  797.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  798.  
  799.             assert(va_pic != NULL);
  800.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  801.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  802.         } else {
  803.             OUT_BCS_BATCH(batch, 0);
  804.             OUT_BCS_BATCH(batch, 0);
  805.         }
  806.     }
  807.  
  808.     va_pic = &pic_param->CurrPic;
  809.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  810.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  811.  
  812.     ADVANCE_BCS_BATCH(batch);
  813. }
  814.  
  815. static void
  816. gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
  817.                                  VAPictureParameterBufferH264 *pic_param,
  818.                                  VASliceParameterBufferH264 *next_slice_param,
  819.                                  struct gen7_mfd_context *gen7_mfd_context)
  820. {
  821.     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
  822. }
  823.  
  824. static void
  825. gen75_mfd_avc_slice_state(VADriverContextP ctx,
  826.                          VAPictureParameterBufferH264 *pic_param,
  827.                          VASliceParameterBufferH264 *slice_param,
  828.                          VASliceParameterBufferH264 *next_slice_param,
  829.                          struct gen7_mfd_context *gen7_mfd_context)
  830. {
  831.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  832.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  833.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  834.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  835.     int num_ref_idx_l0, num_ref_idx_l1;
  836.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  837.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  838.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  839.     int slice_type;
  840.  
  841.     if (slice_param->slice_type == SLICE_TYPE_I ||
  842.         slice_param->slice_type == SLICE_TYPE_SI) {
  843.         slice_type = SLICE_TYPE_I;
  844.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  845.                slice_param->slice_type == SLICE_TYPE_SP) {
  846.         slice_type = SLICE_TYPE_P;
  847.     } else {
  848.         assert(slice_param->slice_type == SLICE_TYPE_B);
  849.         slice_type = SLICE_TYPE_B;
  850.     }
  851.  
  852.     if (slice_type == SLICE_TYPE_I) {
  853.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  854.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  855.         num_ref_idx_l0 = 0;
  856.         num_ref_idx_l1 = 0;
  857.     } else if (slice_type == SLICE_TYPE_P) {
  858.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  859.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  860.         num_ref_idx_l1 = 0;
  861.     } else {
  862.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  863.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  864.     }
  865.  
  866.     first_mb_in_slice = slice_param->first_mb_in_slice;
  867.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  868.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  869.  
  870.     if (mbaff_picture)
  871.         slice_ver_pos = slice_ver_pos << 1;
  872.  
  873.     if (next_slice_param) {
  874.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice;
  875.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  876.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  877.  
  878.         if (mbaff_picture)
  879.             next_slice_ver_pos = next_slice_ver_pos << 1;
  880.     } else {
  881.         next_slice_hor_pos = 0;
  882.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  883.     }
  884.  
  885.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  886.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  887.     OUT_BCS_BATCH(batch, slice_type);
  888.     OUT_BCS_BATCH(batch,
  889.                   (num_ref_idx_l1 << 24) |
  890.                   (num_ref_idx_l0 << 16) |
  891.                   (slice_param->chroma_log2_weight_denom << 8) |
  892.                   (slice_param->luma_log2_weight_denom << 0));
  893.     OUT_BCS_BATCH(batch,
  894.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  895.                   (slice_param->disable_deblocking_filter_idc << 27) |
  896.                   (slice_param->cabac_init_idc << 24) |
  897.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  898.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  899.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  900.     OUT_BCS_BATCH(batch,
  901.                   (slice_ver_pos << 24) |
  902.                   (slice_hor_pos << 16) |
  903.                   (first_mb_in_slice << 0));
  904.     OUT_BCS_BATCH(batch,
  905.                   (next_slice_ver_pos << 16) |
  906.                   (next_slice_hor_pos << 0));
  907.     OUT_BCS_BATCH(batch,
  908.                   (next_slice_param == NULL) << 19); /* last slice flag */
  909.     OUT_BCS_BATCH(batch, 0);
  910.     OUT_BCS_BATCH(batch, 0);
  911.     OUT_BCS_BATCH(batch, 0);
  912.     OUT_BCS_BATCH(batch, 0);
  913.     ADVANCE_BCS_BATCH(batch);
  914. }
  915.  
  916. static inline void
  917. gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
  918.                            VAPictureParameterBufferH264 *pic_param,
  919.                            VASliceParameterBufferH264 *slice_param,
  920.                            struct gen7_mfd_context *gen7_mfd_context)
  921. {
  922.     gen6_send_avc_ref_idx_state(
  923.         gen7_mfd_context->base.batch,
  924.         slice_param,
  925.         gen7_mfd_context->reference_surface
  926.     );
  927. }
  928.  
  929. static void
  930. gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
  931.                                 VAPictureParameterBufferH264 *pic_param,
  932.                                 VASliceParameterBufferH264 *slice_param,
  933.                                 struct gen7_mfd_context *gen7_mfd_context)
  934. {
  935.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  936.     int i, j, num_weight_offset_table = 0;
  937.     short weightoffsets[32 * 6];
  938.  
  939.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  940.          slice_param->slice_type == SLICE_TYPE_SP) &&
  941.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  942.         num_weight_offset_table = 1;
  943.     }
  944.    
  945.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  946.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  947.         num_weight_offset_table = 2;
  948.     }
  949.  
  950.     for (i = 0; i < num_weight_offset_table; i++) {
  951.         BEGIN_BCS_BATCH(batch, 98);
  952.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  953.         OUT_BCS_BATCH(batch, i);
  954.  
  955.         if (i == 0) {
  956.             for (j = 0; j < 32; j++) {
  957.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  958.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  959.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  960.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  961.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  962.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  963.             }
  964.         } else {
  965.             for (j = 0; j < 32; j++) {
  966.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  967.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  968.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  969.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  970.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  971.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  972.             }
  973.         }
  974.  
  975.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  976.         ADVANCE_BCS_BATCH(batch);
  977.     }
  978. }
  979.  
  980. static void
  981. gen75_mfd_avc_bsd_object(VADriverContextP ctx,
  982.                         VAPictureParameterBufferH264 *pic_param,
  983.                         VASliceParameterBufferH264 *slice_param,
  984.                         dri_bo *slice_data_bo,
  985.                         VASliceParameterBufferH264 *next_slice_param,
  986.                         struct gen7_mfd_context *gen7_mfd_context)
  987. {
  988.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  989.     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
  990.                                                             slice_param,
  991.                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
  992.  
  993.     /* the input bitsteam format on GEN7 differs from GEN6 */
  994.     BEGIN_BCS_BATCH(batch, 6);
  995.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  996.     OUT_BCS_BATCH(batch,
  997.                   (slice_param->slice_data_size - slice_param->slice_data_offset));
  998.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  999.     OUT_BCS_BATCH(batch,
  1000.                   (0 << 31) |
  1001.                   (0 << 14) |
  1002.                   (0 << 12) |
  1003.                   (0 << 10) |
  1004.                   (0 << 8));
  1005.     OUT_BCS_BATCH(batch,
  1006.                   ((slice_data_bit_offset >> 3) << 16) |
  1007.                   (1 << 7)  |
  1008.                   (0 << 5)  |
  1009.                   (0 << 4)  |
  1010.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  1011.                   (slice_data_bit_offset & 0x7));
  1012.     OUT_BCS_BATCH(batch, 0);
  1013.     ADVANCE_BCS_BATCH(batch);
  1014. }
  1015.  
  1016. static inline void
  1017. gen75_mfd_avc_context_init(
  1018.     VADriverContextP         ctx,
  1019.     struct gen7_mfd_context *gen7_mfd_context
  1020. )
  1021. {
  1022.     /* Initialize flat scaling lists */
  1023.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  1024. }
  1025.  
  1026. static void
  1027. gen75_mfd_avc_decode_init(VADriverContextP ctx,
  1028.                          struct decode_state *decode_state,
  1029.                          struct gen7_mfd_context *gen7_mfd_context)
  1030. {
  1031.     VAPictureParameterBufferH264 *pic_param;
  1032.     VASliceParameterBufferH264 *slice_param;
  1033.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1034.     struct object_surface *obj_surface;
  1035.     dri_bo *bo;
  1036.     int i, j, enable_avc_ildb = 0;
  1037.     unsigned int width_in_mbs, height_in_mbs;
  1038.  
  1039.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  1040.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1041.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1042.  
  1043.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1044.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1045.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1046.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1047.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1048.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1049.                    (slice_param->slice_type == SLICE_TYPE_B));
  1050.  
  1051.             if (slice_param->disable_deblocking_filter_idc != 1) {
  1052.                 enable_avc_ildb = 1;
  1053.                 break;
  1054.             }
  1055.  
  1056.             slice_param++;
  1057.         }
  1058.     }
  1059.  
  1060.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1061.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1062.     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
  1063.         gen7_mfd_context->reference_surface);
  1064.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  1065.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  1066.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  1067.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  1068.  
  1069.     /* Current decoded picture */
  1070.     obj_surface = decode_state->render_object;
  1071.     if (pic_param->pic_fields.bits.reference_pic_flag)
  1072.         obj_surface->flags |= SURFACE_REFERENCED;
  1073.     else
  1074.         obj_surface->flags &= ~SURFACE_REFERENCED;
  1075.  
  1076.     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  1077.     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  1078.  
  1079.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1080.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1081.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1082.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  1083.  
  1084.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1085.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1086.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1087.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  1088.  
  1089.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1090.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1091.                       "intra row store",
  1092.                       width_in_mbs * 64,
  1093.                       0x1000);
  1094.     assert(bo);
  1095.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1096.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1097.  
  1098.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1099.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1100.                       "deblocking filter row store",
  1101.                       width_in_mbs * 64 * 4,
  1102.                       0x1000);
  1103.     assert(bo);
  1104.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1105.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1106.  
  1107.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1108.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1109.                       "bsd mpc row store",
  1110.                       width_in_mbs * 64 * 2,
  1111.                       0x1000);
  1112.     assert(bo);
  1113.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1114.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1115.  
  1116.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  1117.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1118.                       "mpr row store",
  1119.                       width_in_mbs * 64 * 2,
  1120.                       0x1000);
  1121.     assert(bo);
  1122.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  1123.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  1124.  
  1125.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1126. }
  1127.  
  1128. static void
  1129. gen75_mfd_avc_decode_picture(VADriverContextP ctx,
  1130.                             struct decode_state *decode_state,
  1131.                             struct gen7_mfd_context *gen7_mfd_context)
  1132. {
  1133.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1134.     VAPictureParameterBufferH264 *pic_param;
  1135.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  1136.     dri_bo *slice_data_bo;
  1137.     int i, j;
  1138.  
  1139.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1140.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1141.     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  1142.  
  1143.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1144.     intel_batchbuffer_emit_mi_flush(batch);
  1145.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1146.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1147.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1148.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1149.     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  1150.     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
  1151.     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  1152.  
  1153.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1154.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1155.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1156.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1157.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  1158.  
  1159.         if (j == decode_state->num_slice_params - 1)
  1160.             next_slice_group_param = NULL;
  1161.         else
  1162.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  1163.  
  1164.         if (j == 0 && slice_param->first_mb_in_slice)
  1165.             gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
  1166.  
  1167.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1168.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1169.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1170.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1171.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1172.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1173.                    (slice_param->slice_type == SLICE_TYPE_B));
  1174.  
  1175.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1176.                 next_slice_param = slice_param + 1;
  1177.             else
  1178.                 next_slice_param = next_slice_group_param;
  1179.  
  1180.             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
  1181.             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1182.             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1183.             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1184.             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  1185.             slice_param++;
  1186.         }
  1187.     }
  1188.  
  1189.     intel_batchbuffer_end_atomic(batch);
  1190.     intel_batchbuffer_flush(batch);
  1191. }
  1192.  
  1193. static void
  1194. gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
  1195.                            struct decode_state *decode_state,
  1196.                            struct gen7_mfd_context *gen7_mfd_context)
  1197. {
  1198.     VAPictureParameterBufferMPEG2 *pic_param;
  1199.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1200.     struct object_surface *obj_surface;
  1201.     dri_bo *bo;
  1202.     unsigned int width_in_mbs;
  1203.  
  1204.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1205.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1206.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1207.  
  1208.     mpeg2_set_reference_surfaces(
  1209.         ctx,
  1210.         gen7_mfd_context->reference_surface,
  1211.         decode_state,
  1212.         pic_param
  1213.     );
  1214.  
  1215.     /* Current decoded picture */
  1216.     obj_surface = decode_state->render_object;
  1217.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1218.  
  1219.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1220.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1221.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1222.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1223.  
  1224.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1225.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1226.                       "bsd mpc row store",
  1227.                       width_in_mbs * 96,
  1228.                       0x1000);
  1229.     assert(bo);
  1230.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1231.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1232.  
  1233.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1234.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1235.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1236.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1237.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1238. }
  1239.  
  1240. static void
  1241. gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
  1242.                          struct decode_state *decode_state,
  1243.                          struct gen7_mfd_context *gen7_mfd_context)
  1244. {
  1245.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1246.     VAPictureParameterBufferMPEG2 *pic_param;
  1247.     unsigned int slice_concealment_disable_bit = 0;
  1248.  
  1249.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1250.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1251.  
  1252.     slice_concealment_disable_bit = 1;
  1253.  
  1254.     BEGIN_BCS_BATCH(batch, 13);
  1255.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1256.     OUT_BCS_BATCH(batch,
  1257.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1258.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1259.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1260.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1261.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1262.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1263.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1264.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1265.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1266.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1267.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1268.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1269.     OUT_BCS_BATCH(batch,
  1270.                   pic_param->picture_coding_type << 9);
  1271.     OUT_BCS_BATCH(batch,
  1272.                   (slice_concealment_disable_bit << 31) |
  1273.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  1274.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  1275.     OUT_BCS_BATCH(batch, 0);
  1276.     OUT_BCS_BATCH(batch, 0);
  1277.     OUT_BCS_BATCH(batch, 0);
  1278.     OUT_BCS_BATCH(batch, 0);
  1279.     OUT_BCS_BATCH(batch, 0);
  1280.     OUT_BCS_BATCH(batch, 0);
  1281.     OUT_BCS_BATCH(batch, 0);
  1282.     OUT_BCS_BATCH(batch, 0);
  1283.     OUT_BCS_BATCH(batch, 0);
  1284.     ADVANCE_BCS_BATCH(batch);
  1285. }
  1286.  
  1287. static void
  1288. gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1289.                         struct decode_state *decode_state,
  1290.                         struct gen7_mfd_context *gen7_mfd_context)
  1291. {
  1292.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  1293.     int i, j;
  1294.  
  1295.     /* Update internal QM state */
  1296.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1297.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1298.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1299.  
  1300.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  1301.             iq_matrix->load_intra_quantiser_matrix) {
  1302.             gen_iq_matrix->load_intra_quantiser_matrix =
  1303.                 iq_matrix->load_intra_quantiser_matrix;
  1304.             if (iq_matrix->load_intra_quantiser_matrix) {
  1305.                 for (j = 0; j < 64; j++)
  1306.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1307.                         iq_matrix->intra_quantiser_matrix[j];
  1308.             }
  1309.         }
  1310.  
  1311.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1312.             iq_matrix->load_non_intra_quantiser_matrix) {
  1313.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1314.                 iq_matrix->load_non_intra_quantiser_matrix;
  1315.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1316.                 for (j = 0; j < 64; j++)
  1317.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1318.                         iq_matrix->non_intra_quantiser_matrix[j];
  1319.             }
  1320.         }
  1321.     }
  1322.  
  1323.     /* Commit QM state to HW */
  1324.     for (i = 0; i < 2; i++) {
  1325.         unsigned char *qm = NULL;
  1326.         int qm_type;
  1327.  
  1328.         if (i == 0) {
  1329.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1330.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1331.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1332.             }
  1333.         } else {
  1334.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1335.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1336.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1337.             }
  1338.         }
  1339.  
  1340.         if (!qm)
  1341.             continue;
  1342.  
  1343.         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1344.     }
  1345. }
  1346.  
  1347. static void
  1348. gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1349.                           VAPictureParameterBufferMPEG2 *pic_param,
  1350.                           VASliceParameterBufferMPEG2 *slice_param,
  1351.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1352.                           struct gen7_mfd_context *gen7_mfd_context)
  1353. {
  1354.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1355.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1356.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1357.  
  1358.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1359.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1360.         is_field_pic = 1;
  1361.     is_field_pic_wa = is_field_pic &&
  1362.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1363.  
  1364.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1365.     hpos0 = slice_param->slice_horizontal_position;
  1366.  
  1367.     if (next_slice_param == NULL) {
  1368.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1369.         hpos1 = 0;
  1370.     } else {
  1371.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1372.         hpos1 = next_slice_param->slice_horizontal_position;
  1373.     }
  1374.  
  1375.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1376.  
  1377.     BEGIN_BCS_BATCH(batch, 5);
  1378.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1379.     OUT_BCS_BATCH(batch,
  1380.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1381.     OUT_BCS_BATCH(batch,
  1382.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1383.     OUT_BCS_BATCH(batch,
  1384.                   hpos0 << 24 |
  1385.                   vpos0 << 16 |
  1386.                   mb_count << 8 |
  1387.                   (next_slice_param == NULL) << 5 |
  1388.                   (next_slice_param == NULL) << 3 |
  1389.                   (slice_param->macroblock_offset & 0x7));
  1390.     OUT_BCS_BATCH(batch,
  1391.                   (slice_param->quantiser_scale_code << 24) |
  1392.                   (vpos1 << 8 | hpos1));
  1393.     ADVANCE_BCS_BATCH(batch);
  1394. }
  1395.  
  1396. static void
  1397. gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1398.                               struct decode_state *decode_state,
  1399.                               struct gen7_mfd_context *gen7_mfd_context)
  1400. {
  1401.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1402.     VAPictureParameterBufferMPEG2 *pic_param;
  1403.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1404.     dri_bo *slice_data_bo;
  1405.     int i, j;
  1406.  
  1407.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1408.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1409.  
  1410.     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1411.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1412.     intel_batchbuffer_emit_mi_flush(batch);
  1413.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1414.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1415.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1416.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1417.     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1418.     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1419.  
  1420.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1421.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1422.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1423.  
  1424.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1425.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1426.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1427.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1428.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1429.  
  1430.         if (j == decode_state->num_slice_params - 1)
  1431.             next_slice_group_param = NULL;
  1432.         else
  1433.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1434.  
  1435.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1436.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1437.  
  1438.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1439.                 next_slice_param = slice_param + 1;
  1440.             else
  1441.                 next_slice_param = next_slice_group_param;
  1442.  
  1443.             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1444.             slice_param++;
  1445.         }
  1446.     }
  1447.  
  1448.     intel_batchbuffer_end_atomic(batch);
  1449.     intel_batchbuffer_flush(batch);
  1450. }
  1451.  
  1452. static const int va_to_gen7_vc1_pic_type[5] = {
  1453.     GEN7_VC1_I_PICTURE,
  1454.     GEN7_VC1_P_PICTURE,
  1455.     GEN7_VC1_B_PICTURE,
  1456.     GEN7_VC1_BI_PICTURE,
  1457.     GEN7_VC1_P_PICTURE,
  1458. };
  1459.  
  1460. static const int va_to_gen7_vc1_mv[4] = {
  1461.     1, /* 1-MV */
  1462.     2, /* 1-MV half-pel */
  1463.     3, /* 1-MV half-pef bilinear */
  1464.     0, /* Mixed MV */
  1465. };
  1466.  
  1467. static const int b_picture_scale_factor[21] = {
  1468.     128, 85,  170, 64,  192,
  1469.     51,  102, 153, 204, 43,
  1470.     215, 37,  74,  111, 148,
  1471.     185, 222, 32,  96,  160,
  1472.     224,
  1473. };
  1474.  
  1475. static const int va_to_gen7_vc1_condover[3] = {
  1476.     0,
  1477.     2,
  1478.     3
  1479. };
  1480.  
  1481. static const int va_to_gen7_vc1_profile[4] = {
  1482.     GEN7_VC1_SIMPLE_PROFILE,
  1483.     GEN7_VC1_MAIN_PROFILE,
  1484.     GEN7_VC1_RESERVED_PROFILE,
  1485.     GEN7_VC1_ADVANCED_PROFILE
  1486. };
  1487.  
  1488. static void
  1489. gen75_mfd_free_vc1_surface(void **data)
  1490. {
  1491.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1492.  
  1493.     if (!gen7_vc1_surface)
  1494.         return;
  1495.  
  1496.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1497.     free(gen7_vc1_surface);
  1498.     *data = NULL;
  1499. }
  1500.  
  1501. static void
  1502. gen75_mfd_init_vc1_surface(VADriverContextP ctx,
  1503.                           VAPictureParameterBufferVC1 *pic_param,
  1504.                           struct object_surface *obj_surface)
  1505. {
  1506.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1507.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1508.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1509.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1510.  
  1511.     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
  1512.  
  1513.     if (!gen7_vc1_surface) {
  1514.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1515.         assert(gen7_vc1_surface);
  1516.         assert((obj_surface->size & 0x3f) == 0);
  1517.         obj_surface->private_data = gen7_vc1_surface;
  1518.     }
  1519.  
  1520.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1521.  
  1522.     if (gen7_vc1_surface->dmv == NULL) {
  1523.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1524.                                              "direct mv w/r buffer",
  1525.                                              width_in_mbs * height_in_mbs * 64,
  1526.                                              0x1000);
  1527.     }
  1528. }
  1529.  
  1530. static void
  1531. gen75_mfd_vc1_decode_init(VADriverContextP ctx,
  1532.                          struct decode_state *decode_state,
  1533.                          struct gen7_mfd_context *gen7_mfd_context)
  1534. {
  1535.     VAPictureParameterBufferVC1 *pic_param;
  1536.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1537.     struct object_surface *obj_surface;
  1538.     dri_bo *bo;
  1539.     int width_in_mbs;
  1540.     int picture_type;
  1541.  
  1542.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1543.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1544.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1545.     picture_type = pic_param->picture_fields.bits.picture_type;
  1546.  
  1547.     intel_update_vc1_frame_store_index(ctx,
  1548.                                        decode_state,
  1549.                                        pic_param,
  1550.                                        gen7_mfd_context->reference_surface);
  1551.  
  1552.     /* Current decoded picture */
  1553.     obj_surface = decode_state->render_object;
  1554.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1555.     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1556.  
  1557.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1558.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1559.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1560.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1561.  
  1562.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1563.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1564.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1565.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1566.  
  1567.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1568.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1569.                       "intra row store",
  1570.                       width_in_mbs * 64,
  1571.                       0x1000);
  1572.     assert(bo);
  1573.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1574.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1575.  
  1576.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1577.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1578.                       "deblocking filter row store",
  1579.                       width_in_mbs * 7 * 64,
  1580.                       0x1000);
  1581.     assert(bo);
  1582.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1583.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1584.  
  1585.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1586.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1587.                       "bsd mpc row store",
  1588.                       width_in_mbs * 96,
  1589.                       0x1000);
  1590.     assert(bo);
  1591.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1592.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1593.  
  1594.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1595.  
  1596.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1597.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1598.    
  1599.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1600.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1601.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1602.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1603.         int src_w, src_h;
  1604.         uint8_t *src = NULL, *dst = NULL;
  1605.  
  1606.         assert(decode_state->bit_plane->buffer);
  1607.         src = decode_state->bit_plane->buffer;
  1608.  
  1609.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1610.                           "VC-1 Bitplane",
  1611.                           bitplane_width * height_in_mbs,
  1612.                           0x1000);
  1613.         assert(bo);
  1614.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1615.  
  1616.         dri_bo_map(bo, True);
  1617.         assert(bo->virtual);
  1618.         dst = bo->virtual;
  1619.  
  1620.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1621.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1622.                 int src_index, dst_index;
  1623.                 int src_shift;
  1624.                 uint8_t src_value;
  1625.  
  1626.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1627.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1628.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1629.  
  1630.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1631.                     src_value |= 0x2;
  1632.                 }
  1633.  
  1634.                 dst_index = src_w / 2;
  1635.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1636.             }
  1637.  
  1638.             if (src_w & 1)
  1639.                 dst[src_w / 2] >>= 4;
  1640.  
  1641.             dst += bitplane_width;
  1642.         }
  1643.  
  1644.         dri_bo_unmap(bo);
  1645.     } else
  1646.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1647. }
  1648.  
  1649. static void
  1650. gen75_mfd_vc1_pic_state(VADriverContextP ctx,
  1651.                        struct decode_state *decode_state,
  1652.                        struct gen7_mfd_context *gen7_mfd_context)
  1653. {
  1654.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1655.     VAPictureParameterBufferVC1 *pic_param;
  1656.     struct object_surface *obj_surface;
  1657.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1658.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1659.     int unified_mv_mode;
  1660.     int ref_field_pic_polarity = 0;
  1661.     int scale_factor = 0;
  1662.     int trans_ac_y = 0;
  1663.     int dmv_surface_valid = 0;
  1664.     int brfd = 0;
  1665.     int fcm = 0;
  1666.     int picture_type;
  1667.     int profile;
  1668.     int overlap;
  1669.     int interpolation_mode = 0;
  1670.  
  1671.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1672.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1673.  
  1674.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1675.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1676.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1677.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1678.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1679.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1680.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1681.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1682.  
  1683.     if (dquant == 0) {
  1684.         alt_pquant_config = 0;
  1685.         alt_pquant_edge_mask = 0;
  1686.     } else if (dquant == 2) {
  1687.         alt_pquant_config = 1;
  1688.         alt_pquant_edge_mask = 0xf;
  1689.     } else {
  1690.         assert(dquant == 1);
  1691.         if (dquantfrm == 0) {
  1692.             alt_pquant_config = 0;
  1693.             alt_pquant_edge_mask = 0;
  1694.             alt_pq = 0;
  1695.         } else {
  1696.             assert(dquantfrm == 1);
  1697.             alt_pquant_config = 1;
  1698.  
  1699.             switch (dqprofile) {
  1700.             case 3:
  1701.                 if (dqbilevel == 0) {
  1702.                     alt_pquant_config = 2;
  1703.                     alt_pquant_edge_mask = 0;
  1704.                 } else {
  1705.                     assert(dqbilevel == 1);
  1706.                     alt_pquant_config = 3;
  1707.                     alt_pquant_edge_mask = 0;
  1708.                 }
  1709.                 break;
  1710.                
  1711.             case 0:
  1712.                 alt_pquant_edge_mask = 0xf;
  1713.                 break;
  1714.  
  1715.             case 1:
  1716.                 if (dqdbedge == 3)
  1717.                     alt_pquant_edge_mask = 0x9;
  1718.                 else
  1719.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1720.  
  1721.                 break;
  1722.  
  1723.             case 2:
  1724.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1725.                 break;
  1726.  
  1727.             default:
  1728.                 assert(0);
  1729.             }
  1730.         }
  1731.     }
  1732.  
  1733.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1734.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1735.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1736.     } else {
  1737.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1738.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1739.     }
  1740.  
  1741.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1742.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1743.         /* FIXME: calculate reference field picture polarity */
  1744.         assert(0);
  1745.         ref_field_pic_polarity = 0;
  1746.     }
  1747.  
  1748.     if (pic_param->b_picture_fraction < 21)
  1749.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1750.  
  1751.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1752.    
  1753.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1754.         picture_type == GEN7_VC1_I_PICTURE)
  1755.         picture_type = GEN7_VC1_BI_PICTURE;
  1756.  
  1757.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1758.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1759.     else {
  1760.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1761.  
  1762.         /*
  1763.          * 8.3.6.2.1 Transform Type Selection
  1764.          * If variable-sized transform coding is not enabled,
  1765.          * then the 8x8 transform shall be used for all blocks.
  1766.          * it is also MFX_VC1_PIC_STATE requirement.
  1767.          */
  1768.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1769.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1770.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1771.         }
  1772.     }
  1773.  
  1774.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1775.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1776.  
  1777.         obj_surface = decode_state->reference_objects[1];
  1778.  
  1779.         if (obj_surface)
  1780.             gen7_vc1_surface = obj_surface->private_data;
  1781.  
  1782.         if (!gen7_vc1_surface ||
  1783.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1784.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1785.             dmv_surface_valid = 0;
  1786.         else
  1787.             dmv_surface_valid = 1;
  1788.     }
  1789.  
  1790.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1791.  
  1792.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1793.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1794.     else {
  1795.         if (pic_param->picture_fields.bits.top_field_first)
  1796.             fcm = 2;
  1797.         else
  1798.             fcm = 3;
  1799.     }
  1800.  
  1801.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1802.         brfd = pic_param->reference_fields.bits.reference_distance;
  1803.         brfd = (scale_factor * brfd) >> 8;
  1804.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1805.  
  1806.         if (brfd < 0)
  1807.             brfd = 0;
  1808.     }
  1809.  
  1810.     overlap = pic_param->sequence_fields.bits.overlap;
  1811.  
  1812.     if (overlap) {
  1813.         overlap = 0;
  1814.         if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1815.             if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1816.                 pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1817.                 overlap = 1;
  1818.             }
  1819.         }else {
  1820.             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1821.                 pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1822.                 overlap = 1;
  1823.             }
  1824.             if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1825.                 pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1826.                 if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1827.                     overlap = 1;
  1828.                 } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1829.                            va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1830.                     overlap = 1;
  1831.                 }
  1832.             }
  1833.         }
  1834.     }
  1835.  
  1836.     assert(pic_param->conditional_overlap_flag < 3);
  1837.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1838.  
  1839.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1840.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1841.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1842.         interpolation_mode = 9; /* Half-pel bilinear */
  1843.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1844.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1845.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1846.         interpolation_mode = 1; /* Half-pel bicubic */
  1847.     else
  1848.         interpolation_mode = 0; /* Quarter-pel bicubic */
  1849.  
  1850.     BEGIN_BCS_BATCH(batch, 6);
  1851.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  1852.     OUT_BCS_BATCH(batch,
  1853.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  1854.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  1855.     OUT_BCS_BATCH(batch,
  1856.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  1857.                   dmv_surface_valid << 15 |
  1858.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  1859.                   pic_param->rounding_control << 13 |
  1860.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  1861.                   interpolation_mode << 8 |
  1862.                   0 << 7 | /* FIXME: scale up or down ??? */
  1863.                   pic_param->range_reduction_frame << 6 |
  1864.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  1865.                   overlap << 4 |
  1866.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  1867.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  1868.     OUT_BCS_BATCH(batch,
  1869.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  1870.                   picture_type << 26 |
  1871.                   fcm << 24 |
  1872.                   alt_pq << 16 |
  1873.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  1874.                   scale_factor << 0);
  1875.     OUT_BCS_BATCH(batch,
  1876.                   unified_mv_mode << 28 |
  1877.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  1878.                   pic_param->fast_uvmc_flag << 26 |
  1879.                   ref_field_pic_polarity << 25 |
  1880.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  1881.                   pic_param->reference_fields.bits.reference_distance << 20 |
  1882.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  1883.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  1884.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  1885.                   alt_pquant_edge_mask << 4 |
  1886.                   alt_pquant_config << 2 |
  1887.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  1888.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  1889.     OUT_BCS_BATCH(batch,
  1890.                   !!pic_param->bitplane_present.value << 31 |
  1891.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  1892.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  1893.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  1894.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  1895.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  1896.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  1897.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  1898.                   pic_param->mv_fields.bits.mv_table << 20 |
  1899.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1900.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1901.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  1902.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1903.                   pic_param->mb_mode_table << 8 |
  1904.                   trans_ac_y << 6 |
  1905.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1906.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1907.                   pic_param->cbp_table << 0);
  1908.     ADVANCE_BCS_BATCH(batch);
  1909. }
  1910.  
  1911. static void
  1912. gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1913.                              struct decode_state *decode_state,
  1914.                              struct gen7_mfd_context *gen7_mfd_context)
  1915. {
  1916.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1917.     VAPictureParameterBufferVC1 *pic_param;
  1918.     int intensitycomp_single;
  1919.  
  1920.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1921.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1922.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1923.  
  1924.     BEGIN_BCS_BATCH(batch, 6);
  1925.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  1926.     OUT_BCS_BATCH(batch,
  1927.                   0 << 14 | /* FIXME: double ??? */
  1928.                   0 << 12 |
  1929.                   intensitycomp_single << 10 |
  1930.                   intensitycomp_single << 8 |
  1931.                   0 << 4 | /* FIXME: interlace mode */
  1932.                   0);
  1933.     OUT_BCS_BATCH(batch,
  1934.                   pic_param->luma_shift << 16 |
  1935.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1936.     OUT_BCS_BATCH(batch, 0);
  1937.     OUT_BCS_BATCH(batch, 0);
  1938.     OUT_BCS_BATCH(batch, 0);
  1939.     ADVANCE_BCS_BATCH(batch);
  1940. }
  1941.  
  1942. static void
  1943. gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
  1944.                               struct decode_state *decode_state,
  1945.                               struct gen7_mfd_context *gen7_mfd_context)
  1946. {
  1947.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1948.     struct object_surface *obj_surface;
  1949.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1950.  
  1951.     obj_surface = decode_state->render_object;
  1952.  
  1953.     if (obj_surface && obj_surface->private_data) {
  1954.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1955.     }
  1956.  
  1957.     obj_surface = decode_state->reference_objects[1];
  1958.  
  1959.     if (obj_surface && obj_surface->private_data) {
  1960.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1961.     }
  1962.  
  1963.     BEGIN_BCS_BATCH(batch, 7);
  1964.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
  1965.  
  1966.     if (dmv_write_buffer)
  1967.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1968.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1969.                       0);
  1970.     else
  1971.         OUT_BCS_BATCH(batch, 0);
  1972.  
  1973.         OUT_BCS_BATCH(batch, 0);
  1974.         OUT_BCS_BATCH(batch, 0);
  1975.  
  1976.     if (dmv_read_buffer)
  1977.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1978.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1979.                       0);
  1980.     else
  1981.         OUT_BCS_BATCH(batch, 0);
  1982.         OUT_BCS_BATCH(batch, 0);
  1983.         OUT_BCS_BATCH(batch, 0);
  1984.                  
  1985.     ADVANCE_BCS_BATCH(batch);
  1986. }
  1987.  
  1988. static void
  1989. gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
  1990.                               struct decode_state *decode_state,
  1991.                               struct gen7_mfd_context *gen7_mfd_context)
  1992. {
  1993.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1994.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1995.     struct object_surface *obj_surface;
  1996.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1997.  
  1998.     if (IS_STEPPING_BPLUS(i965)) {
  1999.         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
  2000.         return;
  2001.     }
  2002.  
  2003.     obj_surface = decode_state->render_object;
  2004.  
  2005.     if (obj_surface && obj_surface->private_data) {
  2006.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2007.     }
  2008.  
  2009.     obj_surface = decode_state->reference_objects[1];
  2010.  
  2011.     if (obj_surface && obj_surface->private_data) {
  2012.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2013.     }
  2014.  
  2015.     BEGIN_BCS_BATCH(batch, 3);
  2016.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
  2017.  
  2018.     if (dmv_write_buffer)
  2019.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  2020.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2021.                       0);
  2022.     else
  2023.         OUT_BCS_BATCH(batch, 0);
  2024.  
  2025.     if (dmv_read_buffer)
  2026.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  2027.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  2028.                       0);
  2029.     else
  2030.         OUT_BCS_BATCH(batch, 0);
  2031.                  
  2032.     ADVANCE_BCS_BATCH(batch);
  2033. }
  2034.  
  2035. static int
  2036. gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  2037. {
  2038.     int out_slice_data_bit_offset;
  2039.     int slice_header_size = in_slice_data_bit_offset / 8;
  2040.     int i, j;
  2041.  
  2042.     if (profile != 3)
  2043.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  2044.     else {
  2045.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  2046.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  2047.                 i++, j += 2;
  2048.             }
  2049.         }
  2050.  
  2051.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  2052.     }
  2053.  
  2054.     return out_slice_data_bit_offset;
  2055. }
  2056.  
  2057. static void
  2058. gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
  2059.                         VAPictureParameterBufferVC1 *pic_param,
  2060.                         VASliceParameterBufferVC1 *slice_param,
  2061.                         VASliceParameterBufferVC1 *next_slice_param,
  2062.                         dri_bo *slice_data_bo,
  2063.                         struct gen7_mfd_context *gen7_mfd_context)
  2064. {
  2065.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2066.     int next_slice_start_vert_pos;
  2067.     int macroblock_offset;
  2068.     uint8_t *slice_data = NULL;
  2069.  
  2070.     dri_bo_map(slice_data_bo, 0);
  2071.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  2072.     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
  2073.                                                                slice_param->macroblock_offset,
  2074.                                                                pic_param->sequence_fields.bits.profile);
  2075.     dri_bo_unmap(slice_data_bo);
  2076.  
  2077.     if (next_slice_param)
  2078.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  2079.     else
  2080.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  2081.  
  2082.     BEGIN_BCS_BATCH(batch, 5);
  2083.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  2084.     OUT_BCS_BATCH(batch,
  2085.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  2086.     OUT_BCS_BATCH(batch,
  2087.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  2088.     OUT_BCS_BATCH(batch,
  2089.                   slice_param->slice_vertical_position << 16 |
  2090.                   next_slice_start_vert_pos << 0);
  2091.     OUT_BCS_BATCH(batch,
  2092.                   (macroblock_offset & 0x7));
  2093.     ADVANCE_BCS_BATCH(batch);
  2094. }
  2095.  
  2096. static void
  2097. gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
  2098.                             struct decode_state *decode_state,
  2099.                             struct gen7_mfd_context *gen7_mfd_context)
  2100. {
  2101.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2102.     VAPictureParameterBufferVC1 *pic_param;
  2103.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  2104.     dri_bo *slice_data_bo;
  2105.     int i, j;
  2106.  
  2107.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2108.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2109.  
  2110.     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  2111.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2112.     intel_batchbuffer_emit_mi_flush(batch);
  2113.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2114.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2115.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2116.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2117.     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  2118.     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  2119.     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  2120.  
  2121.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2122.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2123.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  2124.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2125.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  2126.  
  2127.         if (j == decode_state->num_slice_params - 1)
  2128.             next_slice_group_param = NULL;
  2129.         else
  2130.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  2131.  
  2132.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2133.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2134.  
  2135.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2136.                 next_slice_param = slice_param + 1;
  2137.             else
  2138.                 next_slice_param = next_slice_group_param;
  2139.  
  2140.             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2141.             slice_param++;
  2142.         }
  2143.     }
  2144.  
  2145.     intel_batchbuffer_end_atomic(batch);
  2146.     intel_batchbuffer_flush(batch);
  2147. }
  2148.  
  2149. static void
  2150. gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
  2151.                           struct decode_state *decode_state,
  2152.                           struct gen7_mfd_context *gen7_mfd_context)
  2153. {
  2154.     struct object_surface *obj_surface;
  2155.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2156.     int subsampling = SUBSAMPLE_YUV420;
  2157.     int fourcc = VA_FOURCC_IMC3;
  2158.  
  2159.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2160.  
  2161.     if (pic_param->num_components == 1) {
  2162.         subsampling = SUBSAMPLE_YUV400;
  2163.         fourcc = VA_FOURCC_Y800;
  2164.     } else if (pic_param->num_components == 3) {
  2165.         int h1 = pic_param->components[0].h_sampling_factor;
  2166.         int h2 = pic_param->components[1].h_sampling_factor;
  2167.         int h3 = pic_param->components[2].h_sampling_factor;
  2168.         int v1 = pic_param->components[0].v_sampling_factor;
  2169.         int v2 = pic_param->components[1].v_sampling_factor;
  2170.         int v3 = pic_param->components[2].v_sampling_factor;
  2171.  
  2172.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2173.             v1 == 2 && v2 == 1 && v3 == 1) {
  2174.             subsampling = SUBSAMPLE_YUV420;
  2175.             fourcc = VA_FOURCC_IMC3;
  2176.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2177.                    v1 == 1 && v2 == 1 && v3 == 1) {
  2178.             subsampling = SUBSAMPLE_YUV422H;
  2179.             fourcc = VA_FOURCC_422H;
  2180.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2181.                    v1 == 1 && v2 == 1 && v3 == 1) {
  2182.             subsampling = SUBSAMPLE_YUV444;
  2183.             fourcc = VA_FOURCC_444P;
  2184.         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2185.                    v1 == 1 && v2 == 1 && v3 == 1) {
  2186.             subsampling = SUBSAMPLE_YUV411;
  2187.             fourcc = VA_FOURCC_411P;
  2188.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2189.                    v1 == 2 && v2 == 1 && v3 == 1) {
  2190.             subsampling = SUBSAMPLE_YUV422V;
  2191.             fourcc = VA_FOURCC_422V;
  2192.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2193.                    v1 == 2 && v2 == 2 && v3 == 2) {
  2194.             subsampling = SUBSAMPLE_YUV422H;
  2195.             fourcc = VA_FOURCC_422H;
  2196.         } else if (h1 == 2 && h2 == 2 && h3 == 2 &&
  2197.                    v1 == 2 && v2 == 1 && v3 == 1) {
  2198.             subsampling = SUBSAMPLE_YUV422V;
  2199.             fourcc = VA_FOURCC_422V;
  2200.         } else
  2201.             assert(0);
  2202.     } else {
  2203.         assert(0);
  2204.     }
  2205.  
  2206.     /* Current decoded picture */
  2207.     obj_surface = decode_state->render_object;
  2208.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
  2209.  
  2210.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2211.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  2212.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  2213.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  2214.  
  2215.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  2216.     gen7_mfd_context->post_deblocking_output.valid = 0;
  2217.  
  2218.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  2219.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  2220.  
  2221.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2222.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  2223.  
  2224.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2225.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  2226.  
  2227.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  2228.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  2229.  
  2230.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  2231.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  2232. }
  2233.  
  2234. static const int va_to_gen7_jpeg_rotation[4] = {
  2235.     GEN7_JPEG_ROTATION_0,
  2236.     GEN7_JPEG_ROTATION_90,
  2237.     GEN7_JPEG_ROTATION_180,
  2238.     GEN7_JPEG_ROTATION_270
  2239. };
  2240.  
  2241. static void
  2242. gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
  2243.                         struct decode_state *decode_state,
  2244.                         struct gen7_mfd_context *gen7_mfd_context)
  2245. {
  2246.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2247.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2248.     int chroma_type = GEN7_YUV420;
  2249.     int frame_width_in_blks;
  2250.     int frame_height_in_blks;
  2251.  
  2252.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2253.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2254.  
  2255.     if (pic_param->num_components == 1)
  2256.         chroma_type = GEN7_YUV400;
  2257.     else if (pic_param->num_components == 3) {
  2258.         int h1 = pic_param->components[0].h_sampling_factor;
  2259.         int h2 = pic_param->components[1].h_sampling_factor;
  2260.         int h3 = pic_param->components[2].h_sampling_factor;
  2261.         int v1 = pic_param->components[0].v_sampling_factor;
  2262.         int v2 = pic_param->components[1].v_sampling_factor;
  2263.         int v3 = pic_param->components[2].v_sampling_factor;
  2264.  
  2265.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2266.             v1 == 2 && v2 == 1 && v3 == 1)
  2267.             chroma_type = GEN7_YUV420;
  2268.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2269.                  v1 == 1 && v2 == 1 && v3 == 1)
  2270.             chroma_type = GEN7_YUV422H_2Y;
  2271.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2272.                  v1 == 1 && v2 == 1 && v3 == 1)
  2273.             chroma_type = GEN7_YUV444;
  2274.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2275.                  v1 == 1 && v2 == 1 && v3 == 1)
  2276.             chroma_type = GEN7_YUV411;
  2277.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2278.                  v1 == 2 && v2 == 1 && v3 == 1)
  2279.             chroma_type = GEN7_YUV422V_2Y;
  2280.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2281.                  v1 == 2 && v2 == 2 && v3 == 2)
  2282.             chroma_type = GEN7_YUV422H_4Y;
  2283.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2284.                  v1 == 2 && v2 == 1 && v3 == 1)
  2285.             chroma_type = GEN7_YUV422V_4Y;
  2286.         else
  2287.             assert(0);
  2288.     }
  2289.  
  2290.     if (chroma_type == GEN7_YUV400 ||
  2291.         chroma_type == GEN7_YUV444 ||
  2292.         chroma_type == GEN7_YUV422V_2Y) {
  2293.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  2294.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  2295.     } else if (chroma_type == GEN7_YUV411) {
  2296.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  2297.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  2298.     } else {
  2299.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  2300.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  2301.     }
  2302.  
  2303.     BEGIN_BCS_BATCH(batch, 3);
  2304.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2305.     OUT_BCS_BATCH(batch,
  2306.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  2307.                   (chroma_type << 0));
  2308.     OUT_BCS_BATCH(batch,
  2309.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  2310.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  2311.     ADVANCE_BCS_BATCH(batch);
  2312. }
  2313.  
  2314. static const int va_to_gen7_jpeg_hufftable[2] = {
  2315.     MFX_HUFFTABLE_ID_Y,
  2316.     MFX_HUFFTABLE_ID_UV
  2317. };
  2318.  
  2319. static void
  2320. gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  2321.                                struct decode_state *decode_state,
  2322.                                struct gen7_mfd_context *gen7_mfd_context,
  2323.                                int num_tables)
  2324. {
  2325.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  2326.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2327.     int index;
  2328.  
  2329.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  2330.         return;
  2331.  
  2332.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2333.  
  2334.     for (index = 0; index < num_tables; index++) {
  2335.         int id = va_to_gen7_jpeg_hufftable[index];
  2336.  
  2337.         if (!huffman_table->load_huffman_table[index])
  2338.             continue;
  2339.  
  2340.         BEGIN_BCS_BATCH(batch, 53);
  2341.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2342.         OUT_BCS_BATCH(batch, id);
  2343.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2344.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2345.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2346.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2347.         ADVANCE_BCS_BATCH(batch);
  2348.     }
  2349. }
  2350.  
  2351. static const int va_to_gen7_jpeg_qm[5] = {
  2352.     -1,
  2353.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2354.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2355.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2356.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2357. };
  2358.  
  2359. static void
  2360. gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
  2361.                        struct decode_state *decode_state,
  2362.                        struct gen7_mfd_context *gen7_mfd_context)
  2363. {
  2364.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2365.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2366.     int index;
  2367.  
  2368.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2369.         return;
  2370.  
  2371.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2372.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2373.  
  2374.     assert(pic_param->num_components <= 3);
  2375.  
  2376.     for (index = 0; index < pic_param->num_components; index++) {
  2377.         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
  2378.         int qm_type;
  2379.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2380.         unsigned char raster_qm[64];
  2381.         int j;
  2382.  
  2383.         if (id > 4 || id < 1)
  2384.             continue;
  2385.  
  2386.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2387.             continue;
  2388.  
  2389.         qm_type = va_to_gen7_jpeg_qm[id];
  2390.  
  2391.         for (j = 0; j < 64; j++)
  2392.             raster_qm[zigzag_direct[j]] = qm[j];
  2393.  
  2394.         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2395.     }
  2396. }
  2397.  
  2398. static void
  2399. gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2400.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2401.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2402.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2403.                          dri_bo *slice_data_bo,
  2404.                          struct gen7_mfd_context *gen7_mfd_context)
  2405. {
  2406.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2407.     int scan_component_mask = 0;
  2408.     int i;
  2409.  
  2410.     assert(slice_param->num_components > 0);
  2411.     assert(slice_param->num_components < 4);
  2412.     assert(slice_param->num_components <= pic_param->num_components);
  2413.  
  2414.     for (i = 0; i < slice_param->num_components; i++) {
  2415.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2416.         case 1:
  2417.             scan_component_mask |= (1 << 0);
  2418.             break;
  2419.         case 2:
  2420.             scan_component_mask |= (1 << 1);
  2421.             break;
  2422.         case 3:
  2423.             scan_component_mask |= (1 << 2);
  2424.             break;
  2425.         default:
  2426.             assert(0);
  2427.             break;
  2428.         }
  2429.     }
  2430.  
  2431.     BEGIN_BCS_BATCH(batch, 6);
  2432.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2433.     OUT_BCS_BATCH(batch,
  2434.                   slice_param->slice_data_size);
  2435.     OUT_BCS_BATCH(batch,
  2436.                   slice_param->slice_data_offset);
  2437.     OUT_BCS_BATCH(batch,
  2438.                   slice_param->slice_horizontal_position << 16 |
  2439.                   slice_param->slice_vertical_position << 0);
  2440.     OUT_BCS_BATCH(batch,
  2441.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2442.                   (scan_component_mask << 27) |                 /* scan components */
  2443.                   (0 << 26) |   /* disable interrupt allowed */
  2444.                   (slice_param->num_mcus << 0));                /* MCU count */
  2445.     OUT_BCS_BATCH(batch,
  2446.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2447.     ADVANCE_BCS_BATCH(batch);
  2448. }
  2449.  
  2450. /* Workaround for JPEG decoding on Ivybridge */
  2451.  
  2452. static struct {
  2453.     int width;
  2454.     int height;
  2455.     unsigned char data[32];
  2456.     int data_size;
  2457.     int data_bit_offset;
  2458.     int qp;
  2459. } gen7_jpeg_wa_clip = {
  2460.     16,
  2461.     16,
  2462.     {
  2463.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2464.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2465.     },
  2466.     14,
  2467.     40,
  2468.     28,
  2469. };
  2470.  
  2471. static void
  2472. gen75_jpeg_wa_init(VADriverContextP ctx,
  2473.                   struct gen7_mfd_context *gen7_mfd_context)
  2474. {
  2475.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2476.     VAStatus status;
  2477.     struct object_surface *obj_surface;
  2478.  
  2479.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2480.         i965_DestroySurfaces(ctx,
  2481.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2482.                              1);
  2483.  
  2484.     status = i965_CreateSurfaces(ctx,
  2485.                                  gen7_jpeg_wa_clip.width,
  2486.                                  gen7_jpeg_wa_clip.height,
  2487.                                  VA_RT_FORMAT_YUV420,
  2488.                                  1,
  2489.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2490.     assert(status == VA_STATUS_SUCCESS);
  2491.  
  2492.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2493.     assert(obj_surface);
  2494.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2495.     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
  2496.  
  2497.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2498.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2499.                                                                "JPEG WA data",
  2500.                                                                0x1000,
  2501.                                                                0x1000);
  2502.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2503.                        0,
  2504.                        gen7_jpeg_wa_clip.data_size,
  2505.                        gen7_jpeg_wa_clip.data);
  2506.     }
  2507. }
  2508.  
  2509. static void
  2510. gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2511.                               struct gen7_mfd_context *gen7_mfd_context)
  2512. {
  2513.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2514.  
  2515.     BEGIN_BCS_BATCH(batch, 5);
  2516.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2517.     OUT_BCS_BATCH(batch,
  2518.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2519.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2520.                   (0 << 10) | /* disable Stream-Out */
  2521.                   (0 << 9)  | /* Post Deblocking Output */
  2522.                   (1 << 8)  | /* Pre Deblocking Output */
  2523.                   (0 << 5)  | /* not in stitch mode */
  2524.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2525.                   (MFX_FORMAT_AVC << 0));
  2526.     OUT_BCS_BATCH(batch,
  2527.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2528.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2529.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2530.                   (0 << 1)  |
  2531.                   (0 << 0));
  2532.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2533.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2534.     ADVANCE_BCS_BATCH(batch);
  2535. }
  2536.  
  2537. static void
  2538. gen75_jpeg_wa_surface_state(VADriverContextP ctx,
  2539.                            struct gen7_mfd_context *gen7_mfd_context)
  2540. {
  2541.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2542.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2543.  
  2544.     BEGIN_BCS_BATCH(batch, 6);
  2545.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2546.     OUT_BCS_BATCH(batch, 0);
  2547.     OUT_BCS_BATCH(batch,
  2548.                   ((obj_surface->orig_width - 1) << 18) |
  2549.                   ((obj_surface->orig_height - 1) << 4));
  2550.     OUT_BCS_BATCH(batch,
  2551.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2552.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2553.                   (0 << 22) | /* surface object control state, ignored */
  2554.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2555.                   (0 << 2)  | /* must be 0 */
  2556.                   (1 << 1)  | /* must be tiled */
  2557.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2558.     OUT_BCS_BATCH(batch,
  2559.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2560.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2561.     OUT_BCS_BATCH(batch,
  2562.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2563.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2564.     ADVANCE_BCS_BATCH(batch);
  2565. }
  2566.  
  2567. static void
  2568. gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  2569.                                  struct gen7_mfd_context *gen7_mfd_context)
  2570. {
  2571.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2572.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2573.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2574.     dri_bo *intra_bo;
  2575.     int i;
  2576.  
  2577.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2578.                             "intra row store",
  2579.                             128 * 64,
  2580.                             0x1000);
  2581.  
  2582.     BEGIN_BCS_BATCH(batch, 61);
  2583.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  2584.     OUT_BCS_RELOC(batch,
  2585.                   obj_surface->bo,
  2586.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2587.                   0);
  2588.         OUT_BCS_BATCH(batch, 0);
  2589.         OUT_BCS_BATCH(batch, 0);
  2590.    
  2591.  
  2592.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2593.         OUT_BCS_BATCH(batch, 0);
  2594.         OUT_BCS_BATCH(batch, 0);
  2595.  
  2596.         /* uncompressed-video & stream out 7-12 */
  2597.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2598.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2599.         OUT_BCS_BATCH(batch, 0);
  2600.         OUT_BCS_BATCH(batch, 0);
  2601.         OUT_BCS_BATCH(batch, 0);
  2602.         OUT_BCS_BATCH(batch, 0);
  2603.  
  2604.         /* the DW 13-15 is for intra row store scratch */
  2605.     OUT_BCS_RELOC(batch,
  2606.                   intra_bo,
  2607.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2608.                   0);
  2609.         OUT_BCS_BATCH(batch, 0);
  2610.         OUT_BCS_BATCH(batch, 0);
  2611.  
  2612.         /* the DW 16-18 is for deblocking filter */
  2613.     OUT_BCS_BATCH(batch, 0);
  2614.         OUT_BCS_BATCH(batch, 0);
  2615.         OUT_BCS_BATCH(batch, 0);
  2616.  
  2617.     /* DW 19..50 */
  2618.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2619.         OUT_BCS_BATCH(batch, 0);
  2620.         OUT_BCS_BATCH(batch, 0);
  2621.     }
  2622.     OUT_BCS_BATCH(batch, 0);
  2623.  
  2624.         /* the DW52-54 is for mb status address */
  2625.     OUT_BCS_BATCH(batch, 0);
  2626.         OUT_BCS_BATCH(batch, 0);
  2627.         OUT_BCS_BATCH(batch, 0);
  2628.         /* the DW56-60 is for ILDB & second ILDB address */
  2629.     OUT_BCS_BATCH(batch, 0);
  2630.         OUT_BCS_BATCH(batch, 0);
  2631.         OUT_BCS_BATCH(batch, 0);
  2632.     OUT_BCS_BATCH(batch, 0);
  2633.         OUT_BCS_BATCH(batch, 0);
  2634.         OUT_BCS_BATCH(batch, 0);
  2635.  
  2636.     ADVANCE_BCS_BATCH(batch);
  2637.  
  2638.     dri_bo_unreference(intra_bo);
  2639. }
  2640.  
  2641. static void
  2642. gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2643.                                  struct gen7_mfd_context *gen7_mfd_context)
  2644. {
  2645.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2646.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2647.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2648.     dri_bo *intra_bo;
  2649.     int i;
  2650.  
  2651.     if (IS_STEPPING_BPLUS(i965)) {
  2652.         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
  2653.         return;
  2654.     }
  2655.  
  2656.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2657.                             "intra row store",
  2658.                             128 * 64,
  2659.                             0x1000);
  2660.  
  2661.     BEGIN_BCS_BATCH(batch, 25);
  2662.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  2663.     OUT_BCS_RELOC(batch,
  2664.                   obj_surface->bo,
  2665.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2666.                   0);
  2667.    
  2668.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2669.  
  2670.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2671.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2672.  
  2673.     OUT_BCS_RELOC(batch,
  2674.                   intra_bo,
  2675.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2676.                   0);
  2677.  
  2678.     OUT_BCS_BATCH(batch, 0);
  2679.  
  2680.     /* DW 7..22 */
  2681.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2682.         OUT_BCS_BATCH(batch, 0);
  2683.     }
  2684.  
  2685.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  2686.     OUT_BCS_BATCH(batch, 0);
  2687.     ADVANCE_BCS_BATCH(batch);
  2688.  
  2689.     dri_bo_unreference(intra_bo);
  2690. }
  2691.  
  2692. static void
  2693. gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  2694.                                      struct gen7_mfd_context *gen7_mfd_context)
  2695. {
  2696.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2697.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2698.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2699.  
  2700.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2701.                               "bsd mpc row store",
  2702.                               11520, /* 1.5 * 120 * 64 */
  2703.                               0x1000);
  2704.  
  2705.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2706.                           "mpr row store",
  2707.                           7680, /* 1. 0 * 120 * 64 */
  2708.                           0x1000);
  2709.  
  2710.     BEGIN_BCS_BATCH(batch, 10);
  2711.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  2712.  
  2713.     OUT_BCS_RELOC(batch,
  2714.                   bsd_mpc_bo,
  2715.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2716.                   0);
  2717.  
  2718.     OUT_BCS_BATCH(batch, 0);
  2719.     OUT_BCS_BATCH(batch, 0);
  2720.  
  2721.     OUT_BCS_RELOC(batch,
  2722.                   mpr_bo,
  2723.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2724.                   0);
  2725.     OUT_BCS_BATCH(batch, 0);
  2726.     OUT_BCS_BATCH(batch, 0);
  2727.  
  2728.     OUT_BCS_BATCH(batch, 0);
  2729.     OUT_BCS_BATCH(batch, 0);
  2730.     OUT_BCS_BATCH(batch, 0);
  2731.  
  2732.     ADVANCE_BCS_BATCH(batch);
  2733.  
  2734.     dri_bo_unreference(bsd_mpc_bo);
  2735.     dri_bo_unreference(mpr_bo);
  2736. }
  2737.  
  2738. static void
  2739. gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2740.                                      struct gen7_mfd_context *gen7_mfd_context)
  2741. {
  2742.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2743.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2744.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2745.  
  2746.     if (IS_STEPPING_BPLUS(i965)) {
  2747.         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
  2748.         return;
  2749.     }
  2750.  
  2751.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2752.                               "bsd mpc row store",
  2753.                               11520, /* 1.5 * 120 * 64 */
  2754.                               0x1000);
  2755.  
  2756.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2757.                           "mpr row store",
  2758.                           7680, /* 1. 0 * 120 * 64 */
  2759.                           0x1000);
  2760.  
  2761.     BEGIN_BCS_BATCH(batch, 4);
  2762.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  2763.  
  2764.     OUT_BCS_RELOC(batch,
  2765.                   bsd_mpc_bo,
  2766.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2767.                   0);
  2768.  
  2769.     OUT_BCS_RELOC(batch,
  2770.                   mpr_bo,
  2771.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2772.                   0);
  2773.     OUT_BCS_BATCH(batch, 0);
  2774.  
  2775.     ADVANCE_BCS_BATCH(batch);
  2776.  
  2777.     dri_bo_unreference(bsd_mpc_bo);
  2778.     dri_bo_unreference(mpr_bo);
  2779. }
  2780.  
  2781. static void
  2782. gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2783.                           struct gen7_mfd_context *gen7_mfd_context)
  2784. {
  2785.  
  2786. }
  2787.  
  2788. static void
  2789. gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2790.                            struct gen7_mfd_context *gen7_mfd_context)
  2791. {
  2792.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2793.     int img_struct = 0;
  2794.     int mbaff_frame_flag = 0;
  2795.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2796.  
  2797.     BEGIN_BCS_BATCH(batch, 16);
  2798.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2799.     OUT_BCS_BATCH(batch,
  2800.                   (width_in_mbs * height_in_mbs - 1));
  2801.     OUT_BCS_BATCH(batch,
  2802.                   ((height_in_mbs - 1) << 16) |
  2803.                   ((width_in_mbs - 1) << 0));
  2804.     OUT_BCS_BATCH(batch,
  2805.                   (0 << 24) |
  2806.                   (0 << 16) |
  2807.                   (0 << 14) |
  2808.                   (0 << 13) |
  2809.                   (0 << 12) | /* differ from GEN6 */
  2810.                   (0 << 10) |
  2811.                   (img_struct << 8));
  2812.     OUT_BCS_BATCH(batch,
  2813.                   (1 << 10) | /* 4:2:0 */
  2814.                   (1 << 7) |  /* CABAC */
  2815.                   (0 << 6) |
  2816.                   (0 << 5) |
  2817.                   (0 << 4) |
  2818.                   (0 << 3) |
  2819.                   (1 << 2) |
  2820.                   (mbaff_frame_flag << 1) |
  2821.                   (0 << 0));
  2822.     OUT_BCS_BATCH(batch, 0);
  2823.     OUT_BCS_BATCH(batch, 0);
  2824.     OUT_BCS_BATCH(batch, 0);
  2825.     OUT_BCS_BATCH(batch, 0);
  2826.     OUT_BCS_BATCH(batch, 0);
  2827.     OUT_BCS_BATCH(batch, 0);
  2828.     OUT_BCS_BATCH(batch, 0);
  2829.     OUT_BCS_BATCH(batch, 0);
  2830.     OUT_BCS_BATCH(batch, 0);
  2831.     OUT_BCS_BATCH(batch, 0);
  2832.     OUT_BCS_BATCH(batch, 0);
  2833.     ADVANCE_BCS_BATCH(batch);
  2834. }
  2835.  
  2836. static void
  2837. gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
  2838.                                   struct gen7_mfd_context *gen7_mfd_context)
  2839. {
  2840.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2841.     int i;
  2842.  
  2843.     BEGIN_BCS_BATCH(batch, 71);
  2844.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  2845.  
  2846.     /* reference surfaces 0..15 */
  2847.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2848.         OUT_BCS_BATCH(batch, 0); /* top */
  2849.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2850.     }
  2851.        
  2852.         OUT_BCS_BATCH(batch, 0);
  2853.  
  2854.     /* the current decoding frame/field */
  2855.     OUT_BCS_BATCH(batch, 0); /* top */
  2856.     OUT_BCS_BATCH(batch, 0);
  2857.     OUT_BCS_BATCH(batch, 0);
  2858.  
  2859.     /* POC List */
  2860.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2861.         OUT_BCS_BATCH(batch, 0);
  2862.         OUT_BCS_BATCH(batch, 0);
  2863.     }
  2864.  
  2865.     OUT_BCS_BATCH(batch, 0);
  2866.     OUT_BCS_BATCH(batch, 0);
  2867.  
  2868.     ADVANCE_BCS_BATCH(batch);
  2869. }
  2870.  
  2871. static void
  2872. gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  2873.                                   struct gen7_mfd_context *gen7_mfd_context)
  2874. {
  2875.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2876.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2877.     int i;
  2878.  
  2879.     if (IS_STEPPING_BPLUS(i965)) {
  2880.         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
  2881.         return;
  2882.     }  
  2883.  
  2884.     BEGIN_BCS_BATCH(batch, 69);
  2885.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  2886.  
  2887.     /* reference surfaces 0..15 */
  2888.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2889.         OUT_BCS_BATCH(batch, 0); /* top */
  2890.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2891.     }
  2892.  
  2893.     /* the current decoding frame/field */
  2894.     OUT_BCS_BATCH(batch, 0); /* top */
  2895.     OUT_BCS_BATCH(batch, 0); /* bottom */
  2896.  
  2897.     /* POC List */
  2898.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2899.         OUT_BCS_BATCH(batch, 0);
  2900.         OUT_BCS_BATCH(batch, 0);
  2901.     }
  2902.  
  2903.     OUT_BCS_BATCH(batch, 0);
  2904.     OUT_BCS_BATCH(batch, 0);
  2905.  
  2906.     ADVANCE_BCS_BATCH(batch);
  2907. }
  2908.  
  2909. static void
  2910. gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  2911.                                      struct gen7_mfd_context *gen7_mfd_context)
  2912. {
  2913.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2914.  
  2915.     BEGIN_BCS_BATCH(batch, 11);
  2916.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2917.     OUT_BCS_RELOC(batch,
  2918.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2919.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2920.                   0);
  2921.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2922.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2923.     OUT_BCS_BATCH(batch, 0);
  2924.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2925.     OUT_BCS_BATCH(batch, 0);
  2926.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2927.     OUT_BCS_BATCH(batch, 0);
  2928.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2929.     OUT_BCS_BATCH(batch, 0);
  2930.     ADVANCE_BCS_BATCH(batch);
  2931. }
  2932.  
  2933. static void
  2934. gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  2935.                                      struct gen7_mfd_context *gen7_mfd_context)
  2936. {
  2937.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2938.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2939.  
  2940.     if (IS_STEPPING_BPLUS(i965)) {
  2941.         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
  2942.         return;
  2943.     }  
  2944.  
  2945.     BEGIN_BCS_BATCH(batch, 11);
  2946.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2947.     OUT_BCS_RELOC(batch,
  2948.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2949.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2950.                   0);
  2951.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2952.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2953.     OUT_BCS_BATCH(batch, 0);
  2954.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2955.     OUT_BCS_BATCH(batch, 0);
  2956.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2957.     OUT_BCS_BATCH(batch, 0);
  2958.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2959.     OUT_BCS_BATCH(batch, 0);
  2960.     ADVANCE_BCS_BATCH(batch);
  2961. }
  2962.  
  2963. static void
  2964. gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  2965.                             struct gen7_mfd_context *gen7_mfd_context)
  2966. {
  2967.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2968.  
  2969.     /* the input bitsteam format on GEN7 differs from GEN6 */
  2970.     BEGIN_BCS_BATCH(batch, 6);
  2971.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  2972.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  2973.     OUT_BCS_BATCH(batch, 0);
  2974.     OUT_BCS_BATCH(batch,
  2975.                   (0 << 31) |
  2976.                   (0 << 14) |
  2977.                   (0 << 12) |
  2978.                   (0 << 10) |
  2979.                   (0 << 8));
  2980.     OUT_BCS_BATCH(batch,
  2981.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  2982.                   (0 << 5)  |
  2983.                   (0 << 4)  |
  2984.                   (1 << 3) | /* LastSlice Flag */
  2985.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  2986.     OUT_BCS_BATCH(batch, 0);
  2987.     ADVANCE_BCS_BATCH(batch);
  2988. }
  2989.  
  2990. static void
  2991. gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  2992.                              struct gen7_mfd_context *gen7_mfd_context)
  2993. {
  2994.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2995.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  2996.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  2997.     int first_mb_in_slice = 0;
  2998.     int slice_type = SLICE_TYPE_I;
  2999.  
  3000.     BEGIN_BCS_BATCH(batch, 11);
  3001.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  3002.     OUT_BCS_BATCH(batch, slice_type);
  3003.     OUT_BCS_BATCH(batch,
  3004.                   (num_ref_idx_l1 << 24) |
  3005.                   (num_ref_idx_l0 << 16) |
  3006.                   (0 << 8) |
  3007.                   (0 << 0));
  3008.     OUT_BCS_BATCH(batch,
  3009.                   (0 << 29) |
  3010.                   (1 << 27) |   /* disable Deblocking */
  3011.                   (0 << 24) |
  3012.                   (gen7_jpeg_wa_clip.qp << 16) |
  3013.                   (0 << 8) |
  3014.                   (0 << 0));
  3015.     OUT_BCS_BATCH(batch,
  3016.                   (slice_ver_pos << 24) |
  3017.                   (slice_hor_pos << 16) |
  3018.                   (first_mb_in_slice << 0));
  3019.     OUT_BCS_BATCH(batch,
  3020.                   (next_slice_ver_pos << 16) |
  3021.                   (next_slice_hor_pos << 0));
  3022.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  3023.     OUT_BCS_BATCH(batch, 0);
  3024.     OUT_BCS_BATCH(batch, 0);
  3025.     OUT_BCS_BATCH(batch, 0);
  3026.     OUT_BCS_BATCH(batch, 0);
  3027.     ADVANCE_BCS_BATCH(batch);
  3028. }
  3029.  
  3030. static void
  3031. gen75_mfd_jpeg_wa(VADriverContextP ctx,
  3032.                  struct gen7_mfd_context *gen7_mfd_context)
  3033. {
  3034.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3035.     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
  3036.     intel_batchbuffer_emit_mi_flush(batch);
  3037.     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  3038.     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  3039.     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  3040.     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  3041.     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  3042.     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  3043.     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  3044.  
  3045.     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  3046.     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  3047.     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  3048. }
  3049.  
  3050. void
  3051. gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
  3052.                              struct decode_state *decode_state,
  3053.                              struct gen7_mfd_context *gen7_mfd_context)
  3054. {
  3055.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3056.     VAPictureParameterBufferJPEGBaseline *pic_param;
  3057.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  3058.     dri_bo *slice_data_bo;
  3059.     int i, j, max_selector = 0;
  3060.  
  3061.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  3062.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  3063.  
  3064.     /* Currently only support Baseline DCT */
  3065.     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  3066.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  3067.     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
  3068.     intel_batchbuffer_emit_mi_flush(batch);
  3069.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3070.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3071.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3072.     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  3073.     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  3074.  
  3075.     for (j = 0; j < decode_state->num_slice_params; j++) {
  3076.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  3077.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  3078.         slice_data_bo = decode_state->slice_datas[j]->bo;
  3079.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  3080.  
  3081.         if (j == decode_state->num_slice_params - 1)
  3082.             next_slice_group_param = NULL;
  3083.         else
  3084.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  3085.  
  3086.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  3087.             int component;
  3088.  
  3089.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  3090.  
  3091.             if (i < decode_state->slice_params[j]->num_elements - 1)
  3092.                 next_slice_param = slice_param + 1;
  3093.             else
  3094.                 next_slice_param = next_slice_group_param;
  3095.  
  3096.             for (component = 0; component < slice_param->num_components; component++) {
  3097.                 if (max_selector < slice_param->components[component].dc_table_selector)
  3098.                     max_selector = slice_param->components[component].dc_table_selector;
  3099.  
  3100.                 if (max_selector < slice_param->components[component].ac_table_selector)
  3101.                     max_selector = slice_param->components[component].ac_table_selector;
  3102.             }
  3103.  
  3104.             slice_param++;
  3105.         }
  3106.     }
  3107.  
  3108.     assert(max_selector < 2);
  3109.     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  3110.  
  3111.     for (j = 0; j < decode_state->num_slice_params; j++) {
  3112.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  3113.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  3114.         slice_data_bo = decode_state->slice_datas[j]->bo;
  3115.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  3116.  
  3117.         if (j == decode_state->num_slice_params - 1)
  3118.             next_slice_group_param = NULL;
  3119.         else
  3120.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  3121.  
  3122.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  3123.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  3124.  
  3125.             if (i < decode_state->slice_params[j]->num_elements - 1)
  3126.                 next_slice_param = slice_param + 1;
  3127.             else
  3128.                 next_slice_param = next_slice_group_param;
  3129.  
  3130.             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  3131.             slice_param++;
  3132.         }
  3133.     }
  3134.  
  3135.     intel_batchbuffer_end_atomic(batch);
  3136.     intel_batchbuffer_flush(batch);
  3137. }
  3138.  
  3139. static VAStatus
  3140. gen75_mfd_decode_picture(VADriverContextP ctx,
  3141.                         VAProfile profile,
  3142.                         union codec_state *codec_state,
  3143.                         struct hw_context *hw_context)
  3144.  
  3145. {
  3146.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3147.     struct decode_state *decode_state = &codec_state->decode;
  3148.     VAStatus vaStatus;
  3149.  
  3150.     assert(gen7_mfd_context);
  3151.  
  3152.     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
  3153.  
  3154.     if (vaStatus != VA_STATUS_SUCCESS)
  3155.         goto out;
  3156.  
  3157.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  3158.  
  3159.     switch (profile) {
  3160.     case VAProfileMPEG2Simple:
  3161.     case VAProfileMPEG2Main:
  3162.         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  3163.         break;
  3164.        
  3165.     case VAProfileH264ConstrainedBaseline:
  3166.     case VAProfileH264Main:
  3167.     case VAProfileH264High:
  3168.     case VAProfileH264StereoHigh:
  3169.     case VAProfileH264MultiviewHigh:
  3170.         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  3171.         break;
  3172.  
  3173.     case VAProfileVC1Simple:
  3174.     case VAProfileVC1Main:
  3175.     case VAProfileVC1Advanced:
  3176.         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  3177.         break;
  3178.  
  3179.     case VAProfileJPEGBaseline:
  3180.         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  3181.         break;
  3182.  
  3183.     default:
  3184.         assert(0);
  3185.         break;
  3186.     }
  3187.  
  3188.     vaStatus = VA_STATUS_SUCCESS;
  3189.  
  3190. out:
  3191.     return vaStatus;
  3192. }
  3193.  
  3194. static void
  3195. gen75_mfd_context_destroy(void *hw_context)
  3196. {
  3197.     VADriverContextP ctx;
  3198.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3199.  
  3200.     ctx = (VADriverContextP)(gen7_mfd_context->driver_context);
  3201.  
  3202.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  3203.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  3204.  
  3205.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  3206.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  3207.  
  3208.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  3209.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  3210.  
  3211.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  3212.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  3213.  
  3214.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  3215.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  3216.  
  3217.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  3218.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  3219.  
  3220.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  3221.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  3222.  
  3223.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  3224.  
  3225.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE) {
  3226.         i965_DestroySurfaces(ctx,
  3227.                              &gen7_mfd_context->jpeg_wa_surface_id,
  3228.                              1);
  3229.         gen7_mfd_context->jpeg_wa_surface_object = NULL;
  3230.     }
  3231.  
  3232.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  3233.     free(gen7_mfd_context);
  3234. }
  3235.  
  3236. static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
  3237.                                     struct gen7_mfd_context *gen7_mfd_context)
  3238. {
  3239.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  3240.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  3241.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  3242.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  3243. }
  3244.  
  3245. struct hw_context *
  3246. gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
  3247. {
  3248.     struct intel_driver_data *intel = intel_driver_data(ctx);
  3249.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  3250.     int i;
  3251.  
  3252.     assert(gen7_mfd_context);
  3253.     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
  3254.     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
  3255.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  3256.  
  3257.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  3258.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  3259.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  3260.         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
  3261.     }
  3262.  
  3263.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  3264.     gen7_mfd_context->jpeg_wa_surface_object = NULL;
  3265.  
  3266.     switch (obj_config->profile) {
  3267.     case VAProfileMPEG2Simple:
  3268.     case VAProfileMPEG2Main:
  3269.         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  3270.         break;
  3271.  
  3272.     case VAProfileH264ConstrainedBaseline:
  3273.     case VAProfileH264Main:
  3274.     case VAProfileH264High:
  3275.     case VAProfileH264StereoHigh:
  3276.     case VAProfileH264MultiviewHigh:
  3277.         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
  3278.         break;
  3279.     default:
  3280.         break;
  3281.     }
  3282.  
  3283.     gen7_mfd_context->driver_context = ctx;
  3284.     return (struct hw_context *)gen7_mfd_context;
  3285. }
  3286.