Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao Yakui  <yakui.zhao@intel.com>
  27.  *
  28.  */
  29. #include "sysdeps.h"
  30.  
  31. #include <va/va_dec_jpeg.h>
  32.  
  33. #include "intel_batchbuffer.h"
  34. #include "intel_driver.h"
  35. #include "i965_defines.h"
  36. #include "i965_drv_video.h"
  37. #include "i965_decoder_utils.h"
  38. #include "gen7_mfd.h"
  39. #include "intel_media.h"
  40.  
  41. #define B0_STEP_REV             2
  42. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  43.  
  44. static const uint32_t zigzag_direct[64] = {
  45.     0,   1,  8, 16,  9,  2,  3, 10,
  46.     17, 24, 32, 25, 18, 11,  4,  5,
  47.     12, 19, 26, 33, 40, 48, 41, 34,
  48.     27, 20, 13,  6,  7, 14, 21, 28,
  49.     35, 42, 49, 56, 57, 50, 43, 36,
  50.     29, 22, 15, 23, 30, 37, 44, 51,
  51.     58, 59, 52, 45, 38, 31, 39, 46,
  52.     53, 60, 61, 54, 47, 55, 62, 63
  53. };
  54.  
  55. static void
  56. gen75_mfd_init_avc_surface(VADriverContextP ctx,
  57.                           VAPictureParameterBufferH264 *pic_param,
  58.                           struct object_surface *obj_surface)
  59. {
  60.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  61.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  62.     int width_in_mbs, height_in_mbs;
  63.  
  64.     obj_surface->free_private_data = gen_free_avc_surface;
  65.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  66.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  67.  
  68.     if (!gen7_avc_surface) {
  69.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  70.         gen7_avc_surface->frame_store_id = -1;
  71.         assert((obj_surface->size & 0x3f) == 0);
  72.         obj_surface->private_data = gen7_avc_surface;
  73.     }
  74.  
  75.     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
  76.                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
  77.  
  78.     if (gen7_avc_surface->dmv_top == NULL) {
  79.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  80.                                                  "direct mv w/r buffer",
  81.                                                  width_in_mbs * height_in_mbs * 128,
  82.                                                  0x1000);
  83.         assert(gen7_avc_surface->dmv_top);
  84.     }
  85.  
  86.     if (gen7_avc_surface->dmv_bottom_flag &&
  87.         gen7_avc_surface->dmv_bottom == NULL) {
  88.         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
  89.                                                     "direct mv w/r buffer",
  90.                                                     width_in_mbs * height_in_mbs * 128,                                                    
  91.                                                     0x1000);
  92.         assert(gen7_avc_surface->dmv_bottom);
  93.     }
  94. }
  95.  
  96. static void
  97. gen75_mfd_pipe_mode_select(VADriverContextP ctx,
  98.                           struct decode_state *decode_state,
  99.                           int standard_select,
  100.                           struct gen7_mfd_context *gen7_mfd_context)
  101. {
  102.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  103.  
  104.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  105.            standard_select == MFX_FORMAT_AVC ||
  106.            standard_select == MFX_FORMAT_VC1 ||
  107.            standard_select == MFX_FORMAT_JPEG);
  108.  
  109.     BEGIN_BCS_BATCH(batch, 5);
  110.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  111.     OUT_BCS_BATCH(batch,
  112.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  113.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  114.                   (0 << 10) | /* disable Stream-Out */
  115.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  116.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  117.                   (0 << 5)  | /* not in stitch mode */
  118.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  119.                   (standard_select << 0));
  120.     OUT_BCS_BATCH(batch,
  121.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  122.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  123.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  124.                   (0 << 1)  |
  125.                   (0 << 0));
  126.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  127.     OUT_BCS_BATCH(batch, 0); /* reserved */
  128.     ADVANCE_BCS_BATCH(batch);
  129. }
  130.  
  131. static void
  132. gen75_mfd_surface_state(VADriverContextP ctx,
  133.                        struct decode_state *decode_state,
  134.                        int standard_select,
  135.                        struct gen7_mfd_context *gen7_mfd_context)
  136. {
  137.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  138.     struct object_surface *obj_surface = decode_state->render_object;
  139.     unsigned int y_cb_offset;
  140.     unsigned int y_cr_offset;
  141.     unsigned int surface_format;
  142.  
  143.     assert(obj_surface);
  144.  
  145.     y_cb_offset = obj_surface->y_cb_offset;
  146.     y_cr_offset = obj_surface->y_cr_offset;
  147.  
  148.     surface_format = obj_surface->fourcc == VA_FOURCC_Y800 ?
  149.         MFX_SURFACE_MONOCHROME : MFX_SURFACE_PLANAR_420_8;
  150.  
  151.     BEGIN_BCS_BATCH(batch, 6);
  152.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  153.     OUT_BCS_BATCH(batch, 0);
  154.     OUT_BCS_BATCH(batch,
  155.                   ((obj_surface->orig_height - 1) << 18) |
  156.                   ((obj_surface->orig_width - 1) << 4));
  157.     OUT_BCS_BATCH(batch,
  158.                   (surface_format << 28) | /* 420 planar YUV surface */
  159.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  160.                   (0 << 22) | /* surface object control state, ignored */
  161.                   ((obj_surface->width - 1) << 3) | /* pitch */
  162.                   (0 << 2)  | /* must be 0 */
  163.                   (1 << 1)  | /* must be tiled */
  164.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  165.     OUT_BCS_BATCH(batch,
  166.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  167.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  168.     OUT_BCS_BATCH(batch,
  169.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  170.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  171.     ADVANCE_BCS_BATCH(batch);
  172. }
  173.  
  174. static void
  175. gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  176.                              struct decode_state *decode_state,
  177.                              int standard_select,
  178.                              struct gen7_mfd_context *gen7_mfd_context)
  179. {
  180.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  181.     int i;
  182.  
  183.     BEGIN_BCS_BATCH(batch, 61);
  184.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  185.         /* Pre-deblock 1-3 */
  186.     if (gen7_mfd_context->pre_deblocking_output.valid)
  187.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  188.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  189.                       0);
  190.     else
  191.         OUT_BCS_BATCH(batch, 0);
  192.  
  193.         OUT_BCS_BATCH(batch, 0);
  194.         OUT_BCS_BATCH(batch, 0);
  195.         /* Post-debloing 4-6 */
  196.     if (gen7_mfd_context->post_deblocking_output.valid)
  197.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  198.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  199.                       0);
  200.     else
  201.         OUT_BCS_BATCH(batch, 0);
  202.  
  203.         OUT_BCS_BATCH(batch, 0);
  204.         OUT_BCS_BATCH(batch, 0);
  205.  
  206.         /* uncompressed-video & stream out 7-12 */
  207.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  208.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  209.         OUT_BCS_BATCH(batch, 0);
  210.         OUT_BCS_BATCH(batch, 0);
  211.         OUT_BCS_BATCH(batch, 0);
  212.         OUT_BCS_BATCH(batch, 0);
  213.  
  214.         /* intra row-store scratch 13-15 */
  215.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  216.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  217.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  218.                       0);
  219.     else
  220.         OUT_BCS_BATCH(batch, 0);
  221.  
  222.         OUT_BCS_BATCH(batch, 0);
  223.         OUT_BCS_BATCH(batch, 0);
  224.         /* deblocking-filter-row-store 16-18 */
  225.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  226.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  227.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  228.                       0);
  229.     else
  230.         OUT_BCS_BATCH(batch, 0);
  231.         OUT_BCS_BATCH(batch, 0);
  232.         OUT_BCS_BATCH(batch, 0);
  233.  
  234.     /* DW 19..50 */
  235.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  236.         struct object_surface *obj_surface;
  237.  
  238.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  239.             gen7_mfd_context->reference_surface[i].obj_surface &&
  240.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  241.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  242.  
  243.             OUT_BCS_RELOC(batch, obj_surface->bo,
  244.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  245.                           0);
  246.         } else {
  247.             OUT_BCS_BATCH(batch, 0);
  248.         }
  249.             OUT_BCS_BATCH(batch, 0);
  250.     }
  251.         /* reference property 51 */
  252.     OUT_BCS_BATCH(batch, 0);  
  253.        
  254.         /* Macroblock status & ILDB 52-57 */
  255.         OUT_BCS_BATCH(batch, 0);
  256.         OUT_BCS_BATCH(batch, 0);
  257.         OUT_BCS_BATCH(batch, 0);
  258.         OUT_BCS_BATCH(batch, 0);
  259.         OUT_BCS_BATCH(batch, 0);
  260.         OUT_BCS_BATCH(batch, 0);
  261.  
  262.         /* the second Macroblock status 58-60 */       
  263.         OUT_BCS_BATCH(batch, 0);
  264.         OUT_BCS_BATCH(batch, 0);
  265.         OUT_BCS_BATCH(batch, 0);
  266.     ADVANCE_BCS_BATCH(batch);
  267. }
  268.  
  269. static void
  270. gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  271.                              struct decode_state *decode_state,
  272.                              int standard_select,
  273.                              struct gen7_mfd_context *gen7_mfd_context)
  274. {
  275.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  276.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  277.     int i;
  278.  
  279.     if (IS_STEPPING_BPLUS(i965)) {
  280.         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
  281.                                             standard_select, gen7_mfd_context);
  282.         return;
  283.     }
  284.  
  285.     BEGIN_BCS_BATCH(batch, 25);
  286.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  287.     if (gen7_mfd_context->pre_deblocking_output.valid)
  288.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  289.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  290.                       0);
  291.     else
  292.         OUT_BCS_BATCH(batch, 0);
  293.  
  294.     if (gen7_mfd_context->post_deblocking_output.valid)
  295.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  296.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  297.                       0);
  298.     else
  299.         OUT_BCS_BATCH(batch, 0);
  300.  
  301.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  302.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  303.  
  304.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  305.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  306.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  307.                       0);
  308.     else
  309.         OUT_BCS_BATCH(batch, 0);
  310.  
  311.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  312.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  313.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  314.                       0);
  315.     else
  316.         OUT_BCS_BATCH(batch, 0);
  317.  
  318.     /* DW 7..22 */
  319.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  320.         struct object_surface *obj_surface;
  321.  
  322.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  323.             gen7_mfd_context->reference_surface[i].obj_surface &&
  324.             gen7_mfd_context->reference_surface[i].obj_surface->bo) {
  325.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  326.  
  327.             OUT_BCS_RELOC(batch, obj_surface->bo,
  328.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  329.                           0);
  330.         } else {
  331.             OUT_BCS_BATCH(batch, 0);
  332.         }
  333.     }
  334.  
  335.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  336.     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
  337.     ADVANCE_BCS_BATCH(batch);
  338. }
  339.  
  340. static void
  341. gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  342.                                  dri_bo *slice_data_bo,
  343.                                  int standard_select,
  344.                                  struct gen7_mfd_context *gen7_mfd_context)
  345. {
  346.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  347.  
  348.     BEGIN_BCS_BATCH(batch, 26);
  349.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  350.         /* MFX In BS 1-5 */
  351.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  352.     OUT_BCS_BATCH(batch, 0);
  353.     OUT_BCS_BATCH(batch, 0);
  354.         /* Upper bound 4-5 */  
  355.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  356.     OUT_BCS_BATCH(batch, 0);
  357.  
  358.         /* MFX indirect MV 6-10 */
  359.     OUT_BCS_BATCH(batch, 0);
  360.     OUT_BCS_BATCH(batch, 0);
  361.     OUT_BCS_BATCH(batch, 0);
  362.     OUT_BCS_BATCH(batch, 0);
  363.     OUT_BCS_BATCH(batch, 0);
  364.        
  365.         /* MFX IT_COFF 11-15 */
  366.     OUT_BCS_BATCH(batch, 0);
  367.     OUT_BCS_BATCH(batch, 0);
  368.     OUT_BCS_BATCH(batch, 0);
  369.     OUT_BCS_BATCH(batch, 0);
  370.     OUT_BCS_BATCH(batch, 0);
  371.  
  372.         /* MFX IT_DBLK 16-20 */
  373.     OUT_BCS_BATCH(batch, 0);
  374.     OUT_BCS_BATCH(batch, 0);
  375.     OUT_BCS_BATCH(batch, 0);
  376.     OUT_BCS_BATCH(batch, 0);
  377.     OUT_BCS_BATCH(batch, 0);
  378.  
  379.         /* MFX PAK_BSE object for encoder 21-25 */
  380.     OUT_BCS_BATCH(batch, 0);
  381.     OUT_BCS_BATCH(batch, 0);
  382.     OUT_BCS_BATCH(batch, 0);
  383.     OUT_BCS_BATCH(batch, 0);
  384.     OUT_BCS_BATCH(batch, 0);
  385.  
  386.     ADVANCE_BCS_BATCH(batch);
  387. }
  388.  
  389. static void
  390. gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  391.                                  dri_bo *slice_data_bo,
  392.                                  int standard_select,
  393.                                  struct gen7_mfd_context *gen7_mfd_context)
  394. {
  395.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  396.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  397.  
  398.     if (IS_STEPPING_BPLUS(i965)) {
  399.         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
  400.                                                 standard_select, gen7_mfd_context);
  401.         return;
  402.     }
  403.  
  404.     BEGIN_BCS_BATCH(batch, 11);
  405.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  406.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  407.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  408.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  409.     OUT_BCS_BATCH(batch, 0);
  410.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  411.     OUT_BCS_BATCH(batch, 0);
  412.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  413.     OUT_BCS_BATCH(batch, 0);
  414.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  415.     OUT_BCS_BATCH(batch, 0);
  416.     ADVANCE_BCS_BATCH(batch);
  417. }
  418.  
  419. static void
  420. gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  421.                                  struct decode_state *decode_state,
  422.                                  int standard_select,
  423.                                  struct gen7_mfd_context *gen7_mfd_context)
  424. {
  425.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  426.  
  427.     BEGIN_BCS_BATCH(batch, 10);
  428.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  429.  
  430.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  431.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  432.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  433.                       0);
  434.         else
  435.                 OUT_BCS_BATCH(batch, 0);
  436.                
  437.     OUT_BCS_BATCH(batch, 0);
  438.     OUT_BCS_BATCH(batch, 0);
  439.         /* MPR Row Store Scratch buffer 4-6 */
  440.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  441.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  442.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  443.                       0);
  444.     else
  445.             OUT_BCS_BATCH(batch, 0);
  446.     OUT_BCS_BATCH(batch, 0);
  447.     OUT_BCS_BATCH(batch, 0);
  448.  
  449.         /* Bitplane 7-9 */
  450.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  451.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  452.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  453.                       0);
  454.     else
  455.         OUT_BCS_BATCH(batch, 0);
  456.     OUT_BCS_BATCH(batch, 0);
  457.     OUT_BCS_BATCH(batch, 0);
  458.  
  459.     ADVANCE_BCS_BATCH(batch);
  460. }
  461.  
  462. static void
  463. gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  464.                                  struct decode_state *decode_state,
  465.                                  int standard_select,
  466.                                  struct gen7_mfd_context *gen7_mfd_context)
  467. {
  468.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  469.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  470.  
  471.     if (IS_STEPPING_BPLUS(i965)) {
  472.         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
  473.                                                 standard_select, gen7_mfd_context);
  474.         return;
  475.     }
  476.  
  477.     BEGIN_BCS_BATCH(batch, 4);
  478.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  479.  
  480.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  481.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  482.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  483.                       0);
  484.     else
  485.         OUT_BCS_BATCH(batch, 0);
  486.  
  487.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  488.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  489.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  490.                       0);
  491.     else
  492.         OUT_BCS_BATCH(batch, 0);
  493.  
  494.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  495.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  496.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  497.                       0);
  498.     else
  499.         OUT_BCS_BATCH(batch, 0);
  500.  
  501.     ADVANCE_BCS_BATCH(batch);
  502. }
  503.  
  504. static void
  505. gen75_mfd_qm_state(VADriverContextP ctx,
  506.                   int qm_type,
  507.                   unsigned char *qm,
  508.                   int qm_length,
  509.                   struct gen7_mfd_context *gen7_mfd_context)
  510. {
  511.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  512.     unsigned int qm_buffer[16];
  513.  
  514.     assert(qm_length <= 16 * 4);
  515.     memcpy(qm_buffer, qm, qm_length);
  516.  
  517.     BEGIN_BCS_BATCH(batch, 18);
  518.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  519.     OUT_BCS_BATCH(batch, qm_type << 0);
  520.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  521.     ADVANCE_BCS_BATCH(batch);
  522. }
  523.  
  524. static void
  525. gen75_mfd_avc_img_state(VADriverContextP ctx,
  526.                        struct decode_state *decode_state,
  527.                        struct gen7_mfd_context *gen7_mfd_context)
  528. {
  529.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  530.     int img_struct;
  531.     int mbaff_frame_flag;
  532.     unsigned int width_in_mbs, height_in_mbs;
  533.     VAPictureParameterBufferH264 *pic_param;
  534.  
  535.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  536.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  537.  
  538.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  539.  
  540.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  541.         img_struct = 1;
  542.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  543.         img_struct = 3;
  544.     else
  545.         img_struct = 0;
  546.  
  547.     if ((img_struct & 0x1) == 0x1) {
  548.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  549.     } else {
  550.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  551.     }
  552.  
  553.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  554.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  555.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  556.     } else {
  557.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  558.     }
  559.  
  560.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  561.                         !pic_param->pic_fields.bits.field_pic_flag);
  562.  
  563.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  564.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  565.  
  566.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  567.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  568.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  569.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  570.  
  571.     BEGIN_BCS_BATCH(batch, 17);
  572.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (17 - 2));
  573.     OUT_BCS_BATCH(batch,
  574.                   (width_in_mbs * height_in_mbs - 1));
  575.     OUT_BCS_BATCH(batch,
  576.                   ((height_in_mbs - 1) << 16) |
  577.                   ((width_in_mbs - 1) << 0));
  578.     OUT_BCS_BATCH(batch,
  579.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  580.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  581.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  582.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  583.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  584.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  585.                   (img_struct << 8));
  586.     OUT_BCS_BATCH(batch,
  587.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  588.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  589.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  590.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  591.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  592.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  593.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  594.                   (mbaff_frame_flag << 1) |
  595.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  596.     OUT_BCS_BATCH(batch, 0);
  597.     OUT_BCS_BATCH(batch, 0);
  598.     OUT_BCS_BATCH(batch, 0);
  599.     OUT_BCS_BATCH(batch, 0);
  600.     OUT_BCS_BATCH(batch, 0);
  601.     OUT_BCS_BATCH(batch, 0);
  602.     OUT_BCS_BATCH(batch, 0);
  603.     OUT_BCS_BATCH(batch, 0);
  604.     OUT_BCS_BATCH(batch, 0);
  605.     OUT_BCS_BATCH(batch, 0);
  606.     OUT_BCS_BATCH(batch, 0);
  607.     OUT_BCS_BATCH(batch, 0);
  608.     ADVANCE_BCS_BATCH(batch);
  609. }
  610.  
  611. static void
  612. gen75_mfd_avc_qm_state(VADriverContextP ctx,
  613.                       struct decode_state *decode_state,
  614.                       struct gen7_mfd_context *gen7_mfd_context)
  615. {
  616.     VAIQMatrixBufferH264 *iq_matrix;
  617.     VAPictureParameterBufferH264 *pic_param;
  618.  
  619.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  620.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  621.     else
  622.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  623.  
  624.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  625.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  626.  
  627.     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  628.     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  629.  
  630.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  631.         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  632.         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  633.     }
  634. }
  635.  
  636. static inline void
  637. gen75_mfd_avc_picid_state(VADriverContextP ctx,
  638.                       struct decode_state *decode_state,
  639.                       struct gen7_mfd_context *gen7_mfd_context)
  640. {
  641.     gen75_send_avc_picid_state(gen7_mfd_context->base.batch,
  642.         gen7_mfd_context->reference_surface);
  643. }
  644.  
  645. static void
  646. gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
  647.                               struct decode_state *decode_state,
  648.                               VAPictureParameterBufferH264 *pic_param,
  649.                               VASliceParameterBufferH264 *slice_param,
  650.                               struct gen7_mfd_context *gen7_mfd_context)
  651. {
  652.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  653.     struct object_surface *obj_surface;
  654.     GenAvcSurface *gen7_avc_surface;
  655.     VAPictureH264 *va_pic;
  656.     int i;
  657.  
  658.     BEGIN_BCS_BATCH(batch, 71);
  659.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  660.  
  661.     /* reference surfaces 0..15 */
  662.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  663.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  664.             gen7_mfd_context->reference_surface[i].obj_surface &&
  665.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  666.  
  667.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  668.             gen7_avc_surface = obj_surface->private_data;
  669.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  670.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  671.                           0);
  672.             OUT_BCS_BATCH(batch, 0);
  673.         } else {
  674.             OUT_BCS_BATCH(batch, 0);
  675.             OUT_BCS_BATCH(batch, 0);
  676.         }
  677.     }
  678.  
  679.     OUT_BCS_BATCH(batch, 0);
  680.  
  681.     /* the current decoding frame/field */
  682.     va_pic = &pic_param->CurrPic;
  683.     obj_surface = decode_state->render_object;
  684.     assert(obj_surface->bo && obj_surface->private_data);
  685.     gen7_avc_surface = obj_surface->private_data;
  686.  
  687.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  688.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  689.                   0);
  690.  
  691.     OUT_BCS_BATCH(batch, 0);
  692.     OUT_BCS_BATCH(batch, 0);
  693.  
  694.     /* POC List */
  695.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  696.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  697.  
  698.         if (obj_surface) {
  699.             const VAPictureH264 * const va_pic = avc_find_picture(
  700.                 obj_surface->base.id, pic_param->ReferenceFrames,
  701.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  702.  
  703.             assert(va_pic != NULL);
  704.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  705.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  706.         } else {
  707.             OUT_BCS_BATCH(batch, 0);
  708.             OUT_BCS_BATCH(batch, 0);
  709.         }
  710.     }
  711.  
  712.     va_pic = &pic_param->CurrPic;
  713.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  714.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  715.  
  716.     ADVANCE_BCS_BATCH(batch);
  717. }
  718.  
  719. static void
  720. gen75_mfd_avc_directmode_state(VADriverContextP ctx,
  721.                               struct decode_state *decode_state,
  722.                               VAPictureParameterBufferH264 *pic_param,
  723.                               VASliceParameterBufferH264 *slice_param,
  724.                               struct gen7_mfd_context *gen7_mfd_context)
  725. {
  726.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  727.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  728.     struct object_surface *obj_surface;
  729.     GenAvcSurface *gen7_avc_surface;
  730.     VAPictureH264 *va_pic;
  731.     int i;
  732.  
  733.     if (IS_STEPPING_BPLUS(i965)) {
  734.         gen75_mfd_avc_directmode_state_bplus(ctx, decode_state, pic_param, slice_param,
  735.                                              gen7_mfd_context);
  736.  
  737.         return;
  738.     }
  739.  
  740.     BEGIN_BCS_BATCH(batch, 69);
  741.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  742.  
  743.     /* reference surfaces 0..15 */
  744.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  745.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  746.             gen7_mfd_context->reference_surface[i].obj_surface &&
  747.             gen7_mfd_context->reference_surface[i].obj_surface->private_data) {
  748.  
  749.             obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  750.             gen7_avc_surface = obj_surface->private_data;
  751.  
  752.             OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  753.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  754.                           0);
  755.  
  756.             if (gen7_avc_surface->dmv_bottom_flag == 1)
  757.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  758.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  759.                               0);
  760.             else
  761.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  762.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  763.                               0);
  764.         } else {
  765.             OUT_BCS_BATCH(batch, 0);
  766.             OUT_BCS_BATCH(batch, 0);
  767.         }
  768.     }
  769.  
  770.     /* the current decoding frame/field */
  771.     va_pic = &pic_param->CurrPic;
  772.     obj_surface = decode_state->render_object;
  773.     assert(obj_surface->bo && obj_surface->private_data);
  774.     gen7_avc_surface = obj_surface->private_data;
  775.  
  776.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  777.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  778.                   0);
  779.  
  780.     if (gen7_avc_surface->dmv_bottom_flag == 1)
  781.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  782.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  783.                       0);
  784.     else
  785.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  786.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  787.                       0);
  788.  
  789.     /* POC List */
  790.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  791.         obj_surface = gen7_mfd_context->reference_surface[i].obj_surface;
  792.  
  793.         if (obj_surface) {
  794.             const VAPictureH264 * const va_pic = avc_find_picture(
  795.                 obj_surface->base.id, pic_param->ReferenceFrames,
  796.                 ARRAY_ELEMS(pic_param->ReferenceFrames));
  797.  
  798.             assert(va_pic != NULL);
  799.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  800.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  801.         } else {
  802.             OUT_BCS_BATCH(batch, 0);
  803.             OUT_BCS_BATCH(batch, 0);
  804.         }
  805.     }
  806.  
  807.     va_pic = &pic_param->CurrPic;
  808.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  809.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  810.  
  811.     ADVANCE_BCS_BATCH(batch);
  812. }
  813.  
  814. static void
  815. gen75_mfd_avc_phantom_slice_first(VADriverContextP ctx,
  816.                                  VAPictureParameterBufferH264 *pic_param,
  817.                                  VASliceParameterBufferH264 *next_slice_param,
  818.                                  struct gen7_mfd_context *gen7_mfd_context)
  819. {
  820.     gen6_mfd_avc_phantom_slice(ctx, pic_param, next_slice_param, gen7_mfd_context->base.batch);
  821. }
  822.  
  823. static void
  824. gen75_mfd_avc_slice_state(VADriverContextP ctx,
  825.                          VAPictureParameterBufferH264 *pic_param,
  826.                          VASliceParameterBufferH264 *slice_param,
  827.                          VASliceParameterBufferH264 *next_slice_param,
  828.                          struct gen7_mfd_context *gen7_mfd_context)
  829. {
  830.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  831.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  832.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  833.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  834.     int num_ref_idx_l0, num_ref_idx_l1;
  835.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  836.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  837.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  838.     int slice_type;
  839.  
  840.     if (slice_param->slice_type == SLICE_TYPE_I ||
  841.         slice_param->slice_type == SLICE_TYPE_SI) {
  842.         slice_type = SLICE_TYPE_I;
  843.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  844.                slice_param->slice_type == SLICE_TYPE_SP) {
  845.         slice_type = SLICE_TYPE_P;
  846.     } else {
  847.         assert(slice_param->slice_type == SLICE_TYPE_B);
  848.         slice_type = SLICE_TYPE_B;
  849.     }
  850.  
  851.     if (slice_type == SLICE_TYPE_I) {
  852.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  853.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  854.         num_ref_idx_l0 = 0;
  855.         num_ref_idx_l1 = 0;
  856.     } else if (slice_type == SLICE_TYPE_P) {
  857.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  858.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  859.         num_ref_idx_l1 = 0;
  860.     } else {
  861.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  862.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  863.     }
  864.  
  865.     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
  866.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  867.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  868.  
  869.     if (next_slice_param) {
  870.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
  871.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  872.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  873.     } else {
  874.         next_slice_hor_pos = 0;
  875.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  876.     }
  877.  
  878.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  879.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  880.     OUT_BCS_BATCH(batch, slice_type);
  881.     OUT_BCS_BATCH(batch,
  882.                   (num_ref_idx_l1 << 24) |
  883.                   (num_ref_idx_l0 << 16) |
  884.                   (slice_param->chroma_log2_weight_denom << 8) |
  885.                   (slice_param->luma_log2_weight_denom << 0));
  886.     OUT_BCS_BATCH(batch,
  887.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  888.                   (slice_param->disable_deblocking_filter_idc << 27) |
  889.                   (slice_param->cabac_init_idc << 24) |
  890.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  891.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  892.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  893.     OUT_BCS_BATCH(batch,
  894.                   (slice_ver_pos << 24) |
  895.                   (slice_hor_pos << 16) |
  896.                   (first_mb_in_slice << 0));
  897.     OUT_BCS_BATCH(batch,
  898.                   (next_slice_ver_pos << 16) |
  899.                   (next_slice_hor_pos << 0));
  900.     OUT_BCS_BATCH(batch,
  901.                   (next_slice_param == NULL) << 19); /* last slice flag */
  902.     OUT_BCS_BATCH(batch, 0);
  903.     OUT_BCS_BATCH(batch, 0);
  904.     OUT_BCS_BATCH(batch, 0);
  905.     OUT_BCS_BATCH(batch, 0);
  906.     ADVANCE_BCS_BATCH(batch);
  907. }
  908.  
  909. static inline void
  910. gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
  911.                            VAPictureParameterBufferH264 *pic_param,
  912.                            VASliceParameterBufferH264 *slice_param,
  913.                            struct gen7_mfd_context *gen7_mfd_context)
  914. {
  915.     gen6_send_avc_ref_idx_state(
  916.         gen7_mfd_context->base.batch,
  917.         slice_param,
  918.         gen7_mfd_context->reference_surface
  919.     );
  920. }
  921.  
  922. static void
  923. gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
  924.                                 VAPictureParameterBufferH264 *pic_param,
  925.                                 VASliceParameterBufferH264 *slice_param,
  926.                                 struct gen7_mfd_context *gen7_mfd_context)
  927. {
  928.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  929.     int i, j, num_weight_offset_table = 0;
  930.     short weightoffsets[32 * 6];
  931.  
  932.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  933.          slice_param->slice_type == SLICE_TYPE_SP) &&
  934.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  935.         num_weight_offset_table = 1;
  936.     }
  937.    
  938.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  939.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  940.         num_weight_offset_table = 2;
  941.     }
  942.  
  943.     for (i = 0; i < num_weight_offset_table; i++) {
  944.         BEGIN_BCS_BATCH(batch, 98);
  945.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  946.         OUT_BCS_BATCH(batch, i);
  947.  
  948.         if (i == 0) {
  949.             for (j = 0; j < 32; j++) {
  950.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  951.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  952.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  953.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  954.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  955.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  956.             }
  957.         } else {
  958.             for (j = 0; j < 32; j++) {
  959.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  960.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  961.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  962.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  963.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  964.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  965.             }
  966.         }
  967.  
  968.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  969.         ADVANCE_BCS_BATCH(batch);
  970.     }
  971. }
  972.  
  973. static void
  974. gen75_mfd_avc_bsd_object(VADriverContextP ctx,
  975.                         VAPictureParameterBufferH264 *pic_param,
  976.                         VASliceParameterBufferH264 *slice_param,
  977.                         dri_bo *slice_data_bo,
  978.                         VASliceParameterBufferH264 *next_slice_param,
  979.                         struct gen7_mfd_context *gen7_mfd_context)
  980. {
  981.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  982.     int slice_data_bit_offset = avc_get_first_mb_bit_offset(slice_data_bo,
  983.                                                             slice_param,
  984.                                                             pic_param->pic_fields.bits.entropy_coding_mode_flag);
  985.  
  986.     /* the input bitsteam format on GEN7 differs from GEN6 */
  987.     BEGIN_BCS_BATCH(batch, 6);
  988.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  989.     OUT_BCS_BATCH(batch,
  990.                   (slice_param->slice_data_size - slice_param->slice_data_offset));
  991.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  992.     OUT_BCS_BATCH(batch,
  993.                   (0 << 31) |
  994.                   (0 << 14) |
  995.                   (0 << 12) |
  996.                   (0 << 10) |
  997.                   (0 << 8));
  998.     OUT_BCS_BATCH(batch,
  999.                   ((slice_data_bit_offset >> 3) << 16) |
  1000.                   (1 << 7)  |
  1001.                   (0 << 5)  |
  1002.                   (0 << 4)  |
  1003.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  1004.                   (slice_data_bit_offset & 0x7));
  1005.     OUT_BCS_BATCH(batch, 0);
  1006.     ADVANCE_BCS_BATCH(batch);
  1007. }
  1008.  
  1009. static inline void
  1010. gen75_mfd_avc_context_init(
  1011.     VADriverContextP         ctx,
  1012.     struct gen7_mfd_context *gen7_mfd_context
  1013. )
  1014. {
  1015.     /* Initialize flat scaling lists */
  1016.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  1017. }
  1018.  
  1019. static void
  1020. gen75_mfd_avc_decode_init(VADriverContextP ctx,
  1021.                          struct decode_state *decode_state,
  1022.                          struct gen7_mfd_context *gen7_mfd_context)
  1023. {
  1024.     VAPictureParameterBufferH264 *pic_param;
  1025.     VASliceParameterBufferH264 *slice_param;
  1026.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1027.     struct object_surface *obj_surface;
  1028.     dri_bo *bo;
  1029.     int i, j, enable_avc_ildb = 0;
  1030.     unsigned int width_in_mbs, height_in_mbs;
  1031.  
  1032.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  1033.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1034.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1035.  
  1036.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1037.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1038.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1039.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1040.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1041.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1042.                    (slice_param->slice_type == SLICE_TYPE_B));
  1043.  
  1044.             if (slice_param->disable_deblocking_filter_idc != 1) {
  1045.                 enable_avc_ildb = 1;
  1046.                 break;
  1047.             }
  1048.  
  1049.             slice_param++;
  1050.         }
  1051.     }
  1052.  
  1053.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1054.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1055.     gen75_update_avc_frame_store_index(ctx, decode_state, pic_param,
  1056.         gen7_mfd_context->reference_surface);
  1057.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  1058.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  1059.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  1060.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  1061.  
  1062.     /* Current decoded picture */
  1063.     obj_surface = decode_state->render_object;
  1064.     if (pic_param->pic_fields.bits.reference_pic_flag)
  1065.         obj_surface->flags |= SURFACE_REFERENCED;
  1066.     else
  1067.         obj_surface->flags &= ~SURFACE_REFERENCED;
  1068.  
  1069.     avc_ensure_surface_bo(ctx, decode_state, obj_surface, pic_param);
  1070.     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  1071.  
  1072.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1073.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1074.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1075.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  1076.  
  1077.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1078.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1079.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1080.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  1081.  
  1082.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1083.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1084.                       "intra row store",
  1085.                       width_in_mbs * 64,
  1086.                       0x1000);
  1087.     assert(bo);
  1088.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1089.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1090.  
  1091.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1092.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1093.                       "deblocking filter row store",
  1094.                       width_in_mbs * 64 * 4,
  1095.                       0x1000);
  1096.     assert(bo);
  1097.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1098.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1099.  
  1100.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1101.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1102.                       "bsd mpc row store",
  1103.                       width_in_mbs * 64 * 2,
  1104.                       0x1000);
  1105.     assert(bo);
  1106.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1107.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1108.  
  1109.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  1110.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1111.                       "mpr row store",
  1112.                       width_in_mbs * 64 * 2,
  1113.                       0x1000);
  1114.     assert(bo);
  1115.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  1116.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  1117.  
  1118.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1119. }
  1120.  
  1121. static void
  1122. gen75_mfd_avc_decode_picture(VADriverContextP ctx,
  1123.                             struct decode_state *decode_state,
  1124.                             struct gen7_mfd_context *gen7_mfd_context)
  1125. {
  1126.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1127.     VAPictureParameterBufferH264 *pic_param;
  1128.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  1129.     dri_bo *slice_data_bo;
  1130.     int i, j;
  1131.  
  1132.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1133.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1134.     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  1135.  
  1136.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1137.     intel_batchbuffer_emit_mi_flush(batch);
  1138.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1139.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1140.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1141.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1142.     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  1143.     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
  1144.     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  1145.  
  1146.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1147.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1148.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1149.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1150.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  1151.  
  1152.         if (j == decode_state->num_slice_params - 1)
  1153.             next_slice_group_param = NULL;
  1154.         else
  1155.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  1156.  
  1157.         if (j == 0 && slice_param->first_mb_in_slice)
  1158.             gen75_mfd_avc_phantom_slice_first(ctx, pic_param, slice_param, gen7_mfd_context);
  1159.  
  1160.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1161.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1162.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1163.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1164.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1165.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1166.                    (slice_param->slice_type == SLICE_TYPE_B));
  1167.  
  1168.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1169.                 next_slice_param = slice_param + 1;
  1170.             else
  1171.                 next_slice_param = next_slice_group_param;
  1172.  
  1173.             gen75_mfd_avc_directmode_state(ctx, decode_state, pic_param, slice_param, gen7_mfd_context);
  1174.             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1175.             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1176.             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1177.             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  1178.             slice_param++;
  1179.         }
  1180.     }
  1181.  
  1182.     intel_batchbuffer_end_atomic(batch);
  1183.     intel_batchbuffer_flush(batch);
  1184. }
  1185.  
  1186. static void
  1187. gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
  1188.                            struct decode_state *decode_state,
  1189.                            struct gen7_mfd_context *gen7_mfd_context)
  1190. {
  1191.     VAPictureParameterBufferMPEG2 *pic_param;
  1192.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1193.     struct object_surface *obj_surface;
  1194.     dri_bo *bo;
  1195.     unsigned int width_in_mbs;
  1196.  
  1197.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1198.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1199.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1200.  
  1201.     mpeg2_set_reference_surfaces(
  1202.         ctx,
  1203.         gen7_mfd_context->reference_surface,
  1204.         decode_state,
  1205.         pic_param
  1206.     );
  1207.  
  1208.     /* Current decoded picture */
  1209.     obj_surface = decode_state->render_object;
  1210.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1211.  
  1212.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1213.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1214.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1215.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1216.  
  1217.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1218.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1219.                       "bsd mpc row store",
  1220.                       width_in_mbs * 96,
  1221.                       0x1000);
  1222.     assert(bo);
  1223.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1224.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1225.  
  1226.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1227.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1228.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1229.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1230.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1231. }
  1232.  
  1233. static void
  1234. gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
  1235.                          struct decode_state *decode_state,
  1236.                          struct gen7_mfd_context *gen7_mfd_context)
  1237. {
  1238.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1239.     VAPictureParameterBufferMPEG2 *pic_param;
  1240.     unsigned int slice_concealment_disable_bit = 0;
  1241.  
  1242.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1243.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1244.  
  1245.     slice_concealment_disable_bit = 1;
  1246.  
  1247.     BEGIN_BCS_BATCH(batch, 13);
  1248.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1249.     OUT_BCS_BATCH(batch,
  1250.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1251.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1252.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1253.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1254.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1255.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1256.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1257.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1258.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1259.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1260.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1261.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1262.     OUT_BCS_BATCH(batch,
  1263.                   pic_param->picture_coding_type << 9);
  1264.     OUT_BCS_BATCH(batch,
  1265.                   (slice_concealment_disable_bit << 31) |
  1266.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  1267.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  1268.     OUT_BCS_BATCH(batch, 0);
  1269.     OUT_BCS_BATCH(batch, 0);
  1270.     OUT_BCS_BATCH(batch, 0);
  1271.     OUT_BCS_BATCH(batch, 0);
  1272.     OUT_BCS_BATCH(batch, 0);
  1273.     OUT_BCS_BATCH(batch, 0);
  1274.     OUT_BCS_BATCH(batch, 0);
  1275.     OUT_BCS_BATCH(batch, 0);
  1276.     OUT_BCS_BATCH(batch, 0);
  1277.     ADVANCE_BCS_BATCH(batch);
  1278. }
  1279.  
  1280. static void
  1281. gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1282.                         struct decode_state *decode_state,
  1283.                         struct gen7_mfd_context *gen7_mfd_context)
  1284. {
  1285.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  1286.     int i, j;
  1287.  
  1288.     /* Update internal QM state */
  1289.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1290.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1291.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1292.  
  1293.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  1294.             iq_matrix->load_intra_quantiser_matrix) {
  1295.             gen_iq_matrix->load_intra_quantiser_matrix =
  1296.                 iq_matrix->load_intra_quantiser_matrix;
  1297.             if (iq_matrix->load_intra_quantiser_matrix) {
  1298.                 for (j = 0; j < 64; j++)
  1299.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1300.                         iq_matrix->intra_quantiser_matrix[j];
  1301.             }
  1302.         }
  1303.  
  1304.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1305.             iq_matrix->load_non_intra_quantiser_matrix) {
  1306.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1307.                 iq_matrix->load_non_intra_quantiser_matrix;
  1308.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1309.                 for (j = 0; j < 64; j++)
  1310.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1311.                         iq_matrix->non_intra_quantiser_matrix[j];
  1312.             }
  1313.         }
  1314.     }
  1315.  
  1316.     /* Commit QM state to HW */
  1317.     for (i = 0; i < 2; i++) {
  1318.         unsigned char *qm = NULL;
  1319.         int qm_type;
  1320.  
  1321.         if (i == 0) {
  1322.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1323.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1324.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1325.             }
  1326.         } else {
  1327.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1328.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1329.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1330.             }
  1331.         }
  1332.  
  1333.         if (!qm)
  1334.             continue;
  1335.  
  1336.         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1337.     }
  1338. }
  1339.  
  1340. static void
  1341. gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1342.                           VAPictureParameterBufferMPEG2 *pic_param,
  1343.                           VASliceParameterBufferMPEG2 *slice_param,
  1344.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1345.                           struct gen7_mfd_context *gen7_mfd_context)
  1346. {
  1347.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1348.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1349.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1350.  
  1351.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1352.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1353.         is_field_pic = 1;
  1354.     is_field_pic_wa = is_field_pic &&
  1355.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1356.  
  1357.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1358.     hpos0 = slice_param->slice_horizontal_position;
  1359.  
  1360.     if (next_slice_param == NULL) {
  1361.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1362.         hpos1 = 0;
  1363.     } else {
  1364.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1365.         hpos1 = next_slice_param->slice_horizontal_position;
  1366.     }
  1367.  
  1368.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1369.  
  1370.     BEGIN_BCS_BATCH(batch, 5);
  1371.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1372.     OUT_BCS_BATCH(batch,
  1373.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1374.     OUT_BCS_BATCH(batch,
  1375.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1376.     OUT_BCS_BATCH(batch,
  1377.                   hpos0 << 24 |
  1378.                   vpos0 << 16 |
  1379.                   mb_count << 8 |
  1380.                   (next_slice_param == NULL) << 5 |
  1381.                   (next_slice_param == NULL) << 3 |
  1382.                   (slice_param->macroblock_offset & 0x7));
  1383.     OUT_BCS_BATCH(batch,
  1384.                   (slice_param->quantiser_scale_code << 24) |
  1385.                   (vpos1 << 8 | hpos1));
  1386.     ADVANCE_BCS_BATCH(batch);
  1387. }
  1388.  
  1389. static void
  1390. gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1391.                               struct decode_state *decode_state,
  1392.                               struct gen7_mfd_context *gen7_mfd_context)
  1393. {
  1394.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1395.     VAPictureParameterBufferMPEG2 *pic_param;
  1396.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1397.     dri_bo *slice_data_bo;
  1398.     int i, j;
  1399.  
  1400.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1401.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1402.  
  1403.     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1404.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1405.     intel_batchbuffer_emit_mi_flush(batch);
  1406.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1407.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1408.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1409.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1410.     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1411.     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1412.  
  1413.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1414.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1415.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1416.  
  1417.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1418.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1419.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1420.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1421.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1422.  
  1423.         if (j == decode_state->num_slice_params - 1)
  1424.             next_slice_group_param = NULL;
  1425.         else
  1426.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1427.  
  1428.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1429.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1430.  
  1431.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1432.                 next_slice_param = slice_param + 1;
  1433.             else
  1434.                 next_slice_param = next_slice_group_param;
  1435.  
  1436.             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1437.             slice_param++;
  1438.         }
  1439.     }
  1440.  
  1441.     intel_batchbuffer_end_atomic(batch);
  1442.     intel_batchbuffer_flush(batch);
  1443. }
  1444.  
  1445. static const int va_to_gen7_vc1_pic_type[5] = {
  1446.     GEN7_VC1_I_PICTURE,
  1447.     GEN7_VC1_P_PICTURE,
  1448.     GEN7_VC1_B_PICTURE,
  1449.     GEN7_VC1_BI_PICTURE,
  1450.     GEN7_VC1_P_PICTURE,
  1451. };
  1452.  
  1453. static const int va_to_gen7_vc1_mv[4] = {
  1454.     1, /* 1-MV */
  1455.     2, /* 1-MV half-pel */
  1456.     3, /* 1-MV half-pef bilinear */
  1457.     0, /* Mixed MV */
  1458. };
  1459.  
  1460. static const int b_picture_scale_factor[21] = {
  1461.     128, 85,  170, 64,  192,
  1462.     51,  102, 153, 204, 43,
  1463.     215, 37,  74,  111, 148,
  1464.     185, 222, 32,  96,  160,
  1465.     224,
  1466. };
  1467.  
  1468. static const int va_to_gen7_vc1_condover[3] = {
  1469.     0,
  1470.     2,
  1471.     3
  1472. };
  1473.  
  1474. static const int va_to_gen7_vc1_profile[4] = {
  1475.     GEN7_VC1_SIMPLE_PROFILE,
  1476.     GEN7_VC1_MAIN_PROFILE,
  1477.     GEN7_VC1_RESERVED_PROFILE,
  1478.     GEN7_VC1_ADVANCED_PROFILE
  1479. };
  1480.  
  1481. static void
  1482. gen75_mfd_free_vc1_surface(void **data)
  1483. {
  1484.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1485.  
  1486.     if (!gen7_vc1_surface)
  1487.         return;
  1488.  
  1489.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1490.     free(gen7_vc1_surface);
  1491.     *data = NULL;
  1492. }
  1493.  
  1494. static void
  1495. gen75_mfd_init_vc1_surface(VADriverContextP ctx,
  1496.                           VAPictureParameterBufferVC1 *pic_param,
  1497.                           struct object_surface *obj_surface)
  1498. {
  1499.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1500.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1501.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1502.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1503.  
  1504.     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
  1505.  
  1506.     if (!gen7_vc1_surface) {
  1507.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1508.         assert((obj_surface->size & 0x3f) == 0);
  1509.         obj_surface->private_data = gen7_vc1_surface;
  1510.     }
  1511.  
  1512.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1513.  
  1514.     if (gen7_vc1_surface->dmv == NULL) {
  1515.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1516.                                              "direct mv w/r buffer",
  1517.                                              width_in_mbs * height_in_mbs * 64,
  1518.                                              0x1000);
  1519.     }
  1520. }
  1521.  
  1522. static void
  1523. gen75_mfd_vc1_decode_init(VADriverContextP ctx,
  1524.                          struct decode_state *decode_state,
  1525.                          struct gen7_mfd_context *gen7_mfd_context)
  1526. {
  1527.     VAPictureParameterBufferVC1 *pic_param;
  1528.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1529.     struct object_surface *obj_surface;
  1530.     dri_bo *bo;
  1531.     int width_in_mbs;
  1532.     int picture_type;
  1533.  
  1534.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1535.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1536.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1537.     picture_type = pic_param->picture_fields.bits.picture_type;
  1538.  
  1539.     intel_update_vc1_frame_store_index(ctx,
  1540.                                        decode_state,
  1541.                                        pic_param,
  1542.                                        gen7_mfd_context->reference_surface);
  1543.  
  1544.     /* Current decoded picture */
  1545.     obj_surface = decode_state->render_object;
  1546.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  1547.     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1548.  
  1549.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1550.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1551.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1552.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1553.  
  1554.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1555.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1556.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1557.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1558.  
  1559.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1560.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1561.                       "intra row store",
  1562.                       width_in_mbs * 64,
  1563.                       0x1000);
  1564.     assert(bo);
  1565.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1566.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1567.  
  1568.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1569.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1570.                       "deblocking filter row store",
  1571.                       width_in_mbs * 7 * 64,
  1572.                       0x1000);
  1573.     assert(bo);
  1574.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1575.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1576.  
  1577.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1578.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1579.                       "bsd mpc row store",
  1580.                       width_in_mbs * 96,
  1581.                       0x1000);
  1582.     assert(bo);
  1583.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1584.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1585.  
  1586.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1587.  
  1588.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1589.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1590.    
  1591.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1592.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1593.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1594.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1595.         int src_w, src_h;
  1596.         uint8_t *src = NULL, *dst = NULL;
  1597.  
  1598.         assert(decode_state->bit_plane->buffer);
  1599.         src = decode_state->bit_plane->buffer;
  1600.  
  1601.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1602.                           "VC-1 Bitplane",
  1603.                           bitplane_width * height_in_mbs,
  1604.                           0x1000);
  1605.         assert(bo);
  1606.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1607.  
  1608.         dri_bo_map(bo, True);
  1609.         assert(bo->virtual);
  1610.         dst = bo->virtual;
  1611.  
  1612.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1613.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1614.                 int src_index, dst_index;
  1615.                 int src_shift;
  1616.                 uint8_t src_value;
  1617.  
  1618.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1619.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1620.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1621.  
  1622.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1623.                     src_value |= 0x2;
  1624.                 }
  1625.  
  1626.                 dst_index = src_w / 2;
  1627.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1628.             }
  1629.  
  1630.             if (src_w & 1)
  1631.                 dst[src_w / 2] >>= 4;
  1632.  
  1633.             dst += bitplane_width;
  1634.         }
  1635.  
  1636.         dri_bo_unmap(bo);
  1637.     } else
  1638.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1639. }
  1640.  
  1641. static void
  1642. gen75_mfd_vc1_pic_state(VADriverContextP ctx,
  1643.                        struct decode_state *decode_state,
  1644.                        struct gen7_mfd_context *gen7_mfd_context)
  1645. {
  1646.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1647.     VAPictureParameterBufferVC1 *pic_param;
  1648.     struct object_surface *obj_surface;
  1649.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1650.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1651.     int unified_mv_mode;
  1652.     int ref_field_pic_polarity = 0;
  1653.     int scale_factor = 0;
  1654.     int trans_ac_y = 0;
  1655.     int dmv_surface_valid = 0;
  1656.     int brfd = 0;
  1657.     int fcm = 0;
  1658.     int picture_type;
  1659.     int profile;
  1660.     int overlap;
  1661.     int interpolation_mode = 0;
  1662.  
  1663.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1664.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1665.  
  1666.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1667.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1668.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1669.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1670.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1671.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1672.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1673.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1674.  
  1675.     if (dquant == 0) {
  1676.         alt_pquant_config = 0;
  1677.         alt_pquant_edge_mask = 0;
  1678.     } else if (dquant == 2) {
  1679.         alt_pquant_config = 1;
  1680.         alt_pquant_edge_mask = 0xf;
  1681.     } else {
  1682.         assert(dquant == 1);
  1683.         if (dquantfrm == 0) {
  1684.             alt_pquant_config = 0;
  1685.             alt_pquant_edge_mask = 0;
  1686.             alt_pq = 0;
  1687.         } else {
  1688.             assert(dquantfrm == 1);
  1689.             alt_pquant_config = 1;
  1690.  
  1691.             switch (dqprofile) {
  1692.             case 3:
  1693.                 if (dqbilevel == 0) {
  1694.                     alt_pquant_config = 2;
  1695.                     alt_pquant_edge_mask = 0;
  1696.                 } else {
  1697.                     assert(dqbilevel == 1);
  1698.                     alt_pquant_config = 3;
  1699.                     alt_pquant_edge_mask = 0;
  1700.                 }
  1701.                 break;
  1702.                
  1703.             case 0:
  1704.                 alt_pquant_edge_mask = 0xf;
  1705.                 break;
  1706.  
  1707.             case 1:
  1708.                 if (dqdbedge == 3)
  1709.                     alt_pquant_edge_mask = 0x9;
  1710.                 else
  1711.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1712.  
  1713.                 break;
  1714.  
  1715.             case 2:
  1716.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1717.                 break;
  1718.  
  1719.             default:
  1720.                 assert(0);
  1721.             }
  1722.         }
  1723.     }
  1724.  
  1725.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1726.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1727.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1728.     } else {
  1729.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1730.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1731.     }
  1732.  
  1733.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1734.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1735.         /* FIXME: calculate reference field picture polarity */
  1736.         assert(0);
  1737.         ref_field_pic_polarity = 0;
  1738.     }
  1739.  
  1740.     if (pic_param->b_picture_fraction < 21)
  1741.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1742.  
  1743.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1744.    
  1745.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1746.         picture_type == GEN7_VC1_I_PICTURE)
  1747.         picture_type = GEN7_VC1_BI_PICTURE;
  1748.  
  1749.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1750.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1751.     else {
  1752.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1753.  
  1754.         /*
  1755.          * 8.3.6.2.1 Transform Type Selection
  1756.          * If variable-sized transform coding is not enabled,
  1757.          * then the 8x8 transform shall be used for all blocks.
  1758.          * it is also MFX_VC1_PIC_STATE requirement.
  1759.          */
  1760.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1761.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1762.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1763.         }
  1764.     }
  1765.  
  1766.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1767.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1768.  
  1769.         obj_surface = decode_state->reference_objects[1];
  1770.  
  1771.         if (obj_surface)
  1772.             gen7_vc1_surface = obj_surface->private_data;
  1773.  
  1774.         if (!gen7_vc1_surface ||
  1775.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1776.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1777.             dmv_surface_valid = 0;
  1778.         else
  1779.             dmv_surface_valid = 1;
  1780.     }
  1781.  
  1782.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1783.  
  1784.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1785.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1786.     else {
  1787.         if (pic_param->picture_fields.bits.top_field_first)
  1788.             fcm = 2;
  1789.         else
  1790.             fcm = 3;
  1791.     }
  1792.  
  1793.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1794.         brfd = pic_param->reference_fields.bits.reference_distance;
  1795.         brfd = (scale_factor * brfd) >> 8;
  1796.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1797.  
  1798.         if (brfd < 0)
  1799.             brfd = 0;
  1800.     }
  1801.  
  1802.     overlap = 0;
  1803.     if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1804.         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1805.             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1806.             overlap = 1;
  1807.         }
  1808.     }else {
  1809.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1810.              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1811.               overlap = 1;
  1812.         }
  1813.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1814.             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1815.              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1816.                 overlap = 1;
  1817.              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1818.                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1819.                  overlap = 1;
  1820.              }
  1821.         }
  1822.     }
  1823.  
  1824.     assert(pic_param->conditional_overlap_flag < 3);
  1825.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  1826.  
  1827.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  1828.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1829.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  1830.         interpolation_mode = 9; /* Half-pel bilinear */
  1831.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  1832.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  1833.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  1834.         interpolation_mode = 1; /* Half-pel bicubic */
  1835.     else
  1836.         interpolation_mode = 0; /* Quarter-pel bicubic */
  1837.  
  1838.     BEGIN_BCS_BATCH(batch, 6);
  1839.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  1840.     OUT_BCS_BATCH(batch,
  1841.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  1842.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  1843.     OUT_BCS_BATCH(batch,
  1844.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  1845.                   dmv_surface_valid << 15 |
  1846.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  1847.                   pic_param->rounding_control << 13 |
  1848.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  1849.                   interpolation_mode << 8 |
  1850.                   0 << 7 | /* FIXME: scale up or down ??? */
  1851.                   pic_param->range_reduction_frame << 6 |
  1852.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  1853.                   overlap << 4 |
  1854.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  1855.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  1856.     OUT_BCS_BATCH(batch,
  1857.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  1858.                   picture_type << 26 |
  1859.                   fcm << 24 |
  1860.                   alt_pq << 16 |
  1861.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  1862.                   scale_factor << 0);
  1863.     OUT_BCS_BATCH(batch,
  1864.                   unified_mv_mode << 28 |
  1865.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  1866.                   pic_param->fast_uvmc_flag << 26 |
  1867.                   ref_field_pic_polarity << 25 |
  1868.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  1869.                   pic_param->reference_fields.bits.reference_distance << 20 |
  1870.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  1871.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  1872.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  1873.                   alt_pquant_edge_mask << 4 |
  1874.                   alt_pquant_config << 2 |
  1875.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  1876.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  1877.     OUT_BCS_BATCH(batch,
  1878.                   !!pic_param->bitplane_present.value << 31 |
  1879.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  1880.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  1881.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  1882.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  1883.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  1884.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  1885.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  1886.                   pic_param->mv_fields.bits.mv_table << 20 |
  1887.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  1888.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  1889.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  1890.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  1891.                   pic_param->mb_mode_table << 8 |
  1892.                   trans_ac_y << 6 |
  1893.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  1894.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  1895.                   pic_param->cbp_table << 0);
  1896.     ADVANCE_BCS_BATCH(batch);
  1897. }
  1898.  
  1899. static void
  1900. gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  1901.                              struct decode_state *decode_state,
  1902.                              struct gen7_mfd_context *gen7_mfd_context)
  1903. {
  1904.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1905.     VAPictureParameterBufferVC1 *pic_param;
  1906.     int intensitycomp_single;
  1907.  
  1908.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1909.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1910.  
  1911.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1912.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1913.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  1914.  
  1915.     BEGIN_BCS_BATCH(batch, 6);
  1916.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  1917.     OUT_BCS_BATCH(batch,
  1918.                   0 << 14 | /* FIXME: double ??? */
  1919.                   0 << 12 |
  1920.                   intensitycomp_single << 10 |
  1921.                   intensitycomp_single << 8 |
  1922.                   0 << 4 | /* FIXME: interlace mode */
  1923.                   0);
  1924.     OUT_BCS_BATCH(batch,
  1925.                   pic_param->luma_shift << 16 |
  1926.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  1927.     OUT_BCS_BATCH(batch, 0);
  1928.     OUT_BCS_BATCH(batch, 0);
  1929.     OUT_BCS_BATCH(batch, 0);
  1930.     ADVANCE_BCS_BATCH(batch);
  1931. }
  1932.  
  1933. static void
  1934. gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
  1935.                               struct decode_state *decode_state,
  1936.                               struct gen7_mfd_context *gen7_mfd_context)
  1937. {
  1938.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1939.     struct object_surface *obj_surface;
  1940.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1941.  
  1942.     obj_surface = decode_state->render_object;
  1943.  
  1944.     if (obj_surface && obj_surface->private_data) {
  1945.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1946.     }
  1947.  
  1948.     obj_surface = decode_state->reference_objects[1];
  1949.  
  1950.     if (obj_surface && obj_surface->private_data) {
  1951.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1952.     }
  1953.  
  1954.     BEGIN_BCS_BATCH(batch, 7);
  1955.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
  1956.  
  1957.     if (dmv_write_buffer)
  1958.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  1959.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  1960.                       0);
  1961.     else
  1962.         OUT_BCS_BATCH(batch, 0);
  1963.  
  1964.         OUT_BCS_BATCH(batch, 0);
  1965.         OUT_BCS_BATCH(batch, 0);
  1966.  
  1967.     if (dmv_read_buffer)
  1968.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  1969.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  1970.                       0);
  1971.     else
  1972.         OUT_BCS_BATCH(batch, 0);
  1973.         OUT_BCS_BATCH(batch, 0);
  1974.         OUT_BCS_BATCH(batch, 0);
  1975.                  
  1976.     ADVANCE_BCS_BATCH(batch);
  1977. }
  1978.  
  1979. static void
  1980. gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
  1981.                               struct decode_state *decode_state,
  1982.                               struct gen7_mfd_context *gen7_mfd_context)
  1983. {
  1984.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1985.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1986.     struct object_surface *obj_surface;
  1987.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  1988.  
  1989.     if (IS_STEPPING_BPLUS(i965)) {
  1990.         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
  1991.         return;
  1992.     }
  1993.  
  1994.     obj_surface = decode_state->render_object;
  1995.  
  1996.     if (obj_surface && obj_surface->private_data) {
  1997.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  1998.     }
  1999.  
  2000.     obj_surface = decode_state->reference_objects[1];
  2001.  
  2002.     if (obj_surface && obj_surface->private_data) {
  2003.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2004.     }
  2005.  
  2006.     BEGIN_BCS_BATCH(batch, 3);
  2007.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
  2008.  
  2009.     if (dmv_write_buffer)
  2010.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  2011.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2012.                       0);
  2013.     else
  2014.         OUT_BCS_BATCH(batch, 0);
  2015.  
  2016.     if (dmv_read_buffer)
  2017.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  2018.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  2019.                       0);
  2020.     else
  2021.         OUT_BCS_BATCH(batch, 0);
  2022.                  
  2023.     ADVANCE_BCS_BATCH(batch);
  2024. }
  2025.  
  2026. static int
  2027. gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  2028. {
  2029.     int out_slice_data_bit_offset;
  2030.     int slice_header_size = in_slice_data_bit_offset / 8;
  2031.     int i, j;
  2032.  
  2033.     if (profile != 3)
  2034.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  2035.     else {
  2036.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  2037.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  2038.                 i++, j += 2;
  2039.             }
  2040.         }
  2041.  
  2042.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  2043.     }
  2044.  
  2045.     return out_slice_data_bit_offset;
  2046. }
  2047.  
  2048. static void
  2049. gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
  2050.                         VAPictureParameterBufferVC1 *pic_param,
  2051.                         VASliceParameterBufferVC1 *slice_param,
  2052.                         VASliceParameterBufferVC1 *next_slice_param,
  2053.                         dri_bo *slice_data_bo,
  2054.                         struct gen7_mfd_context *gen7_mfd_context)
  2055. {
  2056.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2057.     int next_slice_start_vert_pos;
  2058.     int macroblock_offset;
  2059.     uint8_t *slice_data = NULL;
  2060.  
  2061.     dri_bo_map(slice_data_bo, 0);
  2062.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  2063.     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
  2064.                                                                slice_param->macroblock_offset,
  2065.                                                                pic_param->sequence_fields.bits.profile);
  2066.     dri_bo_unmap(slice_data_bo);
  2067.  
  2068.     if (next_slice_param)
  2069.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  2070.     else
  2071.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  2072.  
  2073.     BEGIN_BCS_BATCH(batch, 5);
  2074.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  2075.     OUT_BCS_BATCH(batch,
  2076.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  2077.     OUT_BCS_BATCH(batch,
  2078.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  2079.     OUT_BCS_BATCH(batch,
  2080.                   slice_param->slice_vertical_position << 16 |
  2081.                   next_slice_start_vert_pos << 0);
  2082.     OUT_BCS_BATCH(batch,
  2083.                   (macroblock_offset & 0x7));
  2084.     ADVANCE_BCS_BATCH(batch);
  2085. }
  2086.  
  2087. static void
  2088. gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
  2089.                             struct decode_state *decode_state,
  2090.                             struct gen7_mfd_context *gen7_mfd_context)
  2091. {
  2092.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2093.     VAPictureParameterBufferVC1 *pic_param;
  2094.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  2095.     dri_bo *slice_data_bo;
  2096.     int i, j;
  2097.  
  2098.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2099.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2100.  
  2101.     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  2102.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2103.     intel_batchbuffer_emit_mi_flush(batch);
  2104.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2105.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2106.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2107.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2108.     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  2109.     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  2110.     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  2111.  
  2112.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2113.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2114.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  2115.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2116.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  2117.  
  2118.         if (j == decode_state->num_slice_params - 1)
  2119.             next_slice_group_param = NULL;
  2120.         else
  2121.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  2122.  
  2123.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2124.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2125.  
  2126.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2127.                 next_slice_param = slice_param + 1;
  2128.             else
  2129.                 next_slice_param = next_slice_group_param;
  2130.  
  2131.             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2132.             slice_param++;
  2133.         }
  2134.     }
  2135.  
  2136.     intel_batchbuffer_end_atomic(batch);
  2137.     intel_batchbuffer_flush(batch);
  2138. }
  2139.  
  2140. static void
  2141. gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
  2142.                           struct decode_state *decode_state,
  2143.                           struct gen7_mfd_context *gen7_mfd_context)
  2144. {
  2145.     struct object_surface *obj_surface;
  2146.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2147.     int subsampling = SUBSAMPLE_YUV420;
  2148.     int fourcc = VA_FOURCC_IMC3;
  2149.  
  2150.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2151.  
  2152.     if (pic_param->num_components == 1) {
  2153.         subsampling = SUBSAMPLE_YUV400;
  2154.         fourcc = VA_FOURCC_Y800;
  2155.     } else if (pic_param->num_components == 3) {
  2156.         int h1 = pic_param->components[0].h_sampling_factor;
  2157.         int h2 = pic_param->components[1].h_sampling_factor;
  2158.         int h3 = pic_param->components[2].h_sampling_factor;
  2159.         int v1 = pic_param->components[0].v_sampling_factor;
  2160.         int v2 = pic_param->components[1].v_sampling_factor;
  2161.         int v3 = pic_param->components[2].v_sampling_factor;
  2162.  
  2163.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2164.             v1 == 2 && v2 == 1 && v3 == 1) {
  2165.             subsampling = SUBSAMPLE_YUV420;
  2166.             fourcc = VA_FOURCC_IMC3;
  2167.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2168.                    v1 == 1 && v2 == 1 && v3 == 1) {
  2169.             subsampling = SUBSAMPLE_YUV422H;
  2170.             fourcc = VA_FOURCC_422H;
  2171.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2172.                    v1 == 1 && v2 == 1 && v3 == 1) {
  2173.             subsampling = SUBSAMPLE_YUV444;
  2174.             fourcc = VA_FOURCC_444P;
  2175.         } else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2176.                    v1 == 1 && v2 == 1 && v3 == 1) {
  2177.             subsampling = SUBSAMPLE_YUV411;
  2178.             fourcc = VA_FOURCC_411P;
  2179.         } else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2180.                    v1 == 2 && v2 == 1 && v3 == 1) {
  2181.             subsampling = SUBSAMPLE_YUV422V;
  2182.             fourcc = VA_FOURCC_422V;
  2183.         } else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2184.                    v1 == 2 && v2 == 2 && v3 == 2) {
  2185.             subsampling = SUBSAMPLE_YUV422H;
  2186.             fourcc = VA_FOURCC_422H;
  2187.         } else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2188.                    v1 == 2 && v2 == 1 && v3 == 1) {
  2189.             subsampling = SUBSAMPLE_YUV422V;
  2190.             fourcc = VA_FOURCC_422V;
  2191.         } else
  2192.             assert(0);
  2193.     } else {
  2194.         assert(0);
  2195.     }
  2196.  
  2197.     /* Current decoded picture */
  2198.     obj_surface = decode_state->render_object;
  2199.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, fourcc, subsampling);
  2200.  
  2201.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2202.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  2203.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  2204.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  2205.  
  2206.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  2207.     gen7_mfd_context->post_deblocking_output.valid = 0;
  2208.  
  2209.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  2210.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  2211.  
  2212.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2213.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  2214.  
  2215.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2216.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  2217.  
  2218.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  2219.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  2220.  
  2221.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  2222.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  2223. }
  2224.  
  2225. static const int va_to_gen7_jpeg_rotation[4] = {
  2226.     GEN7_JPEG_ROTATION_0,
  2227.     GEN7_JPEG_ROTATION_90,
  2228.     GEN7_JPEG_ROTATION_180,
  2229.     GEN7_JPEG_ROTATION_270
  2230. };
  2231.  
  2232. static void
  2233. gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
  2234.                         struct decode_state *decode_state,
  2235.                         struct gen7_mfd_context *gen7_mfd_context)
  2236. {
  2237.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2238.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2239.     int chroma_type = GEN7_YUV420;
  2240.     int frame_width_in_blks;
  2241.     int frame_height_in_blks;
  2242.  
  2243.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2244.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2245.  
  2246.     if (pic_param->num_components == 1)
  2247.         chroma_type = GEN7_YUV400;
  2248.     else if (pic_param->num_components == 3) {
  2249.         int h1 = pic_param->components[0].h_sampling_factor;
  2250.         int h2 = pic_param->components[1].h_sampling_factor;
  2251.         int h3 = pic_param->components[2].h_sampling_factor;
  2252.         int v1 = pic_param->components[0].v_sampling_factor;
  2253.         int v2 = pic_param->components[1].v_sampling_factor;
  2254.         int v3 = pic_param->components[2].v_sampling_factor;
  2255.  
  2256.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2257.             v1 == 2 && v2 == 1 && v3 == 1)
  2258.             chroma_type = GEN7_YUV420;
  2259.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2260.                  v1 == 1 && v2 == 1 && v3 == 1)
  2261.             chroma_type = GEN7_YUV422H_2Y;
  2262.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2263.                  v1 == 1 && v2 == 1 && v3 == 1)
  2264.             chroma_type = GEN7_YUV444;
  2265.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2266.                  v1 == 1 && v2 == 1 && v3 == 1)
  2267.             chroma_type = GEN7_YUV411;
  2268.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2269.                  v1 == 2 && v2 == 1 && v3 == 1)
  2270.             chroma_type = GEN7_YUV422V_2Y;
  2271.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2272.                  v1 == 2 && v2 == 2 && v3 == 2)
  2273.             chroma_type = GEN7_YUV422H_4Y;
  2274.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2275.                  v1 == 2 && v2 == 1 && v3 == 1)
  2276.             chroma_type = GEN7_YUV422V_4Y;
  2277.         else
  2278.             assert(0);
  2279.     }
  2280.  
  2281.     if (chroma_type == GEN7_YUV400 ||
  2282.         chroma_type == GEN7_YUV444 ||
  2283.         chroma_type == GEN7_YUV422V_2Y) {
  2284.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  2285.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  2286.     } else if (chroma_type == GEN7_YUV411) {
  2287.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  2288.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  2289.     } else {
  2290.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  2291.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  2292.     }
  2293.  
  2294.     BEGIN_BCS_BATCH(batch, 3);
  2295.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2296.     OUT_BCS_BATCH(batch,
  2297.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  2298.                   (chroma_type << 0));
  2299.     OUT_BCS_BATCH(batch,
  2300.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  2301.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  2302.     ADVANCE_BCS_BATCH(batch);
  2303. }
  2304.  
  2305. static const int va_to_gen7_jpeg_hufftable[2] = {
  2306.     MFX_HUFFTABLE_ID_Y,
  2307.     MFX_HUFFTABLE_ID_UV
  2308. };
  2309.  
  2310. static void
  2311. gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  2312.                                struct decode_state *decode_state,
  2313.                                struct gen7_mfd_context *gen7_mfd_context,
  2314.                                int num_tables)
  2315. {
  2316.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  2317.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2318.     int index;
  2319.  
  2320.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  2321.         return;
  2322.  
  2323.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2324.  
  2325.     for (index = 0; index < num_tables; index++) {
  2326.         int id = va_to_gen7_jpeg_hufftable[index];
  2327.  
  2328.         if (!huffman_table->load_huffman_table[index])
  2329.             continue;
  2330.  
  2331.         BEGIN_BCS_BATCH(batch, 53);
  2332.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2333.         OUT_BCS_BATCH(batch, id);
  2334.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2335.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2336.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2337.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2338.         ADVANCE_BCS_BATCH(batch);
  2339.     }
  2340. }
  2341.  
  2342. static const int va_to_gen7_jpeg_qm[5] = {
  2343.     -1,
  2344.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2345.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2346.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2347.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2348. };
  2349.  
  2350. static void
  2351. gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
  2352.                        struct decode_state *decode_state,
  2353.                        struct gen7_mfd_context *gen7_mfd_context)
  2354. {
  2355.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2356.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2357.     int index;
  2358.  
  2359.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2360.         return;
  2361.  
  2362.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2363.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2364.  
  2365.     assert(pic_param->num_components <= 3);
  2366.  
  2367.     for (index = 0; index < pic_param->num_components; index++) {
  2368.         int id = pic_param->components[index].component_id - pic_param->components[0].component_id + 1;
  2369.         int qm_type;
  2370.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2371.         unsigned char raster_qm[64];
  2372.         int j;
  2373.  
  2374.         if (id > 4 || id < 1)
  2375.             continue;
  2376.  
  2377.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2378.             continue;
  2379.  
  2380.         qm_type = va_to_gen7_jpeg_qm[id];
  2381.  
  2382.         for (j = 0; j < 64; j++)
  2383.             raster_qm[zigzag_direct[j]] = qm[j];
  2384.  
  2385.         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2386.     }
  2387. }
  2388.  
  2389. static void
  2390. gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2391.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2392.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2393.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2394.                          dri_bo *slice_data_bo,
  2395.                          struct gen7_mfd_context *gen7_mfd_context)
  2396. {
  2397.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2398.     int scan_component_mask = 0;
  2399.     int i;
  2400.  
  2401.     assert(slice_param->num_components > 0);
  2402.     assert(slice_param->num_components < 4);
  2403.     assert(slice_param->num_components <= pic_param->num_components);
  2404.  
  2405.     for (i = 0; i < slice_param->num_components; i++) {
  2406.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2407.         case 1:
  2408.             scan_component_mask |= (1 << 0);
  2409.             break;
  2410.         case 2:
  2411.             scan_component_mask |= (1 << 1);
  2412.             break;
  2413.         case 3:
  2414.             scan_component_mask |= (1 << 2);
  2415.             break;
  2416.         default:
  2417.             assert(0);
  2418.             break;
  2419.         }
  2420.     }
  2421.  
  2422.     BEGIN_BCS_BATCH(batch, 6);
  2423.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2424.     OUT_BCS_BATCH(batch,
  2425.                   slice_param->slice_data_size);
  2426.     OUT_BCS_BATCH(batch,
  2427.                   slice_param->slice_data_offset);
  2428.     OUT_BCS_BATCH(batch,
  2429.                   slice_param->slice_horizontal_position << 16 |
  2430.                   slice_param->slice_vertical_position << 0);
  2431.     OUT_BCS_BATCH(batch,
  2432.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2433.                   (scan_component_mask << 27) |                 /* scan components */
  2434.                   (0 << 26) |   /* disable interrupt allowed */
  2435.                   (slice_param->num_mcus << 0));                /* MCU count */
  2436.     OUT_BCS_BATCH(batch,
  2437.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2438.     ADVANCE_BCS_BATCH(batch);
  2439. }
  2440.  
  2441. /* Workaround for JPEG decoding on Ivybridge */
  2442.  
  2443. static struct {
  2444.     int width;
  2445.     int height;
  2446.     unsigned char data[32];
  2447.     int data_size;
  2448.     int data_bit_offset;
  2449.     int qp;
  2450. } gen7_jpeg_wa_clip = {
  2451.     16,
  2452.     16,
  2453.     {
  2454.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2455.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2456.     },
  2457.     14,
  2458.     40,
  2459.     28,
  2460. };
  2461.  
  2462. static void
  2463. gen75_jpeg_wa_init(VADriverContextP ctx,
  2464.                   struct gen7_mfd_context *gen7_mfd_context)
  2465. {
  2466.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2467.     VAStatus status;
  2468.     struct object_surface *obj_surface;
  2469.  
  2470.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2471.         i965_DestroySurfaces(ctx,
  2472.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2473.                              1);
  2474.  
  2475.     status = i965_CreateSurfaces(ctx,
  2476.                                  gen7_jpeg_wa_clip.width,
  2477.                                  gen7_jpeg_wa_clip.height,
  2478.                                  VA_RT_FORMAT_YUV420,
  2479.                                  1,
  2480.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2481.     assert(status == VA_STATUS_SUCCESS);
  2482.  
  2483.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2484.     assert(obj_surface);
  2485.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC_NV12, SUBSAMPLE_YUV420);
  2486.     gen7_mfd_context->jpeg_wa_surface_object = obj_surface;
  2487.  
  2488.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2489.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2490.                                                                "JPEG WA data",
  2491.                                                                0x1000,
  2492.                                                                0x1000);
  2493.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2494.                        0,
  2495.                        gen7_jpeg_wa_clip.data_size,
  2496.                        gen7_jpeg_wa_clip.data);
  2497.     }
  2498. }
  2499.  
  2500. static void
  2501. gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2502.                               struct gen7_mfd_context *gen7_mfd_context)
  2503. {
  2504.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2505.  
  2506.     BEGIN_BCS_BATCH(batch, 5);
  2507.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2508.     OUT_BCS_BATCH(batch,
  2509.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2510.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2511.                   (0 << 10) | /* disable Stream-Out */
  2512.                   (0 << 9)  | /* Post Deblocking Output */
  2513.                   (1 << 8)  | /* Pre Deblocking Output */
  2514.                   (0 << 5)  | /* not in stitch mode */
  2515.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2516.                   (MFX_FORMAT_AVC << 0));
  2517.     OUT_BCS_BATCH(batch,
  2518.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2519.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2520.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2521.                   (0 << 1)  |
  2522.                   (0 << 0));
  2523.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2524.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2525.     ADVANCE_BCS_BATCH(batch);
  2526. }
  2527.  
  2528. static void
  2529. gen75_jpeg_wa_surface_state(VADriverContextP ctx,
  2530.                            struct gen7_mfd_context *gen7_mfd_context)
  2531. {
  2532.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2533.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2534.  
  2535.     BEGIN_BCS_BATCH(batch, 6);
  2536.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2537.     OUT_BCS_BATCH(batch, 0);
  2538.     OUT_BCS_BATCH(batch,
  2539.                   ((obj_surface->orig_width - 1) << 18) |
  2540.                   ((obj_surface->orig_height - 1) << 4));
  2541.     OUT_BCS_BATCH(batch,
  2542.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2543.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2544.                   (0 << 22) | /* surface object control state, ignored */
  2545.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2546.                   (0 << 2)  | /* must be 0 */
  2547.                   (1 << 1)  | /* must be tiled */
  2548.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2549.     OUT_BCS_BATCH(batch,
  2550.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2551.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2552.     OUT_BCS_BATCH(batch,
  2553.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2554.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2555.     ADVANCE_BCS_BATCH(batch);
  2556. }
  2557.  
  2558. static void
  2559. gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  2560.                                  struct gen7_mfd_context *gen7_mfd_context)
  2561. {
  2562.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2563.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2564.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2565.     dri_bo *intra_bo;
  2566.     int i;
  2567.  
  2568.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2569.                             "intra row store",
  2570.                             128 * 64,
  2571.                             0x1000);
  2572.  
  2573.     BEGIN_BCS_BATCH(batch, 61);
  2574.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  2575.     OUT_BCS_RELOC(batch,
  2576.                   obj_surface->bo,
  2577.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2578.                   0);
  2579.         OUT_BCS_BATCH(batch, 0);
  2580.         OUT_BCS_BATCH(batch, 0);
  2581.    
  2582.  
  2583.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2584.         OUT_BCS_BATCH(batch, 0);
  2585.         OUT_BCS_BATCH(batch, 0);
  2586.  
  2587.         /* uncompressed-video & stream out 7-12 */
  2588.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2589.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2590.         OUT_BCS_BATCH(batch, 0);
  2591.         OUT_BCS_BATCH(batch, 0);
  2592.         OUT_BCS_BATCH(batch, 0);
  2593.         OUT_BCS_BATCH(batch, 0);
  2594.  
  2595.         /* the DW 13-15 is for intra row store scratch */
  2596.     OUT_BCS_RELOC(batch,
  2597.                   intra_bo,
  2598.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2599.                   0);
  2600.         OUT_BCS_BATCH(batch, 0);
  2601.         OUT_BCS_BATCH(batch, 0);
  2602.  
  2603.         /* the DW 16-18 is for deblocking filter */
  2604.     OUT_BCS_BATCH(batch, 0);
  2605.         OUT_BCS_BATCH(batch, 0);
  2606.         OUT_BCS_BATCH(batch, 0);
  2607.  
  2608.     /* DW 19..50 */
  2609.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2610.         OUT_BCS_BATCH(batch, 0);
  2611.         OUT_BCS_BATCH(batch, 0);
  2612.     }
  2613.     OUT_BCS_BATCH(batch, 0);
  2614.  
  2615.         /* the DW52-54 is for mb status address */
  2616.     OUT_BCS_BATCH(batch, 0);
  2617.         OUT_BCS_BATCH(batch, 0);
  2618.         OUT_BCS_BATCH(batch, 0);
  2619.         /* the DW56-60 is for ILDB & second ILDB address */
  2620.     OUT_BCS_BATCH(batch, 0);
  2621.         OUT_BCS_BATCH(batch, 0);
  2622.         OUT_BCS_BATCH(batch, 0);
  2623.     OUT_BCS_BATCH(batch, 0);
  2624.         OUT_BCS_BATCH(batch, 0);
  2625.         OUT_BCS_BATCH(batch, 0);
  2626.  
  2627.     ADVANCE_BCS_BATCH(batch);
  2628.  
  2629.     dri_bo_unreference(intra_bo);
  2630. }
  2631.  
  2632. static void
  2633. gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2634.                                  struct gen7_mfd_context *gen7_mfd_context)
  2635. {
  2636.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2637.     struct object_surface *obj_surface = gen7_mfd_context->jpeg_wa_surface_object;
  2638.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2639.     dri_bo *intra_bo;
  2640.     int i;
  2641.  
  2642.     if (IS_STEPPING_BPLUS(i965)) {
  2643.         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
  2644.         return;
  2645.     }
  2646.  
  2647.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2648.                             "intra row store",
  2649.                             128 * 64,
  2650.                             0x1000);
  2651.  
  2652.     BEGIN_BCS_BATCH(batch, 25);
  2653.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  2654.     OUT_BCS_RELOC(batch,
  2655.                   obj_surface->bo,
  2656.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2657.                   0);
  2658.    
  2659.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2660.  
  2661.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2662.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2663.  
  2664.     OUT_BCS_RELOC(batch,
  2665.                   intra_bo,
  2666.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2667.                   0);
  2668.  
  2669.     OUT_BCS_BATCH(batch, 0);
  2670.  
  2671.     /* DW 7..22 */
  2672.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2673.         OUT_BCS_BATCH(batch, 0);
  2674.     }
  2675.  
  2676.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  2677.     OUT_BCS_BATCH(batch, 0);
  2678.     ADVANCE_BCS_BATCH(batch);
  2679.  
  2680.     dri_bo_unreference(intra_bo);
  2681. }
  2682.  
  2683. static void
  2684. gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  2685.                                      struct gen7_mfd_context *gen7_mfd_context)
  2686. {
  2687.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2688.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2689.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2690.  
  2691.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2692.                               "bsd mpc row store",
  2693.                               11520, /* 1.5 * 120 * 64 */
  2694.                               0x1000);
  2695.  
  2696.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2697.                           "mpr row store",
  2698.                           7680, /* 1. 0 * 120 * 64 */
  2699.                           0x1000);
  2700.  
  2701.     BEGIN_BCS_BATCH(batch, 10);
  2702.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  2703.  
  2704.     OUT_BCS_RELOC(batch,
  2705.                   bsd_mpc_bo,
  2706.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2707.                   0);
  2708.  
  2709.     OUT_BCS_BATCH(batch, 0);
  2710.     OUT_BCS_BATCH(batch, 0);
  2711.  
  2712.     OUT_BCS_RELOC(batch,
  2713.                   mpr_bo,
  2714.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2715.                   0);
  2716.     OUT_BCS_BATCH(batch, 0);
  2717.     OUT_BCS_BATCH(batch, 0);
  2718.  
  2719.     OUT_BCS_BATCH(batch, 0);
  2720.     OUT_BCS_BATCH(batch, 0);
  2721.     OUT_BCS_BATCH(batch, 0);
  2722.  
  2723.     ADVANCE_BCS_BATCH(batch);
  2724.  
  2725.     dri_bo_unreference(bsd_mpc_bo);
  2726.     dri_bo_unreference(mpr_bo);
  2727. }
  2728.  
  2729. static void
  2730. gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2731.                                      struct gen7_mfd_context *gen7_mfd_context)
  2732. {
  2733.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2734.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2735.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2736.  
  2737.     if (IS_STEPPING_BPLUS(i965)) {
  2738.         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
  2739.         return;
  2740.     }
  2741.  
  2742.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2743.                               "bsd mpc row store",
  2744.                               11520, /* 1.5 * 120 * 64 */
  2745.                               0x1000);
  2746.  
  2747.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2748.                           "mpr row store",
  2749.                           7680, /* 1. 0 * 120 * 64 */
  2750.                           0x1000);
  2751.  
  2752.     BEGIN_BCS_BATCH(batch, 4);
  2753.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  2754.  
  2755.     OUT_BCS_RELOC(batch,
  2756.                   bsd_mpc_bo,
  2757.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2758.                   0);
  2759.  
  2760.     OUT_BCS_RELOC(batch,
  2761.                   mpr_bo,
  2762.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2763.                   0);
  2764.     OUT_BCS_BATCH(batch, 0);
  2765.  
  2766.     ADVANCE_BCS_BATCH(batch);
  2767.  
  2768.     dri_bo_unreference(bsd_mpc_bo);
  2769.     dri_bo_unreference(mpr_bo);
  2770. }
  2771.  
  2772. static void
  2773. gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2774.                           struct gen7_mfd_context *gen7_mfd_context)
  2775. {
  2776.  
  2777. }
  2778.  
  2779. static void
  2780. gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2781.                            struct gen7_mfd_context *gen7_mfd_context)
  2782. {
  2783.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2784.     int img_struct = 0;
  2785.     int mbaff_frame_flag = 0;
  2786.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2787.  
  2788.     BEGIN_BCS_BATCH(batch, 16);
  2789.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2790.     OUT_BCS_BATCH(batch,
  2791.                   (width_in_mbs * height_in_mbs - 1));
  2792.     OUT_BCS_BATCH(batch,
  2793.                   ((height_in_mbs - 1) << 16) |
  2794.                   ((width_in_mbs - 1) << 0));
  2795.     OUT_BCS_BATCH(batch,
  2796.                   (0 << 24) |
  2797.                   (0 << 16) |
  2798.                   (0 << 14) |
  2799.                   (0 << 13) |
  2800.                   (0 << 12) | /* differ from GEN6 */
  2801.                   (0 << 10) |
  2802.                   (img_struct << 8));
  2803.     OUT_BCS_BATCH(batch,
  2804.                   (1 << 10) | /* 4:2:0 */
  2805.                   (1 << 7) |  /* CABAC */
  2806.                   (0 << 6) |
  2807.                   (0 << 5) |
  2808.                   (0 << 4) |
  2809.                   (0 << 3) |
  2810.                   (1 << 2) |
  2811.                   (mbaff_frame_flag << 1) |
  2812.                   (0 << 0));
  2813.     OUT_BCS_BATCH(batch, 0);
  2814.     OUT_BCS_BATCH(batch, 0);
  2815.     OUT_BCS_BATCH(batch, 0);
  2816.     OUT_BCS_BATCH(batch, 0);
  2817.     OUT_BCS_BATCH(batch, 0);
  2818.     OUT_BCS_BATCH(batch, 0);
  2819.     OUT_BCS_BATCH(batch, 0);
  2820.     OUT_BCS_BATCH(batch, 0);
  2821.     OUT_BCS_BATCH(batch, 0);
  2822.     OUT_BCS_BATCH(batch, 0);
  2823.     OUT_BCS_BATCH(batch, 0);
  2824.     ADVANCE_BCS_BATCH(batch);
  2825. }
  2826.  
  2827. static void
  2828. gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
  2829.                                   struct gen7_mfd_context *gen7_mfd_context)
  2830. {
  2831.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2832.     int i;
  2833.  
  2834.     BEGIN_BCS_BATCH(batch, 71);
  2835.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  2836.  
  2837.     /* reference surfaces 0..15 */
  2838.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2839.         OUT_BCS_BATCH(batch, 0); /* top */
  2840.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2841.     }
  2842.        
  2843.         OUT_BCS_BATCH(batch, 0);
  2844.  
  2845.     /* the current decoding frame/field */
  2846.     OUT_BCS_BATCH(batch, 0); /* top */
  2847.     OUT_BCS_BATCH(batch, 0);
  2848.     OUT_BCS_BATCH(batch, 0);
  2849.  
  2850.     /* POC List */
  2851.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2852.         OUT_BCS_BATCH(batch, 0);
  2853.         OUT_BCS_BATCH(batch, 0);
  2854.     }
  2855.  
  2856.     OUT_BCS_BATCH(batch, 0);
  2857.     OUT_BCS_BATCH(batch, 0);
  2858.  
  2859.     ADVANCE_BCS_BATCH(batch);
  2860. }
  2861.  
  2862. static void
  2863. gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  2864.                                   struct gen7_mfd_context *gen7_mfd_context)
  2865. {
  2866.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2867.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2868.     int i;
  2869.  
  2870.     if (IS_STEPPING_BPLUS(i965)) {
  2871.         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
  2872.         return;
  2873.     }  
  2874.  
  2875.     BEGIN_BCS_BATCH(batch, 69);
  2876.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  2877.  
  2878.     /* reference surfaces 0..15 */
  2879.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2880.         OUT_BCS_BATCH(batch, 0); /* top */
  2881.         OUT_BCS_BATCH(batch, 0); /* bottom */
  2882.     }
  2883.  
  2884.     /* the current decoding frame/field */
  2885.     OUT_BCS_BATCH(batch, 0); /* top */
  2886.     OUT_BCS_BATCH(batch, 0); /* bottom */
  2887.  
  2888.     /* POC List */
  2889.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2890.         OUT_BCS_BATCH(batch, 0);
  2891.         OUT_BCS_BATCH(batch, 0);
  2892.     }
  2893.  
  2894.     OUT_BCS_BATCH(batch, 0);
  2895.     OUT_BCS_BATCH(batch, 0);
  2896.  
  2897.     ADVANCE_BCS_BATCH(batch);
  2898. }
  2899.  
  2900. static void
  2901. gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  2902.                                      struct gen7_mfd_context *gen7_mfd_context)
  2903. {
  2904.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2905.  
  2906.     BEGIN_BCS_BATCH(batch, 11);
  2907.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2908.     OUT_BCS_RELOC(batch,
  2909.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2910.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2911.                   0);
  2912.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2913.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2914.     OUT_BCS_BATCH(batch, 0);
  2915.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2916.     OUT_BCS_BATCH(batch, 0);
  2917.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2918.     OUT_BCS_BATCH(batch, 0);
  2919.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2920.     OUT_BCS_BATCH(batch, 0);
  2921.     ADVANCE_BCS_BATCH(batch);
  2922. }
  2923.  
  2924. static void
  2925. gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  2926.                                      struct gen7_mfd_context *gen7_mfd_context)
  2927. {
  2928.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2929.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2930.  
  2931.     if (IS_STEPPING_BPLUS(i965)) {
  2932.         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
  2933.         return;
  2934.     }  
  2935.  
  2936.     BEGIN_BCS_BATCH(batch, 11);
  2937.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  2938.     OUT_BCS_RELOC(batch,
  2939.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  2940.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  2941.                   0);
  2942.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  2943.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2944.     OUT_BCS_BATCH(batch, 0);
  2945.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2946.     OUT_BCS_BATCH(batch, 0);
  2947.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2948.     OUT_BCS_BATCH(batch, 0);
  2949.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  2950.     OUT_BCS_BATCH(batch, 0);
  2951.     ADVANCE_BCS_BATCH(batch);
  2952. }
  2953.  
  2954. static void
  2955. gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  2956.                             struct gen7_mfd_context *gen7_mfd_context)
  2957. {
  2958.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2959.  
  2960.     /* the input bitsteam format on GEN7 differs from GEN6 */
  2961.     BEGIN_BCS_BATCH(batch, 6);
  2962.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  2963.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  2964.     OUT_BCS_BATCH(batch, 0);
  2965.     OUT_BCS_BATCH(batch,
  2966.                   (0 << 31) |
  2967.                   (0 << 14) |
  2968.                   (0 << 12) |
  2969.                   (0 << 10) |
  2970.                   (0 << 8));
  2971.     OUT_BCS_BATCH(batch,
  2972.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  2973.                   (0 << 5)  |
  2974.                   (0 << 4)  |
  2975.                   (1 << 3) | /* LastSlice Flag */
  2976.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  2977.     OUT_BCS_BATCH(batch, 0);
  2978.     ADVANCE_BCS_BATCH(batch);
  2979. }
  2980.  
  2981. static void
  2982. gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  2983.                              struct gen7_mfd_context *gen7_mfd_context)
  2984. {
  2985.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2986.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  2987.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  2988.     int first_mb_in_slice = 0;
  2989.     int slice_type = SLICE_TYPE_I;
  2990.  
  2991.     BEGIN_BCS_BATCH(batch, 11);
  2992.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  2993.     OUT_BCS_BATCH(batch, slice_type);
  2994.     OUT_BCS_BATCH(batch,
  2995.                   (num_ref_idx_l1 << 24) |
  2996.                   (num_ref_idx_l0 << 16) |
  2997.                   (0 << 8) |
  2998.                   (0 << 0));
  2999.     OUT_BCS_BATCH(batch,
  3000.                   (0 << 29) |
  3001.                   (1 << 27) |   /* disable Deblocking */
  3002.                   (0 << 24) |
  3003.                   (gen7_jpeg_wa_clip.qp << 16) |
  3004.                   (0 << 8) |
  3005.                   (0 << 0));
  3006.     OUT_BCS_BATCH(batch,
  3007.                   (slice_ver_pos << 24) |
  3008.                   (slice_hor_pos << 16) |
  3009.                   (first_mb_in_slice << 0));
  3010.     OUT_BCS_BATCH(batch,
  3011.                   (next_slice_ver_pos << 16) |
  3012.                   (next_slice_hor_pos << 0));
  3013.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  3014.     OUT_BCS_BATCH(batch, 0);
  3015.     OUT_BCS_BATCH(batch, 0);
  3016.     OUT_BCS_BATCH(batch, 0);
  3017.     OUT_BCS_BATCH(batch, 0);
  3018.     ADVANCE_BCS_BATCH(batch);
  3019. }
  3020.  
  3021. static void
  3022. gen75_mfd_jpeg_wa(VADriverContextP ctx,
  3023.                  struct gen7_mfd_context *gen7_mfd_context)
  3024. {
  3025.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3026.     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
  3027.     intel_batchbuffer_emit_mi_flush(batch);
  3028.     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  3029.     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  3030.     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  3031.     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  3032.     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  3033.     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  3034.     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  3035.  
  3036.     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  3037.     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  3038.     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  3039. }
  3040.  
  3041. void
  3042. gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
  3043.                              struct decode_state *decode_state,
  3044.                              struct gen7_mfd_context *gen7_mfd_context)
  3045. {
  3046.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3047.     VAPictureParameterBufferJPEGBaseline *pic_param;
  3048.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  3049.     dri_bo *slice_data_bo;
  3050.     int i, j, max_selector = 0;
  3051.  
  3052.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  3053.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  3054.  
  3055.     /* Currently only support Baseline DCT */
  3056.     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  3057.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  3058.     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
  3059.     intel_batchbuffer_emit_mi_flush(batch);
  3060.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3061.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3062.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3063.     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  3064.     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  3065.  
  3066.     for (j = 0; j < decode_state->num_slice_params; j++) {
  3067.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  3068.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  3069.         slice_data_bo = decode_state->slice_datas[j]->bo;
  3070.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  3071.  
  3072.         if (j == decode_state->num_slice_params - 1)
  3073.             next_slice_group_param = NULL;
  3074.         else
  3075.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  3076.  
  3077.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  3078.             int component;
  3079.  
  3080.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  3081.  
  3082.             if (i < decode_state->slice_params[j]->num_elements - 1)
  3083.                 next_slice_param = slice_param + 1;
  3084.             else
  3085.                 next_slice_param = next_slice_group_param;
  3086.  
  3087.             for (component = 0; component < slice_param->num_components; component++) {
  3088.                 if (max_selector < slice_param->components[component].dc_table_selector)
  3089.                     max_selector = slice_param->components[component].dc_table_selector;
  3090.  
  3091.                 if (max_selector < slice_param->components[component].ac_table_selector)
  3092.                     max_selector = slice_param->components[component].ac_table_selector;
  3093.             }
  3094.  
  3095.             slice_param++;
  3096.         }
  3097.     }
  3098.  
  3099.     assert(max_selector < 2);
  3100.     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  3101.  
  3102.     for (j = 0; j < decode_state->num_slice_params; j++) {
  3103.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  3104.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  3105.         slice_data_bo = decode_state->slice_datas[j]->bo;
  3106.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  3107.  
  3108.         if (j == decode_state->num_slice_params - 1)
  3109.             next_slice_group_param = NULL;
  3110.         else
  3111.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  3112.  
  3113.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  3114.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  3115.  
  3116.             if (i < decode_state->slice_params[j]->num_elements - 1)
  3117.                 next_slice_param = slice_param + 1;
  3118.             else
  3119.                 next_slice_param = next_slice_group_param;
  3120.  
  3121.             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  3122.             slice_param++;
  3123.         }
  3124.     }
  3125.  
  3126.     intel_batchbuffer_end_atomic(batch);
  3127.     intel_batchbuffer_flush(batch);
  3128. }
  3129.  
  3130. static VAStatus
  3131. gen75_mfd_decode_picture(VADriverContextP ctx,
  3132.                         VAProfile profile,
  3133.                         union codec_state *codec_state,
  3134.                         struct hw_context *hw_context)
  3135.  
  3136. {
  3137.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3138.     struct decode_state *decode_state = &codec_state->decode;
  3139.     VAStatus vaStatus;
  3140.  
  3141.     assert(gen7_mfd_context);
  3142.  
  3143.     vaStatus = intel_decoder_sanity_check_input(ctx, profile, decode_state);
  3144.  
  3145.     if (vaStatus != VA_STATUS_SUCCESS)
  3146.         goto out;
  3147.  
  3148.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  3149.  
  3150.     switch (profile) {
  3151.     case VAProfileMPEG2Simple:
  3152.     case VAProfileMPEG2Main:
  3153.         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  3154.         break;
  3155.        
  3156.     case VAProfileH264ConstrainedBaseline:
  3157.     case VAProfileH264Main:
  3158.     case VAProfileH264High:
  3159.     case VAProfileH264StereoHigh:
  3160.     case VAProfileH264MultiviewHigh:
  3161.         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  3162.         break;
  3163.  
  3164.     case VAProfileVC1Simple:
  3165.     case VAProfileVC1Main:
  3166.     case VAProfileVC1Advanced:
  3167.         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  3168.         break;
  3169.  
  3170.     case VAProfileJPEGBaseline:
  3171.         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  3172.         break;
  3173.  
  3174.     default:
  3175.         assert(0);
  3176.         break;
  3177.     }
  3178.  
  3179.     vaStatus = VA_STATUS_SUCCESS;
  3180.  
  3181. out:
  3182.     return vaStatus;
  3183. }
  3184.  
  3185. static void
  3186. gen75_mfd_context_destroy(void *hw_context)
  3187. {
  3188.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3189.  
  3190.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  3191.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  3192.  
  3193.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  3194.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  3195.  
  3196.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  3197.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  3198.  
  3199.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  3200.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  3201.  
  3202.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  3203.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  3204.  
  3205.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  3206.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  3207.  
  3208.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  3209.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  3210.  
  3211.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  3212.  
  3213.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  3214.     free(gen7_mfd_context);
  3215. }
  3216.  
  3217. static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
  3218.                                     struct gen7_mfd_context *gen7_mfd_context)
  3219. {
  3220.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  3221.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  3222.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  3223.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  3224. }
  3225.  
  3226. struct hw_context *
  3227. gen75_dec_hw_context_init(VADriverContextP ctx, struct object_config *obj_config)
  3228. {
  3229.     struct intel_driver_data *intel = intel_driver_data(ctx);
  3230.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  3231.     int i;
  3232.  
  3233.     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
  3234.     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
  3235.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  3236.  
  3237.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  3238.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  3239.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  3240.         gen7_mfd_context->reference_surface[i].obj_surface = NULL;
  3241.     }
  3242.  
  3243.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  3244.     gen7_mfd_context->jpeg_wa_surface_object = NULL;
  3245.  
  3246.     switch (obj_config->profile) {
  3247.     case VAProfileMPEG2Simple:
  3248.     case VAProfileMPEG2Main:
  3249.         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  3250.         break;
  3251.  
  3252.     case VAProfileH264ConstrainedBaseline:
  3253.     case VAProfileH264Main:
  3254.     case VAProfileH264High:
  3255.     case VAProfileH264StereoHigh:
  3256.     case VAProfileH264MultiviewHigh:
  3257.         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
  3258.         break;
  3259.     default:
  3260.         break;
  3261.     }
  3262.     return (struct hw_context *)gen7_mfd_context;
  3263. }
  3264.