Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Xiang Haihao <haihao.xiang@intel.com>
  26.  *    Zhao Yakui <yakui.zhao@intel.com>
  27.  *
  28.  */
  29.  
  30. #ifndef HAVE_GEN_AVC_SURFACE
  31. #define HAVE_GEN_AVC_SURFACE 1
  32. #endif
  33.  
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <assert.h>
  38.  
  39. //#include "config.h"
  40. #include "intel_batchbuffer.h"
  41. #include "intel_driver.h"
  42.  
  43. #include "i965_defines.h"
  44. #include "i965_drv_video.h"
  45. #include "i965_decoder_utils.h"
  46.  
  47. #include "gen7_mfd.h"
  48.  
  49. #define B0_STEP_REV             2
  50. #define IS_STEPPING_BPLUS(i965) ((i965->intel.revision) >= B0_STEP_REV)
  51.  
  52. static const uint32_t zigzag_direct[64] = {
  53.     0,   1,  8, 16,  9,  2,  3, 10,
  54.     17, 24, 32, 25, 18, 11,  4,  5,
  55.     12, 19, 26, 33, 40, 48, 41, 34,
  56.     27, 20, 13,  6,  7, 14, 21, 28,
  57.     35, 42, 49, 56, 57, 50, 43, 36,
  58.     29, 22, 15, 23, 30, 37, 44, 51,
  59.     58, 59, 52, 45, 38, 31, 39, 46,
  60.     53, 60, 61, 54, 47, 55, 62, 63
  61. };
  62.  
  63. static void
  64. gen75_mfd_avc_frame_store_index(VADriverContextP ctx,
  65.                                VAPictureParameterBufferH264 *pic_param,
  66.                                struct gen7_mfd_context *gen7_mfd_context)
  67. {
  68.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  69.     int i, j;
  70.  
  71.     assert(ARRAY_ELEMS(gen7_mfd_context->reference_surface) == ARRAY_ELEMS(pic_param->ReferenceFrames));
  72.  
  73.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  74.         int found = 0;
  75.  
  76.         if (gen7_mfd_context->reference_surface[i].surface_id == VA_INVALID_ID)
  77.             continue;
  78.  
  79.         for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  80.             VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[j];
  81.             if (ref_pic->flags & VA_PICTURE_H264_INVALID)
  82.                 continue;
  83.  
  84.             if (gen7_mfd_context->reference_surface[i].surface_id == ref_pic->picture_id) {
  85.                 found = 1;
  86.                 break;
  87.             }
  88.         }
  89.  
  90.         if (!found) {
  91.             struct object_surface *obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  92.             obj_surface->flags &= ~SURFACE_REFERENCED;
  93.  
  94.             if ((obj_surface->flags & SURFACE_ALL_MASK) == SURFACE_DISPLAYED) {
  95.                 dri_bo_unreference(obj_surface->bo);
  96.                 obj_surface->bo = NULL;
  97.                 obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
  98.             }
  99.  
  100.             if (obj_surface->free_private_data)
  101.                 obj_surface->free_private_data(&obj_surface->private_data);
  102.  
  103.             gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  104.             gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  105.         }
  106.     }
  107.  
  108.     for (i = 0; i < ARRAY_ELEMS(pic_param->ReferenceFrames); i++) {
  109.         VAPictureH264 *ref_pic = &pic_param->ReferenceFrames[i];
  110.         int found = 0;
  111.  
  112.         if (ref_pic->flags & VA_PICTURE_H264_INVALID)
  113.             continue;
  114.  
  115.         for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  116.             if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
  117.                 continue;
  118.            
  119.             if (gen7_mfd_context->reference_surface[j].surface_id == ref_pic->picture_id) {
  120.                 found = 1;
  121.                 break;
  122.             }
  123.         }
  124.  
  125.         if (!found) {
  126.             int frame_idx;
  127.             struct object_surface *obj_surface = SURFACE(ref_pic->picture_id);
  128.            
  129.             assert(obj_surface);
  130.             i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  131.  
  132.             for (frame_idx = 0; frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface); frame_idx++) {
  133.                 for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  134.                     if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID)
  135.                         continue;
  136.  
  137.                     if (gen7_mfd_context->reference_surface[j].frame_store_id == frame_idx)
  138.                         break;
  139.                 }
  140.  
  141.                 if (j == ARRAY_ELEMS(gen7_mfd_context->reference_surface))
  142.                     break;
  143.             }
  144.  
  145.             assert(frame_idx < ARRAY_ELEMS(gen7_mfd_context->reference_surface));
  146.  
  147.             for (j = 0; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  148.                 if (gen7_mfd_context->reference_surface[j].surface_id == VA_INVALID_ID) {
  149.                     gen7_mfd_context->reference_surface[j].surface_id = ref_pic->picture_id;
  150.                     gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
  151.                     break;
  152.                 }
  153.             }
  154.         }
  155.     }
  156.  
  157.     /* sort */
  158.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface) - 1; i++) {
  159.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID &&
  160.             gen7_mfd_context->reference_surface[i].frame_store_id == i)
  161.             continue;
  162.  
  163.         for (j = i + 1; j < ARRAY_ELEMS(gen7_mfd_context->reference_surface); j++) {
  164.             if (gen7_mfd_context->reference_surface[j].surface_id != VA_INVALID_ID &&
  165.                 gen7_mfd_context->reference_surface[j].frame_store_id == i) {
  166.                 VASurfaceID id = gen7_mfd_context->reference_surface[i].surface_id;
  167.                 int frame_idx = gen7_mfd_context->reference_surface[i].frame_store_id;
  168.  
  169.                 gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[j].surface_id;
  170.                 gen7_mfd_context->reference_surface[i].frame_store_id = gen7_mfd_context->reference_surface[j].frame_store_id;
  171.                 gen7_mfd_context->reference_surface[j].surface_id = id;
  172.                 gen7_mfd_context->reference_surface[j].frame_store_id = frame_idx;
  173.                 break;
  174.             }
  175.         }
  176.     }
  177. }
  178.  
  179. static void
  180. gen75_mfd_init_avc_surface(VADriverContextP ctx,
  181.                           VAPictureParameterBufferH264 *pic_param,
  182.                           struct object_surface *obj_surface)
  183. {
  184.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  185.     GenAvcSurface *gen7_avc_surface = obj_surface->private_data;
  186.     int width_in_mbs, height_in_mbs;
  187.  
  188.     obj_surface->free_private_data = gen_free_avc_surface;
  189.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  190.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  191.  
  192.     if (!gen7_avc_surface) {
  193.         gen7_avc_surface = calloc(sizeof(GenAvcSurface), 1);
  194.         assert((obj_surface->size & 0x3f) == 0);
  195.         obj_surface->private_data = gen7_avc_surface;
  196.     }
  197.  
  198.     gen7_avc_surface->dmv_bottom_flag = (pic_param->pic_fields.bits.field_pic_flag &&
  199.                                          !pic_param->seq_fields.bits.direct_8x8_inference_flag);
  200.  
  201.     if (gen7_avc_surface->dmv_top == NULL) {
  202.         gen7_avc_surface->dmv_top = dri_bo_alloc(i965->intel.bufmgr,
  203.                                                  "direct mv w/r buffer",
  204.                                                  width_in_mbs * height_in_mbs * 128,
  205.                                                  0x1000);
  206.         assert(gen7_avc_surface->dmv_top);
  207.     }
  208.  
  209.     if (gen7_avc_surface->dmv_bottom_flag &&
  210.         gen7_avc_surface->dmv_bottom == NULL) {
  211.         gen7_avc_surface->dmv_bottom = dri_bo_alloc(i965->intel.bufmgr,
  212.                                                     "direct mv w/r buffer",
  213.                                                     width_in_mbs * height_in_mbs * 128,                                                    
  214.                                                     0x1000);
  215.         assert(gen7_avc_surface->dmv_bottom);
  216.     }
  217. }
  218.  
  219. static void
  220. gen75_mfd_pipe_mode_select(VADriverContextP ctx,
  221.                           struct decode_state *decode_state,
  222.                           int standard_select,
  223.                           struct gen7_mfd_context *gen7_mfd_context)
  224. {
  225.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  226.  
  227.     assert(standard_select == MFX_FORMAT_MPEG2 ||
  228.            standard_select == MFX_FORMAT_AVC ||
  229.            standard_select == MFX_FORMAT_VC1 ||
  230.            standard_select == MFX_FORMAT_JPEG);
  231.  
  232.     BEGIN_BCS_BATCH(batch, 5);
  233.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  234.     OUT_BCS_BATCH(batch,
  235.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  236.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  237.                   (0 << 10) | /* disable Stream-Out */
  238.                   (gen7_mfd_context->post_deblocking_output.valid << 9)  | /* Post Deblocking Output */
  239.                   (gen7_mfd_context->pre_deblocking_output.valid << 8)  | /* Pre Deblocking Output */
  240.                   (0 << 5)  | /* not in stitch mode */
  241.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  242.                   (standard_select << 0));
  243.     OUT_BCS_BATCH(batch,
  244.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  245.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  246.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  247.                   (0 << 1)  |
  248.                   (0 << 0));
  249.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  250.     OUT_BCS_BATCH(batch, 0); /* reserved */
  251.     ADVANCE_BCS_BATCH(batch);
  252. }
  253.  
  254. static void
  255. gen75_mfd_surface_state(VADriverContextP ctx,
  256.                        struct decode_state *decode_state,
  257.                        int standard_select,
  258.                        struct gen7_mfd_context *gen7_mfd_context)
  259. {
  260.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  261.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  262.     struct object_surface *obj_surface = SURFACE(decode_state->current_render_target);
  263.     unsigned int y_cb_offset;
  264.     unsigned int y_cr_offset;
  265.  
  266.     assert(obj_surface);
  267.  
  268.     y_cb_offset = obj_surface->y_cb_offset;
  269.     y_cr_offset = obj_surface->y_cr_offset;
  270.  
  271.     BEGIN_BCS_BATCH(batch, 6);
  272.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  273.     OUT_BCS_BATCH(batch, 0);
  274.     OUT_BCS_BATCH(batch,
  275.                   ((obj_surface->orig_height - 1) << 18) |
  276.                   ((obj_surface->orig_width - 1) << 4));
  277.     OUT_BCS_BATCH(batch,
  278.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  279.                   ((standard_select != MFX_FORMAT_JPEG) << 27) | /* interleave chroma, set to 0 for JPEG */
  280.                   (0 << 22) | /* surface object control state, ignored */
  281.                   ((obj_surface->width - 1) << 3) | /* pitch */
  282.                   (0 << 2)  | /* must be 0 */
  283.                   (1 << 1)  | /* must be tiled */
  284.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  285.     OUT_BCS_BATCH(batch,
  286.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  287.                   (y_cb_offset << 0)); /* Y offset for U(Cb) */
  288.     OUT_BCS_BATCH(batch,
  289.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  290.                   (y_cr_offset << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  291.     ADVANCE_BCS_BATCH(batch);
  292. }
  293.  
  294.  
  295. static void
  296. gen75_mfd_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  297.                              struct decode_state *decode_state,
  298.                              int standard_select,
  299.                              struct gen7_mfd_context *gen7_mfd_context)
  300. {
  301.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  302.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  303.     int i;
  304.  
  305.     BEGIN_BCS_BATCH(batch, 61);
  306.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  307.         /* Pre-deblock 1-3 */
  308.     if (gen7_mfd_context->pre_deblocking_output.valid)
  309.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  310.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  311.                       0);
  312.     else
  313.         OUT_BCS_BATCH(batch, 0);
  314.  
  315.         OUT_BCS_BATCH(batch, 0);
  316.         OUT_BCS_BATCH(batch, 0);
  317.         /* Post-debloing 4-6 */
  318.     if (gen7_mfd_context->post_deblocking_output.valid)
  319.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  320.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  321.                       0);
  322.     else
  323.         OUT_BCS_BATCH(batch, 0);
  324.  
  325.         OUT_BCS_BATCH(batch, 0);
  326.         OUT_BCS_BATCH(batch, 0);
  327.  
  328.         /* uncompressed-video & stream out 7-12 */
  329.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  330.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  331.         OUT_BCS_BATCH(batch, 0);
  332.         OUT_BCS_BATCH(batch, 0);
  333.         OUT_BCS_BATCH(batch, 0);
  334.         OUT_BCS_BATCH(batch, 0);
  335.  
  336.         /* intra row-store scratch 13-15 */
  337.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  338.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  339.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  340.                       0);
  341.     else
  342.         OUT_BCS_BATCH(batch, 0);
  343.  
  344.         OUT_BCS_BATCH(batch, 0);
  345.         OUT_BCS_BATCH(batch, 0);
  346.         /* deblocking-filter-row-store 16-18 */
  347.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  348.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  349.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  350.                       0);
  351.     else
  352.         OUT_BCS_BATCH(batch, 0);
  353.         OUT_BCS_BATCH(batch, 0);
  354.         OUT_BCS_BATCH(batch, 0);
  355.  
  356.     /* DW 19..50 */
  357.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  358.         struct object_surface *obj_surface;
  359.  
  360.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  361.             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  362.             assert(obj_surface && obj_surface->bo);
  363.  
  364.             OUT_BCS_RELOC(batch, obj_surface->bo,
  365.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  366.                           0);
  367.         } else {
  368.             OUT_BCS_BATCH(batch, 0);
  369.         }
  370.             OUT_BCS_BATCH(batch, 0);
  371.     }
  372.         /* reference property 51 */
  373.     OUT_BCS_BATCH(batch, 0);  
  374.        
  375.         /* Macroblock status & ILDB 52-57 */
  376.         OUT_BCS_BATCH(batch, 0);
  377.         OUT_BCS_BATCH(batch, 0);
  378.         OUT_BCS_BATCH(batch, 0);
  379.         OUT_BCS_BATCH(batch, 0);
  380.         OUT_BCS_BATCH(batch, 0);
  381.         OUT_BCS_BATCH(batch, 0);
  382.  
  383.         /* the second Macroblock status 58-60 */       
  384.         OUT_BCS_BATCH(batch, 0);
  385.         OUT_BCS_BATCH(batch, 0);
  386.         OUT_BCS_BATCH(batch, 0);
  387.     ADVANCE_BCS_BATCH(batch);
  388. }
  389.  
  390. static void
  391. gen75_mfd_pipe_buf_addr_state(VADriverContextP ctx,
  392.                              struct decode_state *decode_state,
  393.                              int standard_select,
  394.                              struct gen7_mfd_context *gen7_mfd_context)
  395. {
  396.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  397.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  398.     int i;
  399.     if (IS_STEPPING_BPLUS(i965)) {
  400.         gen75_mfd_pipe_buf_addr_state_bplus(ctx, decode_state,
  401.                         standard_select, gen7_mfd_context);
  402.         return;
  403.     }
  404.  
  405.     BEGIN_BCS_BATCH(batch, 25);
  406.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  407.     if (gen7_mfd_context->pre_deblocking_output.valid)
  408.         OUT_BCS_RELOC(batch, gen7_mfd_context->pre_deblocking_output.bo,
  409.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  410.                       0);
  411.     else
  412.         OUT_BCS_BATCH(batch, 0);
  413.  
  414.     if (gen7_mfd_context->post_deblocking_output.valid)
  415.         OUT_BCS_RELOC(batch, gen7_mfd_context->post_deblocking_output.bo,
  416.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  417.                       0);
  418.     else
  419.         OUT_BCS_BATCH(batch, 0);
  420.  
  421.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  422.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  423.  
  424.     if (gen7_mfd_context->intra_row_store_scratch_buffer.valid)
  425.         OUT_BCS_RELOC(batch, gen7_mfd_context->intra_row_store_scratch_buffer.bo,
  426.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  427.                       0);
  428.     else
  429.         OUT_BCS_BATCH(batch, 0);
  430.  
  431.     if (gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid)
  432.         OUT_BCS_RELOC(batch, gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo,
  433.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  434.                       0);
  435.     else
  436.         OUT_BCS_BATCH(batch, 0);
  437.  
  438.     /* DW 7..22 */
  439.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  440.         struct object_surface *obj_surface;
  441.  
  442.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  443.             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  444.             assert(obj_surface && obj_surface->bo);
  445.  
  446.             OUT_BCS_RELOC(batch, obj_surface->bo,
  447.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  448.                           0);
  449.         } else {
  450.             OUT_BCS_BATCH(batch, 0);
  451.         }
  452.     }
  453.  
  454.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  455.     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
  456.     ADVANCE_BCS_BATCH(batch);
  457. }
  458.  
  459. static void
  460. gen75_mfd_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  461.                                  dri_bo *slice_data_bo,
  462.                                  int standard_select,
  463.                                  struct gen7_mfd_context *gen7_mfd_context)
  464. {
  465.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  466.  
  467.     BEGIN_BCS_BATCH(batch, 26);
  468.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  469.         /* MFX In BS 1-5 */
  470.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  471.     OUT_BCS_BATCH(batch, 0);
  472.     OUT_BCS_BATCH(batch, 0);
  473.         /* Upper bound 4-5 */  
  474.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  475.     OUT_BCS_BATCH(batch, 0);
  476.  
  477.         /* MFX indirect MV 6-10 */
  478.     OUT_BCS_BATCH(batch, 0);
  479.     OUT_BCS_BATCH(batch, 0);
  480.     OUT_BCS_BATCH(batch, 0);
  481.     OUT_BCS_BATCH(batch, 0);
  482.     OUT_BCS_BATCH(batch, 0);
  483.        
  484.         /* MFX IT_COFF 11-15 */
  485.     OUT_BCS_BATCH(batch, 0);
  486.     OUT_BCS_BATCH(batch, 0);
  487.     OUT_BCS_BATCH(batch, 0);
  488.     OUT_BCS_BATCH(batch, 0);
  489.     OUT_BCS_BATCH(batch, 0);
  490.  
  491.         /* MFX IT_DBLK 16-20 */
  492.     OUT_BCS_BATCH(batch, 0);
  493.     OUT_BCS_BATCH(batch, 0);
  494.     OUT_BCS_BATCH(batch, 0);
  495.     OUT_BCS_BATCH(batch, 0);
  496.     OUT_BCS_BATCH(batch, 0);
  497.  
  498.         /* MFX PAK_BSE object for encoder 21-25 */
  499.     OUT_BCS_BATCH(batch, 0);
  500.     OUT_BCS_BATCH(batch, 0);
  501.     OUT_BCS_BATCH(batch, 0);
  502.     OUT_BCS_BATCH(batch, 0);
  503.     OUT_BCS_BATCH(batch, 0);
  504.  
  505.     ADVANCE_BCS_BATCH(batch);
  506. }
  507.  
  508. static void
  509. gen75_mfd_ind_obj_base_addr_state(VADriverContextP ctx,
  510.                                  dri_bo *slice_data_bo,
  511.                                  int standard_select,
  512.                                  struct gen7_mfd_context *gen7_mfd_context)
  513. {
  514.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  515.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  516.  
  517.     if (IS_STEPPING_BPLUS(i965)) {
  518.         gen75_mfd_ind_obj_base_addr_state_bplus(ctx, slice_data_bo,
  519.                                 standard_select, gen7_mfd_context);
  520.         return;
  521.     }
  522.  
  523.     BEGIN_BCS_BATCH(batch, 11);
  524.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  525.     OUT_BCS_RELOC(batch, slice_data_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0); /* MFX Indirect Bitstream Object Base Address */
  526.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  527.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  528.     OUT_BCS_BATCH(batch, 0);
  529.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  530.     OUT_BCS_BATCH(batch, 0);
  531.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  532.     OUT_BCS_BATCH(batch, 0);
  533.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  534.     OUT_BCS_BATCH(batch, 0);
  535.     ADVANCE_BCS_BATCH(batch);
  536. }
  537.  
  538.  
  539. static void
  540. gen75_mfd_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  541.                                  struct decode_state *decode_state,
  542.                                  int standard_select,
  543.                                  struct gen7_mfd_context *gen7_mfd_context)
  544. {
  545.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  546.  
  547.     BEGIN_BCS_BATCH(batch, 10);
  548.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  549.  
  550.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  551.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  552.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  553.                       0);
  554.         else
  555.                 OUT_BCS_BATCH(batch, 0);
  556.                
  557.     OUT_BCS_BATCH(batch, 0);
  558.     OUT_BCS_BATCH(batch, 0);
  559.         /* MPR Row Store Scratch buffer 4-6 */
  560.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  561.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  562.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  563.                       0);
  564.     else
  565.             OUT_BCS_BATCH(batch, 0);
  566.     OUT_BCS_BATCH(batch, 0);
  567.     OUT_BCS_BATCH(batch, 0);
  568.  
  569.         /* Bitplane 7-9 */
  570.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  571.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  572.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  573.                       0);
  574.     else
  575.         OUT_BCS_BATCH(batch, 0);
  576.     OUT_BCS_BATCH(batch, 0);
  577.     OUT_BCS_BATCH(batch, 0);
  578.  
  579.     ADVANCE_BCS_BATCH(batch);
  580. }
  581.  
  582. static void
  583. gen75_mfd_bsp_buf_base_addr_state(VADriverContextP ctx,
  584.                                  struct decode_state *decode_state,
  585.                                  int standard_select,
  586.                                  struct gen7_mfd_context *gen7_mfd_context)
  587. {
  588.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  589.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  590.  
  591.     if (IS_STEPPING_BPLUS(i965)) {
  592.         gen75_mfd_bsp_buf_base_addr_state_bplus(ctx, decode_state,
  593.                                 standard_select, gen7_mfd_context);
  594.         return;
  595.      }
  596.  
  597.     BEGIN_BCS_BATCH(batch, 4);
  598.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  599.  
  600.     if (gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid)
  601.         OUT_BCS_RELOC(batch, gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo,
  602.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  603.                       0);
  604.     else
  605.         OUT_BCS_BATCH(batch, 0);
  606.  
  607.     if (gen7_mfd_context->mpr_row_store_scratch_buffer.valid)
  608.         OUT_BCS_RELOC(batch, gen7_mfd_context->mpr_row_store_scratch_buffer.bo,
  609.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  610.                       0);
  611.     else
  612.         OUT_BCS_BATCH(batch, 0);
  613.  
  614.     if (gen7_mfd_context->bitplane_read_buffer.valid)
  615.         OUT_BCS_RELOC(batch, gen7_mfd_context->bitplane_read_buffer.bo,
  616.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  617.                       0);
  618.     else
  619.         OUT_BCS_BATCH(batch, 0);
  620.  
  621.     ADVANCE_BCS_BATCH(batch);
  622. }
  623.  
  624. static void
  625. gen75_mfd_qm_state(VADriverContextP ctx,
  626.                   int qm_type,
  627.                   unsigned char *qm,
  628.                   int qm_length,
  629.                   struct gen7_mfd_context *gen7_mfd_context)
  630. {
  631.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  632.     unsigned int qm_buffer[16];
  633.  
  634.     assert(qm_length <= 16 * 4);
  635.     memcpy(qm_buffer, qm, qm_length);
  636.  
  637.     BEGIN_BCS_BATCH(batch, 18);
  638.     OUT_BCS_BATCH(batch, MFX_QM_STATE | (18 - 2));
  639.     OUT_BCS_BATCH(batch, qm_type << 0);
  640.     intel_batchbuffer_data(batch, qm_buffer, 16 * 4);
  641.     ADVANCE_BCS_BATCH(batch);
  642. }
  643.  
  644. static void
  645. gen75_mfd_avc_img_state(VADriverContextP ctx,
  646.                        struct decode_state *decode_state,
  647.                        struct gen7_mfd_context *gen7_mfd_context)
  648. {
  649.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  650.     int img_struct;
  651.     int mbaff_frame_flag;
  652.     unsigned int width_in_mbs, height_in_mbs;
  653.     VAPictureParameterBufferH264 *pic_param;
  654.  
  655.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  656.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  657.     assert(!(pic_param->CurrPic.flags & VA_PICTURE_H264_INVALID));
  658.  
  659.     if (pic_param->CurrPic.flags & VA_PICTURE_H264_TOP_FIELD)
  660.         img_struct = 1;
  661.     else if (pic_param->CurrPic.flags & VA_PICTURE_H264_BOTTOM_FIELD)
  662.         img_struct = 3;
  663.     else
  664.         img_struct = 0;
  665.  
  666.     if ((img_struct & 0x1) == 0x1) {
  667.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x1);
  668.     } else {
  669.         assert(pic_param->pic_fields.bits.field_pic_flag == 0x0);
  670.     }
  671.  
  672.     if (pic_param->seq_fields.bits.frame_mbs_only_flag) { /* a frame containing only frame macroblocks */
  673.         assert(pic_param->seq_fields.bits.mb_adaptive_frame_field_flag == 0);
  674.         assert(pic_param->pic_fields.bits.field_pic_flag == 0);
  675.     } else {
  676.         assert(pic_param->seq_fields.bits.direct_8x8_inference_flag == 1); /* see H.264 spec */
  677.     }
  678.  
  679.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  680.                         !pic_param->pic_fields.bits.field_pic_flag);
  681.  
  682.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  683.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1; /* frame height */
  684.  
  685.     /* MFX unit doesn't support 4:2:2 and 4:4:4 picture */
  686.     assert(pic_param->seq_fields.bits.chroma_format_idc == 0 || /* monochrome picture */
  687.            pic_param->seq_fields.bits.chroma_format_idc == 1);  /* 4:2:0 */
  688.     assert(pic_param->seq_fields.bits.residual_colour_transform_flag == 0); /* only available for 4:4:4 */
  689.  
  690.     BEGIN_BCS_BATCH(batch, 16);
  691.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  692.     OUT_BCS_BATCH(batch,
  693.                   width_in_mbs * height_in_mbs);
  694.     OUT_BCS_BATCH(batch,
  695.                   ((height_in_mbs - 1) << 16) |
  696.                   ((width_in_mbs - 1) << 0));
  697.     OUT_BCS_BATCH(batch,
  698.                   ((pic_param->second_chroma_qp_index_offset & 0x1f) << 24) |
  699.                   ((pic_param->chroma_qp_index_offset & 0x1f) << 16) |
  700.                   (0 << 14) | /* Max-bit conformance Intra flag ??? FIXME */
  701.                   (0 << 13) | /* Max Macroblock size conformance Inter flag ??? FIXME */
  702.                   (pic_param->pic_fields.bits.weighted_pred_flag << 12) | /* differ from GEN6 */
  703.                   (pic_param->pic_fields.bits.weighted_bipred_idc << 10) |
  704.                   (img_struct << 8));
  705.     OUT_BCS_BATCH(batch,
  706.                   (pic_param->seq_fields.bits.chroma_format_idc << 10) |
  707.                   (pic_param->pic_fields.bits.entropy_coding_mode_flag << 7) |
  708.                   ((!pic_param->pic_fields.bits.reference_pic_flag) << 6) |
  709.                   (pic_param->pic_fields.bits.constrained_intra_pred_flag << 5) |
  710.                   (pic_param->seq_fields.bits.direct_8x8_inference_flag << 4) |
  711.                   (pic_param->pic_fields.bits.transform_8x8_mode_flag << 3) |
  712.                   (pic_param->seq_fields.bits.frame_mbs_only_flag << 2) |
  713.                   (mbaff_frame_flag << 1) |
  714.                   (pic_param->pic_fields.bits.field_pic_flag << 0));
  715.     OUT_BCS_BATCH(batch, 0);
  716.     OUT_BCS_BATCH(batch, 0);
  717.     OUT_BCS_BATCH(batch, 0);
  718.     OUT_BCS_BATCH(batch, 0);
  719.     OUT_BCS_BATCH(batch, 0);
  720.     OUT_BCS_BATCH(batch, 0);
  721.     OUT_BCS_BATCH(batch, 0);
  722.     OUT_BCS_BATCH(batch, 0);
  723.     OUT_BCS_BATCH(batch, 0);
  724.     OUT_BCS_BATCH(batch, 0);
  725.     OUT_BCS_BATCH(batch, 0);
  726.     ADVANCE_BCS_BATCH(batch);
  727. }
  728.  
  729. static void
  730. gen75_mfd_avc_qm_state(VADriverContextP ctx,
  731.                       struct decode_state *decode_state,
  732.                       struct gen7_mfd_context *gen7_mfd_context)
  733. {
  734.     VAIQMatrixBufferH264 *iq_matrix;
  735.     VAPictureParameterBufferH264 *pic_param;
  736.  
  737.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer)
  738.         iq_matrix = (VAIQMatrixBufferH264 *)decode_state->iq_matrix->buffer;
  739.     else
  740.         iq_matrix = &gen7_mfd_context->iq_matrix.h264;
  741.  
  742.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  743.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  744.  
  745.     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTRA_MATRIX, &iq_matrix->ScalingList4x4[0][0], 3 * 16, gen7_mfd_context);
  746.     gen75_mfd_qm_state(ctx, MFX_QM_AVC_4X4_INTER_MATRIX, &iq_matrix->ScalingList4x4[3][0], 3 * 16, gen7_mfd_context);
  747.  
  748.     if (pic_param->pic_fields.bits.transform_8x8_mode_flag) {
  749.         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTRA_MATRIX, &iq_matrix->ScalingList8x8[0][0], 64, gen7_mfd_context);
  750.         gen75_mfd_qm_state(ctx, MFX_QM_AVC_8x8_INTER_MATRIX, &iq_matrix->ScalingList8x8[1][0], 64, gen7_mfd_context);
  751.     }
  752. }
  753.  
  754. static void
  755. gen75_mfd_avc_picid_state(VADriverContextP ctx,
  756.                       struct decode_state *decode_state,
  757.                       struct gen7_mfd_context *gen7_mfd_context)
  758. {
  759.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  760.  
  761.     BEGIN_BCS_BATCH(batch, 10);
  762.     OUT_BCS_BATCH(batch, MFD_AVC_PICID_STATE | (10 - 2));
  763.     OUT_BCS_BATCH(batch, 1); // disable Picture ID Remapping
  764.     OUT_BCS_BATCH(batch, 0);
  765.     OUT_BCS_BATCH(batch, 0);
  766.     OUT_BCS_BATCH(batch, 0);
  767.     OUT_BCS_BATCH(batch, 0);
  768.     OUT_BCS_BATCH(batch, 0);
  769.     OUT_BCS_BATCH(batch, 0);
  770.     OUT_BCS_BATCH(batch, 0);
  771.     OUT_BCS_BATCH(batch, 0);
  772.     ADVANCE_BCS_BATCH(batch);
  773. }
  774.  
  775.  
  776. static void
  777. gen75_mfd_avc_directmode_state_bplus(VADriverContextP ctx,
  778.                               VAPictureParameterBufferH264 *pic_param,
  779.                               VASliceParameterBufferH264 *slice_param,
  780.                               struct gen7_mfd_context *gen7_mfd_context)
  781. {
  782.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  783.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  784.     struct object_surface *obj_surface;
  785.     GenAvcSurface *gen7_avc_surface;
  786.     VAPictureH264 *va_pic;
  787.     int i, j;
  788.  
  789.     BEGIN_BCS_BATCH(batch, 71);
  790.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  791.  
  792.     /* reference surfaces 0..15 */
  793.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  794.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  795.             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  796.             assert(obj_surface);
  797.             gen7_avc_surface = obj_surface->private_data;
  798.  
  799.             if (gen7_avc_surface == NULL) {
  800.                 OUT_BCS_BATCH(batch, 0);
  801.                 OUT_BCS_BATCH(batch, 0);
  802.             } else {
  803.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  804.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  805.                               0);
  806.                 OUT_BCS_BATCH(batch, 0);
  807.             }
  808.         } else {
  809.             OUT_BCS_BATCH(batch, 0);
  810.             OUT_BCS_BATCH(batch, 0);
  811.         }
  812.     }
  813.         OUT_BCS_BATCH(batch, 0);
  814.  
  815.     /* the current decoding frame/field */
  816.     va_pic = &pic_param->CurrPic;
  817.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  818.     obj_surface = SURFACE(va_pic->picture_id);
  819.     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
  820.     gen7_avc_surface = obj_surface->private_data;
  821.  
  822.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  823.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  824.                   0);
  825.  
  826.         OUT_BCS_BATCH(batch, 0);
  827.         OUT_BCS_BATCH(batch, 0);
  828.  
  829.     /* POC List */
  830.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  831.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  832.             int found = 0;
  833.             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  834.                 va_pic = &pic_param->ReferenceFrames[j];
  835.                
  836.                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
  837.                     continue;
  838.  
  839.                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
  840.                     found = 1;
  841.                     break;
  842.                 }
  843.             }
  844.  
  845.             assert(found == 1);
  846.             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  847.            
  848.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  849.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  850.         } else {
  851.             OUT_BCS_BATCH(batch, 0);
  852.             OUT_BCS_BATCH(batch, 0);
  853.         }
  854.     }
  855.  
  856.     va_pic = &pic_param->CurrPic;
  857.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  858.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  859.  
  860.     ADVANCE_BCS_BATCH(batch);
  861. }
  862.  
  863. static void
  864. gen75_mfd_avc_directmode_state(VADriverContextP ctx,
  865.                               VAPictureParameterBufferH264 *pic_param,
  866.                               VASliceParameterBufferH264 *slice_param,
  867.                               struct gen7_mfd_context *gen7_mfd_context)
  868. {
  869.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  870.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  871.     struct object_surface *obj_surface;
  872.     GenAvcSurface *gen7_avc_surface;
  873.     VAPictureH264 *va_pic;
  874.     int i, j;
  875.  
  876.     if (IS_STEPPING_BPLUS(i965)) {
  877.         gen75_mfd_avc_directmode_state_bplus(ctx, pic_param, slice_param,
  878.                 gen7_mfd_context);
  879.         return;
  880.     }
  881.  
  882.     BEGIN_BCS_BATCH(batch, 69);
  883.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  884.  
  885.     /* reference surfaces 0..15 */
  886.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  887.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  888.             obj_surface = SURFACE(gen7_mfd_context->reference_surface[i].surface_id);
  889.             assert(obj_surface);
  890.             gen7_avc_surface = obj_surface->private_data;
  891.  
  892.             if (gen7_avc_surface == NULL) {
  893.                 OUT_BCS_BATCH(batch, 0);
  894.                 OUT_BCS_BATCH(batch, 0);
  895.             } else {
  896.                 OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  897.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  898.                               0);
  899.  
  900.                 if (gen7_avc_surface->dmv_bottom_flag == 1)
  901.                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  902.                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  903.                                   0);
  904.                 else
  905.                     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  906.                                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  907.                                   0);
  908.             }
  909.         } else {
  910.             OUT_BCS_BATCH(batch, 0);
  911.             OUT_BCS_BATCH(batch, 0);
  912.         }
  913.     }
  914.  
  915.     /* the current decoding frame/field */
  916.     va_pic = &pic_param->CurrPic;
  917.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  918.     obj_surface = SURFACE(va_pic->picture_id);
  919.     assert(obj_surface && obj_surface->bo && obj_surface->private_data);
  920.     gen7_avc_surface = obj_surface->private_data;
  921.  
  922.     OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  923.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  924.                   0);
  925.  
  926.     if (gen7_avc_surface->dmv_bottom_flag == 1)
  927.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_bottom,
  928.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  929.                       0);
  930.     else
  931.         OUT_BCS_RELOC(batch, gen7_avc_surface->dmv_top,
  932.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  933.                       0);
  934.  
  935.     /* POC List */
  936.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  937.         if (gen7_mfd_context->reference_surface[i].surface_id != VA_INVALID_ID) {
  938.             int found = 0;
  939.             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  940.                 va_pic = &pic_param->ReferenceFrames[j];
  941.                
  942.                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
  943.                     continue;
  944.  
  945.                 if (va_pic->picture_id == gen7_mfd_context->reference_surface[i].surface_id) {
  946.                     found = 1;
  947.                     break;
  948.                 }
  949.             }
  950.  
  951.             assert(found == 1);
  952.             assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  953.            
  954.             OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  955.             OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  956.         } else {
  957.             OUT_BCS_BATCH(batch, 0);
  958.             OUT_BCS_BATCH(batch, 0);
  959.         }
  960.     }
  961.  
  962.     va_pic = &pic_param->CurrPic;
  963.     OUT_BCS_BATCH(batch, va_pic->TopFieldOrderCnt);
  964.     OUT_BCS_BATCH(batch, va_pic->BottomFieldOrderCnt);
  965.  
  966.     ADVANCE_BCS_BATCH(batch);
  967. }
  968.  
  969. static void
  970. gen75_mfd_avc_slice_state(VADriverContextP ctx,
  971.                          VAPictureParameterBufferH264 *pic_param,
  972.                          VASliceParameterBufferH264 *slice_param,
  973.                          VASliceParameterBufferH264 *next_slice_param,
  974.                          struct gen7_mfd_context *gen7_mfd_context)
  975. {
  976.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  977.     int width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  978.     int height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  979.     int slice_hor_pos, slice_ver_pos, next_slice_hor_pos, next_slice_ver_pos;
  980.     int num_ref_idx_l0, num_ref_idx_l1;
  981.     int mbaff_picture = (!pic_param->pic_fields.bits.field_pic_flag &&
  982.                          pic_param->seq_fields.bits.mb_adaptive_frame_field_flag);
  983.     int first_mb_in_slice = 0, first_mb_in_next_slice = 0;
  984.     int slice_type;
  985.  
  986.     if (slice_param->slice_type == SLICE_TYPE_I ||
  987.         slice_param->slice_type == SLICE_TYPE_SI) {
  988.         slice_type = SLICE_TYPE_I;
  989.     } else if (slice_param->slice_type == SLICE_TYPE_P ||
  990.                slice_param->slice_type == SLICE_TYPE_SP) {
  991.         slice_type = SLICE_TYPE_P;
  992.     } else {
  993.         assert(slice_param->slice_type == SLICE_TYPE_B);
  994.         slice_type = SLICE_TYPE_B;
  995.     }
  996.  
  997.     if (slice_type == SLICE_TYPE_I) {
  998.         assert(slice_param->num_ref_idx_l0_active_minus1 == 0);
  999.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  1000.         num_ref_idx_l0 = 0;
  1001.         num_ref_idx_l1 = 0;
  1002.     } else if (slice_type == SLICE_TYPE_P) {
  1003.         assert(slice_param->num_ref_idx_l1_active_minus1 == 0);
  1004.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  1005.         num_ref_idx_l1 = 0;
  1006.     } else {
  1007.         num_ref_idx_l0 = slice_param->num_ref_idx_l0_active_minus1 + 1;
  1008.         num_ref_idx_l1 = slice_param->num_ref_idx_l1_active_minus1 + 1;
  1009.     }
  1010.  
  1011.     first_mb_in_slice = slice_param->first_mb_in_slice << mbaff_picture;
  1012.     slice_hor_pos = first_mb_in_slice % width_in_mbs;
  1013.     slice_ver_pos = first_mb_in_slice / width_in_mbs;
  1014.  
  1015.     if (next_slice_param) {
  1016.         first_mb_in_next_slice = next_slice_param->first_mb_in_slice << mbaff_picture;
  1017.         next_slice_hor_pos = first_mb_in_next_slice % width_in_mbs;
  1018.         next_slice_ver_pos = first_mb_in_next_slice / width_in_mbs;
  1019.     } else {
  1020.         next_slice_hor_pos = 0;
  1021.         next_slice_ver_pos = height_in_mbs / (1 + !!pic_param->pic_fields.bits.field_pic_flag);
  1022.     }
  1023.  
  1024.     BEGIN_BCS_BATCH(batch, 11); /* FIXME: is it 10??? */
  1025.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  1026.     OUT_BCS_BATCH(batch, slice_type);
  1027.     OUT_BCS_BATCH(batch,
  1028.                   (num_ref_idx_l1 << 24) |
  1029.                   (num_ref_idx_l0 << 16) |
  1030.                   (slice_param->chroma_log2_weight_denom << 8) |
  1031.                   (slice_param->luma_log2_weight_denom << 0));
  1032.     OUT_BCS_BATCH(batch,
  1033.                   (slice_param->direct_spatial_mv_pred_flag << 29) |
  1034.                   (slice_param->disable_deblocking_filter_idc << 27) |
  1035.                   (slice_param->cabac_init_idc << 24) |
  1036.                   ((pic_param->pic_init_qp_minus26 + 26 + slice_param->slice_qp_delta) << 16) |
  1037.                   ((slice_param->slice_beta_offset_div2 & 0xf) << 8) |
  1038.                   ((slice_param->slice_alpha_c0_offset_div2 & 0xf) << 0));
  1039.     OUT_BCS_BATCH(batch,
  1040.                   (slice_ver_pos << 24) |
  1041.                   (slice_hor_pos << 16) |
  1042.                   (first_mb_in_slice << 0));
  1043.     OUT_BCS_BATCH(batch,
  1044.                   (next_slice_ver_pos << 16) |
  1045.                   (next_slice_hor_pos << 0));
  1046.     OUT_BCS_BATCH(batch,
  1047.                   (next_slice_param == NULL) << 19); /* last slice flag */
  1048.     OUT_BCS_BATCH(batch, 0);
  1049.     OUT_BCS_BATCH(batch, 0);
  1050.     OUT_BCS_BATCH(batch, 0);
  1051.     OUT_BCS_BATCH(batch, 0);
  1052.     ADVANCE_BCS_BATCH(batch);
  1053. }
  1054.  
  1055. static inline void
  1056. gen75_mfd_avc_ref_idx_state(VADriverContextP ctx,
  1057.                            VAPictureParameterBufferH264 *pic_param,
  1058.                            VASliceParameterBufferH264 *slice_param,
  1059.                            struct gen7_mfd_context *gen7_mfd_context)
  1060. {
  1061.     gen6_send_avc_ref_idx_state(
  1062.         gen7_mfd_context->base.batch,
  1063.         slice_param,
  1064.         gen7_mfd_context->reference_surface
  1065.     );
  1066. }
  1067.  
  1068. static void
  1069. gen75_mfd_avc_weightoffset_state(VADriverContextP ctx,
  1070.                                 VAPictureParameterBufferH264 *pic_param,
  1071.                                 VASliceParameterBufferH264 *slice_param,
  1072.                                 struct gen7_mfd_context *gen7_mfd_context)
  1073. {
  1074.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1075.     int i, j, num_weight_offset_table = 0;
  1076.     short weightoffsets[32 * 6];
  1077.  
  1078.     if ((slice_param->slice_type == SLICE_TYPE_P ||
  1079.          slice_param->slice_type == SLICE_TYPE_SP) &&
  1080.         (pic_param->pic_fields.bits.weighted_pred_flag == 1)) {
  1081.         num_weight_offset_table = 1;
  1082.     }
  1083.    
  1084.     if ((slice_param->slice_type == SLICE_TYPE_B) &&
  1085.         (pic_param->pic_fields.bits.weighted_bipred_idc == 1)) {
  1086.         num_weight_offset_table = 2;
  1087.     }
  1088.  
  1089.     for (i = 0; i < num_weight_offset_table; i++) {
  1090.         BEGIN_BCS_BATCH(batch, 98);
  1091.         OUT_BCS_BATCH(batch, MFX_AVC_WEIGHTOFFSET_STATE | (98 - 2));
  1092.         OUT_BCS_BATCH(batch, i);
  1093.  
  1094.         if (i == 0) {
  1095.             for (j = 0; j < 32; j++) {
  1096.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l0[j];
  1097.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l0[j];
  1098.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l0[j][0];
  1099.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l0[j][0];
  1100.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l0[j][1];
  1101.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l0[j][1];
  1102.             }
  1103.         } else {
  1104.             for (j = 0; j < 32; j++) {
  1105.                 weightoffsets[j * 6 + 0] = slice_param->luma_weight_l1[j];
  1106.                 weightoffsets[j * 6 + 1] = slice_param->luma_offset_l1[j];
  1107.                 weightoffsets[j * 6 + 2] = slice_param->chroma_weight_l1[j][0];
  1108.                 weightoffsets[j * 6 + 3] = slice_param->chroma_offset_l1[j][0];
  1109.                 weightoffsets[j * 6 + 4] = slice_param->chroma_weight_l1[j][1];
  1110.                 weightoffsets[j * 6 + 5] = slice_param->chroma_offset_l1[j][1];
  1111.             }
  1112.         }
  1113.  
  1114.         intel_batchbuffer_data(batch, weightoffsets, sizeof(weightoffsets));
  1115.         ADVANCE_BCS_BATCH(batch);
  1116.     }
  1117. }
  1118.  
  1119. static void
  1120. gen75_mfd_avc_bsd_object(VADriverContextP ctx,
  1121.                         VAPictureParameterBufferH264 *pic_param,
  1122.                         VASliceParameterBufferH264 *slice_param,
  1123.                         dri_bo *slice_data_bo,
  1124.                         VASliceParameterBufferH264 *next_slice_param,
  1125.                         struct gen7_mfd_context *gen7_mfd_context)
  1126. {
  1127.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1128.     unsigned int slice_data_bit_offset;
  1129.  
  1130.     slice_data_bit_offset = avc_get_first_mb_bit_offset(
  1131.         slice_data_bo,
  1132.         slice_param,
  1133.         pic_param->pic_fields.bits.entropy_coding_mode_flag
  1134.     );
  1135.  
  1136.     /* the input bitsteam format on GEN7 differs from GEN6 */
  1137.     BEGIN_BCS_BATCH(batch, 6);
  1138.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  1139.     OUT_BCS_BATCH(batch,
  1140.                   (slice_param->slice_data_size - slice_param->slice_data_offset));
  1141.     OUT_BCS_BATCH(batch, slice_param->slice_data_offset);
  1142.     OUT_BCS_BATCH(batch,
  1143.                   (0 << 31) |
  1144.                   (0 << 14) |
  1145.                   (0 << 12) |
  1146.                   (0 << 10) |
  1147.                   (0 << 8));
  1148.     OUT_BCS_BATCH(batch,
  1149.                   ((slice_data_bit_offset >> 3) << 16) |
  1150.                   (1 << 7)  |
  1151.                   (0 << 5)  |
  1152.                   (0 << 4)  |
  1153.                   ((next_slice_param == NULL) << 3) | /* LastSlice Flag */
  1154.                   (slice_data_bit_offset & 0x7));
  1155.     OUT_BCS_BATCH(batch, 0);
  1156.     ADVANCE_BCS_BATCH(batch);
  1157. }
  1158.  
  1159. static inline void
  1160. gen75_mfd_avc_context_init(
  1161.     VADriverContextP         ctx,
  1162.     struct gen7_mfd_context *gen7_mfd_context
  1163. )
  1164. {
  1165.     /* Initialize flat scaling lists */
  1166.     avc_gen_default_iq_matrix(&gen7_mfd_context->iq_matrix.h264);
  1167. }
  1168.  
  1169. static void
  1170. gen75_mfd_avc_decode_init(VADriverContextP ctx,
  1171.                          struct decode_state *decode_state,
  1172.                          struct gen7_mfd_context *gen7_mfd_context)
  1173. {
  1174.     VAPictureParameterBufferH264 *pic_param;
  1175.     VASliceParameterBufferH264 *slice_param;
  1176.     VAPictureH264 *va_pic;
  1177.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1178.     struct object_surface *obj_surface;
  1179.     dri_bo *bo;
  1180.     int i, j, enable_avc_ildb = 0;
  1181.     unsigned int width_in_mbs, height_in_mbs;
  1182.  
  1183.     for (j = 0; j < decode_state->num_slice_params && enable_avc_ildb == 0; j++) {
  1184.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1185.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1186.  
  1187.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1188.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1189.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1190.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1191.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1192.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1193.                    (slice_param->slice_type == SLICE_TYPE_B));
  1194.  
  1195.             if (slice_param->disable_deblocking_filter_idc != 1) {
  1196.                 enable_avc_ildb = 1;
  1197.                 break;
  1198.             }
  1199.  
  1200.             slice_param++;
  1201.         }
  1202.     }
  1203.  
  1204.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1205.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1206.     gen75_mfd_avc_frame_store_index(ctx, pic_param, gen7_mfd_context);
  1207.     width_in_mbs = pic_param->picture_width_in_mbs_minus1 + 1;
  1208.     height_in_mbs = pic_param->picture_height_in_mbs_minus1 + 1;
  1209.     assert(width_in_mbs > 0 && width_in_mbs <= 256); /* 4K */
  1210.     assert(height_in_mbs > 0 && height_in_mbs <= 256);
  1211.  
  1212.     /* Current decoded picture */
  1213.     va_pic = &pic_param->CurrPic;
  1214.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  1215.     obj_surface = SURFACE(va_pic->picture_id);
  1216.     assert(obj_surface);
  1217.     obj_surface->flags &= ~SURFACE_REF_DIS_MASK;
  1218.     obj_surface->flags |= (pic_param->pic_fields.bits.reference_pic_flag ? SURFACE_REFERENCED : 0);
  1219.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1220.  
  1221.     /* initial uv component for YUV400 case */
  1222.     if (pic_param->seq_fields.bits.chroma_format_idc == 0) {
  1223.          unsigned int uv_offset = obj_surface->width * obj_surface->height;
  1224.          unsigned int uv_size   = obj_surface->width * obj_surface->height / 2;
  1225.  
  1226.          drm_intel_gem_bo_map_gtt(obj_surface->bo);
  1227.          memset(obj_surface->bo->virtual + uv_offset, 0x80, uv_size);
  1228.          drm_intel_gem_bo_unmap_gtt(obj_surface->bo);
  1229.     }
  1230.  
  1231.     gen75_mfd_init_avc_surface(ctx, pic_param, obj_surface);
  1232.  
  1233.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1234.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1235.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1236.     gen7_mfd_context->post_deblocking_output.valid = enable_avc_ildb;
  1237.  
  1238.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1239.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1240.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1241.     gen7_mfd_context->pre_deblocking_output.valid = !enable_avc_ildb;
  1242.  
  1243.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1244.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1245.                       "intra row store",
  1246.                       width_in_mbs * 64,
  1247.                       0x1000);
  1248.     assert(bo);
  1249.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1250.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1251.  
  1252.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1253.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1254.                       "deblocking filter row store",
  1255.                       width_in_mbs * 64 * 4,
  1256.                       0x1000);
  1257.     assert(bo);
  1258.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1259.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1260.  
  1261.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1262.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1263.                       "bsd mpc row store",
  1264.                       width_in_mbs * 64 * 2,
  1265.                       0x1000);
  1266.     assert(bo);
  1267.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1268.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1269.  
  1270.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  1271.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1272.                       "mpr row store",
  1273.                       width_in_mbs * 64 * 2,
  1274.                       0x1000);
  1275.     assert(bo);
  1276.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = bo;
  1277.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 1;
  1278.  
  1279.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1280. }
  1281.  
  1282. static void
  1283. gen75_mfd_avc_decode_picture(VADriverContextP ctx,
  1284.                             struct decode_state *decode_state,
  1285.                             struct gen7_mfd_context *gen7_mfd_context)
  1286. {
  1287.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1288.     VAPictureParameterBufferH264 *pic_param;
  1289.     VASliceParameterBufferH264 *slice_param, *next_slice_param, *next_slice_group_param;
  1290.     dri_bo *slice_data_bo;
  1291.     int i, j;
  1292.  
  1293.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1294.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  1295.     gen75_mfd_avc_decode_init(ctx, decode_state, gen7_mfd_context);
  1296.  
  1297.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1298.     intel_batchbuffer_emit_mi_flush(batch);
  1299.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1300.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1301.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1302.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_AVC, gen7_mfd_context);
  1303.     gen75_mfd_avc_qm_state(ctx, decode_state, gen7_mfd_context);
  1304.     gen75_mfd_avc_img_state(ctx, decode_state, gen7_mfd_context);
  1305.     gen75_mfd_avc_picid_state(ctx, decode_state, gen7_mfd_context);
  1306.  
  1307.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1308.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1309.         slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j]->buffer;
  1310.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1311.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_AVC, gen7_mfd_context);
  1312.  
  1313.         if (j == decode_state->num_slice_params - 1)
  1314.             next_slice_group_param = NULL;
  1315.         else
  1316.             next_slice_group_param = (VASliceParameterBufferH264 *)decode_state->slice_params[j + 1]->buffer;
  1317.  
  1318.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1319.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1320.             assert((slice_param->slice_type == SLICE_TYPE_I) ||
  1321.                    (slice_param->slice_type == SLICE_TYPE_SI) ||
  1322.                    (slice_param->slice_type == SLICE_TYPE_P) ||
  1323.                    (slice_param->slice_type == SLICE_TYPE_SP) ||
  1324.                    (slice_param->slice_type == SLICE_TYPE_B));
  1325.  
  1326.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1327.                 next_slice_param = slice_param + 1;
  1328.             else
  1329.                 next_slice_param = next_slice_group_param;
  1330.  
  1331.             gen75_mfd_avc_directmode_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1332.             gen75_mfd_avc_ref_idx_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1333.             gen75_mfd_avc_weightoffset_state(ctx, pic_param, slice_param, gen7_mfd_context);
  1334.             gen75_mfd_avc_slice_state(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1335.             gen75_mfd_avc_bsd_object(ctx, pic_param, slice_param, slice_data_bo, next_slice_param, gen7_mfd_context);
  1336.             slice_param++;
  1337.         }
  1338.     }
  1339.  
  1340.     intel_batchbuffer_end_atomic(batch);
  1341.     intel_batchbuffer_flush(batch);
  1342. }
  1343.  
  1344. static void
  1345. gen75_mfd_mpeg2_decode_init(VADriverContextP ctx,
  1346.                            struct decode_state *decode_state,
  1347.                            struct gen7_mfd_context *gen7_mfd_context)
  1348. {
  1349.     VAPictureParameterBufferMPEG2 *pic_param;
  1350.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1351.     struct object_surface *obj_surface;
  1352.     dri_bo *bo;
  1353.     unsigned int width_in_mbs;
  1354.  
  1355.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1356.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1357.     width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1358.  
  1359.     mpeg2_set_reference_surfaces(
  1360.         ctx,
  1361.         gen7_mfd_context->reference_surface,
  1362.         decode_state,
  1363.         pic_param
  1364.     );
  1365.  
  1366.     /* Current decoded picture */
  1367.     obj_surface = SURFACE(decode_state->current_render_target);
  1368.     assert(obj_surface);
  1369.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1370.  
  1371.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1372.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1373.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1374.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  1375.  
  1376.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1377.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1378.                       "bsd mpc row store",
  1379.                       width_in_mbs * 96,
  1380.                       0x1000);
  1381.     assert(bo);
  1382.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1383.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1384.  
  1385.     gen7_mfd_context->post_deblocking_output.valid = 0;
  1386.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  1387.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  1388.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1389.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  1390. }
  1391.  
  1392. static void
  1393. gen75_mfd_mpeg2_pic_state(VADriverContextP ctx,
  1394.                          struct decode_state *decode_state,
  1395.                          struct gen7_mfd_context *gen7_mfd_context)
  1396. {
  1397.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1398.     VAPictureParameterBufferMPEG2 *pic_param;
  1399.     unsigned int slice_concealment_disable_bit = 0;
  1400.  
  1401.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1402.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1403.  
  1404.         /* XXX: disable concealment for now */
  1405.         slice_concealment_disable_bit = 1;
  1406.  
  1407.     BEGIN_BCS_BATCH(batch, 13);
  1408.     OUT_BCS_BATCH(batch, MFX_MPEG2_PIC_STATE | (13 - 2));
  1409.     OUT_BCS_BATCH(batch,
  1410.                   (pic_param->f_code & 0xf) << 28 | /* f_code[1][1] */
  1411.                   ((pic_param->f_code >> 4) & 0xf) << 24 | /* f_code[1][0] */
  1412.                   ((pic_param->f_code >> 8) & 0xf) << 20 | /* f_code[0][1] */
  1413.                   ((pic_param->f_code >> 12) & 0xf) << 16 | /* f_code[0][0] */
  1414.                   pic_param->picture_coding_extension.bits.intra_dc_precision << 14 |
  1415.                   pic_param->picture_coding_extension.bits.picture_structure << 12 |
  1416.                   pic_param->picture_coding_extension.bits.top_field_first << 11 |
  1417.                   pic_param->picture_coding_extension.bits.frame_pred_frame_dct << 10 |
  1418.                   pic_param->picture_coding_extension.bits.concealment_motion_vectors << 9 |
  1419.                   pic_param->picture_coding_extension.bits.q_scale_type << 8 |
  1420.                   pic_param->picture_coding_extension.bits.intra_vlc_format << 7 |
  1421.                   pic_param->picture_coding_extension.bits.alternate_scan << 6);
  1422.     OUT_BCS_BATCH(batch,
  1423.                   pic_param->picture_coding_type << 9);
  1424.     OUT_BCS_BATCH(batch,
  1425.                   (slice_concealment_disable_bit << 31) |
  1426.                   ((ALIGN(pic_param->vertical_size, 16) / 16) - 1) << 16 |
  1427.                   ((ALIGN(pic_param->horizontal_size, 16) / 16) - 1));
  1428.     OUT_BCS_BATCH(batch, 0);
  1429.     OUT_BCS_BATCH(batch, 0);
  1430.     OUT_BCS_BATCH(batch, 0);
  1431.     OUT_BCS_BATCH(batch, 0);
  1432.     OUT_BCS_BATCH(batch, 0);
  1433.     OUT_BCS_BATCH(batch, 0);
  1434.     OUT_BCS_BATCH(batch, 0);
  1435.     OUT_BCS_BATCH(batch, 0);
  1436.     OUT_BCS_BATCH(batch, 0);
  1437.     ADVANCE_BCS_BATCH(batch);
  1438. }
  1439.  
  1440. static void
  1441. gen75_mfd_mpeg2_qm_state(VADriverContextP ctx,
  1442.                         struct decode_state *decode_state,
  1443.                         struct gen7_mfd_context *gen7_mfd_context)
  1444. {
  1445.     VAIQMatrixBufferMPEG2 * const gen_iq_matrix = &gen7_mfd_context->iq_matrix.mpeg2;
  1446.     int i, j;
  1447.  
  1448.     /* Update internal QM state */
  1449.     if (decode_state->iq_matrix && decode_state->iq_matrix->buffer) {
  1450.         VAIQMatrixBufferMPEG2 * const iq_matrix =
  1451.             (VAIQMatrixBufferMPEG2 *)decode_state->iq_matrix->buffer;
  1452.  
  1453.         if (gen_iq_matrix->load_intra_quantiser_matrix == -1 ||
  1454.             iq_matrix->load_intra_quantiser_matrix) {
  1455.             gen_iq_matrix->load_intra_quantiser_matrix =
  1456.                 iq_matrix->load_intra_quantiser_matrix;
  1457.             if (iq_matrix->load_intra_quantiser_matrix) {
  1458.                 for (j = 0; j < 64; j++)
  1459.                     gen_iq_matrix->intra_quantiser_matrix[zigzag_direct[j]] =
  1460.                         iq_matrix->intra_quantiser_matrix[j];
  1461.             }
  1462.         }
  1463.  
  1464.         if (gen_iq_matrix->load_non_intra_quantiser_matrix == -1 ||
  1465.             iq_matrix->load_non_intra_quantiser_matrix) {
  1466.             gen_iq_matrix->load_non_intra_quantiser_matrix =
  1467.                 iq_matrix->load_non_intra_quantiser_matrix;
  1468.             if (iq_matrix->load_non_intra_quantiser_matrix) {
  1469.                 for (j = 0; j < 64; j++)
  1470.                     gen_iq_matrix->non_intra_quantiser_matrix[zigzag_direct[j]] =
  1471.                         iq_matrix->non_intra_quantiser_matrix[j];
  1472.             }
  1473.         }
  1474.     }
  1475.  
  1476.     /* Commit QM state to HW */
  1477.     for (i = 0; i < 2; i++) {
  1478.         unsigned char *qm = NULL;
  1479.         int qm_type;
  1480.  
  1481.         if (i == 0) {
  1482.             if (gen_iq_matrix->load_intra_quantiser_matrix) {
  1483.                 qm = gen_iq_matrix->intra_quantiser_matrix;
  1484.                 qm_type = MFX_QM_MPEG_INTRA_QUANTIZER_MATRIX;
  1485.             }
  1486.         } else {
  1487.             if (gen_iq_matrix->load_non_intra_quantiser_matrix) {
  1488.                 qm = gen_iq_matrix->non_intra_quantiser_matrix;
  1489.                 qm_type = MFX_QM_MPEG_NON_INTRA_QUANTIZER_MATRIX;
  1490.             }
  1491.         }
  1492.  
  1493.         if (!qm)
  1494.             continue;
  1495.  
  1496.         gen75_mfd_qm_state(ctx, qm_type, qm, 64, gen7_mfd_context);
  1497.     }
  1498. }
  1499.  
  1500. static void
  1501. gen75_mfd_mpeg2_bsd_object(VADriverContextP ctx,
  1502.                           VAPictureParameterBufferMPEG2 *pic_param,
  1503.                           VASliceParameterBufferMPEG2 *slice_param,
  1504.                           VASliceParameterBufferMPEG2 *next_slice_param,
  1505.                           struct gen7_mfd_context *gen7_mfd_context)
  1506. {
  1507.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1508.     unsigned int width_in_mbs = ALIGN(pic_param->horizontal_size, 16) / 16;
  1509.     int mb_count, vpos0, hpos0, vpos1, hpos1, is_field_pic_wa, is_field_pic = 0;
  1510.  
  1511.     if (pic_param->picture_coding_extension.bits.picture_structure == MPEG_TOP_FIELD ||
  1512.         pic_param->picture_coding_extension.bits.picture_structure == MPEG_BOTTOM_FIELD)
  1513.         is_field_pic = 1;
  1514.     is_field_pic_wa = is_field_pic &&
  1515.         gen7_mfd_context->wa_mpeg2_slice_vertical_position > 0;
  1516.  
  1517.     vpos0 = slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1518.     hpos0 = slice_param->slice_horizontal_position;
  1519.  
  1520.     if (next_slice_param == NULL) {
  1521.         vpos1 = ALIGN(pic_param->vertical_size, 16) / 16 / (1 + is_field_pic);
  1522.         hpos1 = 0;
  1523.     } else {
  1524.         vpos1 = next_slice_param->slice_vertical_position / (1 + is_field_pic_wa);
  1525.         hpos1 = next_slice_param->slice_horizontal_position;
  1526.     }
  1527.  
  1528.     mb_count = (vpos1 * width_in_mbs + hpos1) - (vpos0 * width_in_mbs + hpos0);
  1529.  
  1530.     BEGIN_BCS_BATCH(batch, 5);
  1531.     OUT_BCS_BATCH(batch, MFD_MPEG2_BSD_OBJECT | (5 - 2));
  1532.     OUT_BCS_BATCH(batch,
  1533.                   slice_param->slice_data_size - (slice_param->macroblock_offset >> 3));
  1534.     OUT_BCS_BATCH(batch,
  1535.                   slice_param->slice_data_offset + (slice_param->macroblock_offset >> 3));
  1536.     OUT_BCS_BATCH(batch,
  1537.                   hpos0 << 24 |
  1538.                   vpos0 << 16 |
  1539.                   mb_count << 8 |
  1540.                   (next_slice_param == NULL) << 5 |
  1541.                   (next_slice_param == NULL) << 3 |
  1542.                   (slice_param->macroblock_offset & 0x7));
  1543.     OUT_BCS_BATCH(batch,
  1544.                   (slice_param->quantiser_scale_code << 24) |
  1545.                   (vpos1 << 8 | hpos1));
  1546.     ADVANCE_BCS_BATCH(batch);
  1547. }
  1548.  
  1549. static void
  1550. gen75_mfd_mpeg2_decode_picture(VADriverContextP ctx,
  1551.                               struct decode_state *decode_state,
  1552.                               struct gen7_mfd_context *gen7_mfd_context)
  1553. {
  1554.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1555.     VAPictureParameterBufferMPEG2 *pic_param;
  1556.     VASliceParameterBufferMPEG2 *slice_param, *next_slice_param, *next_slice_group_param;
  1557.     dri_bo *slice_data_bo;
  1558.     int i, j;
  1559.  
  1560.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1561.     pic_param = (VAPictureParameterBufferMPEG2 *)decode_state->pic_param->buffer;
  1562.  
  1563.     gen75_mfd_mpeg2_decode_init(ctx, decode_state, gen7_mfd_context);
  1564.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  1565.     intel_batchbuffer_emit_mi_flush(batch);
  1566.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1567.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1568.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1569.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1570.     gen75_mfd_mpeg2_pic_state(ctx, decode_state, gen7_mfd_context);
  1571.     gen75_mfd_mpeg2_qm_state(ctx, decode_state, gen7_mfd_context);
  1572.  
  1573.     if (gen7_mfd_context->wa_mpeg2_slice_vertical_position < 0)
  1574.         gen7_mfd_context->wa_mpeg2_slice_vertical_position =
  1575.             mpeg2_wa_slice_vertical_position(decode_state, pic_param);
  1576.  
  1577.     for (j = 0; j < decode_state->num_slice_params; j++) {
  1578.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  1579.         slice_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j]->buffer;
  1580.         slice_data_bo = decode_state->slice_datas[j]->bo;
  1581.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_MPEG2, gen7_mfd_context);
  1582.  
  1583.         if (j == decode_state->num_slice_params - 1)
  1584.             next_slice_group_param = NULL;
  1585.         else
  1586.             next_slice_group_param = (VASliceParameterBufferMPEG2 *)decode_state->slice_params[j + 1]->buffer;
  1587.  
  1588.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  1589.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  1590.  
  1591.             if (i < decode_state->slice_params[j]->num_elements - 1)
  1592.                 next_slice_param = slice_param + 1;
  1593.             else
  1594.                 next_slice_param = next_slice_group_param;
  1595.  
  1596.             gen75_mfd_mpeg2_bsd_object(ctx, pic_param, slice_param, next_slice_param, gen7_mfd_context);
  1597.             slice_param++;
  1598.         }
  1599.     }
  1600.  
  1601.     intel_batchbuffer_end_atomic(batch);
  1602.     intel_batchbuffer_flush(batch);
  1603. }
  1604.  
  1605. static const int va_to_gen7_vc1_pic_type[5] = {
  1606.     GEN7_VC1_I_PICTURE,
  1607.     GEN7_VC1_P_PICTURE,
  1608.     GEN7_VC1_B_PICTURE,
  1609.     GEN7_VC1_BI_PICTURE,
  1610.     GEN7_VC1_P_PICTURE,
  1611. };
  1612.  
  1613. static const int va_to_gen7_vc1_mv[4] = {
  1614.     1, /* 1-MV */
  1615.     2, /* 1-MV half-pel */
  1616.     3, /* 1-MV half-pef bilinear */
  1617.     0, /* Mixed MV */
  1618. };
  1619.  
  1620. static const int b_picture_scale_factor[21] = {
  1621.     128, 85,  170, 64,  192,
  1622.     51,  102, 153, 204, 43,
  1623.     215, 37,  74,  111, 148,
  1624.     185, 222, 32,  96,  160,
  1625.     224,
  1626. };
  1627.  
  1628. static const int va_to_gen7_vc1_condover[3] = {
  1629.     0,
  1630.     2,
  1631.     3
  1632. };
  1633.  
  1634. static const int va_to_gen7_vc1_profile[4] = {
  1635.     GEN7_VC1_SIMPLE_PROFILE,
  1636.     GEN7_VC1_MAIN_PROFILE,
  1637.     GEN7_VC1_RESERVED_PROFILE,
  1638.     GEN7_VC1_ADVANCED_PROFILE
  1639. };
  1640.  
  1641. static void
  1642. gen75_mfd_free_vc1_surface(void **data)
  1643. {
  1644.     struct gen7_vc1_surface *gen7_vc1_surface = *data;
  1645.  
  1646.     if (!gen7_vc1_surface)
  1647.         return;
  1648.  
  1649.     dri_bo_unreference(gen7_vc1_surface->dmv);
  1650.     free(gen7_vc1_surface);
  1651.     *data = NULL;
  1652. }
  1653.  
  1654. static void
  1655. gen75_mfd_init_vc1_surface(VADriverContextP ctx,
  1656.                           VAPictureParameterBufferVC1 *pic_param,
  1657.                           struct object_surface *obj_surface)
  1658. {
  1659.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1660.     struct gen7_vc1_surface *gen7_vc1_surface = obj_surface->private_data;
  1661.     int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1662.     int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1663.  
  1664.     obj_surface->free_private_data = gen75_mfd_free_vc1_surface;
  1665.  
  1666.     if (!gen7_vc1_surface) {
  1667.         gen7_vc1_surface = calloc(sizeof(struct gen7_vc1_surface), 1);
  1668.         assert((obj_surface->size & 0x3f) == 0);
  1669.         obj_surface->private_data = gen7_vc1_surface;
  1670.     }
  1671.  
  1672.     gen7_vc1_surface->picture_type = pic_param->picture_fields.bits.picture_type;
  1673.  
  1674.     if (gen7_vc1_surface->dmv == NULL) {
  1675.         gen7_vc1_surface->dmv = dri_bo_alloc(i965->intel.bufmgr,
  1676.                                              "direct mv w/r buffer",
  1677.                                              width_in_mbs * height_in_mbs * 64,
  1678.                                              0x1000);
  1679.     }
  1680. }
  1681.  
  1682. static void
  1683. gen75_mfd_vc1_decode_init(VADriverContextP ctx,
  1684.                          struct decode_state *decode_state,
  1685.                          struct gen7_mfd_context *gen7_mfd_context)
  1686. {
  1687.     VAPictureParameterBufferVC1 *pic_param;
  1688.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1689.     struct object_surface *obj_surface;
  1690.     int i;
  1691.     dri_bo *bo;
  1692.     int width_in_mbs;
  1693.     int picture_type;
  1694.  
  1695.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1696.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1697.     width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1698.     picture_type = pic_param->picture_fields.bits.picture_type;
  1699.  
  1700.     /* reference picture */
  1701.     obj_surface = SURFACE(pic_param->forward_reference_picture);
  1702.  
  1703.     if (obj_surface && obj_surface->bo)
  1704.         gen7_mfd_context->reference_surface[0].surface_id = pic_param->forward_reference_picture;
  1705.     else
  1706.         gen7_mfd_context->reference_surface[0].surface_id = VA_INVALID_ID;
  1707.  
  1708.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  1709.  
  1710.     if (obj_surface && obj_surface->bo)
  1711.         gen7_mfd_context->reference_surface[1].surface_id = pic_param->backward_reference_picture;
  1712.     else
  1713.         gen7_mfd_context->reference_surface[1].surface_id = pic_param->forward_reference_picture;
  1714.  
  1715.     /* must do so !!! */
  1716.     for (i = 2; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++)
  1717.         gen7_mfd_context->reference_surface[i].surface_id = gen7_mfd_context->reference_surface[i % 2].surface_id;
  1718.  
  1719.     /* Current decoded picture */
  1720.     obj_surface = SURFACE(decode_state->current_render_target);
  1721.     assert(obj_surface);
  1722.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N','V','1','2'), SUBSAMPLE_YUV420);
  1723.     gen75_mfd_init_vc1_surface(ctx, pic_param, obj_surface);
  1724.  
  1725.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  1726.     gen7_mfd_context->post_deblocking_output.bo = obj_surface->bo;
  1727.     dri_bo_reference(gen7_mfd_context->post_deblocking_output.bo);
  1728.     gen7_mfd_context->post_deblocking_output.valid = pic_param->entrypoint_fields.bits.loopfilter;
  1729.  
  1730.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  1731.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  1732.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  1733.     gen7_mfd_context->pre_deblocking_output.valid = !pic_param->entrypoint_fields.bits.loopfilter;
  1734.  
  1735.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  1736.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1737.                       "intra row store",
  1738.                       width_in_mbs * 64,
  1739.                       0x1000);
  1740.     assert(bo);
  1741.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = bo;
  1742.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 1;
  1743.  
  1744.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  1745.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1746.                       "deblocking filter row store",
  1747.                       width_in_mbs * 7 * 64,
  1748.                       0x1000);
  1749.     assert(bo);
  1750.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = bo;
  1751.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 1;
  1752.  
  1753.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  1754.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1755.                       "bsd mpc row store",
  1756.                       width_in_mbs * 96,
  1757.                       0x1000);
  1758.     assert(bo);
  1759.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = bo;
  1760.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 1;
  1761.  
  1762.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  1763.  
  1764.     gen7_mfd_context->bitplane_read_buffer.valid = !!pic_param->bitplane_present.value;
  1765.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  1766.    
  1767.     if (gen7_mfd_context->bitplane_read_buffer.valid) {
  1768.         int width_in_mbs = ALIGN(pic_param->coded_width, 16) / 16;
  1769.         int height_in_mbs = ALIGN(pic_param->coded_height, 16) / 16;
  1770.         int bitplane_width = ALIGN(width_in_mbs, 2) / 2;
  1771.         int src_w, src_h;
  1772.         uint8_t *src = NULL, *dst = NULL;
  1773.  
  1774.         assert(decode_state->bit_plane->buffer);
  1775.         src = decode_state->bit_plane->buffer;
  1776.  
  1777.         bo = dri_bo_alloc(i965->intel.bufmgr,
  1778.                           "VC-1 Bitplane",
  1779.                           bitplane_width * height_in_mbs,
  1780.                           0x1000);
  1781.         assert(bo);
  1782.         gen7_mfd_context->bitplane_read_buffer.bo = bo;
  1783.  
  1784.         dri_bo_map(bo, True);
  1785.         assert(bo->virtual);
  1786.         dst = bo->virtual;
  1787.  
  1788.         for (src_h = 0; src_h < height_in_mbs; src_h++) {
  1789.             for(src_w = 0; src_w < width_in_mbs; src_w++) {
  1790.                 int src_index, dst_index;
  1791.                 int src_shift;
  1792.                 uint8_t src_value;
  1793.  
  1794.                 src_index = (src_h * width_in_mbs + src_w) / 2;
  1795.                 src_shift = !((src_h * width_in_mbs + src_w) & 1) * 4;
  1796.                 src_value = ((src[src_index] >> src_shift) & 0xf);
  1797.  
  1798.                 if (picture_type == GEN7_VC1_SKIPPED_PICTURE){
  1799.                     src_value |= 0x2;
  1800.                 }
  1801.  
  1802.                 dst_index = src_w / 2;
  1803.                 dst[dst_index] = ((dst[dst_index] >> 4) | (src_value << 4));
  1804.             }
  1805.  
  1806.             if (src_w & 1)
  1807.                 dst[src_w / 2] >>= 4;
  1808.  
  1809.             dst += bitplane_width;
  1810.         }
  1811.  
  1812.         dri_bo_unmap(bo);
  1813.     } else
  1814.         gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  1815. }
  1816.  
  1817. static void
  1818. gen75_mfd_vc1_pic_state(VADriverContextP ctx,
  1819.                        struct decode_state *decode_state,
  1820.                        struct gen7_mfd_context *gen7_mfd_context)
  1821. {
  1822.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  1823.     VAPictureParameterBufferVC1 *pic_param;
  1824.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1825.     struct object_surface *obj_surface;
  1826.     int alt_pquant_config = 0, alt_pquant_edge_mask = 0, alt_pq;
  1827.     int dquant, dquantfrm, dqprofile, dqdbedge, dqsbedge, dqbilevel;
  1828.     int unified_mv_mode;
  1829.     int ref_field_pic_polarity = 0;
  1830.     int scale_factor = 0;
  1831.     int trans_ac_y = 0;
  1832.     int dmv_surface_valid = 0;
  1833.     int brfd = 0;
  1834.     int fcm = 0;
  1835.     int picture_type;
  1836.     int profile;
  1837.     int overlap;
  1838.     int interpolation_mode = 0;
  1839.  
  1840.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  1841.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  1842.  
  1843.     profile = va_to_gen7_vc1_profile[pic_param->sequence_fields.bits.profile];
  1844.     dquant = pic_param->pic_quantizer_fields.bits.dquant;
  1845.     dquantfrm = pic_param->pic_quantizer_fields.bits.dq_frame;
  1846.     dqprofile = pic_param->pic_quantizer_fields.bits.dq_profile;
  1847.     dqdbedge = pic_param->pic_quantizer_fields.bits.dq_db_edge;
  1848.     dqsbedge = pic_param->pic_quantizer_fields.bits.dq_sb_edge;
  1849.     dqbilevel = pic_param->pic_quantizer_fields.bits.dq_binary_level;
  1850.     alt_pq = pic_param->pic_quantizer_fields.bits.alt_pic_quantizer;
  1851.  
  1852.     if (dquant == 0) {
  1853.         alt_pquant_config = 0;
  1854.         alt_pquant_edge_mask = 0;
  1855.     } else if (dquant == 2) {
  1856.         alt_pquant_config = 1;
  1857.         alt_pquant_edge_mask = 0xf;
  1858.     } else {
  1859.         assert(dquant == 1);
  1860.         if (dquantfrm == 0) {
  1861.             alt_pquant_config = 0;
  1862.             alt_pquant_edge_mask = 0;
  1863.             alt_pq = 0;
  1864.         } else {
  1865.             assert(dquantfrm == 1);
  1866.             alt_pquant_config = 1;
  1867.  
  1868.             switch (dqprofile) {
  1869.             case 3:
  1870.                 if (dqbilevel == 0) {
  1871.                     alt_pquant_config = 2;
  1872.                     alt_pquant_edge_mask = 0;
  1873.                 } else {
  1874.                     assert(dqbilevel == 1);
  1875.                     alt_pquant_config = 3;
  1876.                     alt_pquant_edge_mask = 0;
  1877.                 }
  1878.                 break;
  1879.                
  1880.             case 0:
  1881.                 alt_pquant_edge_mask = 0xf;
  1882.                 break;
  1883.  
  1884.             case 1:
  1885.                 if (dqdbedge == 3)
  1886.                     alt_pquant_edge_mask = 0x9;
  1887.                 else
  1888.                     alt_pquant_edge_mask = (0x3 << dqdbedge);
  1889.  
  1890.                 break;
  1891.  
  1892.             case 2:
  1893.                 alt_pquant_edge_mask = (0x1 << dqsbedge);
  1894.                 break;
  1895.  
  1896.             default:
  1897.                 assert(0);
  1898.             }
  1899.         }
  1900.     }
  1901.  
  1902.     if (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation) {
  1903.         assert(pic_param->mv_fields.bits.mv_mode2 < 4);
  1904.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode2];
  1905.     } else {
  1906.         assert(pic_param->mv_fields.bits.mv_mode < 4);
  1907.         unified_mv_mode = va_to_gen7_vc1_mv[pic_param->mv_fields.bits.mv_mode];
  1908.     }
  1909.  
  1910.     if (pic_param->sequence_fields.bits.interlace == 1 &&
  1911.         pic_param->picture_fields.bits.frame_coding_mode != 0) { /* frame-interlace or field-interlace */
  1912.         /* FIXME: calculate reference field picture polarity */
  1913.         assert(0);
  1914.         ref_field_pic_polarity = 0;
  1915.     }
  1916.  
  1917.     if (pic_param->b_picture_fraction < 21)
  1918.         scale_factor = b_picture_scale_factor[pic_param->b_picture_fraction];
  1919.  
  1920.     picture_type = va_to_gen7_vc1_pic_type[pic_param->picture_fields.bits.picture_type];
  1921.    
  1922.     if (profile == GEN7_VC1_ADVANCED_PROFILE &&
  1923.         picture_type == GEN7_VC1_I_PICTURE)
  1924.         picture_type = GEN7_VC1_BI_PICTURE;
  1925.  
  1926.     if (picture_type == GEN7_VC1_I_PICTURE || picture_type == GEN7_VC1_BI_PICTURE) /* I picture */
  1927.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx2;
  1928.     else {
  1929.         trans_ac_y = pic_param->transform_fields.bits.transform_ac_codingset_idx1;
  1930.  
  1931.         /*
  1932.          * 8.3.6.2.1 Transform Type Selection
  1933.          * If variable-sized transform coding is not enabled,
  1934.          * then the 8x8 transform shall be used for all blocks.
  1935.          * it is also MFX_VC1_PIC_STATE requirement.
  1936.          */
  1937.         if (pic_param->transform_fields.bits.variable_sized_transform_flag == 0) {
  1938.             pic_param->transform_fields.bits.mb_level_transform_type_flag   = 1;
  1939.             pic_param->transform_fields.bits.frame_level_transform_type     = 0;
  1940.         }
  1941.     }
  1942.  
  1943.     if (picture_type == GEN7_VC1_B_PICTURE) {
  1944.         struct gen7_vc1_surface *gen7_vc1_surface = NULL;
  1945.  
  1946.         obj_surface = SURFACE(pic_param->backward_reference_picture);
  1947.         assert(obj_surface);
  1948.         gen7_vc1_surface = obj_surface->private_data;
  1949.  
  1950.         if (!gen7_vc1_surface ||
  1951.             (va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_I_PICTURE ||
  1952.              va_to_gen7_vc1_pic_type[gen7_vc1_surface->picture_type] == GEN7_VC1_BI_PICTURE))
  1953.             dmv_surface_valid = 0;
  1954.         else
  1955.             dmv_surface_valid = 1;
  1956.     }
  1957.  
  1958.     assert(pic_param->picture_fields.bits.frame_coding_mode < 3);
  1959.  
  1960.     if (pic_param->picture_fields.bits.frame_coding_mode < 2)
  1961.         fcm = pic_param->picture_fields.bits.frame_coding_mode;
  1962.     else {
  1963.         if (pic_param->picture_fields.bits.top_field_first)
  1964.             fcm = 2;
  1965.         else
  1966.             fcm = 3;
  1967.     }
  1968.  
  1969.     if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_B_PICTURE) { /* B picture */
  1970.         brfd = pic_param->reference_fields.bits.reference_distance;
  1971.         brfd = (scale_factor * brfd) >> 8;
  1972.         brfd = pic_param->reference_fields.bits.reference_distance - brfd - 1;
  1973.  
  1974.         if (brfd < 0)
  1975.             brfd = 0;
  1976.     }
  1977.  
  1978.     overlap = 0;
  1979.     if (profile != GEN7_VC1_ADVANCED_PROFILE){
  1980.         if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9 &&
  1981.             pic_param->picture_fields.bits.picture_type != GEN7_VC1_B_PICTURE) {
  1982.             overlap = 1;
  1983.         }
  1984.     }else {
  1985.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_P_PICTURE &&
  1986.              pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1987.               overlap = 1;
  1988.         }
  1989.         if (pic_param->picture_fields.bits.picture_type == GEN7_VC1_I_PICTURE ||
  1990.             pic_param->picture_fields.bits.picture_type == GEN7_VC1_BI_PICTURE){
  1991.              if (pic_param->pic_quantizer_fields.bits.pic_quantizer_scale >= 9){
  1992.                 overlap = 1;
  1993.              } else if (va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 2 ||
  1994.                         va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] == 3) {
  1995.                  overlap = 1;
  1996.              }
  1997.         }
  1998.     }
  1999.  
  2000.     assert(pic_param->conditional_overlap_flag < 3);
  2001.     assert(pic_param->mv_fields.bits.mv_table < 4); /* FIXME: interlace mode */
  2002.  
  2003.     if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPelBilinear ||
  2004.         (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  2005.          pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPelBilinear))
  2006.         interpolation_mode = 9; /* Half-pel bilinear */
  2007.     else if (pic_param->mv_fields.bits.mv_mode == VAMvMode1MvHalfPel ||
  2008.              (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation &&
  2009.               pic_param->mv_fields.bits.mv_mode2 == VAMvMode1MvHalfPel))
  2010.         interpolation_mode = 1; /* Half-pel bicubic */
  2011.     else
  2012.         interpolation_mode = 0; /* Quarter-pel bicubic */
  2013.  
  2014.     BEGIN_BCS_BATCH(batch, 6);
  2015.     OUT_BCS_BATCH(batch, MFD_VC1_LONG_PIC_STATE | (6 - 2));
  2016.     OUT_BCS_BATCH(batch,
  2017.                   (((ALIGN(pic_param->coded_height, 16) / 16) - 1) << 16) |
  2018.                   ((ALIGN(pic_param->coded_width, 16) / 16) - 1));
  2019.     OUT_BCS_BATCH(batch,
  2020.                   ((ALIGN(pic_param->coded_width, 16) / 16 + 1) / 2 - 1) << 24 |
  2021.                   dmv_surface_valid << 15 |
  2022.                   (pic_param->pic_quantizer_fields.bits.quantizer == 0) << 14 | /* implicit quantizer */
  2023.                   pic_param->rounding_control << 13 |
  2024.                   pic_param->sequence_fields.bits.syncmarker << 12 |
  2025.                   interpolation_mode << 8 |
  2026.                   0 << 7 | /* FIXME: scale up or down ??? */
  2027.                   pic_param->range_reduction_frame << 6 |
  2028.                   pic_param->entrypoint_fields.bits.loopfilter << 5 |
  2029.                   overlap << 4 |
  2030.                   !pic_param->picture_fields.bits.is_first_field << 3 |
  2031.                   (pic_param->sequence_fields.bits.profile == 3) << 0);
  2032.     OUT_BCS_BATCH(batch,
  2033.                   va_to_gen7_vc1_condover[pic_param->conditional_overlap_flag] << 29 |
  2034.                   picture_type << 26 |
  2035.                   fcm << 24 |
  2036.                   alt_pq << 16 |
  2037.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_scale << 8 |
  2038.                   scale_factor << 0);
  2039.     OUT_BCS_BATCH(batch,
  2040.                   unified_mv_mode << 28 |
  2041.                   pic_param->mv_fields.bits.four_mv_switch << 27 |
  2042.                   pic_param->fast_uvmc_flag << 26 |
  2043.                   ref_field_pic_polarity << 25 |
  2044.                   pic_param->reference_fields.bits.num_reference_pictures << 24 |
  2045.                   pic_param->reference_fields.bits.reference_distance << 20 |
  2046.                   pic_param->reference_fields.bits.reference_distance << 16 | /* FIXME: ??? */
  2047.                   pic_param->mv_fields.bits.extended_dmv_range << 10 |
  2048.                   pic_param->mv_fields.bits.extended_mv_range << 8 |
  2049.                   alt_pquant_edge_mask << 4 |
  2050.                   alt_pquant_config << 2 |
  2051.                   pic_param->pic_quantizer_fields.bits.half_qp << 1 |                  
  2052.                   pic_param->pic_quantizer_fields.bits.pic_quantizer_type << 0);
  2053.     OUT_BCS_BATCH(batch,
  2054.                   !!pic_param->bitplane_present.value << 31 |
  2055.                   !pic_param->bitplane_present.flags.bp_forward_mb << 30 |
  2056.                   !pic_param->bitplane_present.flags.bp_mv_type_mb << 29 |
  2057.                   !pic_param->bitplane_present.flags.bp_skip_mb << 28 |
  2058.                   !pic_param->bitplane_present.flags.bp_direct_mb << 27 |
  2059.                   !pic_param->bitplane_present.flags.bp_overflags << 26 |
  2060.                   !pic_param->bitplane_present.flags.bp_ac_pred << 25 |
  2061.                   !pic_param->bitplane_present.flags.bp_field_tx << 24 |
  2062.                   pic_param->mv_fields.bits.mv_table << 20 |
  2063.                   pic_param->mv_fields.bits.four_mv_block_pattern_table << 18 |
  2064.                   pic_param->mv_fields.bits.two_mv_block_pattern_table << 16 |
  2065.                   pic_param->transform_fields.bits.frame_level_transform_type << 12 |                  
  2066.                   pic_param->transform_fields.bits.mb_level_transform_type_flag << 11 |
  2067.                   pic_param->mb_mode_table << 8 |
  2068.                   trans_ac_y << 6 |
  2069.                   pic_param->transform_fields.bits.transform_ac_codingset_idx1 << 4 |
  2070.                   pic_param->transform_fields.bits.intra_transform_dc_table << 3 |
  2071.                   pic_param->cbp_table << 0);
  2072.     ADVANCE_BCS_BATCH(batch);
  2073. }
  2074.  
  2075. static void
  2076. gen75_mfd_vc1_pred_pipe_state(VADriverContextP ctx,
  2077.                              struct decode_state *decode_state,
  2078.                              struct gen7_mfd_context *gen7_mfd_context)
  2079. {
  2080.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2081.     VAPictureParameterBufferVC1 *pic_param;
  2082.     int intensitycomp_single;
  2083.  
  2084.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2085.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2086.  
  2087.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2088.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2089.     intensitycomp_single = (pic_param->mv_fields.bits.mv_mode == VAMvModeIntensityCompensation);
  2090.  
  2091.     BEGIN_BCS_BATCH(batch, 6);
  2092.     OUT_BCS_BATCH(batch, MFX_VC1_PRED_PIPE_STATE | (6 - 2));
  2093.     OUT_BCS_BATCH(batch,
  2094.                   0 << 14 | /* FIXME: double ??? */
  2095.                   0 << 12 |
  2096.                   intensitycomp_single << 10 |
  2097.                   intensitycomp_single << 8 |
  2098.                   0 << 4 | /* FIXME: interlace mode */
  2099.                   0);
  2100.     OUT_BCS_BATCH(batch,
  2101.                   pic_param->luma_shift << 16 |
  2102.                   pic_param->luma_scale << 0); /* FIXME: Luma Scaling */
  2103.     OUT_BCS_BATCH(batch, 0);
  2104.     OUT_BCS_BATCH(batch, 0);
  2105.     OUT_BCS_BATCH(batch, 0);
  2106.     ADVANCE_BCS_BATCH(batch);
  2107. }
  2108.  
  2109. static void
  2110. gen75_mfd_vc1_directmode_state_bplus(VADriverContextP ctx,
  2111.                               struct decode_state *decode_state,
  2112.                               struct gen7_mfd_context *gen7_mfd_context)
  2113. {
  2114.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2115.     VAPictureParameterBufferVC1 *pic_param;
  2116.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2117.     struct object_surface *obj_surface;
  2118.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  2119.  
  2120.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2121.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2122.  
  2123.     obj_surface = SURFACE(decode_state->current_render_target);
  2124.  
  2125.     if (obj_surface && obj_surface->private_data) {
  2126.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2127.     }
  2128.  
  2129.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  2130.  
  2131.     if (obj_surface && obj_surface->private_data) {
  2132.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2133.     }
  2134.  
  2135.     BEGIN_BCS_BATCH(batch, 7);
  2136.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (7 - 2));
  2137.  
  2138.     if (dmv_write_buffer)
  2139.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  2140.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2141.                       0);
  2142.     else
  2143.         OUT_BCS_BATCH(batch, 0);
  2144.  
  2145.         OUT_BCS_BATCH(batch, 0);
  2146.         OUT_BCS_BATCH(batch, 0);
  2147.  
  2148.     if (dmv_read_buffer)
  2149.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  2150.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  2151.                       0);
  2152.     else
  2153.         OUT_BCS_BATCH(batch, 0);
  2154.         OUT_BCS_BATCH(batch, 0);
  2155.         OUT_BCS_BATCH(batch, 0);
  2156.                  
  2157.     ADVANCE_BCS_BATCH(batch);
  2158. }
  2159.  
  2160. static void
  2161. gen75_mfd_vc1_directmode_state(VADriverContextP ctx,
  2162.                               struct decode_state *decode_state,
  2163.                               struct gen7_mfd_context *gen7_mfd_context)
  2164. {
  2165.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2166.     VAPictureParameterBufferVC1 *pic_param;
  2167.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2168.     struct object_surface *obj_surface;
  2169.     dri_bo *dmv_read_buffer = NULL, *dmv_write_buffer = NULL;
  2170.  
  2171.     if (IS_STEPPING_BPLUS(i965)) {
  2172.         gen75_mfd_vc1_directmode_state_bplus(ctx, decode_state, gen7_mfd_context);
  2173.         return;
  2174.     }
  2175.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2176.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2177.  
  2178.     obj_surface = SURFACE(decode_state->current_render_target);
  2179.  
  2180.     if (obj_surface && obj_surface->private_data) {
  2181.         dmv_write_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2182.     }
  2183.  
  2184.     obj_surface = SURFACE(pic_param->backward_reference_picture);
  2185.  
  2186.     if (obj_surface && obj_surface->private_data) {
  2187.         dmv_read_buffer = ((struct gen7_vc1_surface *)(obj_surface->private_data))->dmv;
  2188.     }
  2189.  
  2190.     BEGIN_BCS_BATCH(batch, 3);
  2191.     OUT_BCS_BATCH(batch, MFX_VC1_DIRECTMODE_STATE | (3 - 2));
  2192.  
  2193.     if (dmv_write_buffer)
  2194.         OUT_BCS_RELOC(batch, dmv_write_buffer,
  2195.                       I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2196.                       0);
  2197.     else
  2198.         OUT_BCS_BATCH(batch, 0);
  2199.  
  2200.     if (dmv_read_buffer)
  2201.         OUT_BCS_RELOC(batch, dmv_read_buffer,
  2202.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  2203.                       0);
  2204.     else
  2205.         OUT_BCS_BATCH(batch, 0);
  2206.                  
  2207.     ADVANCE_BCS_BATCH(batch);
  2208. }
  2209.  
  2210. static int
  2211. gen75_mfd_vc1_get_macroblock_bit_offset(uint8_t *buf, int in_slice_data_bit_offset, int profile)
  2212. {
  2213.     int out_slice_data_bit_offset;
  2214.     int slice_header_size = in_slice_data_bit_offset / 8;
  2215.     int i, j;
  2216.  
  2217.     if (profile != 3)
  2218.         out_slice_data_bit_offset = in_slice_data_bit_offset;
  2219.     else {
  2220.         for (i = 0, j = 0; i < slice_header_size; i++, j++) {
  2221.             if (!buf[j] && !buf[j + 1] && buf[j + 2] == 3 && buf[j + 3] < 4) {
  2222.                 i++, j += 2;
  2223.             }
  2224.         }
  2225.  
  2226.         out_slice_data_bit_offset = 8 * j + in_slice_data_bit_offset % 8;
  2227.     }
  2228.  
  2229.     return out_slice_data_bit_offset;
  2230. }
  2231.  
  2232. static void
  2233. gen75_mfd_vc1_bsd_object(VADriverContextP ctx,
  2234.                         VAPictureParameterBufferVC1 *pic_param,
  2235.                         VASliceParameterBufferVC1 *slice_param,
  2236.                         VASliceParameterBufferVC1 *next_slice_param,
  2237.                         dri_bo *slice_data_bo,
  2238.                         struct gen7_mfd_context *gen7_mfd_context)
  2239. {
  2240.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2241.     int next_slice_start_vert_pos;
  2242.     int macroblock_offset;
  2243.     uint8_t *slice_data = NULL;
  2244.  
  2245.     dri_bo_map(slice_data_bo, 0);
  2246.     slice_data = (uint8_t *)(slice_data_bo->virtual + slice_param->slice_data_offset);
  2247.     macroblock_offset = gen75_mfd_vc1_get_macroblock_bit_offset(slice_data,
  2248.                                                                slice_param->macroblock_offset,
  2249.                                                                pic_param->sequence_fields.bits.profile);
  2250.     dri_bo_unmap(slice_data_bo);
  2251.  
  2252.     if (next_slice_param)
  2253.         next_slice_start_vert_pos = next_slice_param->slice_vertical_position;
  2254.     else
  2255.         next_slice_start_vert_pos = ALIGN(pic_param->coded_height, 16) / 16;
  2256.  
  2257.     BEGIN_BCS_BATCH(batch, 5);
  2258.     OUT_BCS_BATCH(batch, MFD_VC1_BSD_OBJECT | (5 - 2));
  2259.     OUT_BCS_BATCH(batch,
  2260.                   slice_param->slice_data_size - (macroblock_offset >> 3));
  2261.     OUT_BCS_BATCH(batch,
  2262.                   slice_param->slice_data_offset + (macroblock_offset >> 3));
  2263.     OUT_BCS_BATCH(batch,
  2264.                   slice_param->slice_vertical_position << 16 |
  2265.                   next_slice_start_vert_pos << 0);
  2266.     OUT_BCS_BATCH(batch,
  2267.                   (macroblock_offset & 0x7));
  2268.     ADVANCE_BCS_BATCH(batch);
  2269. }
  2270.  
  2271. static void
  2272. gen75_mfd_vc1_decode_picture(VADriverContextP ctx,
  2273.                             struct decode_state *decode_state,
  2274.                             struct gen7_mfd_context *gen7_mfd_context)
  2275. {
  2276.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2277.     VAPictureParameterBufferVC1 *pic_param;
  2278.     VASliceParameterBufferVC1 *slice_param, *next_slice_param, *next_slice_group_param;
  2279.     dri_bo *slice_data_bo;
  2280.     int i, j;
  2281.  
  2282.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2283.     pic_param = (VAPictureParameterBufferVC1 *)decode_state->pic_param->buffer;
  2284.  
  2285.     gen75_mfd_vc1_decode_init(ctx, decode_state, gen7_mfd_context);
  2286.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  2287.     intel_batchbuffer_emit_mi_flush(batch);
  2288.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2289.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2290.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2291.     gen75_mfd_bsp_buf_base_addr_state(ctx, decode_state, MFX_FORMAT_VC1, gen7_mfd_context);
  2292.     gen75_mfd_vc1_pic_state(ctx, decode_state, gen7_mfd_context);
  2293.     gen75_mfd_vc1_pred_pipe_state(ctx, decode_state, gen7_mfd_context);
  2294.     gen75_mfd_vc1_directmode_state(ctx, decode_state, gen7_mfd_context);
  2295.  
  2296.     for (j = 0; j < decode_state->num_slice_params; j++) {
  2297.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  2298.         slice_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j]->buffer;
  2299.         slice_data_bo = decode_state->slice_datas[j]->bo;
  2300.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_VC1, gen7_mfd_context);
  2301.  
  2302.         if (j == decode_state->num_slice_params - 1)
  2303.             next_slice_group_param = NULL;
  2304.         else
  2305.             next_slice_group_param = (VASliceParameterBufferVC1 *)decode_state->slice_params[j + 1]->buffer;
  2306.  
  2307.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  2308.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  2309.  
  2310.             if (i < decode_state->slice_params[j]->num_elements - 1)
  2311.                 next_slice_param = slice_param + 1;
  2312.             else
  2313.                 next_slice_param = next_slice_group_param;
  2314.  
  2315.             gen75_mfd_vc1_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  2316.             slice_param++;
  2317.         }
  2318.     }
  2319.  
  2320.     intel_batchbuffer_end_atomic(batch);
  2321.     intel_batchbuffer_flush(batch);
  2322. }
  2323.  
  2324. static void
  2325. gen75_mfd_jpeg_decode_init(VADriverContextP ctx,
  2326.                           struct decode_state *decode_state,
  2327.                           struct gen7_mfd_context *gen7_mfd_context)
  2328. {
  2329.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2330.     struct object_surface *obj_surface;
  2331.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2332.     int subsampling = SUBSAMPLE_YUV420;
  2333.  
  2334.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2335.  
  2336.     if (pic_param->num_components == 1)
  2337.         subsampling = SUBSAMPLE_YUV400;
  2338.     else if (pic_param->num_components == 3) {
  2339.         int h1 = pic_param->components[0].h_sampling_factor;
  2340.         int h2 = pic_param->components[1].h_sampling_factor;
  2341.         int h3 = pic_param->components[2].h_sampling_factor;
  2342.         int v1 = pic_param->components[0].v_sampling_factor;
  2343.         int v2 = pic_param->components[1].v_sampling_factor;
  2344.         int v3 = pic_param->components[2].v_sampling_factor;
  2345.  
  2346.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2347.             v1 == 2 && v2 == 1 && v3 == 1)
  2348.             subsampling = SUBSAMPLE_YUV420;
  2349.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2350.                  v1 == 1 && v2 == 1 && v3 == 1)
  2351.             subsampling = SUBSAMPLE_YUV422H;
  2352.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2353.                  v1 == 1 && v2 == 1 && v3 == 1)
  2354.             subsampling = SUBSAMPLE_YUV444;
  2355.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2356.                  v1 == 1 && v2 == 1 && v3 == 1)
  2357.             subsampling = SUBSAMPLE_YUV411;
  2358.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2359.                  v1 == 2 && v2 == 1 && v3 == 1)
  2360.             subsampling = SUBSAMPLE_YUV422V;
  2361.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2362.                  v1 == 2 && v2 == 2 && v3 == 2)
  2363.             subsampling = SUBSAMPLE_YUV422H;
  2364.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2365.                  v1 == 2 && v2 == 1 && v3 == 1)
  2366.             subsampling = SUBSAMPLE_YUV422V;
  2367.         else
  2368.             assert(0);
  2369.     } else {
  2370.         assert(0);
  2371.     }
  2372.  
  2373.     /* Current decoded picture */
  2374.     obj_surface = SURFACE(decode_state->current_render_target);
  2375.     assert(obj_surface);
  2376.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('I','M','C','1'), subsampling);
  2377.  
  2378.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  2379.     gen7_mfd_context->pre_deblocking_output.bo = obj_surface->bo;
  2380.     dri_bo_reference(gen7_mfd_context->pre_deblocking_output.bo);
  2381.     gen7_mfd_context->pre_deblocking_output.valid = 1;
  2382.  
  2383.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  2384.     gen7_mfd_context->post_deblocking_output.valid = 0;
  2385.  
  2386.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  2387.     gen7_mfd_context->intra_row_store_scratch_buffer.valid = 0;
  2388.  
  2389.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  2390.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.valid = 0;
  2391.  
  2392.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  2393.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.valid = 0;
  2394.  
  2395.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  2396.     gen7_mfd_context->mpr_row_store_scratch_buffer.valid = 0;
  2397.  
  2398.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  2399.     gen7_mfd_context->bitplane_read_buffer.valid = 0;
  2400. }
  2401.  
  2402. static const int va_to_gen7_jpeg_rotation[4] = {
  2403.     GEN7_JPEG_ROTATION_0,
  2404.     GEN7_JPEG_ROTATION_90,
  2405.     GEN7_JPEG_ROTATION_180,
  2406.     GEN7_JPEG_ROTATION_270
  2407. };
  2408.  
  2409. static void
  2410. gen75_mfd_jpeg_pic_state(VADriverContextP ctx,
  2411.                         struct decode_state *decode_state,
  2412.                         struct gen7_mfd_context *gen7_mfd_context)
  2413. {
  2414.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2415.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2416.     int chroma_type = GEN7_YUV420;
  2417.     int frame_width_in_blks;
  2418.     int frame_height_in_blks;
  2419.  
  2420.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  2421.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2422.  
  2423.     if (pic_param->num_components == 1)
  2424.         chroma_type = GEN7_YUV400;
  2425.     else if (pic_param->num_components == 3) {
  2426.         int h1 = pic_param->components[0].h_sampling_factor;
  2427.         int h2 = pic_param->components[1].h_sampling_factor;
  2428.         int h3 = pic_param->components[2].h_sampling_factor;
  2429.         int v1 = pic_param->components[0].v_sampling_factor;
  2430.         int v2 = pic_param->components[1].v_sampling_factor;
  2431.         int v3 = pic_param->components[2].v_sampling_factor;
  2432.  
  2433.         if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2434.             v1 == 2 && v2 == 1 && v3 == 1)
  2435.             chroma_type = GEN7_YUV420;
  2436.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2437.                  v1 == 1 && v2 == 1 && v3 == 1)
  2438.             chroma_type = GEN7_YUV422H_2Y;
  2439.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2440.                  v1 == 1 && v2 == 1 && v3 == 1)
  2441.             chroma_type = GEN7_YUV444;
  2442.         else if (h1 == 4 && h2 == 1 && h3 == 1 &&
  2443.                  v1 == 1 && v2 == 1 && v3 == 1)
  2444.             chroma_type = GEN7_YUV411;
  2445.         else if (h1 == 1 && h2 == 1 && h3 == 1 &&
  2446.                  v1 == 2 && v2 == 1 && v3 == 1)
  2447.             chroma_type = GEN7_YUV422V_2Y;
  2448.         else if (h1 == 2 && h2 == 1 && h3 == 1 &&
  2449.                  v1 == 2 && v2 == 2 && v3 == 2)
  2450.             chroma_type = GEN7_YUV422H_4Y;
  2451.         else if (h2 == 2 && h2 == 2 && h3 == 2 &&
  2452.                  v1 == 2 && v2 == 1 && v3 == 1)
  2453.             chroma_type = GEN7_YUV422V_4Y;
  2454.         else
  2455.             assert(0);
  2456.     }
  2457.  
  2458.     if (chroma_type == GEN7_YUV400 ||
  2459.         chroma_type == GEN7_YUV444 ||
  2460.         chroma_type == GEN7_YUV422V_2Y) {
  2461.         frame_width_in_blks = ((pic_param->picture_width + 7) / 8);
  2462.         frame_height_in_blks = ((pic_param->picture_height + 7) / 8);
  2463.     } else if (chroma_type == GEN7_YUV411) {
  2464.         frame_width_in_blks = ((pic_param->picture_width + 31) / 32) * 4;
  2465.         frame_height_in_blks = ((pic_param->picture_height + 31) / 32) * 4;
  2466.     } else {
  2467.         frame_width_in_blks = ((pic_param->picture_width + 15) / 16) * 2;
  2468.         frame_height_in_blks = ((pic_param->picture_height + 15) / 16) * 2;
  2469.     }
  2470.  
  2471.     BEGIN_BCS_BATCH(batch, 3);
  2472.     OUT_BCS_BATCH(batch, MFX_JPEG_PIC_STATE | (3 - 2));
  2473.     OUT_BCS_BATCH(batch,
  2474.                   (va_to_gen7_jpeg_rotation[0] << 4) |    /* without rotation */
  2475.                   (chroma_type << 0));
  2476.     OUT_BCS_BATCH(batch,
  2477.                   ((frame_height_in_blks - 1) << 16) |   /* FrameHeightInBlks */
  2478.                   ((frame_width_in_blks - 1) << 0));    /* FrameWidthInBlks */
  2479.     ADVANCE_BCS_BATCH(batch);
  2480. }
  2481.  
  2482. static const int va_to_gen7_jpeg_hufftable[2] = {
  2483.     MFX_HUFFTABLE_ID_Y,
  2484.     MFX_HUFFTABLE_ID_UV
  2485. };
  2486.  
  2487. static void
  2488. gen75_mfd_jpeg_huff_table_state(VADriverContextP ctx,
  2489.                                struct decode_state *decode_state,
  2490.                                struct gen7_mfd_context *gen7_mfd_context,
  2491.                                int num_tables)
  2492. {
  2493.     VAHuffmanTableBufferJPEGBaseline *huffman_table;
  2494.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2495.     int index;
  2496.  
  2497.     if (!decode_state->huffman_table || !decode_state->huffman_table->buffer)
  2498.         return;
  2499.  
  2500.     huffman_table = (VAHuffmanTableBufferJPEGBaseline *)decode_state->huffman_table->buffer;
  2501.  
  2502.     for (index = 0; index < num_tables; index++) {
  2503.         int id = va_to_gen7_jpeg_hufftable[index];
  2504.         BEGIN_BCS_BATCH(batch, 53);
  2505.         OUT_BCS_BATCH(batch, MFX_JPEG_HUFF_TABLE_STATE | (53 - 2));
  2506.         OUT_BCS_BATCH(batch, id);
  2507.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_dc_codes, 12);
  2508.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].dc_values, 12);
  2509.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].num_ac_codes, 16);
  2510.         intel_batchbuffer_data(batch, huffman_table->huffman_table[index].ac_values, 164);
  2511.         ADVANCE_BCS_BATCH(batch);
  2512.     }
  2513. }
  2514.  
  2515. static const int va_to_gen7_jpeg_qm[5] = {
  2516.     -1,
  2517.     MFX_QM_JPEG_LUMA_Y_QUANTIZER_MATRIX,
  2518.     MFX_QM_JPEG_CHROMA_CB_QUANTIZER_MATRIX,
  2519.     MFX_QM_JPEG_CHROMA_CR_QUANTIZER_MATRIX,
  2520.     MFX_QM_JPEG_ALPHA_QUANTIZER_MATRIX
  2521. };
  2522.  
  2523. static void
  2524. gen75_mfd_jpeg_qm_state(VADriverContextP ctx,
  2525.                        struct decode_state *decode_state,
  2526.                        struct gen7_mfd_context *gen7_mfd_context)
  2527. {
  2528.     VAPictureParameterBufferJPEGBaseline *pic_param;
  2529.     VAIQMatrixBufferJPEGBaseline *iq_matrix;
  2530.     int index;
  2531.  
  2532.     if (!decode_state->iq_matrix || !decode_state->iq_matrix->buffer)
  2533.         return;
  2534.  
  2535.     iq_matrix = (VAIQMatrixBufferJPEGBaseline *)decode_state->iq_matrix->buffer;
  2536.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  2537.  
  2538.     assert(pic_param->num_components <= 3);
  2539.  
  2540.     for (index = 0; index < pic_param->num_components; index++) {
  2541.         int qm_type = va_to_gen7_jpeg_qm[pic_param->components[index].component_id - pic_param->components[0].component_id + 1];
  2542.         unsigned char *qm = iq_matrix->quantiser_table[pic_param->components[index].quantiser_table_selector];
  2543.         unsigned char raster_qm[64];
  2544.         int j;
  2545.  
  2546.         if (!iq_matrix->load_quantiser_table[pic_param->components[index].quantiser_table_selector])
  2547.             continue;
  2548.  
  2549.         for (j = 0; j < 64; j++)
  2550.             raster_qm[zigzag_direct[j]] = qm[j];
  2551.  
  2552.         gen75_mfd_qm_state(ctx, qm_type, raster_qm, 64, gen7_mfd_context);
  2553.     }
  2554. }
  2555.  
  2556. static void
  2557. gen75_mfd_jpeg_bsd_object(VADriverContextP ctx,
  2558.                          VAPictureParameterBufferJPEGBaseline *pic_param,
  2559.                          VASliceParameterBufferJPEGBaseline *slice_param,
  2560.                          VASliceParameterBufferJPEGBaseline *next_slice_param,
  2561.                          dri_bo *slice_data_bo,
  2562.                          struct gen7_mfd_context *gen7_mfd_context)
  2563. {
  2564.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2565.     int scan_component_mask = 0;
  2566.     int i;
  2567.  
  2568.     assert(slice_param->num_components > 0);
  2569.     assert(slice_param->num_components < 4);
  2570.     assert(slice_param->num_components <= pic_param->num_components);
  2571.  
  2572.     for (i = 0; i < slice_param->num_components; i++) {
  2573.         switch (slice_param->components[i].component_selector - pic_param->components[0].component_id + 1) {
  2574.         case 1:
  2575.             scan_component_mask |= (1 << 0);
  2576.             break;
  2577.         case 2:
  2578.             scan_component_mask |= (1 << 1);
  2579.             break;
  2580.         case 3:
  2581.             scan_component_mask |= (1 << 2);
  2582.             break;
  2583.         default:
  2584.             assert(0);
  2585.             break;
  2586.         }
  2587.     }
  2588.  
  2589.     BEGIN_BCS_BATCH(batch, 6);
  2590.     OUT_BCS_BATCH(batch, MFD_JPEG_BSD_OBJECT | (6 - 2));
  2591.     OUT_BCS_BATCH(batch,
  2592.                   slice_param->slice_data_size);
  2593.     OUT_BCS_BATCH(batch,
  2594.                   slice_param->slice_data_offset);
  2595.     OUT_BCS_BATCH(batch,
  2596.                   slice_param->slice_horizontal_position << 16 |
  2597.                   slice_param->slice_vertical_position << 0);
  2598.     OUT_BCS_BATCH(batch,
  2599.                   ((slice_param->num_components != 1) << 30) |  /* interleaved */
  2600.                   (scan_component_mask << 27) |                 /* scan components */
  2601.                   (0 << 26) |   /* disable interrupt allowed */
  2602.                   (slice_param->num_mcus << 0));                /* MCU count */
  2603.     OUT_BCS_BATCH(batch,
  2604.                   (slice_param->restart_interval << 0));    /* RestartInterval */
  2605.     ADVANCE_BCS_BATCH(batch);
  2606. }
  2607.  
  2608. /* Workaround for JPEG decoding on Ivybridge */
  2609.  
  2610. VAStatus
  2611. i965_DestroySurfaces(VADriverContextP ctx,
  2612.                      VASurfaceID *surface_list,
  2613.                      int num_surfaces);
  2614. VAStatus
  2615. i965_CreateSurfaces(VADriverContextP ctx,
  2616.                     int width,
  2617.                     int height,
  2618.                     int format,
  2619.                     int num_surfaces,
  2620.                     VASurfaceID *surfaces);
  2621.  
  2622. static struct {
  2623.     int width;
  2624.     int height;
  2625.     unsigned char data[32];
  2626.     int data_size;
  2627.     int data_bit_offset;
  2628.     int qp;
  2629. } gen7_jpeg_wa_clip = {
  2630.     16,
  2631.     16,
  2632.     {
  2633.         0x65, 0xb8, 0x40, 0x32, 0x13, 0xfd, 0x06, 0x6c,
  2634.         0xfc, 0x0a, 0x50, 0x71, 0x5c, 0x00
  2635.     },
  2636.     14,
  2637.     40,
  2638.     28,
  2639. };
  2640.  
  2641. static void
  2642. gen75_jpeg_wa_init(VADriverContextP ctx,
  2643.                   struct gen7_mfd_context *gen7_mfd_context)
  2644. {
  2645.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2646.     VAStatus status;
  2647.     struct object_surface *obj_surface;
  2648.  
  2649.     if (gen7_mfd_context->jpeg_wa_surface_id != VA_INVALID_SURFACE)
  2650.         i965_DestroySurfaces(ctx,
  2651.                              &gen7_mfd_context->jpeg_wa_surface_id,
  2652.                              1);
  2653.  
  2654.     status = i965_CreateSurfaces(ctx,
  2655.                                  gen7_jpeg_wa_clip.width,
  2656.                                  gen7_jpeg_wa_clip.height,
  2657.                                  VA_RT_FORMAT_YUV420,
  2658.                                  1,
  2659.                                  &gen7_mfd_context->jpeg_wa_surface_id);
  2660.     assert(status == VA_STATUS_SUCCESS);
  2661.  
  2662.     obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2663.     assert(obj_surface);
  2664.     i965_check_alloc_surface_bo(ctx, obj_surface, 1, VA_FOURCC('N', 'V', '1', '2'), SUBSAMPLE_YUV420);
  2665.  
  2666.     if (!gen7_mfd_context->jpeg_wa_slice_data_bo) {
  2667.         gen7_mfd_context->jpeg_wa_slice_data_bo = dri_bo_alloc(i965->intel.bufmgr,
  2668.                                                                "JPEG WA data",
  2669.                                                                0x1000,
  2670.                                                                0x1000);
  2671.         dri_bo_subdata(gen7_mfd_context->jpeg_wa_slice_data_bo,
  2672.                        0,
  2673.                        gen7_jpeg_wa_clip.data_size,
  2674.                        gen7_jpeg_wa_clip.data);
  2675.     }
  2676. }
  2677.  
  2678. static void
  2679. gen75_jpeg_wa_pipe_mode_select(VADriverContextP ctx,
  2680.                               struct gen7_mfd_context *gen7_mfd_context)
  2681. {
  2682.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2683.  
  2684.     BEGIN_BCS_BATCH(batch, 5);
  2685.     OUT_BCS_BATCH(batch, MFX_PIPE_MODE_SELECT | (5 - 2));
  2686.     OUT_BCS_BATCH(batch,
  2687.                   (MFX_LONG_MODE << 17) | /* Currently only support long format */
  2688.                   (MFD_MODE_VLD << 15) | /* VLD mode */
  2689.                   (0 << 10) | /* disable Stream-Out */
  2690.                   (0 << 9)  | /* Post Deblocking Output */
  2691.                   (1 << 8)  | /* Pre Deblocking Output */
  2692.                   (0 << 5)  | /* not in stitch mode */
  2693.                   (MFX_CODEC_DECODE << 4)  | /* decoding mode */
  2694.                   (MFX_FORMAT_AVC << 0));
  2695.     OUT_BCS_BATCH(batch,
  2696.                   (0 << 4)  | /* terminate if AVC motion and POC table error occurs */
  2697.                   (0 << 3)  | /* terminate if AVC mbdata error occurs */
  2698.                   (0 << 2)  | /* terminate if AVC CABAC/CAVLC decode error occurs */
  2699.                   (0 << 1)  |
  2700.                   (0 << 0));
  2701.     OUT_BCS_BATCH(batch, 0); /* pic status/error report id */
  2702.     OUT_BCS_BATCH(batch, 0); /* reserved */
  2703.     ADVANCE_BCS_BATCH(batch);
  2704. }
  2705.  
  2706. static void
  2707. gen75_jpeg_wa_surface_state(VADriverContextP ctx,
  2708.                            struct gen7_mfd_context *gen7_mfd_context)
  2709. {
  2710.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2711.     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2712.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2713.  
  2714.     BEGIN_BCS_BATCH(batch, 6);
  2715.     OUT_BCS_BATCH(batch, MFX_SURFACE_STATE | (6 - 2));
  2716.     OUT_BCS_BATCH(batch, 0);
  2717.     OUT_BCS_BATCH(batch,
  2718.                   ((obj_surface->orig_width - 1) << 18) |
  2719.                   ((obj_surface->orig_height - 1) << 4));
  2720.     OUT_BCS_BATCH(batch,
  2721.                   (MFX_SURFACE_PLANAR_420_8 << 28) | /* 420 planar YUV surface */
  2722.                   (1 << 27) | /* interleave chroma, set to 0 for JPEG */
  2723.                   (0 << 22) | /* surface object control state, ignored */
  2724.                   ((obj_surface->width - 1) << 3) | /* pitch */
  2725.                   (0 << 2)  | /* must be 0 */
  2726.                   (1 << 1)  | /* must be tiled */
  2727.                   (I965_TILEWALK_YMAJOR << 0));  /* tile walk, must be 1 */
  2728.     OUT_BCS_BATCH(batch,
  2729.                   (0 << 16) | /* X offset for U(Cb), must be 0 */
  2730.                   (obj_surface->y_cb_offset << 0)); /* Y offset for U(Cb) */
  2731.     OUT_BCS_BATCH(batch,
  2732.                   (0 << 16) | /* X offset for V(Cr), must be 0 */
  2733.                   (0 << 0)); /* Y offset for V(Cr), must be 0 for video codec, non-zoro for JPEG */
  2734.     ADVANCE_BCS_BATCH(batch);
  2735. }
  2736.  
  2737.  
  2738. static void
  2739. gen75_jpeg_wa_pipe_buf_addr_state_bplus(VADriverContextP ctx,
  2740.                                  struct gen7_mfd_context *gen7_mfd_context)
  2741. {
  2742.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2743.     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2744.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2745.     dri_bo *intra_bo;
  2746.     int i;
  2747.  
  2748.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2749.                             "intra row store",
  2750.                             128 * 64,
  2751.                             0x1000);
  2752.  
  2753.     BEGIN_BCS_BATCH(batch, 61);
  2754.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (61 - 2));
  2755.     OUT_BCS_RELOC(batch,
  2756.                   obj_surface->bo,
  2757.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2758.                   0);
  2759.         OUT_BCS_BATCH(batch, 0);
  2760.         OUT_BCS_BATCH(batch, 0);
  2761.    
  2762.  
  2763.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2764.         OUT_BCS_BATCH(batch, 0);
  2765.         OUT_BCS_BATCH(batch, 0);
  2766.  
  2767.         /* uncompressed-video & stream out 7-12 */
  2768.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2769.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2770.         OUT_BCS_BATCH(batch, 0);
  2771.         OUT_BCS_BATCH(batch, 0);
  2772.         OUT_BCS_BATCH(batch, 0);
  2773.         OUT_BCS_BATCH(batch, 0);
  2774.  
  2775.         /* the DW 13-15 is for intra row store scratch */
  2776.     OUT_BCS_RELOC(batch,
  2777.                   intra_bo,
  2778.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2779.                   0);
  2780.         OUT_BCS_BATCH(batch, 0);
  2781.         OUT_BCS_BATCH(batch, 0);
  2782.  
  2783.         /* the DW 16-18 is for deblocking filter */
  2784.     OUT_BCS_BATCH(batch, 0);
  2785.         OUT_BCS_BATCH(batch, 0);
  2786.         OUT_BCS_BATCH(batch, 0);
  2787.  
  2788.     /* DW 19..50 */
  2789.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2790.         OUT_BCS_BATCH(batch, 0);
  2791.         OUT_BCS_BATCH(batch, 0);
  2792.     }
  2793.     OUT_BCS_BATCH(batch, 0);
  2794.  
  2795.         /* the DW52-54 is for mb status address */
  2796.     OUT_BCS_BATCH(batch, 0);
  2797.         OUT_BCS_BATCH(batch, 0);
  2798.         OUT_BCS_BATCH(batch, 0);
  2799.         /* the DW56-60 is for ILDB & second ILDB address */
  2800.     OUT_BCS_BATCH(batch, 0);
  2801.         OUT_BCS_BATCH(batch, 0);
  2802.         OUT_BCS_BATCH(batch, 0);
  2803.     OUT_BCS_BATCH(batch, 0);
  2804.         OUT_BCS_BATCH(batch, 0);
  2805.         OUT_BCS_BATCH(batch, 0);
  2806.  
  2807.     ADVANCE_BCS_BATCH(batch);
  2808.  
  2809.     dri_bo_unreference(intra_bo);
  2810. }
  2811.  
  2812. static void
  2813. gen75_jpeg_wa_pipe_buf_addr_state(VADriverContextP ctx,
  2814.                                  struct gen7_mfd_context *gen7_mfd_context)
  2815. {
  2816.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2817.     struct object_surface *obj_surface = SURFACE(gen7_mfd_context->jpeg_wa_surface_id);
  2818.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2819.     dri_bo *intra_bo;
  2820.     int i;
  2821.  
  2822.     if (IS_STEPPING_BPLUS(i965)) {
  2823.         gen75_jpeg_wa_pipe_buf_addr_state_bplus(ctx, gen7_mfd_context);
  2824.         return;
  2825.     }
  2826.     intra_bo = dri_bo_alloc(i965->intel.bufmgr,
  2827.                             "intra row store",
  2828.                             128 * 64,
  2829.                             0x1000);
  2830.  
  2831.     BEGIN_BCS_BATCH(batch, 25);
  2832.     OUT_BCS_BATCH(batch, MFX_PIPE_BUF_ADDR_STATE | (25 - 2));
  2833.     OUT_BCS_RELOC(batch,
  2834.                   obj_surface->bo,
  2835.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2836.                   0);
  2837.    
  2838.     OUT_BCS_BATCH(batch, 0); /* post deblocking */
  2839.  
  2840.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2841.     OUT_BCS_BATCH(batch, 0); /* ignore for decoding */
  2842.  
  2843.     OUT_BCS_RELOC(batch,
  2844.                   intra_bo,
  2845.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2846.                   0);
  2847.  
  2848.     OUT_BCS_BATCH(batch, 0);
  2849.  
  2850.     /* DW 7..22 */
  2851.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  2852.         OUT_BCS_BATCH(batch, 0);
  2853.     }
  2854.  
  2855.     OUT_BCS_BATCH(batch, 0);   /* ignore DW23 for decoding */
  2856.     OUT_BCS_BATCH(batch, 0);   /* ignore DW24 for decoding */
  2857.     ADVANCE_BCS_BATCH(batch);
  2858.  
  2859.     dri_bo_unreference(intra_bo);
  2860. }
  2861.  
  2862. static void
  2863. gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(VADriverContextP ctx,
  2864.                                      struct gen7_mfd_context *gen7_mfd_context)
  2865. {
  2866.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2867.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2868.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2869.  
  2870.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2871.                               "bsd mpc row store",
  2872.                               11520, /* 1.5 * 120 * 64 */
  2873.                               0x1000);
  2874.  
  2875.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2876.                           "mpr row store",
  2877.                           7680, /* 1. 0 * 120 * 64 */
  2878.                           0x1000);
  2879.  
  2880.     BEGIN_BCS_BATCH(batch, 10);
  2881.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (10 - 2));
  2882.  
  2883.     OUT_BCS_RELOC(batch,
  2884.                   bsd_mpc_bo,
  2885.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2886.                   0);
  2887.  
  2888.     OUT_BCS_BATCH(batch, 0);
  2889.     OUT_BCS_BATCH(batch, 0);
  2890.  
  2891.     OUT_BCS_RELOC(batch,
  2892.                   mpr_bo,
  2893.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2894.                   0);
  2895.     OUT_BCS_BATCH(batch, 0);
  2896.     OUT_BCS_BATCH(batch, 0);
  2897.  
  2898.     OUT_BCS_BATCH(batch, 0);
  2899.     OUT_BCS_BATCH(batch, 0);
  2900.     OUT_BCS_BATCH(batch, 0);
  2901.  
  2902.     ADVANCE_BCS_BATCH(batch);
  2903.  
  2904.     dri_bo_unreference(bsd_mpc_bo);
  2905.     dri_bo_unreference(mpr_bo);
  2906. }
  2907.  
  2908. static void
  2909. gen75_jpeg_wa_bsp_buf_base_addr_state(VADriverContextP ctx,
  2910.                                      struct gen7_mfd_context *gen7_mfd_context)
  2911. {
  2912.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2913.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2914.     dri_bo *bsd_mpc_bo, *mpr_bo;
  2915.  
  2916.     if (IS_STEPPING_BPLUS(i965)) {
  2917.         gen75_jpeg_wa_bsp_buf_base_addr_state_bplus(ctx, gen7_mfd_context);
  2918.         return;
  2919.     }
  2920.        
  2921.     bsd_mpc_bo = dri_bo_alloc(i965->intel.bufmgr,
  2922.                               "bsd mpc row store",
  2923.                               11520, /* 1.5 * 120 * 64 */
  2924.                               0x1000);
  2925.  
  2926.     mpr_bo = dri_bo_alloc(i965->intel.bufmgr,
  2927.                           "mpr row store",
  2928.                           7680, /* 1. 0 * 120 * 64 */
  2929.                           0x1000);
  2930.  
  2931.     BEGIN_BCS_BATCH(batch, 4);
  2932.     OUT_BCS_BATCH(batch, MFX_BSP_BUF_BASE_ADDR_STATE | (4 - 2));
  2933.  
  2934.     OUT_BCS_RELOC(batch,
  2935.                   bsd_mpc_bo,
  2936.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2937.                   0);
  2938.  
  2939.     OUT_BCS_RELOC(batch,
  2940.                   mpr_bo,
  2941.                   I915_GEM_DOMAIN_INSTRUCTION, I915_GEM_DOMAIN_INSTRUCTION,
  2942.                   0);
  2943.     OUT_BCS_BATCH(batch, 0);
  2944.  
  2945.     ADVANCE_BCS_BATCH(batch);
  2946.  
  2947.     dri_bo_unreference(bsd_mpc_bo);
  2948.     dri_bo_unreference(mpr_bo);
  2949. }
  2950.  
  2951. static void
  2952. gen75_jpeg_wa_avc_qm_state(VADriverContextP ctx,
  2953.                           struct gen7_mfd_context *gen7_mfd_context)
  2954. {
  2955.  
  2956. }
  2957.  
  2958. static void
  2959. gen75_jpeg_wa_avc_img_state(VADriverContextP ctx,
  2960.                            struct gen7_mfd_context *gen7_mfd_context)
  2961. {
  2962.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  2963.     int img_struct = 0;
  2964.     int mbaff_frame_flag = 0;
  2965.     unsigned int width_in_mbs = 1, height_in_mbs = 1;
  2966.  
  2967.     BEGIN_BCS_BATCH(batch, 16);
  2968.     OUT_BCS_BATCH(batch, MFX_AVC_IMG_STATE | (16 - 2));
  2969.     OUT_BCS_BATCH(batch,
  2970.                   width_in_mbs * height_in_mbs);
  2971.     OUT_BCS_BATCH(batch,
  2972.                   ((height_in_mbs - 1) << 16) |
  2973.                   ((width_in_mbs - 1) << 0));
  2974.     OUT_BCS_BATCH(batch,
  2975.                   (0 << 24) |
  2976.                   (0 << 16) |
  2977.                   (0 << 14) |
  2978.                   (0 << 13) |
  2979.                   (0 << 12) | /* differ from GEN6 */
  2980.                   (0 << 10) |
  2981.                   (img_struct << 8));
  2982.     OUT_BCS_BATCH(batch,
  2983.                   (1 << 10) | /* 4:2:0 */
  2984.                   (1 << 7) |  /* CABAC */
  2985.                   (0 << 6) |
  2986.                   (0 << 5) |
  2987.                   (0 << 4) |
  2988.                   (0 << 3) |
  2989.                   (1 << 2) |
  2990.                   (mbaff_frame_flag << 1) |
  2991.                   (0 << 0));
  2992.     OUT_BCS_BATCH(batch, 0);
  2993.     OUT_BCS_BATCH(batch, 0);
  2994.     OUT_BCS_BATCH(batch, 0);
  2995.     OUT_BCS_BATCH(batch, 0);
  2996.     OUT_BCS_BATCH(batch, 0);
  2997.     OUT_BCS_BATCH(batch, 0);
  2998.     OUT_BCS_BATCH(batch, 0);
  2999.     OUT_BCS_BATCH(batch, 0);
  3000.     OUT_BCS_BATCH(batch, 0);
  3001.     OUT_BCS_BATCH(batch, 0);
  3002.     OUT_BCS_BATCH(batch, 0);
  3003.     ADVANCE_BCS_BATCH(batch);
  3004. }
  3005.  
  3006. static void
  3007. gen75_jpeg_wa_avc_directmode_state_bplus(VADriverContextP ctx,
  3008.                                   struct gen7_mfd_context *gen7_mfd_context)
  3009. {
  3010.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3011.     int i;
  3012.  
  3013.     BEGIN_BCS_BATCH(batch, 71);
  3014.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (71 - 2));
  3015.  
  3016.     /* reference surfaces 0..15 */
  3017.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  3018.         OUT_BCS_BATCH(batch, 0); /* top */
  3019.         OUT_BCS_BATCH(batch, 0); /* bottom */
  3020.     }
  3021.        
  3022.         OUT_BCS_BATCH(batch, 0);
  3023.  
  3024.     /* the current decoding frame/field */
  3025.     OUT_BCS_BATCH(batch, 0); /* top */
  3026.     OUT_BCS_BATCH(batch, 0);
  3027.     OUT_BCS_BATCH(batch, 0);
  3028.  
  3029.     /* POC List */
  3030.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  3031.         OUT_BCS_BATCH(batch, 0);
  3032.         OUT_BCS_BATCH(batch, 0);
  3033.     }
  3034.  
  3035.     OUT_BCS_BATCH(batch, 0);
  3036.     OUT_BCS_BATCH(batch, 0);
  3037.  
  3038.     ADVANCE_BCS_BATCH(batch);
  3039. }
  3040.  
  3041. static void
  3042. gen75_jpeg_wa_avc_directmode_state(VADriverContextP ctx,
  3043.                                   struct gen7_mfd_context *gen7_mfd_context)
  3044. {
  3045.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3046.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3047.     int i;
  3048.  
  3049.     if (IS_STEPPING_BPLUS(i965)) {
  3050.         gen75_jpeg_wa_avc_directmode_state_bplus(ctx, gen7_mfd_context);
  3051.         return;
  3052.     }  
  3053.  
  3054.     BEGIN_BCS_BATCH(batch, 69);
  3055.     OUT_BCS_BATCH(batch, MFX_AVC_DIRECTMODE_STATE | (69 - 2));
  3056.  
  3057.     /* reference surfaces 0..15 */
  3058.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  3059.         OUT_BCS_BATCH(batch, 0); /* top */
  3060.         OUT_BCS_BATCH(batch, 0); /* bottom */
  3061.     }
  3062.  
  3063.     /* the current decoding frame/field */
  3064.     OUT_BCS_BATCH(batch, 0); /* top */
  3065.     OUT_BCS_BATCH(batch, 0); /* bottom */
  3066.  
  3067.     /* POC List */
  3068.     for (i = 0; i < MAX_GEN_REFERENCE_FRAMES; i++) {
  3069.         OUT_BCS_BATCH(batch, 0);
  3070.         OUT_BCS_BATCH(batch, 0);
  3071.     }
  3072.  
  3073.     OUT_BCS_BATCH(batch, 0);
  3074.     OUT_BCS_BATCH(batch, 0);
  3075.  
  3076.     ADVANCE_BCS_BATCH(batch);
  3077. }
  3078.  
  3079. static void
  3080. gen75_jpeg_wa_ind_obj_base_addr_state_bplus(VADriverContextP ctx,
  3081.                                      struct gen7_mfd_context *gen7_mfd_context)
  3082. {
  3083.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3084.  
  3085.     BEGIN_BCS_BATCH(batch, 26);
  3086.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (26 - 2));
  3087.     OUT_BCS_RELOC(batch,
  3088.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  3089.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  3090.                   0);
  3091.     OUT_BCS_BATCH(batch, 0);
  3092.     OUT_BCS_BATCH(batch, 0);
  3093.        
  3094.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  3095.     OUT_BCS_BATCH(batch, 0);
  3096.  
  3097.         /* MFX indirect MV 6-10 */
  3098.     OUT_BCS_BATCH(batch, 0);
  3099.     OUT_BCS_BATCH(batch, 0);
  3100.     OUT_BCS_BATCH(batch, 0);
  3101.     OUT_BCS_BATCH(batch, 0);
  3102.     OUT_BCS_BATCH(batch, 0);
  3103.  
  3104.         /* MFX IT_COFF 11-15 */
  3105.     OUT_BCS_BATCH(batch, 0);
  3106.     OUT_BCS_BATCH(batch, 0);
  3107.     OUT_BCS_BATCH(batch, 0);
  3108.     OUT_BCS_BATCH(batch, 0);
  3109.     OUT_BCS_BATCH(batch, 0);
  3110.  
  3111.         /* MFX IT_DBLK 16-20 */
  3112.     OUT_BCS_BATCH(batch, 0);
  3113.     OUT_BCS_BATCH(batch, 0);
  3114.     OUT_BCS_BATCH(batch, 0);
  3115.     OUT_BCS_BATCH(batch, 0);
  3116.     OUT_BCS_BATCH(batch, 0);
  3117.  
  3118.         /* MFX PAK_BSE object for encoder 21-25 */
  3119.     OUT_BCS_BATCH(batch, 0);
  3120.     OUT_BCS_BATCH(batch, 0);
  3121.     OUT_BCS_BATCH(batch, 0);
  3122.     OUT_BCS_BATCH(batch, 0);
  3123.     OUT_BCS_BATCH(batch, 0);
  3124.     ADVANCE_BCS_BATCH(batch);
  3125. }
  3126.  
  3127. static void
  3128. gen75_jpeg_wa_ind_obj_base_addr_state(VADriverContextP ctx,
  3129.                                      struct gen7_mfd_context *gen7_mfd_context)
  3130. {
  3131.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3132.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3133.  
  3134.     if (IS_STEPPING_BPLUS(i965)) {
  3135.         gen75_jpeg_wa_ind_obj_base_addr_state_bplus(ctx, gen7_mfd_context);
  3136.         return;
  3137.     }  
  3138.  
  3139.     BEGIN_BCS_BATCH(batch, 11);
  3140.     OUT_BCS_BATCH(batch, MFX_IND_OBJ_BASE_ADDR_STATE | (11 - 2));
  3141.     OUT_BCS_RELOC(batch,
  3142.                   gen7_mfd_context->jpeg_wa_slice_data_bo,
  3143.                   I915_GEM_DOMAIN_INSTRUCTION, 0,
  3144.                   0);
  3145.     OUT_BCS_BATCH(batch, 0x80000000); /* must set, up to 2G */
  3146.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  3147.     OUT_BCS_BATCH(batch, 0);
  3148.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  3149.     OUT_BCS_BATCH(batch, 0);
  3150.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  3151.     OUT_BCS_BATCH(batch, 0);
  3152.     OUT_BCS_BATCH(batch, 0); /* ignore for VLD mode */
  3153.     OUT_BCS_BATCH(batch, 0);
  3154.     ADVANCE_BCS_BATCH(batch);
  3155. }
  3156.  
  3157. static void
  3158. gen75_jpeg_wa_avc_bsd_object(VADriverContextP ctx,
  3159.                             struct gen7_mfd_context *gen7_mfd_context)
  3160. {
  3161.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3162.  
  3163.     /* the input bitsteam format on GEN7 differs from GEN6 */
  3164.     BEGIN_BCS_BATCH(batch, 6);
  3165.     OUT_BCS_BATCH(batch, MFD_AVC_BSD_OBJECT | (6 - 2));
  3166.     OUT_BCS_BATCH(batch, gen7_jpeg_wa_clip.data_size);
  3167.     OUT_BCS_BATCH(batch, 0);
  3168.     OUT_BCS_BATCH(batch,
  3169.                   (0 << 31) |
  3170.                   (0 << 14) |
  3171.                   (0 << 12) |
  3172.                   (0 << 10) |
  3173.                   (0 << 8));
  3174.     OUT_BCS_BATCH(batch,
  3175.                   ((gen7_jpeg_wa_clip.data_bit_offset >> 3) << 16) |
  3176.                   (0 << 5)  |
  3177.                   (0 << 4)  |
  3178.                   (1 << 3) | /* LastSlice Flag */
  3179.                   (gen7_jpeg_wa_clip.data_bit_offset & 0x7));
  3180.     OUT_BCS_BATCH(batch, 0);
  3181.     ADVANCE_BCS_BATCH(batch);
  3182. }
  3183.  
  3184. static void
  3185. gen75_jpeg_wa_avc_slice_state(VADriverContextP ctx,
  3186.                              struct gen7_mfd_context *gen7_mfd_context)
  3187. {
  3188.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3189.     int slice_hor_pos = 0, slice_ver_pos = 0, next_slice_hor_pos = 0, next_slice_ver_pos = 1;
  3190.     int num_ref_idx_l0 = 0, num_ref_idx_l1 = 0;
  3191.     int first_mb_in_slice = 0;
  3192.     int slice_type = SLICE_TYPE_I;
  3193.  
  3194.     BEGIN_BCS_BATCH(batch, 11);
  3195.     OUT_BCS_BATCH(batch, MFX_AVC_SLICE_STATE | (11 - 2));
  3196.     OUT_BCS_BATCH(batch, slice_type);
  3197.     OUT_BCS_BATCH(batch,
  3198.                   (num_ref_idx_l1 << 24) |
  3199.                   (num_ref_idx_l0 << 16) |
  3200.                   (0 << 8) |
  3201.                   (0 << 0));
  3202.     OUT_BCS_BATCH(batch,
  3203.                   (0 << 29) |
  3204.                   (1 << 27) |   /* disable Deblocking */
  3205.                   (0 << 24) |
  3206.                   (gen7_jpeg_wa_clip.qp << 16) |
  3207.                   (0 << 8) |
  3208.                   (0 << 0));
  3209.     OUT_BCS_BATCH(batch,
  3210.                   (slice_ver_pos << 24) |
  3211.                   (slice_hor_pos << 16) |
  3212.                   (first_mb_in_slice << 0));
  3213.     OUT_BCS_BATCH(batch,
  3214.                   (next_slice_ver_pos << 16) |
  3215.                   (next_slice_hor_pos << 0));
  3216.     OUT_BCS_BATCH(batch, (1 << 19)); /* last slice flag */
  3217.     OUT_BCS_BATCH(batch, 0);
  3218.     OUT_BCS_BATCH(batch, 0);
  3219.     OUT_BCS_BATCH(batch, 0);
  3220.     OUT_BCS_BATCH(batch, 0);
  3221.     ADVANCE_BCS_BATCH(batch);
  3222. }
  3223.  
  3224. static void
  3225. gen75_mfd_jpeg_wa(VADriverContextP ctx,
  3226.                  struct gen7_mfd_context *gen7_mfd_context)
  3227. {
  3228.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3229.     gen75_jpeg_wa_init(ctx, gen7_mfd_context);
  3230.     intel_batchbuffer_emit_mi_flush(batch);
  3231.     gen75_jpeg_wa_pipe_mode_select(ctx, gen7_mfd_context);
  3232.     gen75_jpeg_wa_surface_state(ctx, gen7_mfd_context);
  3233.     gen75_jpeg_wa_pipe_buf_addr_state(ctx, gen7_mfd_context);
  3234.     gen75_jpeg_wa_bsp_buf_base_addr_state(ctx, gen7_mfd_context);
  3235.     gen75_jpeg_wa_avc_qm_state(ctx, gen7_mfd_context);
  3236.     gen75_jpeg_wa_avc_img_state(ctx, gen7_mfd_context);
  3237.     gen75_jpeg_wa_ind_obj_base_addr_state(ctx, gen7_mfd_context);
  3238.  
  3239.     gen75_jpeg_wa_avc_directmode_state(ctx, gen7_mfd_context);
  3240.     gen75_jpeg_wa_avc_slice_state(ctx, gen7_mfd_context);
  3241.     gen75_jpeg_wa_avc_bsd_object(ctx, gen7_mfd_context);
  3242. }
  3243.  
  3244. void
  3245. gen75_mfd_jpeg_decode_picture(VADriverContextP ctx,
  3246.                              struct decode_state *decode_state,
  3247.                              struct gen7_mfd_context *gen7_mfd_context)
  3248. {
  3249.     struct intel_batchbuffer *batch = gen7_mfd_context->base.batch;
  3250.     VAPictureParameterBufferJPEGBaseline *pic_param;
  3251.     VASliceParameterBufferJPEGBaseline *slice_param, *next_slice_param, *next_slice_group_param;
  3252.     dri_bo *slice_data_bo;
  3253.     int i, j, max_selector = 0;
  3254.  
  3255.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  3256.     pic_param = (VAPictureParameterBufferJPEGBaseline *)decode_state->pic_param->buffer;
  3257.  
  3258.     /* Currently only support Baseline DCT */
  3259.     gen75_mfd_jpeg_decode_init(ctx, decode_state, gen7_mfd_context);
  3260.     intel_batchbuffer_start_atomic_bcs(batch, 0x1000);
  3261.     gen75_mfd_jpeg_wa(ctx, gen7_mfd_context);
  3262.     intel_batchbuffer_emit_mi_flush(batch);
  3263.     gen75_mfd_pipe_mode_select(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3264.     gen75_mfd_surface_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3265.     gen75_mfd_pipe_buf_addr_state(ctx, decode_state, MFX_FORMAT_JPEG, gen7_mfd_context);
  3266.     gen75_mfd_jpeg_pic_state(ctx, decode_state, gen7_mfd_context);
  3267.     gen75_mfd_jpeg_qm_state(ctx, decode_state, gen7_mfd_context);
  3268.  
  3269.     for (j = 0; j < decode_state->num_slice_params; j++) {
  3270.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  3271.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  3272.         slice_data_bo = decode_state->slice_datas[j]->bo;
  3273.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  3274.  
  3275.         if (j == decode_state->num_slice_params - 1)
  3276.             next_slice_group_param = NULL;
  3277.         else
  3278.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  3279.  
  3280.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  3281.             int component;
  3282.  
  3283.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  3284.  
  3285.             if (i < decode_state->slice_params[j]->num_elements - 1)
  3286.                 next_slice_param = slice_param + 1;
  3287.             else
  3288.                 next_slice_param = next_slice_group_param;
  3289.  
  3290.             for (component = 0; component < slice_param->num_components; component++) {
  3291.                 if (max_selector < slice_param->components[component].dc_table_selector)
  3292.                     max_selector = slice_param->components[component].dc_table_selector;
  3293.  
  3294.                 if (max_selector < slice_param->components[component].ac_table_selector)
  3295.                     max_selector = slice_param->components[component].ac_table_selector;
  3296.             }
  3297.  
  3298.             slice_param++;
  3299.         }
  3300.     }
  3301.  
  3302.     assert(max_selector < 2);
  3303.     gen75_mfd_jpeg_huff_table_state(ctx, decode_state, gen7_mfd_context, max_selector + 1);
  3304.  
  3305.     for (j = 0; j < decode_state->num_slice_params; j++) {
  3306.         assert(decode_state->slice_params && decode_state->slice_params[j]->buffer);
  3307.         slice_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j]->buffer;
  3308.         slice_data_bo = decode_state->slice_datas[j]->bo;
  3309.         gen75_mfd_ind_obj_base_addr_state(ctx, slice_data_bo, MFX_FORMAT_JPEG, gen7_mfd_context);
  3310.  
  3311.         if (j == decode_state->num_slice_params - 1)
  3312.             next_slice_group_param = NULL;
  3313.         else
  3314.             next_slice_group_param = (VASliceParameterBufferJPEGBaseline *)decode_state->slice_params[j + 1]->buffer;
  3315.  
  3316.         for (i = 0; i < decode_state->slice_params[j]->num_elements; i++) {
  3317.             assert(slice_param->slice_data_flag == VA_SLICE_DATA_FLAG_ALL);
  3318.  
  3319.             if (i < decode_state->slice_params[j]->num_elements - 1)
  3320.                 next_slice_param = slice_param + 1;
  3321.             else
  3322.                 next_slice_param = next_slice_group_param;
  3323.  
  3324.             gen75_mfd_jpeg_bsd_object(ctx, pic_param, slice_param, next_slice_param, slice_data_bo, gen7_mfd_context);
  3325.             slice_param++;
  3326.         }
  3327.     }
  3328.  
  3329.     intel_batchbuffer_end_atomic(batch);
  3330.     intel_batchbuffer_flush(batch);
  3331. }
  3332.  
  3333. static void
  3334. gen75_mfd_decode_picture(VADriverContextP ctx,
  3335.                         VAProfile profile,
  3336.                         union codec_state *codec_state,
  3337.                         struct hw_context *hw_context)
  3338.  
  3339. {
  3340.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3341.     struct decode_state *decode_state = &codec_state->decode;
  3342.  
  3343.     assert(gen7_mfd_context);
  3344.  
  3345.     gen7_mfd_context->wa_mpeg2_slice_vertical_position = -1;
  3346.  
  3347.     switch (profile) {
  3348.     case VAProfileMPEG2Simple:
  3349.     case VAProfileMPEG2Main:
  3350.         gen75_mfd_mpeg2_decode_picture(ctx, decode_state, gen7_mfd_context);
  3351.         break;
  3352.        
  3353.     case VAProfileH264Baseline:
  3354.     case VAProfileH264Main:
  3355.     case VAProfileH264High:
  3356.         gen75_mfd_avc_decode_picture(ctx, decode_state, gen7_mfd_context);
  3357.         break;
  3358.  
  3359.     case VAProfileVC1Simple:
  3360.     case VAProfileVC1Main:
  3361.     case VAProfileVC1Advanced:
  3362.         gen75_mfd_vc1_decode_picture(ctx, decode_state, gen7_mfd_context);
  3363.         break;
  3364.  
  3365.     case VAProfileJPEGBaseline:
  3366.         gen75_mfd_jpeg_decode_picture(ctx, decode_state, gen7_mfd_context);
  3367.         break;
  3368.  
  3369.     default:
  3370.         assert(0);
  3371.         break;
  3372.     }
  3373. }
  3374.  
  3375. static void
  3376. gen75_mfd_context_destroy(void *hw_context)
  3377. {
  3378.     struct gen7_mfd_context *gen7_mfd_context = (struct gen7_mfd_context *)hw_context;
  3379.  
  3380.     dri_bo_unreference(gen7_mfd_context->post_deblocking_output.bo);
  3381.     gen7_mfd_context->post_deblocking_output.bo = NULL;
  3382.  
  3383.     dri_bo_unreference(gen7_mfd_context->pre_deblocking_output.bo);
  3384.     gen7_mfd_context->pre_deblocking_output.bo = NULL;
  3385.  
  3386.     dri_bo_unreference(gen7_mfd_context->intra_row_store_scratch_buffer.bo);
  3387.     gen7_mfd_context->intra_row_store_scratch_buffer.bo = NULL;
  3388.  
  3389.     dri_bo_unreference(gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo);
  3390.     gen7_mfd_context->deblocking_filter_row_store_scratch_buffer.bo = NULL;
  3391.  
  3392.     dri_bo_unreference(gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo);
  3393.     gen7_mfd_context->bsd_mpc_row_store_scratch_buffer.bo = NULL;
  3394.  
  3395.     dri_bo_unreference(gen7_mfd_context->mpr_row_store_scratch_buffer.bo);
  3396.     gen7_mfd_context->mpr_row_store_scratch_buffer.bo = NULL;
  3397.  
  3398.     dri_bo_unreference(gen7_mfd_context->bitplane_read_buffer.bo);
  3399.     gen7_mfd_context->bitplane_read_buffer.bo = NULL;
  3400.  
  3401.     dri_bo_unreference(gen7_mfd_context->jpeg_wa_slice_data_bo);
  3402.  
  3403.     intel_batchbuffer_free(gen7_mfd_context->base.batch);
  3404.     free(gen7_mfd_context);
  3405. }
  3406.  
  3407. static void gen75_mfd_mpeg2_context_init(VADriverContextP ctx,
  3408.                                     struct gen7_mfd_context *gen7_mfd_context)
  3409. {
  3410.     gen7_mfd_context->iq_matrix.mpeg2.load_intra_quantiser_matrix = -1;
  3411.     gen7_mfd_context->iq_matrix.mpeg2.load_non_intra_quantiser_matrix = -1;
  3412.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_intra_quantiser_matrix = -1;
  3413.     gen7_mfd_context->iq_matrix.mpeg2.load_chroma_non_intra_quantiser_matrix = -1;
  3414. }
  3415.  
  3416. struct hw_context *
  3417. gen75_dec_hw_context_init(VADriverContextP ctx, VAProfile profile)
  3418. {
  3419.     struct intel_driver_data *intel = intel_driver_data(ctx);
  3420.     struct gen7_mfd_context *gen7_mfd_context = calloc(1, sizeof(struct gen7_mfd_context));
  3421.     int i;
  3422.  
  3423.     gen7_mfd_context->base.destroy = gen75_mfd_context_destroy;
  3424.     gen7_mfd_context->base.run = gen75_mfd_decode_picture;
  3425.     gen7_mfd_context->base.batch = intel_batchbuffer_new(intel, I915_EXEC_RENDER, 0);
  3426.  
  3427.     for (i = 0; i < ARRAY_ELEMS(gen7_mfd_context->reference_surface); i++) {
  3428.         gen7_mfd_context->reference_surface[i].surface_id = VA_INVALID_ID;
  3429.         gen7_mfd_context->reference_surface[i].frame_store_id = -1;
  3430.     }
  3431.  
  3432.     gen7_mfd_context->jpeg_wa_surface_id = VA_INVALID_SURFACE;
  3433.  
  3434.     switch (profile) {
  3435.     case VAProfileMPEG2Simple:
  3436.     case VAProfileMPEG2Main:
  3437.         gen75_mfd_mpeg2_context_init(ctx, gen7_mfd_context);
  3438.         break;
  3439.  
  3440.     case VAProfileH264Baseline:
  3441.     case VAProfileH264Main:
  3442.     case VAProfileH264High:
  3443.         gen75_mfd_avc_context_init(ctx, gen7_mfd_context);
  3444.         break;
  3445.     default:
  3446.         break;
  3447.     }
  3448.     return (struct hw_context *)gen7_mfd_context;
  3449. }
  3450.