Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. #include <stdlib.h>
  2. #include <stdio.h>
  3. #include <string.h>
  4. #include <assert.h>
  5.  
  6.  
  7. #include "intel_batchbuffer.h"
  8. #include "intel_driver.h"
  9.  
  10. #include "i965_defines.h"
  11. #include "i965_drv_video.h"
  12. #include "i965_media.h"
  13. #include "i965_media_h264.h"
  14.  
  15. enum {
  16.     INTRA_16X16 = 0,
  17.     INTRA_8X8,
  18.     INTRA_4X4,
  19.     INTRA_PCM,
  20.     FRAMEMB_MOTION,
  21.     FIELDMB_MOTION,
  22.     MBAFF_MOTION,
  23. };
  24.  
  25. struct intra_kernel_header
  26. {
  27.     /* R1.0 */
  28.     unsigned char intra_4x4_luma_mode_0_offset;
  29.     unsigned char intra_4x4_luma_mode_1_offset;
  30.     unsigned char intra_4x4_luma_mode_2_offset;
  31.     unsigned char intra_4x4_luma_mode_3_offset;
  32.     /* R1.1 */
  33.     unsigned char intra_4x4_luma_mode_4_offset;
  34.     unsigned char intra_4x4_luma_mode_5_offset;
  35.     unsigned char intra_4x4_luma_mode_6_offset;
  36.     unsigned char intra_4x4_luma_mode_7_offset;
  37.     /* R1.2 */
  38.     unsigned char intra_4x4_luma_mode_8_offset;
  39.     unsigned char pad0;
  40.     unsigned short top_reference_offset;
  41.     /* R1.3 */
  42.     unsigned char intra_8x8_luma_mode_0_offset;
  43.     unsigned char intra_8x8_luma_mode_1_offset;
  44.     unsigned char intra_8x8_luma_mode_2_offset;
  45.     unsigned char intra_8x8_luma_mode_3_offset;
  46.     /* R1.4 */
  47.     unsigned char intra_8x8_luma_mode_4_offset;
  48.     unsigned char intra_8x8_luma_mode_5_offset;
  49.     unsigned char intra_8x8_luma_mode_6_offset;
  50.     unsigned char intra_8x8_luma_mode_7_offset;
  51.     /* R1.5 */
  52.     unsigned char intra_8x8_luma_mode_8_offset;
  53.     unsigned char pad1;
  54.     unsigned short const_reverse_data_transfer_intra_8x8;
  55.     /* R1.6 */
  56.     unsigned char intra_16x16_luma_mode_0_offset;
  57.     unsigned char intra_16x16_luma_mode_1_offset;
  58.     unsigned char intra_16x16_luma_mode_2_offset;
  59.     unsigned char intra_16x16_luma_mode_3_offset;
  60.     /* R1.7 */
  61.     unsigned char intra_chroma_mode_0_offset;
  62.     unsigned char intra_chroma_mode_1_offset;
  63.     unsigned char intra_chroma_mode_2_offset;
  64.     unsigned char intra_chroma_mode_3_offset;
  65.     /* R2.0 */
  66.     unsigned int const_intra_16x16_plane_0;
  67.     /* R2.1 */
  68.     unsigned int const_intra_16x16_chroma_plane_0;
  69.     /* R2.2 */
  70.     unsigned int const_intra_16x16_chroma_plane_1;
  71.     /* R2.3 */
  72.     unsigned int const_intra_16x16_plane_1;
  73.     /* R2.4 */
  74.     unsigned int left_shift_count_reverse_dw_ordering;
  75.     /* R2.5 */
  76.     unsigned int const_reverse_data_transfer_intra_4x4;
  77.     /* R2.6 */
  78.     unsigned int intra_4x4_pred_mode_offset;
  79. };
  80.  
  81. struct inter_kernel_header
  82. {
  83.     unsigned short weight_offset;
  84.     unsigned char weight_offset_flag;
  85.     unsigned char pad0;
  86. };
  87.  
  88. #include "shaders/h264/mc/export.inc"
  89. static unsigned long avc_mc_kernel_offset_gen4[] = {
  90.     INTRA_16x16_IP * INST_UNIT_GEN4,
  91.     INTRA_8x8_IP * INST_UNIT_GEN4,
  92.     INTRA_4x4_IP * INST_UNIT_GEN4,
  93.     INTRA_PCM_IP * INST_UNIT_GEN4,
  94.     FRAME_MB_IP * INST_UNIT_GEN4,
  95.     FIELD_MB_IP * INST_UNIT_GEN4,
  96.     MBAFF_MB_IP * INST_UNIT_GEN4
  97. };
  98.  
  99. struct intra_kernel_header intra_kernel_header_gen4 = {
  100.     0,
  101.     (INTRA_4X4_HORIZONTAL_IP - INTRA_4X4_VERTICAL_IP),
  102.     (INTRA_4X4_DC_IP - INTRA_4X4_VERTICAL_IP),
  103.     (INTRA_4X4_DIAG_DOWN_LEFT_IP - INTRA_4X4_VERTICAL_IP),
  104.  
  105.     (INTRA_4X4_DIAG_DOWN_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
  106.     (INTRA_4X4_VERT_RIGHT_IP - INTRA_4X4_VERTICAL_IP),
  107.     (INTRA_4X4_HOR_DOWN_IP - INTRA_4X4_VERTICAL_IP),
  108.     (INTRA_4X4_VERT_LEFT_IP - INTRA_4X4_VERTICAL_IP),
  109.  
  110.     (INTRA_4X4_HOR_UP_IP - INTRA_4X4_VERTICAL_IP),
  111.     0,
  112.     0xFFFC,
  113.  
  114.     0,
  115.     (INTRA_8X8_HORIZONTAL_IP - INTRA_8X8_VERTICAL_IP),
  116.     (INTRA_8X8_DC_IP - INTRA_8X8_VERTICAL_IP),
  117.     (INTRA_8X8_DIAG_DOWN_LEFT_IP - INTRA_8X8_VERTICAL_IP),
  118.  
  119.     (INTRA_8X8_DIAG_DOWN_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
  120.     (INTRA_8X8_VERT_RIGHT_IP - INTRA_8X8_VERTICAL_IP),
  121.     (INTRA_8X8_HOR_DOWN_IP - INTRA_8X8_VERTICAL_IP),
  122.     (INTRA_8X8_VERT_LEFT_IP - INTRA_8X8_VERTICAL_IP),
  123.  
  124.     (INTRA_8X8_HOR_UP_IP - INTRA_8X8_VERTICAL_IP),
  125.     0,
  126.     0x0001,
  127.  
  128.     0,
  129.     (INTRA_16x16_HORIZONTAL_IP - INTRA_16x16_VERTICAL_IP),
  130.     (INTRA_16x16_DC_IP - INTRA_16x16_VERTICAL_IP),
  131.     (INTRA_16x16_PLANE_IP - INTRA_16x16_VERTICAL_IP),
  132.  
  133.     0,
  134.     (INTRA_CHROMA_HORIZONTAL_IP - INTRA_CHROMA_DC_IP),
  135.     (INTRA_CHROMA_VERTICAL_IP - INTRA_CHROMA_DC_IP),
  136.     (INTRA_Chroma_PLANE_IP - INTRA_CHROMA_DC_IP),
  137.  
  138.     0xFCFBFAF9,
  139.  
  140.     0x00FFFEFD,
  141.  
  142.     0x04030201,
  143.  
  144.     0x08070605,
  145.  
  146.     0x18100800,
  147.  
  148.     0x00020406,
  149.  
  150.     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB3_IP) * 0x1000000 +
  151.     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB2_IP) * 0x10000 +
  152.     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB1_IP) * 0x100 +
  153.     (intra_Pred_4x4_Y_IP - ADD_ERROR_SB0_IP)
  154. };
  155.  
  156. static const uint32_t h264_avc_combined_gen4[][4] = {
  157. #include "shaders/h264/mc/avc_mc.g4b"
  158. };
  159.  
  160. static const uint32_t h264_avc_null_gen4[][4] = {
  161. #include "shaders/h264/mc/null.g4b"
  162. };
  163.  
  164. static struct i965_kernel h264_avc_kernels_gen4[] = {
  165.     {
  166.         "AVC combined kernel",
  167.         H264_AVC_COMBINED,
  168.         h264_avc_combined_gen4,
  169.         sizeof(h264_avc_combined_gen4),
  170.         NULL
  171.     },
  172.  
  173.     {
  174.         "NULL kernel",
  175.         H264_AVC_NULL,
  176.         h264_avc_null_gen4,
  177.         sizeof(h264_avc_null_gen4),
  178.         NULL
  179.     }
  180. };
  181.  
  182. /* On Ironlake */
  183. #include "shaders/h264/mc/export.inc.gen5"
  184. static unsigned long avc_mc_kernel_offset_gen5[] = {
  185.     INTRA_16x16_IP_GEN5 * INST_UNIT_GEN5,
  186.     INTRA_8x8_IP_GEN5 * INST_UNIT_GEN5,
  187.     INTRA_4x4_IP_GEN5 * INST_UNIT_GEN5,
  188.     INTRA_PCM_IP_GEN5 * INST_UNIT_GEN5,
  189.     FRAME_MB_IP_GEN5 * INST_UNIT_GEN5,
  190.     FIELD_MB_IP_GEN5 * INST_UNIT_GEN5,
  191.     MBAFF_MB_IP_GEN5 * INST_UNIT_GEN5
  192. };
  193.  
  194. struct intra_kernel_header intra_kernel_header_gen5 = {
  195.     0,
  196.     (INTRA_4X4_HORIZONTAL_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  197.     (INTRA_4X4_DC_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  198.     (INTRA_4X4_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  199.  
  200.     (INTRA_4X4_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  201.     (INTRA_4X4_VERT_RIGHT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  202.     (INTRA_4X4_HOR_DOWN_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  203.     (INTRA_4X4_VERT_LEFT_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  204.  
  205.     (INTRA_4X4_HOR_UP_IP_GEN5 - INTRA_4X4_VERTICAL_IP_GEN5),
  206.     0,
  207.     0xFFFC,
  208.  
  209.     0,
  210.     (INTRA_8X8_HORIZONTAL_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  211.     (INTRA_8X8_DC_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  212.     (INTRA_8X8_DIAG_DOWN_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  213.  
  214.     (INTRA_8X8_DIAG_DOWN_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  215.     (INTRA_8X8_VERT_RIGHT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  216.     (INTRA_8X8_HOR_DOWN_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  217.     (INTRA_8X8_VERT_LEFT_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  218.  
  219.     (INTRA_8X8_HOR_UP_IP_GEN5 - INTRA_8X8_VERTICAL_IP_GEN5),
  220.     0,
  221.     0x0001,
  222.  
  223.     0,
  224.     (INTRA_16x16_HORIZONTAL_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
  225.     (INTRA_16x16_DC_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
  226.     (INTRA_16x16_PLANE_IP_GEN5 - INTRA_16x16_VERTICAL_IP_GEN5),
  227.  
  228.     0,
  229.     (INTRA_CHROMA_HORIZONTAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
  230.     (INTRA_CHROMA_VERTICAL_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
  231.     (INTRA_Chroma_PLANE_IP_GEN5 - INTRA_CHROMA_DC_IP_GEN5),
  232.  
  233.     0xFCFBFAF9,
  234.  
  235.     0x00FFFEFD,
  236.  
  237.     0x04030201,
  238.  
  239.     0x08070605,
  240.  
  241.     0x18100800,
  242.  
  243.     0x00020406,
  244.  
  245.     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB3_IP_GEN5) * 0x1000000 +
  246.     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB2_IP_GEN5) * 0x10000 +
  247.     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB1_IP_GEN5) * 0x100 +
  248.     (intra_Pred_4x4_Y_IP_GEN5 - ADD_ERROR_SB0_IP_GEN5)
  249. };
  250.  
  251. static const uint32_t h264_avc_combined_gen5[][4] = {
  252. #include "shaders/h264/mc/avc_mc.g4b.gen5"
  253. };
  254.  
  255. static const uint32_t h264_avc_null_gen5[][4] = {
  256. #include "shaders/h264/mc/null.g4b.gen5"
  257. };
  258.  
  259. static struct i965_kernel h264_avc_kernels_gen5[] = {
  260.     {
  261.         "AVC combined kernel",
  262.         H264_AVC_COMBINED,
  263.         h264_avc_combined_gen5,
  264.         sizeof(h264_avc_combined_gen5),
  265.         NULL
  266.     },
  267.  
  268.     {
  269.         "NULL kernel",
  270.         H264_AVC_NULL,
  271.         h264_avc_null_gen5,
  272.         sizeof(h264_avc_null_gen5),
  273.         NULL
  274.     }
  275. };
  276.  
  277. #define NUM_AVC_MC_INTERFACES (sizeof(avc_mc_kernel_offset_gen4) / sizeof(avc_mc_kernel_offset_gen4[0]))
  278. static unsigned long *avc_mc_kernel_offset = NULL;
  279.  
  280. static struct intra_kernel_header *intra_kernel_header = NULL;
  281.  
  282. static void
  283. i965_media_h264_surface_state(VADriverContextP ctx,
  284.                               int index,
  285.                               struct object_surface *obj_surface,
  286.                               unsigned long offset,
  287.                               int w, int h, int pitch,
  288.                               Bool is_dst,
  289.                               int vert_line_stride,
  290.                               int vert_line_stride_ofs,
  291.                               int format,
  292.                               struct i965_media_context *media_context)
  293. {
  294.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  295.     struct i965_surface_state *ss;
  296.     dri_bo *bo;
  297.     uint32_t write_domain, read_domain;
  298.  
  299.     assert(obj_surface->bo);
  300.  
  301.     bo = dri_bo_alloc(i965->intel.bufmgr,
  302.                       "surface state",
  303.                       sizeof(struct i965_surface_state), 32);
  304.     assert(bo);
  305.     dri_bo_map(bo, 1);
  306.     assert(bo->virtual);
  307.     ss = bo->virtual;
  308.     memset(ss, 0, sizeof(*ss));
  309.     ss->ss0.surface_type = I965_SURFACE_2D;
  310.     ss->ss0.surface_format = format;
  311.     ss->ss0.vert_line_stride = vert_line_stride;
  312.     ss->ss0.vert_line_stride_ofs = vert_line_stride_ofs;
  313.     ss->ss1.base_addr = obj_surface->bo->offset + offset;
  314.     ss->ss2.width = w - 1;
  315.     ss->ss2.height = h - 1;
  316.     ss->ss3.pitch = pitch - 1;
  317.  
  318.     if (is_dst) {
  319.         write_domain = I915_GEM_DOMAIN_RENDER;
  320.         read_domain = I915_GEM_DOMAIN_RENDER;
  321.     } else {
  322.         write_domain = 0;
  323.         read_domain = I915_GEM_DOMAIN_SAMPLER;
  324.     }
  325.  
  326.     dri_bo_emit_reloc(bo,
  327.                       read_domain, write_domain,
  328.                       offset,
  329.                       offsetof(struct i965_surface_state, ss1),
  330.                       obj_surface->bo);
  331.     dri_bo_unmap(bo);
  332.  
  333.     assert(index < MAX_MEDIA_SURFACES);
  334.     media_context->surface_state[index].bo = bo;
  335. }
  336.  
  337. static void
  338. i965_media_h264_surfaces_setup(VADriverContextP ctx,
  339.                                struct decode_state *decode_state,
  340.                                struct i965_media_context *media_context)
  341. {
  342.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  343.     struct i965_h264_context *i965_h264_context;
  344.     struct object_surface *obj_surface;
  345.     VAPictureParameterBufferH264 *pic_param;
  346.     VAPictureH264 *va_pic;
  347.     int i, j, w, h;
  348.     int field_picture;
  349.  
  350.     assert(media_context->private_context);
  351.     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
  352.  
  353.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  354.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  355.  
  356.     /* Target Picture */
  357.     va_pic = &pic_param->CurrPic;
  358.     assert(!(va_pic->flags & VA_PICTURE_H264_INVALID));
  359.     obj_surface = SURFACE(va_pic->picture_id);
  360.     assert(obj_surface);
  361.     w = obj_surface->width;
  362.     h = obj_surface->height;
  363.     field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
  364.     i965_media_h264_surface_state(ctx, 0, obj_surface,
  365.                                   0, w / 4, h / (1 + field_picture), w,
  366.                                   1,
  367.                                   field_picture,
  368.                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
  369.                                   I965_SURFACEFORMAT_R8_SINT,   /* Y */
  370.                                   media_context);
  371.     i965_media_h264_surface_state(ctx, 1, obj_surface,
  372.                                   w * h, w / 4, h / 2 / (1 + field_picture), w,
  373.                                   1,
  374.                                   field_picture,
  375.                                   !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
  376.                                   I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
  377.                                   media_context);
  378.  
  379.     /* Reference Pictures */
  380.     for (i = 0; i < ARRAY_ELEMS(i965_h264_context->fsid_list); i++) {
  381.         if (i965_h264_context->fsid_list[i].surface_id != VA_INVALID_ID) {
  382.             int found = 0;
  383.             for (j = 0; j < ARRAY_ELEMS(pic_param->ReferenceFrames); j++) {
  384.                 va_pic = &pic_param->ReferenceFrames[j];
  385.                
  386.                 if (va_pic->flags & VA_PICTURE_H264_INVALID)
  387.                     continue;
  388.  
  389.                 if (va_pic->picture_id == i965_h264_context->fsid_list[i].surface_id) {
  390.                     found = 1;
  391.                     break;
  392.                 }
  393.             }
  394.  
  395.             assert(found == 1);
  396.  
  397.             obj_surface = SURFACE(va_pic->picture_id);
  398.             assert(obj_surface);
  399.             w = obj_surface->width;
  400.             h = obj_surface->height;
  401.             field_picture = !!(va_pic->flags & (VA_PICTURE_H264_TOP_FIELD | VA_PICTURE_H264_BOTTOM_FIELD));
  402.             i965_media_h264_surface_state(ctx, 2 + i, obj_surface,
  403.                                           0, w / 4, h / (1 + field_picture), w,
  404.                                           0,
  405.                                           field_picture,
  406.                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
  407.                                           I965_SURFACEFORMAT_R8_SINT,   /* Y */
  408.                                           media_context);
  409.             i965_media_h264_surface_state(ctx, 18 + i, obj_surface,
  410.                                           w * h, w / 4, h / 2 / (1 + field_picture), w,
  411.                                           0,
  412.                                           field_picture,
  413.                                           !!(va_pic->flags & VA_PICTURE_H264_BOTTOM_FIELD),
  414.                                           I965_SURFACEFORMAT_R8G8_SINT, /* INTERLEAVED U/V */
  415.                                           media_context);
  416.         }
  417.     }
  418. }
  419.  
  420. static void
  421. i965_media_h264_binding_table(VADriverContextP ctx, struct i965_media_context *media_context)
  422. {
  423.     int i;
  424.     unsigned int *binding_table;
  425.     dri_bo *bo = media_context->binding_table.bo;
  426.  
  427.     dri_bo_map(bo, 1);
  428.     assert(bo->virtual);
  429.     binding_table = bo->virtual;
  430.     memset(binding_table, 0, bo->size);
  431.  
  432.     for (i = 0; i < MAX_MEDIA_SURFACES; i++) {
  433.         if (media_context->surface_state[i].bo) {
  434.             binding_table[i] = media_context->surface_state[i].bo->offset;
  435.             dri_bo_emit_reloc(bo,
  436.                               I915_GEM_DOMAIN_INSTRUCTION, 0,
  437.                               0,
  438.                               i * sizeof(*binding_table),
  439.                               media_context->surface_state[i].bo);
  440.         }
  441.     }
  442.  
  443.     dri_bo_unmap(media_context->binding_table.bo);
  444. }
  445.  
  446. static void
  447. i965_media_h264_interface_descriptor_remap_table(VADriverContextP ctx, struct i965_media_context *media_context)
  448. {
  449.     struct i965_h264_context *i965_h264_context = (struct i965_h264_context *)media_context->private_context;
  450.     struct i965_interface_descriptor *desc;
  451.     int i;
  452.     dri_bo *bo;
  453.  
  454.     bo = media_context->idrt.bo;
  455.     dri_bo_map(bo, 1);
  456.     assert(bo->virtual);
  457.     desc = bo->virtual;
  458.  
  459.     for (i = 0; i < NUM_AVC_MC_INTERFACES; i++) {
  460.         int kernel_offset = avc_mc_kernel_offset[i];
  461.         memset(desc, 0, sizeof(*desc));
  462.         desc->desc0.grf_reg_blocks = 7;
  463.         desc->desc0.kernel_start_pointer = (i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo->offset + kernel_offset) >> 6; /* reloc */
  464.         desc->desc1.const_urb_entry_read_offset = 0;
  465.         desc->desc1.const_urb_entry_read_len = 2;
  466.         desc->desc3.binding_table_entry_count = 0;
  467.         desc->desc3.binding_table_pointer =
  468.             media_context->binding_table.bo->offset >> 5; /*reloc */
  469.  
  470.         dri_bo_emit_reloc(bo,
  471.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  472.                           desc->desc0.grf_reg_blocks + kernel_offset,
  473.                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc0),
  474.                           i965_h264_context->avc_kernels[H264_AVC_COMBINED].bo);
  475.  
  476.         dri_bo_emit_reloc(bo,
  477.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  478.                           desc->desc3.binding_table_entry_count,
  479.                           i * sizeof(*desc) + offsetof(struct i965_interface_descriptor, desc3),
  480.                           media_context->binding_table.bo);
  481.         desc++;
  482.     }
  483.  
  484.     dri_bo_unmap(bo);
  485. }
  486.  
  487. static void
  488. i965_media_h264_vfe_state(VADriverContextP ctx, struct i965_media_context *media_context)
  489. {
  490.     struct i965_vfe_state *vfe_state;
  491.     dri_bo *bo;
  492.  
  493.     bo = media_context->vfe_state.bo;
  494.     dri_bo_map(bo, 1);
  495.     assert(bo->virtual);
  496.     vfe_state = bo->virtual;
  497.     memset(vfe_state, 0, sizeof(*vfe_state));
  498.     vfe_state->vfe0.extend_vfe_state_present = 1;
  499.     vfe_state->vfe1.max_threads = media_context->urb.num_vfe_entries - 1;
  500.     vfe_state->vfe1.urb_entry_alloc_size = media_context->urb.size_vfe_entry - 1;
  501.     vfe_state->vfe1.num_urb_entries = media_context->urb.num_vfe_entries;
  502.     vfe_state->vfe1.vfe_mode = VFE_AVC_IT_MODE;
  503.     vfe_state->vfe1.children_present = 0;
  504.     vfe_state->vfe2.interface_descriptor_base =
  505.         media_context->idrt.bo->offset >> 4; /* reloc */
  506.     dri_bo_emit_reloc(bo,
  507.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  508.                       0,
  509.                       offsetof(struct i965_vfe_state, vfe2),
  510.                       media_context->idrt.bo);
  511.     dri_bo_unmap(bo);
  512. }
  513.  
  514. static void
  515. i965_media_h264_vfe_state_extension(VADriverContextP ctx,
  516.                                     struct decode_state *decode_state,
  517.                                     struct i965_media_context *media_context)
  518. {
  519.     struct i965_h264_context *i965_h264_context;
  520.     struct i965_vfe_state_ex *vfe_state_ex;
  521.     VAPictureParameterBufferH264 *pic_param;
  522.     int mbaff_frame_flag;
  523.  
  524.     assert(media_context->private_context);
  525.     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
  526.  
  527.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  528.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  529.     mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  530.                         !pic_param->pic_fields.bits.field_pic_flag);
  531.  
  532.     assert(media_context->extended_state.bo);
  533.     dri_bo_map(media_context->extended_state.bo, 1);
  534.     assert(media_context->extended_state.bo->virtual);
  535.     vfe_state_ex = media_context->extended_state.bo->virtual;
  536.     memset(vfe_state_ex, 0, sizeof(*vfe_state_ex));
  537.  
  538.     /*
  539.      * Indirect data buffer:
  540.      * --------------------------------------------------------
  541.      * | Motion Vectors | Weight/Offset data | Residual data |
  542.      * --------------------------------------------------------
  543.      * R4-R7: Motion Vectors
  544.      * R8-R9: Weight/Offset
  545.      * R10-R33: Residual data
  546.      */
  547.     vfe_state_ex->vfex1.avc.residual_data_fix_offset_flag = !!RESIDUAL_DATA_OFFSET;
  548.     vfe_state_ex->vfex1.avc.residual_data_offset = RESIDUAL_DATA_OFFSET;
  549.  
  550.     if (i965_h264_context->picture.i_flag) {
  551.         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_NOMV; /* NoMV */
  552.         vfe_state_ex->vfex1.avc.weight_grf_offset = 0;
  553.         vfe_state_ex->vfex1.avc.residual_grf_offset = 0;
  554.     } else {
  555.         vfe_state_ex->vfex1.avc.sub_field_present_flag = PRESENT_MV_WO; /* Both MV and W/O */
  556.         vfe_state_ex->vfex1.avc.weight_grf_offset = 4;
  557.         vfe_state_ex->vfex1.avc.residual_grf_offset = 6;
  558.     }
  559.  
  560.     if (!pic_param->pic_fields.bits.field_pic_flag) {
  561.         if (mbaff_frame_flag) {
  562.             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
  563.             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
  564.             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
  565.             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
  566.             vfe_state_ex->remap_table0.remap_index_4 = MBAFF_MOTION;
  567.             vfe_state_ex->remap_table0.remap_index_5 = MBAFF_MOTION;
  568.             vfe_state_ex->remap_table0.remap_index_6 = MBAFF_MOTION;
  569.             vfe_state_ex->remap_table0.remap_index_7 = MBAFF_MOTION;
  570.  
  571.             vfe_state_ex->remap_table1.remap_index_8 = MBAFF_MOTION;
  572.             vfe_state_ex->remap_table1.remap_index_9 = MBAFF_MOTION;
  573.             vfe_state_ex->remap_table1.remap_index_10 = MBAFF_MOTION;
  574.             vfe_state_ex->remap_table1.remap_index_11 = MBAFF_MOTION;
  575.             vfe_state_ex->remap_table1.remap_index_12 = MBAFF_MOTION;
  576.             vfe_state_ex->remap_table1.remap_index_13 = MBAFF_MOTION;
  577.             vfe_state_ex->remap_table1.remap_index_14 = MBAFF_MOTION;
  578.             vfe_state_ex->remap_table1.remap_index_15 = MBAFF_MOTION;
  579.         } else {
  580.             vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
  581.             vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
  582.             vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
  583.             vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
  584.             vfe_state_ex->remap_table0.remap_index_4 = FRAMEMB_MOTION;
  585.             vfe_state_ex->remap_table0.remap_index_5 = FRAMEMB_MOTION;
  586.             vfe_state_ex->remap_table0.remap_index_6 = FRAMEMB_MOTION;
  587.             vfe_state_ex->remap_table0.remap_index_7 = FRAMEMB_MOTION;
  588.  
  589.             vfe_state_ex->remap_table1.remap_index_8 = FRAMEMB_MOTION;
  590.             vfe_state_ex->remap_table1.remap_index_9 = FRAMEMB_MOTION;
  591.             vfe_state_ex->remap_table1.remap_index_10 = FRAMEMB_MOTION;
  592.             vfe_state_ex->remap_table1.remap_index_11 = FRAMEMB_MOTION;
  593.             vfe_state_ex->remap_table1.remap_index_12 = FRAMEMB_MOTION;
  594.             vfe_state_ex->remap_table1.remap_index_13 = FRAMEMB_MOTION;
  595.             vfe_state_ex->remap_table1.remap_index_14 = FRAMEMB_MOTION;
  596.             vfe_state_ex->remap_table1.remap_index_15 = FRAMEMB_MOTION;
  597.         }
  598.     } else {
  599.         vfe_state_ex->remap_table0.remap_index_0 = INTRA_16X16;
  600.         vfe_state_ex->remap_table0.remap_index_1 = INTRA_8X8;
  601.         vfe_state_ex->remap_table0.remap_index_2 = INTRA_4X4;
  602.         vfe_state_ex->remap_table0.remap_index_3 = INTRA_PCM;
  603.         vfe_state_ex->remap_table0.remap_index_4 = FIELDMB_MOTION;
  604.         vfe_state_ex->remap_table0.remap_index_5 = FIELDMB_MOTION;
  605.         vfe_state_ex->remap_table0.remap_index_6 = FIELDMB_MOTION;
  606.         vfe_state_ex->remap_table0.remap_index_7 = FIELDMB_MOTION;
  607.  
  608.         vfe_state_ex->remap_table1.remap_index_8 = FIELDMB_MOTION;
  609.         vfe_state_ex->remap_table1.remap_index_9 = FIELDMB_MOTION;
  610.         vfe_state_ex->remap_table1.remap_index_10 = FIELDMB_MOTION;
  611.         vfe_state_ex->remap_table1.remap_index_11 = FIELDMB_MOTION;
  612.         vfe_state_ex->remap_table1.remap_index_12 = FIELDMB_MOTION;
  613.         vfe_state_ex->remap_table1.remap_index_13 = FIELDMB_MOTION;
  614.         vfe_state_ex->remap_table1.remap_index_14 = FIELDMB_MOTION;
  615.         vfe_state_ex->remap_table1.remap_index_15 = FIELDMB_MOTION;
  616.     }
  617.  
  618.     if (i965_h264_context->use_avc_hw_scoreboard) {
  619.         vfe_state_ex->scoreboard0.enable = 1;
  620.         vfe_state_ex->scoreboard0.type = SCOREBOARD_STALLING;
  621.         vfe_state_ex->scoreboard0.mask = 0xff;
  622.  
  623.         vfe_state_ex->scoreboard1.delta_x0 = -1;
  624.         vfe_state_ex->scoreboard1.delta_y0 = 0;
  625.         vfe_state_ex->scoreboard1.delta_x1 = 0;
  626.         vfe_state_ex->scoreboard1.delta_y1 = -1;
  627.         vfe_state_ex->scoreboard1.delta_x2 = 1;
  628.         vfe_state_ex->scoreboard1.delta_y2 = -1;
  629.         vfe_state_ex->scoreboard1.delta_x3 = -1;
  630.         vfe_state_ex->scoreboard1.delta_y3 = -1;
  631.  
  632.         vfe_state_ex->scoreboard2.delta_x4 = -1;
  633.         vfe_state_ex->scoreboard2.delta_y4 = 1;
  634.         vfe_state_ex->scoreboard2.delta_x5 = 0;
  635.         vfe_state_ex->scoreboard2.delta_y5 = -2;
  636.         vfe_state_ex->scoreboard2.delta_x6 = 1;
  637.         vfe_state_ex->scoreboard2.delta_y6 = -2;
  638.         vfe_state_ex->scoreboard2.delta_x7 = -1;
  639.         vfe_state_ex->scoreboard2.delta_y7 = -2;
  640.     }
  641.  
  642.     dri_bo_unmap(media_context->extended_state.bo);
  643. }
  644.  
  645. static void
  646. i965_media_h264_upload_constants(VADriverContextP ctx,
  647.                                  struct decode_state *decode_state,
  648.                                  struct i965_media_context *media_context)
  649. {
  650.     struct i965_h264_context *i965_h264_context;
  651.     unsigned char *constant_buffer;
  652.     VASliceParameterBufferH264 *slice_param;
  653.  
  654.     assert(media_context->private_context);
  655.     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
  656.  
  657.     assert(decode_state->slice_params[0] && decode_state->slice_params[0]->buffer);
  658.     slice_param = (VASliceParameterBufferH264 *)decode_state->slice_params[0]->buffer;
  659.  
  660.     dri_bo_map(media_context->curbe.bo, 1);
  661.     assert(media_context->curbe.bo->virtual);
  662.     constant_buffer = media_context->curbe.bo->virtual;
  663.  
  664.     /* HW solution for W=128 */
  665.     if (i965_h264_context->use_hw_w128) {
  666.         memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
  667.     } else {
  668.         if (slice_param->slice_type == SLICE_TYPE_I ||
  669.             slice_param->slice_type == SLICE_TYPE_SI) {
  670.             memcpy(constant_buffer, intra_kernel_header, sizeof(*intra_kernel_header));
  671.         } else {
  672.             /* FIXME: Need to upload CURBE data to inter kernel interface
  673.              * to support weighted prediction work-around
  674.              */
  675.             *(short *)constant_buffer = i965_h264_context->weight128_offset0;
  676.             constant_buffer += 2;
  677.             *(char *)constant_buffer = i965_h264_context->weight128_offset0_flag;
  678.             constant_buffer++;
  679.             *constant_buffer = 0;
  680.         }
  681.     }
  682.  
  683.     dri_bo_unmap(media_context->curbe.bo);
  684. }
  685.  
  686. static void
  687. i965_media_h264_states_setup(VADriverContextP ctx,
  688.                              struct decode_state *decode_state,
  689.                              struct i965_media_context *media_context)
  690. {
  691.     struct i965_h264_context *i965_h264_context;
  692.  
  693.     assert(media_context->private_context);
  694.     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
  695.  
  696.     i965_avc_bsd_pipeline(ctx, decode_state, i965_h264_context);
  697.  
  698.     if (i965_h264_context->use_avc_hw_scoreboard)
  699.         i965_avc_hw_scoreboard(ctx, decode_state, i965_h264_context);
  700.  
  701.     i965_media_h264_surfaces_setup(ctx, decode_state, media_context);
  702.     i965_media_h264_binding_table(ctx, media_context);
  703.     i965_media_h264_interface_descriptor_remap_table(ctx, media_context);
  704.     i965_media_h264_vfe_state_extension(ctx, decode_state, media_context);
  705.     i965_media_h264_vfe_state(ctx, media_context);
  706.     i965_media_h264_upload_constants(ctx, decode_state, media_context);
  707. }
  708.  
  709. static void
  710. i965_media_h264_objects(VADriverContextP ctx,
  711.                         struct decode_state *decode_state,
  712.                         struct i965_media_context *media_context)
  713. {
  714.     struct intel_batchbuffer *batch = media_context->base.batch;
  715.     struct i965_h264_context *i965_h264_context;
  716.     unsigned int *object_command;
  717.  
  718.     assert(media_context->private_context);
  719.     i965_h264_context = (struct i965_h264_context *)media_context->private_context;
  720.  
  721.     dri_bo_map(i965_h264_context->avc_it_command_mb_info.bo, True);
  722.     assert(i965_h264_context->avc_it_command_mb_info.bo->virtual);
  723.     object_command = i965_h264_context->avc_it_command_mb_info.bo->virtual;
  724.     memset(object_command, 0, i965_h264_context->avc_it_command_mb_info.mbs * i965_h264_context->use_avc_hw_scoreboard * MB_CMD_IN_BYTES);
  725.     object_command += i965_h264_context->avc_it_command_mb_info.mbs * (1 + i965_h264_context->use_avc_hw_scoreboard) * MB_CMD_IN_DWS;
  726.     *object_command++ = 0;
  727.     *object_command = MI_BATCH_BUFFER_END;
  728.     dri_bo_unmap(i965_h264_context->avc_it_command_mb_info.bo);
  729.  
  730.     BEGIN_BATCH(batch, 2);
  731.     OUT_BATCH(batch, MI_BATCH_BUFFER_START | (2 << 6));
  732.     OUT_RELOC(batch, i965_h264_context->avc_it_command_mb_info.bo,
  733.               I915_GEM_DOMAIN_COMMAND, 0,
  734.               0);
  735.     ADVANCE_BATCH(batch);
  736.  
  737.     /* Have to execute the batch buffer here becuase MI_BATCH_BUFFER_END
  738.      * will cause control to pass back to ring buffer
  739.      */
  740.     intel_batchbuffer_end_atomic(batch);
  741.     intel_batchbuffer_flush(batch);
  742.     intel_batchbuffer_start_atomic(batch, 0x1000);
  743.     i965_avc_ildb(ctx, decode_state, i965_h264_context);
  744. }
  745.  
  746. static void
  747. i965_media_h264_free_private_context(void **data)
  748. {
  749.     struct i965_h264_context *i965_h264_context = *data;
  750.     int i;
  751.  
  752.     if (i965_h264_context == NULL)
  753.         return;
  754.  
  755.     i965_avc_ildb_ternimate(&i965_h264_context->avc_ildb_context);
  756.     i965_avc_hw_scoreboard_ternimate(&i965_h264_context->avc_hw_scoreboard_context);
  757.     i965_avc_bsd_ternimate(&i965_h264_context->i965_avc_bsd_context);
  758.     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
  759.     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
  760.     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
  761.  
  762.     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
  763.         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
  764.  
  765.         dri_bo_unreference(kernel->bo);
  766.         kernel->bo = NULL;
  767.     }
  768.  
  769.     free(i965_h264_context);
  770.     *data = NULL;
  771. }
  772.  
  773. void
  774. i965_media_h264_decode_init(VADriverContextP ctx,
  775.                             struct decode_state *decode_state,
  776.                             struct i965_media_context *media_context)
  777. {
  778.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  779.     struct i965_h264_context *i965_h264_context = media_context->private_context;
  780.     dri_bo *bo;
  781.     VAPictureParameterBufferH264 *pic_param;
  782.  
  783.     assert(decode_state->pic_param && decode_state->pic_param->buffer);
  784.     pic_param = (VAPictureParameterBufferH264 *)decode_state->pic_param->buffer;
  785.     i965_h264_context->picture.width_in_mbs = ((pic_param->picture_width_in_mbs_minus1 + 1) & 0xff);
  786.     i965_h264_context->picture.height_in_mbs = ((pic_param->picture_height_in_mbs_minus1 + 1) & 0xff) /
  787.         (1 + !!pic_param->pic_fields.bits.field_pic_flag); /* picture height */
  788.     i965_h264_context->picture.mbaff_frame_flag = (pic_param->seq_fields.bits.mb_adaptive_frame_field_flag &&
  789.                                                    !pic_param->pic_fields.bits.field_pic_flag);
  790.     i965_h264_context->avc_it_command_mb_info.mbs = (i965_h264_context->picture.width_in_mbs *
  791.                                                      i965_h264_context->picture.height_in_mbs);
  792.  
  793.     dri_bo_unreference(i965_h264_context->avc_it_command_mb_info.bo);
  794.     bo = dri_bo_alloc(i965->intel.bufmgr,
  795.                       "avc it command mb info",
  796.                       i965_h264_context->avc_it_command_mb_info.mbs * MB_CMD_IN_BYTES * (1 + i965_h264_context->use_avc_hw_scoreboard) + 8,
  797.                       0x1000);
  798.     assert(bo);
  799.     i965_h264_context->avc_it_command_mb_info.bo = bo;
  800.  
  801.     dri_bo_unreference(i965_h264_context->avc_it_data.bo);
  802.     bo = dri_bo_alloc(i965->intel.bufmgr,
  803.                       "avc it data",
  804.                       i965_h264_context->avc_it_command_mb_info.mbs *
  805.                       0x800 *
  806.                       (1 + !!pic_param->pic_fields.bits.field_pic_flag),
  807.                       0x1000);
  808.     assert(bo);
  809.     i965_h264_context->avc_it_data.bo = bo;
  810.     i965_h264_context->avc_it_data.write_offset = 0;
  811.     dri_bo_unreference(media_context->indirect_object.bo);
  812.     media_context->indirect_object.bo = bo;
  813.     dri_bo_reference(media_context->indirect_object.bo);
  814.     media_context->indirect_object.offset = i965_h264_context->avc_it_data.write_offset;
  815.  
  816.     dri_bo_unreference(i965_h264_context->avc_ildb_data.bo);
  817.     bo = dri_bo_alloc(i965->intel.bufmgr,
  818.                       "AVC-ILDB Data Buffer",
  819.                       i965_h264_context->avc_it_command_mb_info.mbs * 64 * 2,
  820.                       0x1000);
  821.     assert(bo);
  822.     i965_h264_context->avc_ildb_data.bo = bo;
  823.  
  824.     /* bsd pipeline */
  825.     i965_avc_bsd_decode_init(ctx, i965_h264_context);
  826.  
  827.     /* HW scoreboard */
  828.     if (i965_h264_context->use_avc_hw_scoreboard)
  829.         i965_avc_hw_scoreboard_decode_init(ctx, i965_h264_context);
  830.  
  831.     /* ILDB */
  832.     i965_avc_ildb_decode_init(ctx, i965_h264_context);
  833.  
  834.     /* for Media pipeline */
  835.     media_context->extended_state.enabled = 1;
  836.     dri_bo_unreference(media_context->extended_state.bo);
  837.     bo = dri_bo_alloc(i965->intel.bufmgr,
  838.                       "extened vfe state",
  839.                       sizeof(struct i965_vfe_state_ex), 32);
  840.     assert(bo);
  841.     media_context->extended_state.bo = bo;
  842. }
  843.  
  844. void
  845. i965_media_h264_dec_context_init(VADriverContextP ctx, struct i965_media_context *media_context)
  846. {
  847.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  848.     struct i965_h264_context *i965_h264_context;
  849.     int i;
  850.  
  851.     i965_h264_context = calloc(1, sizeof(struct i965_h264_context));
  852.  
  853.     /* kernel */
  854.     assert(NUM_H264_AVC_KERNELS == (sizeof(h264_avc_kernels_gen5) /
  855.                                     sizeof(h264_avc_kernels_gen5[0])));
  856.     assert(NUM_AVC_MC_INTERFACES == (sizeof(avc_mc_kernel_offset_gen5) /
  857.                                      sizeof(avc_mc_kernel_offset_gen5[0])));
  858.     if (IS_IRONLAKE(i965->intel.device_id)) {
  859.         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen5, sizeof(i965_h264_context->avc_kernels));
  860.         avc_mc_kernel_offset = avc_mc_kernel_offset_gen5;
  861.         intra_kernel_header = &intra_kernel_header_gen5;
  862.         i965_h264_context->use_avc_hw_scoreboard = 1;
  863.         i965_h264_context->use_hw_w128 = 1;
  864.     } else {
  865.         memcpy(i965_h264_context->avc_kernels, h264_avc_kernels_gen4, sizeof(i965_h264_context->avc_kernels));
  866.         avc_mc_kernel_offset = avc_mc_kernel_offset_gen4;
  867.         intra_kernel_header = &intra_kernel_header_gen4;
  868.         i965_h264_context->use_avc_hw_scoreboard = 0;
  869.         i965_h264_context->use_hw_w128 = 0;
  870.     }
  871.  
  872.     for (i = 0; i < NUM_H264_AVC_KERNELS; i++) {
  873.         struct i965_kernel *kernel = &i965_h264_context->avc_kernels[i];
  874.         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
  875.                                   kernel->name,
  876.                                   kernel->size, 0x1000);
  877.         assert(kernel->bo);
  878.         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
  879.     }
  880.  
  881.     for (i = 0; i < 16; i++) {
  882.         i965_h264_context->fsid_list[i].surface_id = VA_INVALID_ID;
  883.         i965_h264_context->fsid_list[i].frame_store_id = -1;
  884.     }
  885.  
  886.     i965_h264_context->batch = media_context->base.batch;
  887.  
  888.     media_context->private_context = i965_h264_context;
  889.     media_context->free_private_context = i965_media_h264_free_private_context;
  890.  
  891.     /* URB */
  892.     if (IS_IRONLAKE(i965->intel.device_id)) {
  893.         media_context->urb.num_vfe_entries = 63;
  894.     } else {
  895.         media_context->urb.num_vfe_entries = 23;
  896.     }
  897.  
  898.     media_context->urb.size_vfe_entry = 16;
  899.  
  900.     media_context->urb.num_cs_entries = 1;
  901.     media_context->urb.size_cs_entry = 1;
  902.  
  903.     media_context->urb.vfe_start = 0;
  904.     media_context->urb.cs_start = media_context->urb.vfe_start +
  905.         media_context->urb.num_vfe_entries * media_context->urb.size_vfe_entry;
  906.     assert(media_context->urb.cs_start +
  907.            media_context->urb.num_cs_entries * media_context->urb.size_cs_entry <= URB_SIZE((&i965->intel)));
  908.  
  909.     /* hook functions */
  910.     media_context->media_states_setup = i965_media_h264_states_setup;
  911.     media_context->media_objects = i965_media_h264_objects;
  912. }
  913.