Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /*
  2.  * Copyright © 2010-2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *    Zhao Yakui <yakui.zhao@intel.com>
  26.  *    Xiang HaiHao <haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <string.h>
  33. #include <assert.h>
  34.  
  35. #include "intel_batchbuffer.h"
  36. #include "intel_driver.h"
  37.  
  38. #include "i965_defines.h"
  39. #include "i965_drv_video.h"
  40. #include "gen6_vme.h"
  41. #include "i965_encoder.h"
  42.  
  43. #define SURFACE_STATE_PADDED_SIZE_0_GEN7        ALIGN(sizeof(struct gen7_surface_state), 32)
  44. #define SURFACE_STATE_PADDED_SIZE_1_GEN7        ALIGN(sizeof(struct gen7_surface_state2), 32)
  45. #define SURFACE_STATE_PADDED_SIZE_GEN7          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN7, SURFACE_STATE_PADDED_SIZE_1_GEN7)
  46.  
  47. #define SURFACE_STATE_PADDED_SIZE_0_GEN6        ALIGN(sizeof(struct i965_surface_state), 32)
  48. #define SURFACE_STATE_PADDED_SIZE_1_GEN6        ALIGN(sizeof(struct i965_surface_state2), 32)
  49. #define SURFACE_STATE_PADDED_SIZE_GEN6          MAX(SURFACE_STATE_PADDED_SIZE_0_GEN6, SURFACE_STATE_PADDED_SIZE_1_GEN7)
  50.  
  51. #define SURFACE_STATE_PADDED_SIZE               MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
  52. #define SURFACE_STATE_OFFSET(index)             (SURFACE_STATE_PADDED_SIZE * index)
  53. #define BINDING_TABLE_OFFSET                    SURFACE_STATE_OFFSET(MAX_MEDIA_SURFACES_GEN6)
  54.  
  55. #define VME_INTRA_SHADER        0      
  56. #define VME_INTER_SHADER        1
  57.  
  58. #define CURBE_ALLOCATION_SIZE   37              /* in 256-bit */
  59. #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        /* in byte, it should be less than or equal to CURBE_ALLOCATION_SIZE * 32 */
  60. #define CURBE_URB_ENTRY_LENGTH  4               /* in 256-bit, it should be less than or equal to CURBE_TOTAL_DATA_LENGTH / 32 */
  61.  
  62. #define VME_MSG_LENGTH          32
  63.  
  64. static const uint32_t gen75_vme_intra_frame[][4] = {
  65. #include "shaders/vme/intra_frame_haswell.g75b"
  66. };
  67.  
  68. static const uint32_t gen75_vme_inter_frame[][4] = {
  69. #include "shaders/vme/inter_frame_haswell.g75b"
  70. };
  71.  
  72. static struct i965_kernel gen75_vme_kernels[] = {
  73.     {
  74.         "VME Intra Frame",
  75.         VME_INTRA_SHADER,                                                                               /*index*/
  76.         gen75_vme_intra_frame,                 
  77.         sizeof(gen75_vme_intra_frame),         
  78.         NULL
  79.     },
  80.     {
  81.         "VME inter Frame",
  82.         VME_INTER_SHADER,
  83.         gen75_vme_inter_frame,
  84.         sizeof(gen75_vme_inter_frame),
  85.         NULL
  86.     }
  87. };
  88.  
  89. /*
  90.  * Surface state for IvyBridge
  91.  */
  92. static
  93. void gen75_vme_set_common_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
  94. {
  95.     switch (tiling) {
  96.     case I915_TILING_NONE:
  97.         ss->ss0.tiled_surface = 0;
  98.         ss->ss0.tile_walk = 0;
  99.         break;
  100.     case I915_TILING_X:
  101.         ss->ss0.tiled_surface = 1;
  102.         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  103.         break;
  104.     case I915_TILING_Y:
  105.         ss->ss0.tiled_surface = 1;
  106.         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  107.         break;
  108.     }
  109. }
  110.  
  111. static void
  112. gen75_vme_set_source_surface_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
  113. {
  114.     switch (tiling) {
  115.     case I915_TILING_NONE:
  116.         ss->ss2.tiled_surface = 0;
  117.         ss->ss2.tile_walk = 0;
  118.         break;
  119.     case I915_TILING_X:
  120.         ss->ss2.tiled_surface = 1;
  121.         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
  122.         break;
  123.     case I915_TILING_Y:
  124.         ss->ss2.tiled_surface = 1;
  125.         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
  126.         break;
  127.     }
  128. }
  129.  
  130.  
  131. /* only used for VME source surface state */
  132. static void gen75_vme_source_surface_state(VADriverContextP ctx,
  133.                                           int index,
  134.                                           struct object_surface *obj_surface,
  135.                                           struct gen6_encoder_context *gen6_encoder_context)
  136. {
  137.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  138.     struct gen7_surface_state2 *ss;
  139.     dri_bo *bo;
  140.     int w, h, w_pitch, h_pitch;
  141.     unsigned int tiling, swizzle;
  142.  
  143.     assert(obj_surface->bo);
  144.  
  145.     w = obj_surface->orig_width;
  146.     h = obj_surface->orig_height;
  147.     w_pitch = obj_surface->width;
  148.     h_pitch = obj_surface->height;
  149.  
  150.     bo = vme_context->surface_state_binding_table.bo;
  151.     dri_bo_map(bo, 1);
  152.     assert(bo->virtual);
  153.  
  154.     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
  155.     memset(ss, 0, sizeof(*ss));
  156.  
  157.     ss->ss0.surface_base_address = obj_surface->bo->offset;
  158.  
  159.     ss->ss1.cbcr_pixel_offset_v_direction = 2;
  160.     ss->ss1.width = w - 1;
  161.     ss->ss1.height = h - 1;
  162.  
  163.     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
  164.     ss->ss2.interleave_chroma = 1;
  165.     ss->ss2.pitch = w_pitch - 1;
  166.     ss->ss2.half_pitch_for_chroma = 0;
  167.  
  168.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  169.     gen75_vme_set_source_surface_tiling(ss, tiling);
  170.  
  171.     /* UV offset for interleave mode */
  172.     ss->ss3.x_offset_for_cb = 0;
  173.     ss->ss3.y_offset_for_cb = h_pitch;
  174.  
  175.     dri_bo_emit_reloc(bo,
  176.                       I915_GEM_DOMAIN_RENDER, 0,
  177.                       0,
  178.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state2, ss0),
  179.                       obj_surface->bo);
  180.  
  181.     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  182.     dri_bo_unmap(bo);
  183. }
  184.  
  185. static void
  186. gen75_vme_media_source_surface_state(VADriverContextP ctx,
  187.                                     int index,
  188.                                     struct object_surface *obj_surface,
  189.                                     struct gen6_encoder_context *gen6_encoder_context)
  190. {
  191.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  192.     struct gen7_surface_state *ss;
  193.     dri_bo *bo;
  194.     int w, h, w_pitch;
  195.     unsigned int tiling, swizzle;
  196.  
  197.     /* Y plane */
  198.     w = obj_surface->orig_width;
  199.     h = obj_surface->orig_height;
  200.     w_pitch = obj_surface->width;
  201.  
  202.     bo = vme_context->surface_state_binding_table.bo;
  203.     dri_bo_map(bo, True);
  204.     assert(bo->virtual);
  205.  
  206.     ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
  207.     memset(ss, 0, sizeof(*ss));
  208.  
  209.     ss->ss0.surface_type = I965_SURFACE_2D;
  210.     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
  211.  
  212.     ss->ss1.base_addr = obj_surface->bo->offset;
  213.  
  214.     ss->ss2.width = w / 4 - 1;
  215.     ss->ss2.height = h - 1;
  216.  
  217.     ss->ss3.pitch = w_pitch - 1;
  218.  
  219.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  220.     gen75_vme_set_common_surface_tiling(ss, tiling);
  221.  
  222.     dri_bo_emit_reloc(bo,
  223.                       I915_GEM_DOMAIN_RENDER, 0,
  224.                       0,
  225.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  226.                       obj_surface->bo);
  227.  
  228.     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  229.     dri_bo_unmap(bo);
  230. }
  231.  
  232. static VAStatus
  233. gen75_vme_output_buffer_setup(VADriverContextP ctx,
  234.                              struct encode_state *encode_state,
  235.                              int index,
  236.                              struct gen6_encoder_context *gen6_encoder_context)
  237.  
  238. {
  239.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  240.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  241.     struct gen7_surface_state *ss;
  242.     dri_bo *bo;
  243.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
  244.     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
  245.     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
  246.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  247.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  248.     int num_entries;
  249.  
  250.     if ( is_intra ) {
  251.         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 2;
  252.     } else {
  253.         vme_context->vme_output.size_block = INTRA_VME_OUTPUT_IN_BYTES * 24;
  254.         /*
  255.          * Inter MV . 32-byte Intra search + 16 IME info + 128 IME MV + 32 IME Ref
  256.          * + 16 FBR Info + 128 FBR MV + 32 FBR Ref.
  257.          * 16 * (2 + 2 * (1 + 8 + 2))= 16 * 24.
  258.          */
  259.     }
  260.     vme_context->vme_output.num_blocks = width_in_mbs * height_in_mbs;
  261.     vme_context->vme_output.pitch = 16;
  262.     bo = dri_bo_alloc(i965->intel.bufmgr,
  263.                       "VME output buffer",
  264.                       vme_context->vme_output.num_blocks * vme_context->vme_output.size_block,
  265.                       0x1000);
  266.     assert(bo);
  267.     vme_context->vme_output.bo = bo;
  268.  
  269.     bo = vme_context->surface_state_binding_table.bo;
  270.     dri_bo_map(bo, 1);
  271.     assert(bo->virtual);
  272.  
  273.     ss = (struct gen7_surface_state *)((char *)bo->virtual + SURFACE_STATE_OFFSET(index));
  274.     memset(ss, 0, sizeof(*ss));
  275.  
  276.     /* always use 16 bytes as pitch on Sandy Bridge */
  277.     num_entries = vme_context->vme_output.num_blocks * vme_context->vme_output.size_block / 16;
  278.  
  279.     ss->ss0.surface_type = I965_SURFACE_BUFFER;
  280.  
  281.     ss->ss1.base_addr = vme_context->vme_output.bo->offset;
  282.  
  283.     ss->ss2.width = ((num_entries - 1) & 0x7f);
  284.     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
  285.     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
  286.  
  287.     ss->ss3.pitch = vme_context->vme_output.pitch - 1;
  288.  
  289.     dri_bo_emit_reloc(bo,
  290.                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  291.                       0,
  292.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  293.                       vme_context->vme_output.bo);
  294.  
  295.     ((unsigned int *)((char *)bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  296.     dri_bo_unmap(bo);
  297.  
  298.     return VA_STATUS_SUCCESS;
  299. }
  300.  
  301. static VAStatus gen75_vme_surface_setup(VADriverContextP ctx,
  302.                                        struct encode_state *encode_state,
  303.                                        int is_intra,
  304.                                        struct gen6_encoder_context *gen6_encoder_context)
  305. {
  306.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  307.     struct object_surface *obj_surface;
  308.     VAEncPictureParameterBufferH264 *pPicParameter = (VAEncPictureParameterBufferH264 *)encode_state->pic_param->buffer;
  309.  
  310.     /*Setup surfaces state*/
  311.     /* current picture for encoding */
  312.     obj_surface = SURFACE(encode_state->current_render_target);
  313.     assert(obj_surface);
  314.     gen75_vme_source_surface_state(ctx, 0, obj_surface, gen6_encoder_context);
  315.     gen75_vme_media_source_surface_state(ctx, 4, obj_surface, gen6_encoder_context);
  316.  
  317.     if ( ! is_intra ) {
  318.         /* reference 0 */
  319.         obj_surface = SURFACE(pPicParameter->reference_picture);
  320.         assert(obj_surface);
  321.         gen75_vme_source_surface_state(ctx, 1, obj_surface, gen6_encoder_context);
  322.         /* reference 1, FIXME: */
  323.         // obj_surface = SURFACE(pPicParameter->reference_picture);
  324.         // assert(obj_surface);
  325.         //gen7_vme_source_surface_state(ctx, 2, obj_surface);
  326.     }
  327.  
  328.     /* VME output */
  329.     gen75_vme_output_buffer_setup(ctx, encode_state, 3, gen6_encoder_context);
  330.  
  331.     return VA_STATUS_SUCCESS;
  332. }
  333.  
  334. static VAStatus gen75_vme_interface_setup(VADriverContextP ctx,
  335.                                          struct encode_state *encode_state,
  336.                                          struct gen6_encoder_context *gen6_encoder_context)
  337. {
  338.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  339.     struct gen6_interface_descriptor_data *desc;  
  340.     int i;
  341.     dri_bo *bo;
  342.  
  343.     bo = vme_context->idrt.bo;
  344.     dri_bo_map(bo, 1);
  345.     assert(bo->virtual);
  346.     desc = bo->virtual;
  347.  
  348.     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
  349.         struct i965_kernel *kernel;
  350.         kernel = &vme_context->vme_kernels[i];
  351.         assert(sizeof(*desc) == 32);
  352.         /*Setup the descritor table*/
  353.         memset(desc, 0, sizeof(*desc));
  354.         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
  355.         desc->desc2.sampler_count = 0; /* FIXME: */
  356.         desc->desc2.sampler_state_pointer = 0;
  357.         desc->desc3.binding_table_entry_count = 1; /* FIXME: */
  358.         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET >> 5);
  359.         desc->desc4.constant_urb_entry_read_offset = 0;
  360.         desc->desc4.constant_urb_entry_read_length = CURBE_URB_ENTRY_LENGTH;
  361.                
  362.         /*kernel start*/
  363.         dri_bo_emit_reloc(bo,  
  364.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  365.                           0,
  366.                           i * sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
  367.                           kernel->bo);
  368.         desc++;
  369.     }
  370.     dri_bo_unmap(bo);
  371.  
  372.     return VA_STATUS_SUCCESS;
  373. }
  374.  
  375. static VAStatus gen75_vme_constant_setup(VADriverContextP ctx,
  376.                                         struct encode_state *encode_state,
  377.                                         struct gen6_encoder_context *gen6_encoder_context)
  378. {
  379.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  380.     unsigned char *constant_buffer;
  381.  
  382.     dri_bo_map(vme_context->curbe.bo, 1);
  383.     assert(vme_context->curbe.bo->virtual);
  384.     constant_buffer = vme_context->curbe.bo->virtual;
  385.        
  386.         /* VME MV/Mb cost table is passed by using const buffer */
  387.         /* Now it uses the fixed search path. So it is constructed directly
  388.          * in the GPU shader.
  389.          */
  390.     memcpy(constant_buffer, (char *)vme_context->vme_state_message, 32);
  391.  
  392.     dri_bo_unmap( vme_context->curbe.bo);
  393.  
  394.     return VA_STATUS_SUCCESS;
  395. }
  396.  
  397. static VAStatus gen75_vme_vme_state_setup(VADriverContextP ctx,
  398.                                          struct encode_state *encode_state,
  399.                                          int is_intra,
  400.                                          struct gen6_encoder_context *gen6_encoder_context)
  401. {
  402.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  403.     unsigned int *vme_state_message;
  404.     int i;
  405.        
  406.     //building VME state message
  407.     //pass the MV/Mb cost into VME message on HASWell
  408.     assert(vme_context->vme_state_message);
  409.     vme_state_message = (unsigned int *)vme_context->vme_state_message;
  410.  
  411.     vme_state_message[0] = 0x4a4a4a4a;
  412.     vme_state_message[1] = 0x4a4a4a4a;
  413.     vme_state_message[2] = 0x4a4a4a4a;
  414.     vme_state_message[3] = 0x22120200;
  415.     vme_state_message[4] = 0x62524232;
  416.  
  417.     for (i=5; i < 8; i++) {
  418.         vme_state_message[i] = 0;
  419.      }
  420.  
  421.     return VA_STATUS_SUCCESS;
  422. }
  423.  
  424. static void gen75_vme_pipeline_select(VADriverContextP ctx,
  425.                                       struct gen6_encoder_context *gen6_encoder_context,
  426.                                       struct intel_batchbuffer *batch)
  427. {
  428.     if (batch == NULL)
  429.         batch = gen6_encoder_context->base.batch;
  430.  
  431.     BEGIN_BATCH(batch, 1);
  432.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
  433.     ADVANCE_BATCH(batch);
  434. }
  435.  
  436. static void gen75_vme_state_base_address(VADriverContextP ctx,
  437.                                          struct gen6_encoder_context *gen6_encoder_context,
  438.                                          struct intel_batchbuffer *batch)
  439. {
  440.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  441.  
  442.     if (batch == NULL)
  443.         batch = gen6_encoder_context->base.batch;
  444.  
  445.     BEGIN_BATCH(batch, 10);
  446.  
  447.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 8);
  448.  
  449.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
  450.     OUT_RELOC(batch, vme_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  451.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //Dynamic State Base Address
  452.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //Indirect Object Base Address
  453.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //Instruction Base Address
  454.  
  455.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound     
  456.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
  457.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
  458.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
  459.  
  460.     /*
  461.       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
  462.       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
  463.     */
  464.  
  465.     ADVANCE_BATCH(batch);
  466. }
  467.  
  468. static void gen75_vme_vfe_state(VADriverContextP ctx,
  469.                                 struct gen6_encoder_context *gen6_encoder_context,
  470.                                 struct intel_batchbuffer *batch)
  471. {
  472.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  473.  
  474.     if (batch == NULL)
  475.         batch = gen6_encoder_context->base.batch;
  476.  
  477.     BEGIN_BATCH(batch, 8);
  478.  
  479.     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | 6);                                  /*Gen6 CMD_MEDIA_STATE_POINTERS = CMD_MEDIA_STATE */
  480.     OUT_BATCH(batch, 0);                                                                                                /*Scratch Space Base Pointer and Space*/
  481.     OUT_BATCH(batch, (vme_context->vfe_state.max_num_threads << 16)
  482.               | (vme_context->vfe_state.num_urb_entries << 8)
  483.               | (vme_context->vfe_state.gpgpu_mode << 2) );     /*Maximum Number of Threads , Number of URB Entries, MEDIA Mode*/
  484.     OUT_BATCH(batch, 0);                                                                                                /*Debug: Object ID*/
  485.     OUT_BATCH(batch, (vme_context->vfe_state.urb_entry_size << 16)
  486.               | vme_context->vfe_state.curbe_allocation_size);                          /*URB Entry Allocation Size , CURBE Allocation Size*/
  487.     OUT_BATCH(batch, 0);                                                                                        /*Disable Scoreboard*/
  488.     OUT_BATCH(batch, 0);                                                                                        /*Disable Scoreboard*/
  489.     OUT_BATCH(batch, 0);                                                                                        /*Disable Scoreboard*/
  490.        
  491.     ADVANCE_BATCH(batch);
  492.  
  493. }
  494.  
  495. static void gen75_vme_curbe_load(VADriverContextP ctx,
  496.                                  struct gen6_encoder_context *gen6_encoder_context,
  497.                                  struct intel_batchbuffer *batch)
  498. {
  499.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  500.  
  501.     if (batch == NULL)
  502.         batch = gen6_encoder_context->base.batch;
  503.  
  504.     BEGIN_BATCH(batch, 4);
  505.  
  506.     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | 2);
  507.     OUT_BATCH(batch, 0);
  508.  
  509.     OUT_BATCH(batch, CURBE_TOTAL_DATA_LENGTH);
  510.     OUT_RELOC(batch, vme_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  511.  
  512.     ADVANCE_BATCH(batch);
  513. }
  514.  
  515. static void gen75_vme_idrt(VADriverContextP ctx,
  516.                            struct gen6_encoder_context *gen6_encoder_context,
  517.                            struct intel_batchbuffer *batch)
  518. {
  519.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  520.  
  521.     if (batch == NULL)
  522.         batch = gen6_encoder_context->base.batch;
  523.  
  524.     BEGIN_BATCH(batch, 4);
  525.  
  526.     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | 2);    
  527.     OUT_BATCH(batch, 0);
  528.     OUT_BATCH(batch, GEN6_VME_KERNEL_NUMBER * sizeof(struct gen6_interface_descriptor_data));
  529.     OUT_RELOC(batch, vme_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  530.  
  531.     ADVANCE_BATCH(batch);
  532. }
  533.  
  534. static int gen75_vme_media_object(VADriverContextP ctx,
  535.                                   struct encode_state *encode_state,
  536.                                   int mb_x, int mb_y,
  537.                                   int kernel, unsigned int mb_intra_ub,
  538.                                   struct gen6_encoder_context *gen6_encoder_context,
  539.                                   struct intel_batchbuffer *batch)
  540. {
  541.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  542.     struct object_surface *obj_surface = SURFACE(encode_state->current_render_target);
  543.     int mb_width = ALIGN(obj_surface->orig_width, 16) / 16;
  544.     int len_in_dowrds = 8;
  545.  
  546.     if (batch == NULL)
  547.         batch = gen6_encoder_context->base.batch;
  548.  
  549.     BEGIN_BATCH(batch, len_in_dowrds);
  550.    
  551.     OUT_BATCH(batch, CMD_MEDIA_OBJECT | (len_in_dowrds - 2));
  552.     OUT_BATCH(batch, kernel);           /*Interface Descriptor Offset*/
  553.     OUT_BATCH(batch, 0);
  554.     OUT_BATCH(batch, 0);
  555.     OUT_BATCH(batch, 0);
  556.     OUT_BATCH(batch, 0);
  557.    
  558.     /*inline data */
  559.     OUT_BATCH(batch, mb_width << 16 | mb_y << 8 | mb_x);                        /*M0.0 Refrence0 X,Y, not used in Intra*/
  560.  
  561.         OUT_BATCH(batch, ((mb_intra_ub << 8) | 0));
  562.     ADVANCE_BATCH(batch);
  563.  
  564.     return len_in_dowrds * 4;
  565. }
  566.  
  567. static void gen75_vme_media_init(VADriverContextP ctx, struct gen6_encoder_context *gen6_encoder_context)
  568. {
  569.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  570.     struct gen6_vme_context *vme_context = &gen6_encoder_context->vme_context;
  571.     dri_bo *bo;
  572.  
  573.     /* constant buffer */
  574.     dri_bo_unreference(vme_context->curbe.bo);
  575.     bo = dri_bo_alloc(i965->intel.bufmgr,
  576.                       "Buffer",
  577.                       CURBE_TOTAL_DATA_LENGTH, 64);
  578.     assert(bo);
  579.     vme_context->curbe.bo = bo;
  580.  
  581.     dri_bo_unreference(vme_context->surface_state_binding_table.bo);
  582.     bo = dri_bo_alloc(i965->intel.bufmgr,
  583.                       "surface state & binding table",
  584.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6,
  585.                       4096);
  586.     assert(bo);
  587.     vme_context->surface_state_binding_table.bo = bo;
  588.  
  589.     /* interface descriptor remapping table */
  590.     dri_bo_unreference(vme_context->idrt.bo);
  591.     bo = dri_bo_alloc(i965->intel.bufmgr,
  592.                       "Buffer",
  593.                       MAX_INTERFACE_DESC_GEN6 * sizeof(struct gen6_interface_descriptor_data), 16);
  594.     assert(bo);
  595.     vme_context->idrt.bo = bo;
  596.  
  597.     /* VME output buffer */
  598.     dri_bo_unreference(vme_context->vme_output.bo);
  599.     vme_context->vme_output.bo = NULL;
  600.  
  601.     /* VME state */
  602.     dri_bo_unreference(vme_context->vme_state.bo);
  603.     vme_context->vme_state.bo = NULL;
  604.  
  605.     vme_context->vfe_state.max_num_threads = 60 - 1;
  606.     vme_context->vfe_state.num_urb_entries = 16;
  607.     vme_context->vfe_state.gpgpu_mode = 0;
  608.     vme_context->vfe_state.urb_entry_size = 59 - 1;
  609.     vme_context->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
  610. }
  611.  
  612. #define         INTRA_PRED_AVAIL_FLAG_AE        0x60
  613. #define         INTRA_PRED_AVAIL_FLAG_B         0x10
  614. #define         INTRA_PRED_AVAIL_FLAG_C         0x8
  615. #define         INTRA_PRED_AVAIL_FLAG_D         0x4
  616. #define         INTRA_PRED_AVAIL_FLAG_BCD_MASK  0x1C
  617.  
  618. static void gen75_vme_pipeline_programing(VADriverContextP ctx,
  619.                                          struct encode_state *encode_state,
  620.                                          struct gen6_encoder_context *gen6_encoder_context)
  621. {
  622.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  623.     struct intel_batchbuffer *main_batch = gen6_encoder_context->base.batch;
  624.     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
  625.     VAEncSequenceParameterBufferH264 *pSequenceParameter = (VAEncSequenceParameterBufferH264 *)encode_state->seq_param->buffer;
  626.     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
  627.     int width_in_mbs = pSequenceParameter->picture_width_in_mbs;
  628.     int height_in_mbs = pSequenceParameter->picture_height_in_mbs;
  629.     int emit_new_state = 1, object_len_in_bytes;
  630.     int x, y;
  631.     unsigned int mb_intra_ub;
  632.     struct intel_batchbuffer *batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, width_in_mbs * height_in_mbs * 8 * 4 + 0x200);
  633.  
  634.     intel_batchbuffer_start_atomic(batch, width_in_mbs * height_in_mbs * 8 * 4 + 0x100);
  635.  
  636.     for(y = 0; y < height_in_mbs; y++){
  637.         for(x = 0; x < width_in_mbs; x++){     
  638.             mb_intra_ub = 0;
  639.             if (x != 0) {
  640.                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_AE;
  641.             }
  642.             if (y != 0) {
  643.                 mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_B;
  644.                 if (x != 0)
  645.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_D;
  646.                 if (x != (width_in_mbs -1))
  647.                         mb_intra_ub |= INTRA_PRED_AVAIL_FLAG_C;
  648.             }
  649.  
  650.             if (emit_new_state) {
  651.                 /*Step1: MI_FLUSH/PIPE_CONTROL*/
  652.                 intel_batchbuffer_emit_mi_flush(batch);
  653.  
  654.                 /*Step2: State command PIPELINE_SELECT*/
  655.                 gen75_vme_pipeline_select(ctx, gen6_encoder_context, batch);
  656.  
  657.                 /*Step3: State commands configuring pipeline states*/
  658.                 gen75_vme_state_base_address(ctx, gen6_encoder_context, batch);
  659.                 gen75_vme_vfe_state(ctx, gen6_encoder_context, batch);
  660.                 gen75_vme_curbe_load(ctx, gen6_encoder_context, batch);
  661.                 gen75_vme_idrt(ctx, gen6_encoder_context, batch);
  662.  
  663.                 emit_new_state = 0;
  664.             }
  665.  
  666.             /*Step4: Primitive commands*/
  667.             object_len_in_bytes = gen75_vme_media_object(ctx, encode_state, x, y, is_intra ? VME_INTRA_SHADER : VME_INTER_SHADER, mb_intra_ub, gen6_encoder_context, batch);
  668.  
  669.             if (intel_batchbuffer_check_free_space(batch, object_len_in_bytes) == 0) {
  670.                 assert(0);
  671.                 intel_batchbuffer_end_atomic(batch);   
  672.                 intel_batchbuffer_flush(batch);
  673.                 emit_new_state = 1;
  674.                 intel_batchbuffer_start_atomic(batch, 0x1000);
  675.             }
  676.         }
  677.     }
  678.  
  679.     intel_batchbuffer_align(batch, 8);
  680.  
  681.     BEGIN_BATCH(batch, 2);
  682.     OUT_BATCH(batch, 0);
  683.     OUT_BATCH(batch, MI_BATCH_BUFFER_END);
  684.     ADVANCE_BATCH(batch);
  685.  
  686.     intel_batchbuffer_end_atomic(batch);
  687.  
  688.     /* chain to the main batch buffer */
  689.     intel_batchbuffer_start_atomic(main_batch, 0x100);
  690.     intel_batchbuffer_emit_mi_flush(main_batch);
  691.     BEGIN_BATCH(main_batch, 2);
  692.     OUT_BATCH(main_batch, MI_BATCH_BUFFER_START | (2 << 6));
  693.     OUT_RELOC(main_batch,
  694.               batch->buffer,
  695.               I915_GEM_DOMAIN_COMMAND, 0,
  696.               0);
  697.     ADVANCE_BATCH(main_batch);
  698.     intel_batchbuffer_end_atomic(main_batch);
  699.  
  700.     // end programing            
  701.     intel_batchbuffer_free(batch);
  702. }
  703.  
  704. static VAStatus gen75_vme_prepare(VADriverContextP ctx,
  705.                                  struct encode_state *encode_state,
  706.                                  struct gen6_encoder_context *gen6_encoder_context)
  707. {
  708.     VAStatus vaStatus = VA_STATUS_SUCCESS;
  709.     VAEncSliceParameterBuffer *pSliceParameter = (VAEncSliceParameterBuffer *)encode_state->slice_params[0]->buffer;
  710.     int is_intra = pSliceParameter->slice_flags.bits.is_intra;
  711.        
  712.         gen75_vme_surface_setup(ctx, encode_state, is_intra, gen6_encoder_context);
  713.  
  714.     gen75_vme_interface_setup(ctx, encode_state, gen6_encoder_context);
  715.     gen75_vme_vme_state_setup(ctx, encode_state, is_intra, gen6_encoder_context);
  716.     gen75_vme_constant_setup(ctx, encode_state, gen6_encoder_context);
  717.  
  718.     /*Programing media pipeline*/
  719.     gen75_vme_pipeline_programing(ctx, encode_state, gen6_encoder_context);
  720.  
  721.     return vaStatus;
  722. }
  723.  
  724. static VAStatus gen75_vme_run(VADriverContextP ctx,
  725.                              struct encode_state *encode_state,
  726.                              struct gen6_encoder_context *gen6_encoder_context)
  727. {
  728.     struct intel_batchbuffer *batch = gen6_encoder_context->base.batch;
  729.  
  730.     intel_batchbuffer_flush(batch);
  731.  
  732.     return VA_STATUS_SUCCESS;
  733. }
  734.  
  735. static VAStatus gen75_vme_stop(VADriverContextP ctx,
  736.                               struct encode_state *encode_state,
  737.                               struct gen6_encoder_context *gen6_encoder_context)
  738. {
  739.     return VA_STATUS_SUCCESS;
  740. }
  741.  
  742. VAStatus gen75_vme_pipeline(VADriverContextP ctx,
  743.                            VAProfile profile,
  744.                            struct encode_state *encode_state,
  745.                            struct gen6_encoder_context *gen6_encoder_context)
  746. {
  747.     gen75_vme_media_init(ctx, gen6_encoder_context);
  748.     gen75_vme_prepare(ctx, encode_state, gen6_encoder_context);
  749.     gen75_vme_run(ctx, encode_state, gen6_encoder_context);
  750.     gen75_vme_stop(ctx, encode_state, gen6_encoder_context);
  751.  
  752.     return VA_STATUS_SUCCESS;
  753. }
  754.  
  755. Bool gen75_vme_context_init(VADriverContextP ctx, struct gen6_vme_context *vme_context)
  756. {
  757.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  758.     int i;
  759.  
  760.         memcpy(vme_context->vme_kernels, gen75_vme_kernels, sizeof(vme_context->vme_kernels));
  761.  
  762.     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
  763.         /*Load kernel into GPU memory*/
  764.         struct i965_kernel *kernel = &vme_context->vme_kernels[i];
  765.  
  766.         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
  767.                                   kernel->name,
  768.                                   kernel->size,
  769.                                   0x1000);
  770.         assert(kernel->bo);
  771.         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
  772.     }
  773.    
  774.         vme_context->vme_state_message = malloc(VME_MSG_LENGTH * sizeof(int));
  775.     return True;
  776. }
  777.  
  778. Bool gen75_vme_context_destroy(struct gen6_vme_context *vme_context)
  779. {
  780.     int i;
  781.  
  782.     dri_bo_unreference(vme_context->idrt.bo);
  783.     vme_context->idrt.bo = NULL;
  784.  
  785.     dri_bo_unreference(vme_context->surface_state_binding_table.bo);
  786.     vme_context->surface_state_binding_table.bo = NULL;
  787.  
  788.     dri_bo_unreference(vme_context->curbe.bo);
  789.     vme_context->curbe.bo = NULL;
  790.  
  791.     dri_bo_unreference(vme_context->vme_output.bo);
  792.     vme_context->vme_output.bo = NULL;
  793.  
  794.     dri_bo_unreference(vme_context->vme_state.bo);
  795.     vme_context->vme_state.bo = NULL;
  796.  
  797.     for (i = 0; i < GEN6_VME_KERNEL_NUMBER; i++) {
  798.         /*Load kernel into GPU memory*/
  799.         struct i965_kernel *kernel = &vme_context->vme_kernels[i];
  800.  
  801.         dri_bo_unreference(kernel->bo);
  802.         kernel->bo = NULL;
  803.     }
  804.  
  805.     if (vme_context->vme_state_message) {
  806.         free(vme_context->vme_state_message);
  807.         vme_context->vme_state_message = NULL;
  808.     }
  809.  
  810.     return True;
  811. }
  812.