Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2012 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
  21.  * IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Xiang Haihao <haihao.xiang@intel.com>
  25.  */
  26.  
  27. #include <stdio.h>
  28. #include <stdlib.h>
  29. #include <string.h>
  30. #include <assert.h>
  31.  
  32. #include "intel_batchbuffer.h"
  33. #include "intel_driver.h"
  34.  
  35. #include "i965_gpe_utils.h"
  36.  
  37. static void
  38. i965_gpe_select(VADriverContextP ctx,
  39.                 struct i965_gpe_context *gpe_context,
  40.                 struct intel_batchbuffer *batch)
  41. {
  42.     BEGIN_BATCH(batch, 1);
  43.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA);
  44.     ADVANCE_BATCH(batch);
  45. }
  46.  
  47. static void
  48. gen6_gpe_state_base_address(VADriverContextP ctx,
  49.                             struct i965_gpe_context *gpe_context,
  50.                             struct intel_batchbuffer *batch)
  51. {
  52.     BEGIN_BATCH(batch, 10);
  53.  
  54.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  55.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Base Address */
  56.     OUT_RELOC(batch,
  57.               gpe_context->surface_state_binding_table.bo,
  58.               I915_GEM_DOMAIN_INSTRUCTION,
  59.               0,
  60.               BASE_ADDRESS_MODIFY);                     /* Surface state base address */
  61.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Base Address */
  62.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Base Address */
  63.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Base Address */
  64.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* General State Access Upper Bound */
  65.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Dynamic State Access Upper Bound */
  66.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Indirect Object Access Upper Bound */
  67.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY);              /* Instruction Access Upper Bound */
  68.  
  69.     ADVANCE_BATCH(batch);
  70. }
  71.  
  72. static void
  73. gen6_gpe_vfe_state(VADriverContextP ctx,
  74.                    struct i965_gpe_context *gpe_context,
  75.                    struct intel_batchbuffer *batch)
  76. {
  77.  
  78.     BEGIN_BATCH(batch, 8);
  79.  
  80.     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (8 - 2));
  81.     OUT_BATCH(batch, 0);                                        /* Scratch Space Base Pointer and Space */
  82.     OUT_BATCH(batch,
  83.               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
  84.               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
  85.               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
  86.     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
  87.     OUT_BATCH(batch,
  88.               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
  89.               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
  90.     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
  91.     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
  92.     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                      
  93.     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                      
  94.        
  95.     ADVANCE_BATCH(batch);
  96.  
  97. }
  98.  
  99. static void
  100. gen6_gpe_curbe_load(VADriverContextP ctx,
  101.                     struct i965_gpe_context *gpe_context,
  102.                     struct intel_batchbuffer *batch)
  103. {
  104.     BEGIN_BATCH(batch, 4);
  105.  
  106.     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
  107.     OUT_BATCH(batch, 0);
  108.     OUT_BATCH(batch, gpe_context->curbe.length);
  109.     OUT_RELOC(batch, gpe_context->curbe.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  110.  
  111.     ADVANCE_BATCH(batch);
  112. }
  113.  
  114. static void
  115. gen6_gpe_idrt(VADriverContextP ctx,
  116.               struct i965_gpe_context *gpe_context,
  117.               struct intel_batchbuffer *batch)
  118. {
  119.     BEGIN_BATCH(batch, 4);
  120.  
  121.     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
  122.     OUT_BATCH(batch, 0);
  123.     OUT_BATCH(batch, gpe_context->idrt.max_entries * gpe_context->idrt.entry_size);
  124.     OUT_RELOC(batch, gpe_context->idrt.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  125.  
  126.     ADVANCE_BATCH(batch);
  127. }
  128.  
  129. void
  130. i965_gpe_load_kernels(VADriverContextP ctx,
  131.                       struct i965_gpe_context *gpe_context,
  132.                       struct i965_kernel *kernel_list,
  133.                       unsigned int num_kernels)
  134. {
  135.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  136.     int i;
  137.  
  138.     assert(num_kernels <= MAX_GPE_KERNELS);
  139.     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
  140.     gpe_context->num_kernels = num_kernels;
  141.  
  142.     for (i = 0; i < num_kernels; i++) {
  143.         struct i965_kernel *kernel = &gpe_context->kernels[i];
  144.  
  145.         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
  146.                                   kernel->name,
  147.                                   kernel->size,
  148.                                   0x1000);
  149.         assert(kernel->bo);
  150.         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
  151.     }
  152. }
  153.  
  154. void
  155. i965_gpe_context_destroy(struct i965_gpe_context *gpe_context)
  156. {
  157.     int i;
  158.  
  159.     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
  160.     gpe_context->surface_state_binding_table.bo = NULL;
  161.  
  162.     dri_bo_unreference(gpe_context->idrt.bo);
  163.     gpe_context->idrt.bo = NULL;
  164.  
  165.     dri_bo_unreference(gpe_context->curbe.bo);
  166.     gpe_context->curbe.bo = NULL;
  167.  
  168.     for (i = 0; i < gpe_context->num_kernels; i++) {
  169.         struct i965_kernel *kernel = &gpe_context->kernels[i];
  170.  
  171.         dri_bo_unreference(kernel->bo);
  172.         kernel->bo = NULL;
  173.     }
  174. }
  175.  
  176. void
  177. i965_gpe_context_init(VADriverContextP ctx,
  178.                       struct i965_gpe_context *gpe_context)
  179. {
  180.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  181.     dri_bo *bo;
  182.  
  183.     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
  184.     bo = dri_bo_alloc(i965->intel.bufmgr,
  185.                       "surface state & binding table",
  186.                       gpe_context->surface_state_binding_table.length,
  187.                       4096);
  188.     assert(bo);
  189.     gpe_context->surface_state_binding_table.bo = bo;
  190.  
  191.     dri_bo_unreference(gpe_context->idrt.bo);
  192.     bo = dri_bo_alloc(i965->intel.bufmgr,
  193.                       "interface descriptor table",
  194.                       gpe_context->idrt.entry_size * gpe_context->idrt.max_entries,
  195.                       4096);
  196.     assert(bo);
  197.     gpe_context->idrt.bo = bo;
  198.  
  199.     dri_bo_unreference(gpe_context->curbe.bo);
  200.     bo = dri_bo_alloc(i965->intel.bufmgr,
  201.                       "curbe buffer",
  202.                       gpe_context->curbe.length,
  203.                       4096);
  204.     assert(bo);
  205.     gpe_context->curbe.bo = bo;
  206. }
  207.  
  208. void
  209. gen6_gpe_pipeline_setup(VADriverContextP ctx,
  210.                         struct i965_gpe_context *gpe_context,
  211.                         struct intel_batchbuffer *batch)
  212. {
  213.     intel_batchbuffer_emit_mi_flush(batch);
  214.  
  215.     i965_gpe_select(ctx, gpe_context, batch);
  216.     gen6_gpe_state_base_address(ctx, gpe_context, batch);
  217.     gen6_gpe_vfe_state(ctx, gpe_context, batch);
  218.     gen6_gpe_curbe_load(ctx, gpe_context, batch);
  219.     gen6_gpe_idrt(ctx, gpe_context, batch);
  220. }
  221.  
  222. static void
  223. i965_gpe_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
  224. {
  225.     switch (tiling) {
  226.     case I915_TILING_NONE:
  227.         ss->ss3.tiled_surface = 0;
  228.         ss->ss3.tile_walk = 0;
  229.         break;
  230.     case I915_TILING_X:
  231.         ss->ss3.tiled_surface = 1;
  232.         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
  233.         break;
  234.     case I915_TILING_Y:
  235.         ss->ss3.tiled_surface = 1;
  236.         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
  237.         break;
  238.     }
  239. }
  240.  
  241. static void
  242. i965_gpe_set_surface2_tiling(struct i965_surface_state2 *ss, unsigned int tiling)
  243. {
  244.     switch (tiling) {
  245.     case I915_TILING_NONE:
  246.         ss->ss2.tiled_surface = 0;
  247.         ss->ss2.tile_walk = 0;
  248.         break;
  249.     case I915_TILING_X:
  250.         ss->ss2.tiled_surface = 1;
  251.         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
  252.         break;
  253.     case I915_TILING_Y:
  254.         ss->ss2.tiled_surface = 1;
  255.         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
  256.         break;
  257.     }
  258. }
  259.  
  260. static void
  261. gen7_gpe_set_surface_tiling(struct gen7_surface_state *ss, unsigned int tiling)
  262. {
  263.     switch (tiling) {
  264.     case I915_TILING_NONE:
  265.         ss->ss0.tiled_surface = 0;
  266.         ss->ss0.tile_walk = 0;
  267.         break;
  268.     case I915_TILING_X:
  269.         ss->ss0.tiled_surface = 1;
  270.         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  271.         break;
  272.     case I915_TILING_Y:
  273.         ss->ss0.tiled_surface = 1;
  274.         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  275.         break;
  276.     }
  277. }
  278.  
  279. static void
  280. gen7_gpe_set_surface2_tiling(struct gen7_surface_state2 *ss, unsigned int tiling)
  281. {
  282.     switch (tiling) {
  283.     case I915_TILING_NONE:
  284.         ss->ss2.tiled_surface = 0;
  285.         ss->ss2.tile_walk = 0;
  286.         break;
  287.     case I915_TILING_X:
  288.         ss->ss2.tiled_surface = 1;
  289.         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
  290.         break;
  291.     case I915_TILING_Y:
  292.         ss->ss2.tiled_surface = 1;
  293.         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
  294.         break;
  295.     }
  296. }
  297.  
  298. static void
  299. gen8_gpe_set_surface_tiling(struct gen8_surface_state *ss, unsigned int tiling)
  300. {
  301.     switch (tiling) {
  302.     case I915_TILING_NONE:
  303.         ss->ss0.tiled_surface = 0;
  304.         ss->ss0.tile_walk = 0;
  305.         break;
  306.     case I915_TILING_X:
  307.         ss->ss0.tiled_surface = 1;
  308.         ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  309.         break;
  310.     case I915_TILING_Y:
  311.         ss->ss0.tiled_surface = 1;
  312.         ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  313.         break;
  314.     }
  315. }
  316.  
  317. static void
  318. gen8_gpe_set_surface2_tiling(struct gen8_surface_state2 *ss, unsigned int tiling)
  319. {
  320.     switch (tiling) {
  321.     case I915_TILING_NONE:
  322.         ss->ss2.tiled_surface = 0;
  323.         ss->ss2.tile_walk = 0;
  324.         break;
  325.     case I915_TILING_X:
  326.         ss->ss2.tiled_surface = 1;
  327.         ss->ss2.tile_walk = I965_TILEWALK_XMAJOR;
  328.         break;
  329.     case I915_TILING_Y:
  330.         ss->ss2.tiled_surface = 1;
  331.         ss->ss2.tile_walk = I965_TILEWALK_YMAJOR;
  332.         break;
  333.     }
  334. }
  335.  
  336. static void
  337. i965_gpe_set_surface2_state(VADriverContextP ctx,
  338.                             struct object_surface *obj_surface,
  339.                             struct i965_surface_state2 *ss)
  340. {
  341.     int w, h, w_pitch;
  342.     unsigned int tiling, swizzle;
  343.  
  344.     assert(obj_surface->bo);
  345.     assert(obj_surface->fourcc == VA_FOURCC_NV12);
  346.  
  347.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  348.     w = obj_surface->orig_width;
  349.     h = obj_surface->orig_height;
  350.     w_pitch = obj_surface->width;
  351.  
  352.     memset(ss, 0, sizeof(*ss));
  353.     /* ss0 */
  354.     ss->ss0.surface_base_address = obj_surface->bo->offset;
  355.     /* ss1 */
  356.     ss->ss1.cbcr_pixel_offset_v_direction = 2;
  357.     ss->ss1.width = w - 1;
  358.     ss->ss1.height = h - 1;
  359.     /* ss2 */
  360.     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
  361.     ss->ss2.interleave_chroma = 1;
  362.     ss->ss2.pitch = w_pitch - 1;
  363.     ss->ss2.half_pitch_for_chroma = 0;
  364.     i965_gpe_set_surface2_tiling(ss, tiling);
  365.     /* ss3: UV offset for interleave mode */
  366.     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
  367.     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
  368. }
  369.  
  370. void
  371. i965_gpe_surface2_setup(VADriverContextP ctx,
  372.                         struct i965_gpe_context *gpe_context,
  373.                         struct object_surface *obj_surface,
  374.                         unsigned long binding_table_offset,
  375.                         unsigned long surface_state_offset)
  376. {
  377.     struct i965_surface_state2 *ss;
  378.     dri_bo *bo;
  379.  
  380.     bo = gpe_context->surface_state_binding_table.bo;
  381.     dri_bo_map(bo, 1);
  382.     assert(bo->virtual);
  383.  
  384.     ss = (struct i965_surface_state2 *)((char *)bo->virtual + surface_state_offset);
  385.     i965_gpe_set_surface2_state(ctx, obj_surface, ss);
  386.     dri_bo_emit_reloc(bo,
  387.                       I915_GEM_DOMAIN_RENDER, 0,
  388.                       0,
  389.                       surface_state_offset + offsetof(struct i965_surface_state2, ss0),
  390.                       obj_surface->bo);
  391.  
  392.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  393.     dri_bo_unmap(bo);
  394. }
  395.  
  396. static void
  397. i965_gpe_set_media_rw_surface_state(VADriverContextP ctx,
  398.                                     struct object_surface *obj_surface,
  399.                                     struct i965_surface_state *ss)
  400. {
  401.     int w, h, w_pitch;
  402.     unsigned int tiling, swizzle;
  403.  
  404.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  405.     w = obj_surface->orig_width;
  406.     h = obj_surface->orig_height;
  407.     w_pitch = obj_surface->width;
  408.  
  409.     memset(ss, 0, sizeof(*ss));
  410.     /* ss0 */
  411.     ss->ss0.surface_type = I965_SURFACE_2D;
  412.     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
  413.     /* ss1 */
  414.     ss->ss1.base_addr = obj_surface->bo->offset;
  415.     /* ss2 */
  416.     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
  417.     ss->ss2.height = h - 1;
  418.     /* ss3 */
  419.     ss->ss3.pitch = w_pitch - 1;
  420.     i965_gpe_set_surface_tiling(ss, tiling);
  421. }
  422.  
  423. void
  424. i965_gpe_media_rw_surface_setup(VADriverContextP ctx,
  425.                                 struct i965_gpe_context *gpe_context,
  426.                                 struct object_surface *obj_surface,
  427.                                 unsigned long binding_table_offset,
  428.                                 unsigned long surface_state_offset)
  429. {
  430.     struct i965_surface_state *ss;
  431.     dri_bo *bo;
  432.  
  433.     bo = gpe_context->surface_state_binding_table.bo;
  434.     dri_bo_map(bo, True);
  435.     assert(bo->virtual);
  436.  
  437.     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
  438.     i965_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
  439.     dri_bo_emit_reloc(bo,
  440.                       I915_GEM_DOMAIN_RENDER, 0,
  441.                       0,
  442.                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
  443.                       obj_surface->bo);
  444.  
  445.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  446.     dri_bo_unmap(bo);
  447. }
  448.  
  449. static void
  450. i965_gpe_set_buffer_surface_state(VADriverContextP ctx,
  451.                                   struct i965_buffer_surface *buffer_surface,
  452.                                   struct i965_surface_state *ss)
  453. {
  454.     int num_entries;
  455.  
  456.     assert(buffer_surface->bo);
  457.     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
  458.  
  459.     memset(ss, 0, sizeof(*ss));
  460.     /* ss0 */
  461.     ss->ss0.render_cache_read_mode = 1;
  462.     ss->ss0.surface_type = I965_SURFACE_BUFFER;
  463.     /* ss1 */
  464.     ss->ss1.base_addr = buffer_surface->bo->offset;
  465.     /* ss2 */
  466.     ss->ss2.width = ((num_entries - 1) & 0x7f);
  467.     ss->ss2.height = (((num_entries - 1) >> 7) & 0x1fff);
  468.     /* ss3 */
  469.     ss->ss3.depth = (((num_entries - 1) >> 20) & 0x7f);
  470.     ss->ss3.pitch = buffer_surface->pitch - 1;
  471. }
  472.  
  473. void
  474. i965_gpe_buffer_suface_setup(VADriverContextP ctx,
  475.                              struct i965_gpe_context *gpe_context,
  476.                              struct i965_buffer_surface *buffer_surface,
  477.                              unsigned long binding_table_offset,
  478.                              unsigned long surface_state_offset)
  479. {
  480.     struct i965_surface_state *ss;
  481.     dri_bo *bo;
  482.  
  483.     bo = gpe_context->surface_state_binding_table.bo;
  484.     dri_bo_map(bo, 1);
  485.     assert(bo->virtual);
  486.  
  487.     ss = (struct i965_surface_state *)((char *)bo->virtual + surface_state_offset);
  488.     i965_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
  489.     dri_bo_emit_reloc(bo,
  490.                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  491.                       0,
  492.                       surface_state_offset + offsetof(struct i965_surface_state, ss1),
  493.                       buffer_surface->bo);
  494.  
  495.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  496.     dri_bo_unmap(bo);
  497. }
  498.  
  499. static void
  500. gen7_gpe_set_surface2_state(VADriverContextP ctx,
  501.                             struct object_surface *obj_surface,
  502.                             struct gen7_surface_state2 *ss)
  503. {
  504.     int w, h, w_pitch;
  505.     unsigned int tiling, swizzle;
  506.  
  507.     assert(obj_surface->bo);
  508.     assert(obj_surface->fourcc == VA_FOURCC_NV12);
  509.  
  510.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  511.     w = obj_surface->orig_width;
  512.     h = obj_surface->orig_height;
  513.     w_pitch = obj_surface->width;
  514.  
  515.     memset(ss, 0, sizeof(*ss));
  516.     /* ss0 */
  517.     ss->ss0.surface_base_address = obj_surface->bo->offset;
  518.     /* ss1 */
  519.     ss->ss1.cbcr_pixel_offset_v_direction = 2;
  520.     ss->ss1.width = w - 1;
  521.     ss->ss1.height = h - 1;
  522.     /* ss2 */
  523.     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
  524.     ss->ss2.interleave_chroma = 1;
  525.     ss->ss2.pitch = w_pitch - 1;
  526.     ss->ss2.half_pitch_for_chroma = 0;
  527.     gen7_gpe_set_surface2_tiling(ss, tiling);
  528.     /* ss3: UV offset for interleave mode */
  529.     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
  530.     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
  531. }
  532.  
  533. void
  534. gen7_gpe_surface2_setup(VADriverContextP ctx,
  535.                         struct i965_gpe_context *gpe_context,
  536.                         struct object_surface *obj_surface,
  537.                         unsigned long binding_table_offset,
  538.                         unsigned long surface_state_offset)
  539. {
  540.     struct gen7_surface_state2 *ss;
  541.     dri_bo *bo;
  542.  
  543.     bo = gpe_context->surface_state_binding_table.bo;
  544.     dri_bo_map(bo, 1);
  545.     assert(bo->virtual);
  546.  
  547.     ss = (struct gen7_surface_state2 *)((char *)bo->virtual + surface_state_offset);
  548.     gen7_gpe_set_surface2_state(ctx, obj_surface, ss);
  549.     dri_bo_emit_reloc(bo,
  550.                       I915_GEM_DOMAIN_RENDER, 0,
  551.                       0,
  552.                       surface_state_offset + offsetof(struct gen7_surface_state2, ss0),
  553.                       obj_surface->bo);
  554.  
  555.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  556.     dri_bo_unmap(bo);
  557. }
  558.  
  559. static void
  560. gen7_gpe_set_media_rw_surface_state(VADriverContextP ctx,
  561.                                     struct object_surface *obj_surface,
  562.                                     struct gen7_surface_state *ss)
  563. {
  564.     int w, h, w_pitch;
  565.     unsigned int tiling, swizzle;
  566.  
  567.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  568.     w = obj_surface->orig_width;
  569.     h = obj_surface->orig_height;
  570.     w_pitch = obj_surface->width;
  571.  
  572.     memset(ss, 0, sizeof(*ss));
  573.     /* ss0 */
  574.     ss->ss0.surface_type = I965_SURFACE_2D;
  575.     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
  576.     /* ss1 */
  577.     ss->ss1.base_addr = obj_surface->bo->offset;
  578.     /* ss2 */
  579.     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
  580.     ss->ss2.height = h - 1;
  581.     /* ss3 */
  582.     ss->ss3.pitch = w_pitch - 1;
  583.     gen7_gpe_set_surface_tiling(ss, tiling);
  584. }
  585.  
  586. static void
  587. gen75_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
  588.                                     struct object_surface *obj_surface,
  589.                                     struct gen7_surface_state *ss)
  590. {
  591.     int w, w_pitch;
  592.     unsigned int tiling, swizzle;
  593.     int cbcr_offset;
  594.  
  595.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  596.     w = obj_surface->orig_width;
  597.     w_pitch = obj_surface->width;
  598.  
  599.     cbcr_offset = obj_surface->height * obj_surface->width;
  600.     memset(ss, 0, sizeof(*ss));
  601.     /* ss0 */
  602.     ss->ss0.surface_type = I965_SURFACE_2D;
  603.     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
  604.     /* ss1 */
  605.     ss->ss1.base_addr = obj_surface->bo->offset + cbcr_offset;
  606.     /* ss2 */
  607.     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
  608.     ss->ss2.height = (obj_surface->height / 2) -1;
  609.     /* ss3 */
  610.     ss->ss3.pitch = w_pitch - 1;
  611.     gen7_gpe_set_surface_tiling(ss, tiling);
  612. }
  613.  
  614. void
  615. gen7_gpe_media_rw_surface_setup(VADriverContextP ctx,
  616.                                 struct i965_gpe_context *gpe_context,
  617.                                 struct object_surface *obj_surface,
  618.                                 unsigned long binding_table_offset,
  619.                                 unsigned long surface_state_offset)
  620. {
  621.     struct gen7_surface_state *ss;
  622.     dri_bo *bo;
  623.  
  624.     bo = gpe_context->surface_state_binding_table.bo;
  625.     dri_bo_map(bo, True);
  626.     assert(bo->virtual);
  627.  
  628.     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
  629.     gen7_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
  630.     dri_bo_emit_reloc(bo,
  631.                       I915_GEM_DOMAIN_RENDER, 0,
  632.                       0,
  633.                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
  634.                       obj_surface->bo);
  635.  
  636.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  637.     dri_bo_unmap(bo);
  638. }
  639.  
  640. void
  641. gen75_gpe_media_chroma_surface_setup(VADriverContextP ctx,
  642.                                 struct i965_gpe_context *gpe_context,
  643.                                 struct object_surface *obj_surface,
  644.                                 unsigned long binding_table_offset,
  645.                                 unsigned long surface_state_offset)
  646. {
  647.     struct gen7_surface_state *ss;
  648.     dri_bo *bo;
  649.     int cbcr_offset;
  650.  
  651.         assert(obj_surface->fourcc == VA_FOURCC_NV12);
  652.     bo = gpe_context->surface_state_binding_table.bo;
  653.     dri_bo_map(bo, True);
  654.     assert(bo->virtual);
  655.  
  656.     cbcr_offset = obj_surface->height * obj_surface->width;
  657.     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
  658.     gen75_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
  659.     dri_bo_emit_reloc(bo,
  660.                       I915_GEM_DOMAIN_RENDER, 0,
  661.                       cbcr_offset,
  662.                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
  663.                       obj_surface->bo);
  664.  
  665.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  666.     dri_bo_unmap(bo);
  667. }
  668.  
  669.  
  670. static void
  671. gen7_gpe_set_buffer_surface_state(VADriverContextP ctx,
  672.                                   struct i965_buffer_surface *buffer_surface,
  673.                                   struct gen7_surface_state *ss)
  674. {
  675.     int num_entries;
  676.  
  677.     assert(buffer_surface->bo);
  678.     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
  679.  
  680.     memset(ss, 0, sizeof(*ss));
  681.     /* ss0 */
  682.     ss->ss0.surface_type = I965_SURFACE_BUFFER;
  683.     /* ss1 */
  684.     ss->ss1.base_addr = buffer_surface->bo->offset;
  685.     /* ss2 */
  686.     ss->ss2.width = ((num_entries - 1) & 0x7f);
  687.     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
  688.     /* ss3 */
  689.     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
  690.     ss->ss3.pitch = buffer_surface->pitch - 1;
  691. }
  692.  
  693. void
  694. gen7_gpe_buffer_suface_setup(VADriverContextP ctx,
  695.                              struct i965_gpe_context *gpe_context,
  696.                              struct i965_buffer_surface *buffer_surface,
  697.                              unsigned long binding_table_offset,
  698.                              unsigned long surface_state_offset)
  699. {
  700.     struct gen7_surface_state *ss;
  701.     dri_bo *bo;
  702.  
  703.     bo = gpe_context->surface_state_binding_table.bo;
  704.     dri_bo_map(bo, 1);
  705.     assert(bo->virtual);
  706.  
  707.     ss = (struct gen7_surface_state *)((char *)bo->virtual + surface_state_offset);
  708.     gen7_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
  709.     dri_bo_emit_reloc(bo,
  710.                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  711.                       0,
  712.                       surface_state_offset + offsetof(struct gen7_surface_state, ss1),
  713.                       buffer_surface->bo);
  714.  
  715.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  716.     dri_bo_unmap(bo);
  717. }
  718.  
  719. static void
  720. gen8_gpe_set_surface2_state(VADriverContextP ctx,
  721.                             struct object_surface *obj_surface,
  722.                             struct gen8_surface_state2 *ss)
  723. {
  724.     int w, h, w_pitch;
  725.     unsigned int tiling, swizzle;
  726.  
  727.     assert(obj_surface->bo);
  728.     assert(obj_surface->fourcc == VA_FOURCC_NV12);
  729.  
  730.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  731.     w = obj_surface->orig_width;
  732.     h = obj_surface->orig_height;
  733.     w_pitch = obj_surface->width;
  734.  
  735.     memset(ss, 0, sizeof(*ss));
  736.     /* ss0 */
  737.     ss->ss6.base_addr = obj_surface->bo->offset;
  738.     /* ss1 */
  739.     ss->ss1.cbcr_pixel_offset_v_direction = 2;
  740.     ss->ss1.width = w - 1;
  741.     ss->ss1.height = h - 1;
  742.     /* ss2 */
  743.     ss->ss2.surface_format = MFX_SURFACE_PLANAR_420_8;
  744.     ss->ss2.interleave_chroma = 1;
  745.     ss->ss2.pitch = w_pitch - 1;
  746.     ss->ss2.half_pitch_for_chroma = 0;
  747.     gen8_gpe_set_surface2_tiling(ss, tiling);
  748.     /* ss3: UV offset for interleave mode */
  749.     ss->ss3.x_offset_for_cb = obj_surface->x_cb_offset;
  750.     ss->ss3.y_offset_for_cb = obj_surface->y_cb_offset;
  751. }
  752.  
  753. void
  754. gen8_gpe_surface2_setup(VADriverContextP ctx,
  755.                         struct i965_gpe_context *gpe_context,
  756.                         struct object_surface *obj_surface,
  757.                         unsigned long binding_table_offset,
  758.                         unsigned long surface_state_offset)
  759. {
  760.     struct gen8_surface_state2 *ss;
  761.     dri_bo *bo;
  762.  
  763.     bo = gpe_context->surface_state_binding_table.bo;
  764.     dri_bo_map(bo, 1);
  765.     assert(bo->virtual);
  766.  
  767.     ss = (struct gen8_surface_state2 *)((char *)bo->virtual + surface_state_offset);
  768.     gen8_gpe_set_surface2_state(ctx, obj_surface, ss);
  769.     dri_bo_emit_reloc(bo,
  770.                       I915_GEM_DOMAIN_RENDER, 0,
  771.                       0,
  772.                       surface_state_offset + offsetof(struct gen8_surface_state2, ss6),
  773.                       obj_surface->bo);
  774.  
  775.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  776.     dri_bo_unmap(bo);
  777. }
  778.  
  779. static void
  780. gen8_gpe_set_media_rw_surface_state(VADriverContextP ctx,
  781.                                     struct object_surface *obj_surface,
  782.                                     struct gen8_surface_state *ss)
  783. {
  784.     int w, h, w_pitch;
  785.     unsigned int tiling, swizzle;
  786.  
  787.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  788.     w = obj_surface->orig_width;
  789.     h = obj_surface->orig_height;
  790.     w_pitch = obj_surface->width;
  791.  
  792.     memset(ss, 0, sizeof(*ss));
  793.     /* ss0 */
  794.     ss->ss0.surface_type = I965_SURFACE_2D;
  795.     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
  796.     /* ss1 */
  797.     ss->ss8.base_addr = obj_surface->bo->offset;
  798.     /* ss2 */
  799.     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
  800.     ss->ss2.height = h - 1;
  801.     /* ss3 */
  802.     ss->ss3.pitch = w_pitch - 1;
  803.     gen8_gpe_set_surface_tiling(ss, tiling);
  804. }
  805.  
  806. static void
  807. gen8_gpe_set_media_chroma_surface_state(VADriverContextP ctx,
  808.                                     struct object_surface *obj_surface,
  809.                                     struct gen8_surface_state *ss)
  810. {
  811.     int w, w_pitch;
  812.     unsigned int tiling, swizzle;
  813.     int cbcr_offset;
  814.  
  815.     dri_bo_get_tiling(obj_surface->bo, &tiling, &swizzle);
  816.     w = obj_surface->orig_width;
  817.     w_pitch = obj_surface->width;
  818.  
  819.     cbcr_offset = obj_surface->height * obj_surface->width;
  820.     memset(ss, 0, sizeof(*ss));
  821.     /* ss0 */
  822.     ss->ss0.surface_type = I965_SURFACE_2D;
  823.     ss->ss0.surface_format = I965_SURFACEFORMAT_R8_UNORM;
  824.     /* ss1 */
  825.     ss->ss8.base_addr = obj_surface->bo->offset + cbcr_offset;
  826.     /* ss2 */
  827.     ss->ss2.width = w / 4 - 1;  /* in DWORDs for media read & write message */
  828.     ss->ss2.height = (obj_surface->height / 2) -1;
  829.     /* ss3 */
  830.     ss->ss3.pitch = w_pitch - 1;
  831.     gen8_gpe_set_surface_tiling(ss, tiling);
  832. }
  833.  
  834. void
  835. gen8_gpe_media_rw_surface_setup(VADriverContextP ctx,
  836.                                 struct i965_gpe_context *gpe_context,
  837.                                 struct object_surface *obj_surface,
  838.                                 unsigned long binding_table_offset,
  839.                                 unsigned long surface_state_offset)
  840. {
  841.     struct gen8_surface_state *ss;
  842.     dri_bo *bo;
  843.  
  844.     bo = gpe_context->surface_state_binding_table.bo;
  845.     dri_bo_map(bo, True);
  846.     assert(bo->virtual);
  847.  
  848.     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
  849.     gen8_gpe_set_media_rw_surface_state(ctx, obj_surface, ss);
  850.     dri_bo_emit_reloc(bo,
  851.                       I915_GEM_DOMAIN_RENDER, 0,
  852.                       0,
  853.                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
  854.                       obj_surface->bo);
  855.  
  856.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  857.     dri_bo_unmap(bo);
  858. }
  859.  
  860. void
  861. gen8_gpe_media_chroma_surface_setup(VADriverContextP ctx,
  862.                                 struct i965_gpe_context *gpe_context,
  863.                                 struct object_surface *obj_surface,
  864.                                 unsigned long binding_table_offset,
  865.                                 unsigned long surface_state_offset)
  866. {
  867.     struct gen8_surface_state *ss;
  868.     dri_bo *bo;
  869.     int cbcr_offset;
  870.  
  871.         assert(obj_surface->fourcc == VA_FOURCC_NV12);
  872.     bo = gpe_context->surface_state_binding_table.bo;
  873.     dri_bo_map(bo, True);
  874.     assert(bo->virtual);
  875.  
  876.     cbcr_offset = obj_surface->height * obj_surface->width;
  877.     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
  878.     gen8_gpe_set_media_chroma_surface_state(ctx, obj_surface, ss);
  879.     dri_bo_emit_reloc(bo,
  880.                       I915_GEM_DOMAIN_RENDER, 0,
  881.                       cbcr_offset,
  882.                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
  883.                       obj_surface->bo);
  884.  
  885.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  886.     dri_bo_unmap(bo);
  887. }
  888.  
  889.  
  890. static void
  891. gen8_gpe_set_buffer_surface_state(VADriverContextP ctx,
  892.                                   struct i965_buffer_surface *buffer_surface,
  893.                                   struct gen8_surface_state *ss)
  894. {
  895.     int num_entries;
  896.  
  897.     assert(buffer_surface->bo);
  898.     num_entries = buffer_surface->num_blocks * buffer_surface->size_block / buffer_surface->pitch;
  899.  
  900.     memset(ss, 0, sizeof(*ss));
  901.     /* ss0 */
  902.     ss->ss0.surface_type = I965_SURFACE_BUFFER;
  903.     /* ss1 */
  904.     ss->ss8.base_addr = buffer_surface->bo->offset;
  905.     /* ss2 */
  906.     ss->ss2.width = ((num_entries - 1) & 0x7f);
  907.     ss->ss2.height = (((num_entries - 1) >> 7) & 0x3fff);
  908.     /* ss3 */
  909.     ss->ss3.depth = (((num_entries - 1) >> 21) & 0x3f);
  910.     ss->ss3.pitch = buffer_surface->pitch - 1;
  911. }
  912.  
  913. void
  914. gen8_gpe_buffer_suface_setup(VADriverContextP ctx,
  915.                              struct i965_gpe_context *gpe_context,
  916.                              struct i965_buffer_surface *buffer_surface,
  917.                              unsigned long binding_table_offset,
  918.                              unsigned long surface_state_offset)
  919. {
  920.     struct gen8_surface_state *ss;
  921.     dri_bo *bo;
  922.  
  923.     bo = gpe_context->surface_state_binding_table.bo;
  924.     dri_bo_map(bo, 1);
  925.     assert(bo->virtual);
  926.  
  927.     ss = (struct gen8_surface_state *)((char *)bo->virtual + surface_state_offset);
  928.     gen8_gpe_set_buffer_surface_state(ctx, buffer_surface, ss);
  929.     dri_bo_emit_reloc(bo,
  930.                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  931.                       0,
  932.                       surface_state_offset + offsetof(struct gen8_surface_state, ss8),
  933.                       buffer_surface->bo);
  934.  
  935.     *((unsigned int *)((char *)bo->virtual + binding_table_offset)) = surface_state_offset;
  936.     dri_bo_unmap(bo);
  937. }
  938.  
  939. static void
  940. gen8_gpe_state_base_address(VADriverContextP ctx,
  941.                             struct i965_gpe_context *gpe_context,
  942.                             struct intel_batchbuffer *batch)
  943. {
  944.     BEGIN_BATCH(batch, 16);
  945.  
  946.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 14);
  947.  
  948.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
  949.     OUT_BATCH(batch, 0);
  950.     OUT_BATCH(batch, 0);
  951.  
  952.         /*DW4 Surface state base address */
  953.     OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  954.     OUT_BATCH(batch, 0);
  955.  
  956.         /*DW6. Dynamic state base address */
  957.     if (gpe_context->dynamic_state.bo)
  958.         OUT_RELOC(batch, gpe_context->dynamic_state.bo,
  959.                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
  960.                   0, BASE_ADDRESS_MODIFY);
  961.     else
  962.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  963.  
  964.     OUT_BATCH(batch, 0);
  965.  
  966.         /*DW8. Indirect Object base address */
  967.     if (gpe_context->indirect_state.bo)
  968.         OUT_RELOC(batch, gpe_context->indirect_state.bo,
  969.                   I915_GEM_DOMAIN_SAMPLER,
  970.                   0, BASE_ADDRESS_MODIFY);
  971.     else
  972.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  973.  
  974.     OUT_BATCH(batch, 0);
  975.  
  976.         /*DW10. Instruct base address */
  977.     if (gpe_context->instruction_state.bo)
  978.         OUT_RELOC(batch, gpe_context->instruction_state.bo,
  979.                   I915_GEM_DOMAIN_INSTRUCTION,
  980.                   0, BASE_ADDRESS_MODIFY);
  981.     else
  982.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  983.  
  984.     OUT_BATCH(batch, 0);
  985.  
  986.         /* DW12. Size limitation */
  987.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound     
  988.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
  989.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
  990.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
  991.  
  992.     /*
  993.       OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                                //LLC Coherent Base Address
  994.       OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY );              //LLC Coherent Upper Bound
  995.     */
  996.  
  997.     ADVANCE_BATCH(batch);
  998. }
  999.  
  1000. static void
  1001. gen8_gpe_vfe_state(VADriverContextP ctx,
  1002.                    struct i965_gpe_context *gpe_context,
  1003.                    struct intel_batchbuffer *batch)
  1004. {
  1005.  
  1006.     BEGIN_BATCH(batch, 9);
  1007.  
  1008.     OUT_BATCH(batch, CMD_MEDIA_VFE_STATE | (9 - 2));
  1009.     /* Scratch Space Base Pointer and Space */
  1010.     OUT_BATCH(batch, 0);    
  1011.     OUT_BATCH(batch, 0);
  1012.  
  1013.     OUT_BATCH(batch,
  1014.               gpe_context->vfe_state.max_num_threads << 16 |    /* Maximum Number of Threads */
  1015.               gpe_context->vfe_state.num_urb_entries << 8 |     /* Number of URB Entries */
  1016.               gpe_context->vfe_state.gpgpu_mode << 2);          /* MEDIA Mode */
  1017.     OUT_BATCH(batch, 0);                                        /* Debug: Object ID */
  1018.     OUT_BATCH(batch,
  1019.               gpe_context->vfe_state.urb_entry_size << 16 |     /* URB Entry Allocation Size */
  1020.               gpe_context->vfe_state.curbe_allocation_size);    /* CURBE Allocation Size */
  1021.  
  1022.     /* the vfe_desc5/6/7 will decide whether the scoreboard is used. */
  1023.     OUT_BATCH(batch, gpe_context->vfe_desc5.dword);                                        
  1024.     OUT_BATCH(batch, gpe_context->vfe_desc6.dword);                                      
  1025.     OUT_BATCH(batch, gpe_context->vfe_desc7.dword);                                      
  1026.        
  1027.     ADVANCE_BATCH(batch);
  1028.  
  1029. }
  1030.  
  1031.  
  1032. static void
  1033. gen8_gpe_curbe_load(VADriverContextP ctx,
  1034.                     struct i965_gpe_context *gpe_context,
  1035.                     struct intel_batchbuffer *batch)
  1036. {
  1037.     BEGIN_BATCH(batch, 4);
  1038.  
  1039.     OUT_BATCH(batch, CMD_MEDIA_CURBE_LOAD | (4 - 2));
  1040.     OUT_BATCH(batch, 0);
  1041.     OUT_BATCH(batch, gpe_context->curbe_size);
  1042.     OUT_BATCH(batch, gpe_context->curbe_offset);
  1043.  
  1044.     ADVANCE_BATCH(batch);
  1045. }
  1046.  
  1047. static void
  1048. gen8_gpe_idrt(VADriverContextP ctx,
  1049.               struct i965_gpe_context *gpe_context,
  1050.               struct intel_batchbuffer *batch)
  1051. {
  1052.     BEGIN_BATCH(batch, 6);
  1053.  
  1054.     OUT_BATCH(batch, CMD_MEDIA_STATE_FLUSH);
  1055.     OUT_BATCH(batch, 0);
  1056.  
  1057.     OUT_BATCH(batch, CMD_MEDIA_INTERFACE_LOAD | (4 - 2));
  1058.     OUT_BATCH(batch, 0);
  1059.     OUT_BATCH(batch, gpe_context->idrt_size);
  1060.     OUT_BATCH(batch, gpe_context->idrt_offset);
  1061.  
  1062.     ADVANCE_BATCH(batch);
  1063. }
  1064.  
  1065.  
  1066. void
  1067. gen8_gpe_pipeline_setup(VADriverContextP ctx,
  1068.                         struct i965_gpe_context *gpe_context,
  1069.                         struct intel_batchbuffer *batch)
  1070. {
  1071.     intel_batchbuffer_emit_mi_flush(batch);
  1072.  
  1073.     i965_gpe_select(ctx, gpe_context, batch);
  1074.     gen8_gpe_state_base_address(ctx, gpe_context, batch);
  1075.     gen8_gpe_vfe_state(ctx, gpe_context, batch);
  1076.     gen8_gpe_curbe_load(ctx, gpe_context, batch);
  1077.     gen8_gpe_idrt(ctx, gpe_context, batch);
  1078. }
  1079.  
  1080. void
  1081. gen8_gpe_context_init(VADriverContextP ctx,
  1082.                       struct i965_gpe_context *gpe_context)
  1083. {
  1084.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1085.     dri_bo *bo;
  1086.     int bo_size;
  1087.     unsigned int start_offset, end_offset;
  1088.  
  1089.     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
  1090.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1091.                       "surface state & binding table",
  1092.                       gpe_context->surface_state_binding_table.length,
  1093.                       4096);
  1094.     assert(bo);
  1095.     gpe_context->surface_state_binding_table.bo = bo;
  1096.  
  1097.     bo_size = gpe_context->idrt_size + gpe_context->curbe_size + gpe_context->sampler_size + 192;
  1098.     dri_bo_unreference(gpe_context->dynamic_state.bo);
  1099.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1100.                       "surface state & binding table",
  1101.                       bo_size,
  1102.                       4096);
  1103.     assert(bo);
  1104.     gpe_context->dynamic_state.bo = bo;
  1105.     gpe_context->dynamic_state.bo_size = bo_size;
  1106.  
  1107.     end_offset = 0;
  1108.     gpe_context->dynamic_state.end_offset = 0;
  1109.  
  1110.     /* Constant buffer offset */
  1111.     start_offset = ALIGN(end_offset, 64);
  1112.     gpe_context->curbe_offset = start_offset;
  1113.     end_offset = start_offset + gpe_context->curbe_size;
  1114.  
  1115.     /* Interface descriptor offset */
  1116.     start_offset = ALIGN(end_offset, 64);
  1117.     gpe_context->idrt_offset = start_offset;
  1118.     end_offset = start_offset + gpe_context->idrt_size;
  1119.  
  1120.     /* Sampler state offset */
  1121.     start_offset = ALIGN(end_offset, 64);
  1122.     gpe_context->sampler_offset = start_offset;
  1123.     end_offset = start_offset + gpe_context->sampler_size;
  1124.  
  1125.     /* update the end offset of dynamic_state */
  1126.     gpe_context->dynamic_state.end_offset = end_offset;
  1127. }
  1128.  
  1129.  
  1130. void
  1131. gen8_gpe_context_destroy(struct i965_gpe_context *gpe_context)
  1132. {
  1133.     dri_bo_unreference(gpe_context->surface_state_binding_table.bo);
  1134.     gpe_context->surface_state_binding_table.bo = NULL;
  1135.  
  1136.     dri_bo_unreference(gpe_context->instruction_state.bo);
  1137.     gpe_context->instruction_state.bo = NULL;
  1138.  
  1139.     dri_bo_unreference(gpe_context->dynamic_state.bo);
  1140.     gpe_context->dynamic_state.bo = NULL;
  1141.  
  1142.     dri_bo_unreference(gpe_context->indirect_state.bo);
  1143.     gpe_context->indirect_state.bo = NULL;
  1144.  
  1145. }
  1146.  
  1147.  
  1148. void
  1149. gen8_gpe_load_kernels(VADriverContextP ctx,
  1150.                       struct i965_gpe_context *gpe_context,
  1151.                       struct i965_kernel *kernel_list,
  1152.                       unsigned int num_kernels)
  1153. {
  1154.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1155.     int i, kernel_size;
  1156.     unsigned int kernel_offset, end_offset;
  1157.     unsigned char *kernel_ptr;
  1158.     struct i965_kernel *kernel;
  1159.  
  1160.     assert(num_kernels <= MAX_GPE_KERNELS);
  1161.     memcpy(gpe_context->kernels, kernel_list, sizeof(*kernel_list) * num_kernels);
  1162.     gpe_context->num_kernels = num_kernels;
  1163.  
  1164.     kernel_size = num_kernels * 64;
  1165.     for (i = 0; i < num_kernels; i++) {
  1166.         kernel = &gpe_context->kernels[i];
  1167.  
  1168.         kernel_size += kernel->size;
  1169.     }
  1170.  
  1171.     gpe_context->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
  1172.                                   "kernel shader",
  1173.                                   kernel_size,
  1174.                                   0x1000);
  1175.     if (gpe_context->instruction_state.bo == NULL) {
  1176.         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
  1177.         return;
  1178.     }
  1179.  
  1180.     assert(gpe_context->instruction_state.bo);
  1181.  
  1182.     gpe_context->instruction_state.bo_size = kernel_size;
  1183.     gpe_context->instruction_state.end_offset = 0;
  1184.     end_offset = 0;
  1185.  
  1186.     dri_bo_map(gpe_context->instruction_state.bo, 1);
  1187.     kernel_ptr = (unsigned char *)(gpe_context->instruction_state.bo->virtual);
  1188.     for (i = 0; i < num_kernels; i++) {
  1189.         kernel_offset = ALIGN(end_offset, 64);
  1190.         kernel = &gpe_context->kernels[i];
  1191.         kernel->kernel_offset = kernel_offset;
  1192.  
  1193.         if (kernel->size) {
  1194.             memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
  1195.  
  1196.             end_offset = kernel_offset + kernel->size;
  1197.         }
  1198.     }
  1199.  
  1200.     gpe_context->instruction_state.end_offset = end_offset;
  1201.  
  1202.     dri_bo_unmap(gpe_context->instruction_state.bo);
  1203.  
  1204.     return;
  1205. }
  1206.  
  1207. static void
  1208. gen9_gpe_state_base_address(VADriverContextP ctx,
  1209.                             struct i965_gpe_context *gpe_context,
  1210.                             struct intel_batchbuffer *batch)
  1211. {
  1212.     BEGIN_BATCH(batch, 19);
  1213.  
  1214.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (19 - 2));
  1215.  
  1216.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);                          //General State Base Address
  1217.     OUT_BATCH(batch, 0);
  1218.     OUT_BATCH(batch, 0);
  1219.  
  1220.         /*DW4 Surface state base address */
  1221.     OUT_RELOC(batch, gpe_context->surface_state_binding_table.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  1222.     OUT_BATCH(batch, 0);
  1223.  
  1224.         /*DW6. Dynamic state base address */
  1225.     if (gpe_context->dynamic_state.bo)
  1226.         OUT_RELOC(batch, gpe_context->dynamic_state.bo,
  1227.                   I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
  1228.                   0, BASE_ADDRESS_MODIFY);
  1229.     else
  1230.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1231.  
  1232.     OUT_BATCH(batch, 0);
  1233.  
  1234.         /*DW8. Indirect Object base address */
  1235.     if (gpe_context->indirect_state.bo)
  1236.         OUT_RELOC(batch, gpe_context->indirect_state.bo,
  1237.                   I915_GEM_DOMAIN_SAMPLER,
  1238.                   0, BASE_ADDRESS_MODIFY);
  1239.     else
  1240.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1241.  
  1242.     OUT_BATCH(batch, 0);
  1243.  
  1244.         /*DW10. Instruct base address */
  1245.     if (gpe_context->instruction_state.bo)
  1246.         OUT_RELOC(batch, gpe_context->instruction_state.bo,
  1247.                   I915_GEM_DOMAIN_INSTRUCTION,
  1248.                   0, BASE_ADDRESS_MODIFY);
  1249.     else
  1250.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1251.  
  1252.     OUT_BATCH(batch, 0);
  1253.  
  1254.         /* DW12. Size limitation */
  1255.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //General State Access Upper Bound
  1256.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Dynamic State Access Upper Bound
  1257.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Indirect Object Access Upper Bound
  1258.     OUT_BATCH(batch, 0xFFFFF000 | BASE_ADDRESS_MODIFY);         //Instruction Access Upper Bound
  1259.  
  1260.     /* the bindless surface state address */
  1261.     OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1262.     OUT_BATCH(batch, 0);
  1263.     OUT_BATCH(batch, 0xFFFFF000);
  1264.  
  1265.     ADVANCE_BATCH(batch);
  1266. }
  1267.  
  1268. static void
  1269. gen9_gpe_select(VADriverContextP ctx,
  1270.                 struct i965_gpe_context *gpe_context,
  1271.                 struct intel_batchbuffer *batch)
  1272. {
  1273.     BEGIN_BATCH(batch, 1);
  1274.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
  1275.                      GEN9_PIPELINE_SELECTION_MASK |
  1276.                      GEN9_MEDIA_DOP_GATE_OFF |
  1277.                      GEN9_MEDIA_DOP_GATE_MASK |
  1278.                      GEN9_FORCE_MEDIA_AWAKE_ON |
  1279.                      GEN9_FORCE_MEDIA_AWAKE_MASK);
  1280.     ADVANCE_BATCH(batch);
  1281. }
  1282.  
  1283. void
  1284. gen9_gpe_pipeline_setup(VADriverContextP ctx,
  1285.                         struct i965_gpe_context *gpe_context,
  1286.                         struct intel_batchbuffer *batch)
  1287. {
  1288.     intel_batchbuffer_emit_mi_flush(batch);
  1289.  
  1290.     gen9_gpe_select(ctx, gpe_context, batch);
  1291.     gen9_gpe_state_base_address(ctx, gpe_context, batch);
  1292.     gen8_gpe_vfe_state(ctx, gpe_context, batch);
  1293.     gen8_gpe_curbe_load(ctx, gpe_context, batch);
  1294.     gen8_gpe_idrt(ctx, gpe_context, batch);
  1295. }
  1296.  
  1297. void
  1298. gen9_gpe_pipeline_end(VADriverContextP ctx,
  1299.                       struct i965_gpe_context *gpe_context,
  1300.                       struct intel_batchbuffer *batch)
  1301. {
  1302.     BEGIN_BATCH(batch, 1);
  1303.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_MEDIA |
  1304.               GEN9_PIPELINE_SELECTION_MASK |
  1305.               GEN9_MEDIA_DOP_GATE_ON |
  1306.               GEN9_MEDIA_DOP_GATE_MASK |
  1307.               GEN9_FORCE_MEDIA_AWAKE_OFF |
  1308.               GEN9_FORCE_MEDIA_AWAKE_MASK);
  1309.     ADVANCE_BATCH(batch);
  1310. }
  1311.