Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26.  */
  27.  
  28. #include <stdio.h>
  29. #include <stdlib.h>
  30. #include <string.h>
  31. #include <assert.h>
  32.  
  33. #include "intel_batchbuffer.h"
  34. #include "intel_driver.h"
  35.  
  36. #include "i965_structs.h"
  37. #include "i965_defines.h"
  38. #include "i965_drv_video.h"
  39. #include "gen75_vpp_gpe.h"
  40.  
  41. #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42. #define MAX_MEDIA_SURFACES_GEN6      34
  43.  
  44. #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45. #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46.  
  47. #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48. #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49.  
  50. #define CURBE_ALLOCATION_SIZE   37              
  51. #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
  52. #define CURBE_URB_ENTRY_LENGTH  4              
  53.  
  54. /* Shaders information for sharpening */
  55. static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  56.    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  57. };
  58. static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  59.    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  60. };
  61. static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  62.    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  63. };
  64. static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  65.     {
  66.         "vpp: sharpening(horizontal blur)",
  67.         VPP_GPE_SHARPENING,
  68.         gen75_gpe_sharpening_h_blur,                   
  69.         sizeof(gen75_gpe_sharpening_h_blur),           
  70.         NULL
  71.     },
  72.     {
  73.         "vpp: sharpening(vertical blur)",
  74.         VPP_GPE_SHARPENING,
  75.         gen75_gpe_sharpening_v_blur,                   
  76.         sizeof(gen75_gpe_sharpening_v_blur),           
  77.         NULL
  78.     },
  79.     {
  80.         "vpp: sharpening(unmask)",
  81.         VPP_GPE_SHARPENING,
  82.         gen75_gpe_sharpening_unmask,                   
  83.         sizeof(gen75_gpe_sharpening_unmask),           
  84.         NULL
  85.     },
  86. };
  87.  
  88. /* sharpening kernels for Broadwell */
  89. static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
  90.    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
  91. };
  92. static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
  93.    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
  94. };
  95. static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
  96.    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
  97. };
  98.  
  99. static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
  100.     {
  101.         "vpp: sharpening(horizontal blur)",
  102.         VPP_GPE_SHARPENING,
  103.         gen8_gpe_sharpening_h_blur,
  104.         sizeof(gen8_gpe_sharpening_h_blur),
  105.         NULL
  106.     },
  107.     {
  108.         "vpp: sharpening(vertical blur)",
  109.         VPP_GPE_SHARPENING,
  110.         gen8_gpe_sharpening_v_blur,
  111.         sizeof(gen8_gpe_sharpening_v_blur),
  112.         NULL
  113.     },
  114.     {
  115.         "vpp: sharpening(unmask)",
  116.         VPP_GPE_SHARPENING,
  117.         gen8_gpe_sharpening_unmask,
  118.         sizeof(gen8_gpe_sharpening_unmask),
  119.         NULL
  120.     },
  121. };
  122.  
  123. static VAStatus
  124. gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
  125.                    struct vpp_gpe_context *vpp_gpe_ctx)
  126. {
  127.     struct object_surface *obj_surface;
  128.     unsigned int i = 0;
  129.     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
  130.                                          vpp_gpe_ctx->backward_surf_sum) * 2;
  131.  
  132.     /* Binding input NV12 surfaces (Luma + Chroma)*/
  133.     for( i = 0; i < input_surface_sum; i += 2){
  134.          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
  135.          assert(obj_surface);
  136.          gen7_gpe_media_rw_surface_setup(ctx,
  137.                                          &vpp_gpe_ctx->gpe_ctx,
  138.                                           obj_surface,
  139.                                           BINDING_TABLE_OFFSET_GEN7(i),
  140.                                           SURFACE_STATE_OFFSET_GEN7(i));
  141.  
  142.          gen75_gpe_media_chroma_surface_setup(ctx,
  143.                                           &vpp_gpe_ctx->gpe_ctx,
  144.                                           obj_surface,
  145.                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
  146.                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
  147.     }
  148.  
  149.     /* Binding output NV12 surface(Luma + Chroma) */
  150.     obj_surface = vpp_gpe_ctx->surface_output_object;
  151.     assert(obj_surface);
  152.     gen7_gpe_media_rw_surface_setup(ctx,
  153.                                     &vpp_gpe_ctx->gpe_ctx,
  154.                                     obj_surface,
  155.                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
  156.                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
  157.     gen75_gpe_media_chroma_surface_setup(ctx,
  158.                                     &vpp_gpe_ctx->gpe_ctx,
  159.                                     obj_surface,
  160.                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
  161.                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
  162.     /* Bind kernel return buffer surface */
  163.     gen7_gpe_buffer_suface_setup(ctx,
  164.                                   &vpp_gpe_ctx->gpe_ctx,
  165.                                   &vpp_gpe_ctx->vpp_kernel_return,
  166.                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
  167.                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
  168.  
  169.     return VA_STATUS_SUCCESS;
  170. }
  171.  
  172. static VAStatus
  173. gen75_gpe_process_interface_setup(VADriverContextP ctx,
  174.                     struct vpp_gpe_context *vpp_gpe_ctx)
  175. {
  176.     struct gen6_interface_descriptor_data *desc;  
  177.     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
  178.     int i;
  179.  
  180.     dri_bo_map(bo, 1);
  181.     assert(bo->virtual);
  182.     desc = bo->virtual;
  183.    
  184.     /*Setup the descritor table*/
  185.     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
  186.         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
  187.         assert(sizeof(*desc) == 32);
  188.         memset(desc, 0, sizeof(*desc));
  189.         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
  190.         desc->desc2.sampler_count = 0; /* FIXME: */
  191.         desc->desc2.sampler_state_pointer = 0;
  192.         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
  193.         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
  194.         desc->desc4.constant_urb_entry_read_offset = 0;
  195.         desc->desc4.constant_urb_entry_read_length = 0;
  196.  
  197.         dri_bo_emit_reloc(bo,  
  198.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  199.                           0,
  200.                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
  201.                           kernel->bo);
  202.         desc++;
  203.     }
  204.  
  205.     dri_bo_unmap(bo);
  206.  
  207.     return VA_STATUS_SUCCESS;
  208. }
  209.  
  210. static VAStatus
  211. gen75_gpe_process_parameters_fill(VADriverContextP ctx,
  212.                            struct vpp_gpe_context *vpp_gpe_ctx)
  213. {
  214.     unsigned int *command_ptr;
  215.     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
  216.     unsigned char* position = NULL;
  217.  
  218.     /* Thread inline data setting*/
  219.     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
  220.     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
  221.  
  222.     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
  223.     {
  224.          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
  225.          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
  226.          *command_ptr++ = 0;
  227.          *command_ptr++ = 0;
  228.          *command_ptr++ = 0;
  229.          *command_ptr++ = 0;
  230.    
  231.          /* copy thread inline data */
  232.          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
  233.          memcpy(command_ptr, position, size);
  234.          command_ptr += size/sizeof(int);
  235.     }  
  236.  
  237.     *command_ptr++ = 0;
  238.     *command_ptr++ = MI_BATCH_BUFFER_END;
  239.  
  240.     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
  241.  
  242.     return VA_STATUS_SUCCESS;
  243. }
  244.  
  245. static VAStatus
  246. gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
  247.                    struct vpp_gpe_context *vpp_gpe_ctx)
  248. {
  249.     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
  250.     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
  251.  
  252.     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
  253.  
  254.     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
  255.    
  256.     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
  257.     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
  258.     OUT_RELOC(vpp_gpe_ctx->batch,
  259.               vpp_gpe_ctx->vpp_batchbuffer.bo,
  260.               I915_GEM_DOMAIN_COMMAND, 0,
  261.               0);
  262.     ADVANCE_BATCH(vpp_gpe_ctx->batch);
  263.  
  264.     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
  265.        
  266.     return VA_STATUS_SUCCESS;
  267. }
  268.  
  269. static VAStatus
  270. gen75_gpe_process_init(VADriverContextP ctx,
  271.                  struct vpp_gpe_context *vpp_gpe_ctx)
  272. {
  273.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  274.     dri_bo *bo;
  275.  
  276.     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
  277.                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
  278.  
  279.     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
  280.     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
  281.     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
  282.     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks  
  283.            * vpp_gpe_ctx->vpp_kernel_return.size_block;
  284.  
  285.     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  286.     bo = dri_bo_alloc(i965->intel.bufmgr,
  287.                       "vpp batch buffer",
  288.                        batch_buf_size, 0x1000);
  289.     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
  290.  
  291.     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
  292.     bo = dri_bo_alloc(i965->intel.bufmgr,
  293.                       "vpp kernel return buffer",
  294.                        kernel_return_size, 0x1000);
  295.     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
  296.  
  297.     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
  298.  
  299.     return VA_STATUS_SUCCESS;
  300. }
  301.  
  302. static VAStatus
  303. gen75_gpe_process_prepare(VADriverContextP ctx,
  304.                     struct vpp_gpe_context *vpp_gpe_ctx)
  305. {
  306.     /*Setup all the memory object*/
  307.     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
  308.     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
  309.     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
  310.  
  311.     /*Programing media pipeline*/
  312.     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
  313.        
  314.     return VA_STATUS_SUCCESS;
  315. }
  316.  
  317. static VAStatus
  318. gen75_gpe_process_run(VADriverContextP ctx,
  319.                 struct vpp_gpe_context *vpp_gpe_ctx)
  320. {
  321.     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
  322.    
  323.     return VA_STATUS_SUCCESS;
  324. }
  325.  
  326. static VAStatus
  327. gen75_gpe_process(VADriverContextP ctx,
  328.                   struct vpp_gpe_context * vpp_gpe_ctx)
  329. {
  330.     VAStatus va_status = VA_STATUS_SUCCESS;
  331.  
  332.     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
  333.     if (va_status != VA_STATUS_SUCCESS)
  334.         return va_status;
  335.  
  336.     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
  337.     if (va_status != VA_STATUS_SUCCESS)
  338.         return va_status;
  339.  
  340.     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
  341.     if (va_status != VA_STATUS_SUCCESS)
  342.         return va_status;
  343.  
  344.     return VA_STATUS_SUCCESS;
  345. }
  346.  
  347. static VAStatus
  348. gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
  349.                    struct vpp_gpe_context *vpp_gpe_ctx)
  350. {
  351.     struct object_surface *obj_surface;
  352.     unsigned int i = 0;
  353.     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
  354.                                          vpp_gpe_ctx->backward_surf_sum) * 2;
  355.  
  356.     /* Binding input NV12 surfaces (Luma + Chroma)*/
  357.     for( i = 0; i < input_surface_sum; i += 2){
  358.          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
  359.          assert(obj_surface);
  360.          gen8_gpe_media_rw_surface_setup(ctx,
  361.                                          &vpp_gpe_ctx->gpe_ctx,
  362.                                           obj_surface,
  363.                                           BINDING_TABLE_OFFSET_GEN8(i),
  364.                                           SURFACE_STATE_OFFSET_GEN8(i));
  365.  
  366.          gen8_gpe_media_chroma_surface_setup(ctx,
  367.                                           &vpp_gpe_ctx->gpe_ctx,
  368.                                           obj_surface,
  369.                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
  370.                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
  371.     }
  372.  
  373.     /* Binding output NV12 surface(Luma + Chroma) */
  374.     obj_surface = vpp_gpe_ctx->surface_output_object;
  375.     assert(obj_surface);
  376.     gen8_gpe_media_rw_surface_setup(ctx,
  377.                                     &vpp_gpe_ctx->gpe_ctx,
  378.                                     obj_surface,
  379.                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
  380.                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
  381.     gen8_gpe_media_chroma_surface_setup(ctx,
  382.                                     &vpp_gpe_ctx->gpe_ctx,
  383.                                     obj_surface,
  384.                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
  385.                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
  386.     /* Bind kernel return buffer surface */
  387.     gen7_gpe_buffer_suface_setup(ctx,
  388.                                   &vpp_gpe_ctx->gpe_ctx,
  389.                                   &vpp_gpe_ctx->vpp_kernel_return,
  390.                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
  391.                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
  392.  
  393.     return VA_STATUS_SUCCESS;
  394. }
  395.  
  396. static VAStatus
  397. gen8_gpe_process_interface_setup(VADriverContextP ctx,
  398.                     struct vpp_gpe_context *vpp_gpe_ctx)
  399. {
  400.     struct gen8_interface_descriptor_data *desc;
  401.     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
  402.     int i;
  403.  
  404.     dri_bo_map(bo, 1);
  405.     assert(bo->virtual);
  406.     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
  407.                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
  408.  
  409.     /*Setup the descritor table*/
  410.     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
  411.         struct i965_kernel *kernel;
  412.         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
  413.         assert(sizeof(*desc) == 32);
  414.         /*Setup the descritor table*/
  415.          memset(desc, 0, sizeof(*desc));
  416.          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
  417.          desc->desc3.sampler_count = 0; /* FIXME: */
  418.          desc->desc3.sampler_state_pointer = 0;
  419.          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
  420.          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
  421.          desc->desc5.constant_urb_entry_read_offset = 0;
  422.          desc->desc5.constant_urb_entry_read_length = 0;
  423.  
  424.          desc++;
  425.     }
  426.  
  427.     dri_bo_unmap(bo);
  428.  
  429.     return VA_STATUS_SUCCESS;
  430. }
  431.  
  432. static VAStatus
  433. gen8_gpe_process_parameters_fill(VADriverContextP ctx,
  434.                            struct vpp_gpe_context *vpp_gpe_ctx)
  435. {
  436.     unsigned int *command_ptr;
  437.     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
  438.     unsigned char* position = NULL;
  439.  
  440.     /* Thread inline data setting*/
  441.     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
  442.     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
  443.  
  444.     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
  445.     {
  446.          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
  447.          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
  448.          *command_ptr++ = 0;
  449.          *command_ptr++ = 0;
  450.          *command_ptr++ = 0;
  451.          *command_ptr++ = 0;
  452.  
  453.          /* copy thread inline data */
  454.          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
  455.          memcpy(command_ptr, position, size);
  456.          command_ptr += size/sizeof(int);
  457.  
  458.          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
  459.          *command_ptr++ = 0;
  460.     }
  461.  
  462.     *command_ptr++ = 0;
  463.     *command_ptr++ = MI_BATCH_BUFFER_END;
  464.  
  465.     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
  466.  
  467.     return VA_STATUS_SUCCESS;
  468. }
  469.  
  470. static VAStatus
  471. gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
  472.                    struct vpp_gpe_context *vpp_gpe_ctx)
  473. {
  474.     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
  475.     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
  476.  
  477.     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
  478.  
  479.     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
  480.  
  481.     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
  482.     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
  483.     OUT_RELOC(vpp_gpe_ctx->batch,
  484.               vpp_gpe_ctx->vpp_batchbuffer.bo,
  485.               I915_GEM_DOMAIN_COMMAND, 0,
  486.               0);
  487.     OUT_BATCH(vpp_gpe_ctx->batch, 0);
  488.  
  489.     ADVANCE_BATCH(vpp_gpe_ctx->batch);
  490.  
  491.     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
  492.  
  493.     return VA_STATUS_SUCCESS;
  494. }
  495.  
  496. static VAStatus
  497. gen8_gpe_process_init(VADriverContextP ctx,
  498.                  struct vpp_gpe_context *vpp_gpe_ctx)
  499. {
  500.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  501.     dri_bo *bo;
  502.  
  503.     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
  504.                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
  505.  
  506.     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
  507.     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
  508.     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
  509.  
  510.     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
  511.            * vpp_gpe_ctx->vpp_kernel_return.size_block;
  512.  
  513.     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  514.     bo = dri_bo_alloc(i965->intel.bufmgr,
  515.                       "vpp batch buffer",
  516.                        batch_buf_size, 0x1000);
  517.     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
  518.  
  519.     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
  520.     bo = dri_bo_alloc(i965->intel.bufmgr,
  521.                       "vpp kernel return buffer",
  522.                        kernel_return_size, 0x1000);
  523.     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
  524.  
  525.     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
  526.  
  527.     return VA_STATUS_SUCCESS;
  528. }
  529.  
  530. static VAStatus
  531. gen8_gpe_process_prepare(VADriverContextP ctx,
  532.                     struct vpp_gpe_context *vpp_gpe_ctx)
  533. {
  534.     /*Setup all the memory object*/
  535.     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
  536.     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
  537.     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
  538.  
  539.     /*Programing media pipeline*/
  540.     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
  541.  
  542.     return VA_STATUS_SUCCESS;
  543. }
  544.  
  545. static VAStatus
  546. gen8_gpe_process_run(VADriverContextP ctx,
  547.                 struct vpp_gpe_context *vpp_gpe_ctx)
  548. {
  549.     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
  550.  
  551.     return VA_STATUS_SUCCESS;
  552. }
  553.  
  554. static VAStatus
  555. gen8_gpe_process(VADriverContextP ctx,
  556.                   struct vpp_gpe_context * vpp_gpe_ctx)
  557. {
  558.     VAStatus va_status = VA_STATUS_SUCCESS;
  559.  
  560.     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
  561.     if (va_status != VA_STATUS_SUCCESS)
  562.         return va_status;
  563.  
  564.     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
  565.     if (va_status != VA_STATUS_SUCCESS)
  566.         return va_status;
  567.  
  568.     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
  569.     if (va_status != VA_STATUS_SUCCESS)
  570.         return va_status;
  571.  
  572.     return VA_STATUS_SUCCESS;
  573. }
  574.  
  575. static VAStatus
  576. vpp_gpe_process(VADriverContextP ctx,
  577.                   struct vpp_gpe_context * vpp_gpe_ctx)
  578. {
  579.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  580.     if (IS_HASWELL(i965->intel.device_info))
  581.        return gen75_gpe_process(ctx, vpp_gpe_ctx);
  582.     else if (IS_GEN8(i965->intel.device_info) ||
  583.              IS_GEN9(i965->intel.device_info))
  584.        return gen8_gpe_process(ctx, vpp_gpe_ctx);
  585.  
  586.      return VA_STATUS_ERROR_UNIMPLEMENTED;
  587. }
  588.  
  589. static VAStatus
  590. vpp_gpe_process_sharpening(VADriverContextP ctx,
  591.                              struct vpp_gpe_context * vpp_gpe_ctx)
  592. {
  593.      VAStatus va_status = VA_STATUS_SUCCESS;
  594.      struct i965_driver_data *i965 = i965_driver_data(ctx);
  595.      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
  596.      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
  597.  
  598.      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
  599.      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
  600.      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
  601.  
  602.      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
  603.        
  604.      if (!obj_buf ||
  605.          !obj_buf->buffer_store ||
  606.          !obj_buf->buffer_store->buffer)
  607.          goto error;
  608.  
  609.      VAProcFilterParameterBuffer* filter =
  610.                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
  611.      float sharpening_intensity = filter->value;
  612.  
  613.      ThreadParameterSharpening thr_param;
  614.      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
  615.      unsigned int i;
  616.      unsigned char * pos;
  617.  
  618.      if(vpp_gpe_ctx->is_first_frame){
  619.          vpp_gpe_ctx->sub_shader_sum = 3;
  620.          struct i965_kernel * vpp_kernels;
  621.          if (IS_HASWELL(i965->intel.device_info))
  622.              vpp_kernels = gen75_vpp_sharpening_kernels;
  623.          else if (IS_GEN8(i965->intel.device_info) ||
  624.                   IS_GEN9(i965->intel.device_info)) // TODO: build the sharpening kernel for GEN9
  625.              vpp_kernels = gen8_vpp_sharpening_kernels;
  626.          else
  627.              return VA_STATUS_ERROR_UNIMPLEMENTED;
  628.  
  629.          vpp_gpe_ctx->gpe_load_kernels(ctx,
  630.                                &vpp_gpe_ctx->gpe_ctx,
  631.                                vpp_kernels,
  632.                                vpp_gpe_ctx->sub_shader_sum);
  633.      }
  634.  
  635.      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
  636.         va_status = i965_CreateSurfaces(ctx,
  637.                                        vpp_gpe_ctx->in_frame_w,
  638.                                        vpp_gpe_ctx->in_frame_h,
  639.                                        VA_RT_FORMAT_YUV420,
  640.                                        1,
  641.                                        &vpp_gpe_ctx->surface_tmp);
  642.        assert(va_status == VA_STATUS_SUCCESS);
  643.    
  644.        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
  645.        assert(obj_surf);
  646.  
  647.        if (obj_surf) {
  648.            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
  649.                                        SUBSAMPLE_YUV420);
  650.            vpp_gpe_ctx->surface_tmp_object = obj_surf;
  651.        }
  652.     }                
  653.  
  654.     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
  655.     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
  656.     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
  657.  
  658.     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
  659.     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
  660.  
  661.     /* Step 1: horizontal blur process */      
  662.     vpp_gpe_ctx->forward_surf_sum = 0;
  663.     vpp_gpe_ctx->backward_surf_sum = 0;
  664.  
  665.     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
  666.     vpp_gpe_ctx->thread_param_size = thr_param_size;
  667.     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
  668.                                                        *vpp_gpe_ctx->thread_num);
  669.     pos = vpp_gpe_ctx->thread_param;
  670.  
  671.     if (!pos) {
  672.         return VA_STATUS_ERROR_ALLOCATION_FAILED;
  673.     }
  674.  
  675.     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
  676.         thr_param.base.v_pos = 16 * i;
  677.         thr_param.base.h_pos = 0;
  678.         memcpy(pos, &thr_param, thr_param_size);
  679.         pos += thr_param_size;
  680.     }
  681.  
  682.     vpp_gpe_ctx->sub_shader_index = 0;
  683.     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
  684.     free(vpp_gpe_ctx->thread_param);
  685.  
  686.     /* Step 2: vertical blur process */
  687.     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
  688.     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
  689.     vpp_gpe_ctx->forward_surf_sum = 0;
  690.     vpp_gpe_ctx->backward_surf_sum = 0;
  691.  
  692.     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
  693.     vpp_gpe_ctx->thread_param_size = thr_param_size;
  694.     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
  695.                                                        *vpp_gpe_ctx->thread_num);
  696.     pos = vpp_gpe_ctx->thread_param;
  697.  
  698.     if (!pos) {
  699.         return VA_STATUS_ERROR_ALLOCATION_FAILED;
  700.     }
  701.  
  702.     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
  703.         thr_param.base.v_pos = 0;
  704.         thr_param.base.h_pos = 16 * i;
  705.         memcpy(pos, &thr_param, thr_param_size);
  706.         pos += thr_param_size;
  707.     }
  708.  
  709.     vpp_gpe_ctx->sub_shader_index = 1;
  710.     vpp_gpe_process(ctx, vpp_gpe_ctx);
  711.     free(vpp_gpe_ctx->thread_param);
  712.  
  713.     /* Step 3: apply the blur to original surface */      
  714.     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
  715.     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
  716.     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
  717.     vpp_gpe_ctx->forward_surf_sum  = 1;
  718.     vpp_gpe_ctx->backward_surf_sum = 0;
  719.  
  720.     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
  721.     vpp_gpe_ctx->thread_param_size = thr_param_size;
  722.     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
  723.                                                        *vpp_gpe_ctx->thread_num);
  724.     pos = vpp_gpe_ctx->thread_param;
  725.  
  726.     if (!pos) {
  727.         return VA_STATUS_ERROR_ALLOCATION_FAILED;
  728.     }
  729.  
  730.     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
  731.         thr_param.base.v_pos = 4 * i;
  732.         thr_param.base.h_pos = 0;
  733.         memcpy(pos, &thr_param, thr_param_size);
  734.         pos += thr_param_size;
  735.     }
  736.  
  737.     vpp_gpe_ctx->sub_shader_index = 2;
  738.     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
  739.     free(vpp_gpe_ctx->thread_param);
  740.  
  741.     return va_status;
  742.  
  743. error:
  744.     return VA_STATUS_ERROR_INVALID_PARAMETER;
  745. }
  746.  
  747. VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
  748.                     struct vpp_gpe_context * vpp_gpe_ctx)
  749. {
  750.     VAStatus va_status = VA_STATUS_SUCCESS;
  751.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  752.     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
  753.     VAProcFilterParameterBuffer* filter = NULL;
  754.     unsigned int i;
  755.     struct object_surface *obj_surface = NULL;
  756.  
  757.     if (pipe->num_filters && !pipe->filters)
  758.         goto error;
  759.  
  760.     for(i = 0; i < pipe->num_filters; i++){
  761.         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
  762.  
  763.         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
  764.  
  765.         if (!obj_buf ||
  766.             !obj_buf->buffer_store ||
  767.             !obj_buf->buffer_store->buffer)
  768.             goto error;
  769.  
  770.         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
  771.         if(filter->type == VAProcFilterSharpening){
  772.            break;
  773.         }
  774.     }
  775.        
  776.     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
  777.     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
  778.  
  779.     vpp_gpe_ctx->forward_surf_sum = 0;
  780.     vpp_gpe_ctx->backward_surf_sum = 0;
  781.  
  782.     for(i = 0; i < pipe->num_forward_references; i ++)
  783.     {
  784.         obj_surface = SURFACE(pipe->forward_references[i]);
  785.  
  786.         assert(obj_surface);
  787.         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
  788.         vpp_gpe_ctx->forward_surf_sum++;
  789.     }
  790.  
  791.     for(i = 0; i < pipe->num_backward_references; i ++)
  792.     {
  793.         obj_surface = SURFACE(pipe->backward_references[i]);
  794.        
  795.         assert(obj_surface);
  796.         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
  797.         vpp_gpe_ctx->backward_surf_sum++;
  798.     }
  799.  
  800.     obj_surface = vpp_gpe_ctx->surface_input_object[0];
  801.     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
  802.     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
  803.  
  804.     if(filter && filter->type == VAProcFilterSharpening) {
  805.        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
  806.     } else {
  807.        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
  808.     }
  809.  
  810.     vpp_gpe_ctx->is_first_frame = 0;
  811.  
  812.     return va_status;
  813.  
  814. error:
  815.     return VA_STATUS_ERROR_INVALID_PARAMETER;
  816. }
  817.  
  818. void
  819. vpp_gpe_context_destroy(VADriverContextP ctx,
  820.                                struct vpp_gpe_context *vpp_gpe_ctx)
  821. {
  822.     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  823.     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
  824.  
  825.     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
  826.     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
  827.  
  828.     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
  829.  
  830.     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
  831.         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
  832.         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
  833.         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
  834.         vpp_gpe_ctx->surface_tmp_object = NULL;
  835.     }  
  836.  
  837.     if (vpp_gpe_ctx->batch)
  838.         intel_batchbuffer_free(vpp_gpe_ctx->batch);
  839.  
  840.     free(vpp_gpe_ctx);
  841. }
  842.  
  843. struct vpp_gpe_context *
  844. vpp_gpe_context_init(VADriverContextP ctx)
  845. {
  846.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  847.     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
  848.     assert(vpp_gpe_ctx);
  849.     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
  850.  
  851.     assert(IS_HASWELL(i965->intel.device_info) ||
  852.            IS_GEN8(i965->intel.device_info) ||
  853.            IS_GEN9(i965->intel.device_info));
  854.  
  855.     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
  856.     vpp_gpe_ctx->surface_tmp_object = NULL;
  857.     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
  858.     vpp_gpe_ctx->is_first_frame = 1;
  859.  
  860.     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
  861.     gpe_ctx->vfe_state.num_urb_entries = 16;
  862.     gpe_ctx->vfe_state.gpgpu_mode = 0;
  863.     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
  864.     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
  865.  
  866.     if (IS_HASWELL(i965->intel.device_info)) {
  867.         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
  868.         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
  869.         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
  870.         gpe_ctx->surface_state_binding_table.length =
  871.                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
  872.  
  873.         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
  874.         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
  875.         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
  876.  
  877.     } else if (IS_GEN8(i965->intel.device_info) ||
  878.                IS_GEN9(i965->intel.device_info)) {
  879.         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
  880.         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
  881.         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
  882.         gpe_ctx->surface_state_binding_table.length =
  883.                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
  884.  
  885.         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
  886.         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
  887.  
  888.     }
  889.  
  890.     return vpp_gpe_ctx;
  891. }
  892.  
  893.