Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2011 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the
  6.  * "Software"), to deal in the Software without restriction, including
  7.  * without limitation the rights to use, copy, modify, merge, publish,
  8.  * distribute, sub license, and/or sell copies of the Software, and to
  9.  * permit persons to whom the Software is furnished to do so, subject to
  10.  * the following conditions:
  11.  *
  12.  * The above copyright notice and this permission notice (including the
  13.  * next paragraph) shall be included in all copies or substantial portions
  14.  * of the Software.
  15.  *
  16.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  17.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  18.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  19.  * IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR
  20.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  21.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  22.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  23.  *
  24.  * Authors:
  25.  *   Li Xiaowei <xiaowei.a.li@intel.com>
  26.  */
  27.  
  28. #include <stdio.h>
  29. #include <stdlib.h>
  30. #include <string.h>
  31. #include <assert.h>
  32.  
  33. #include "intel_batchbuffer.h"
  34. #include "intel_driver.h"
  35.  
  36. #include "i965_structs.h"
  37. #include "i965_defines.h"
  38. #include "i965_drv_video.h"
  39. #include "gen75_vpp_gpe.h"
  40.  
  41. #define MAX_INTERFACE_DESC_GEN6      MAX_GPE_KERNELS
  42. #define MAX_MEDIA_SURFACES_GEN6      34
  43.  
  44. #define SURFACE_STATE_OFFSET_GEN7(index)   (SURFACE_STATE_PADDED_SIZE_GEN7 * (index))
  45. #define BINDING_TABLE_OFFSET_GEN7(index)   (SURFACE_STATE_OFFSET_GEN7(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  46.  
  47. #define SURFACE_STATE_OFFSET_GEN8(index)   (SURFACE_STATE_PADDED_SIZE_GEN8 * (index))
  48. #define BINDING_TABLE_OFFSET_GEN8(index)   (SURFACE_STATE_OFFSET_GEN8(MAX_MEDIA_SURFACES_GEN6) + sizeof(unsigned int) * (index))
  49.  
  50. #define CURBE_ALLOCATION_SIZE   37              
  51. #define CURBE_TOTAL_DATA_LENGTH (4 * 32)        
  52. #define CURBE_URB_ENTRY_LENGTH  4              
  53.  
  54. /* Shaders information for sharpening */
  55. static const unsigned int gen75_gpe_sharpening_h_blur[][4] = {
  56.    #include "shaders/post_processing/gen75/sharpening_h_blur.g75b"
  57. };
  58. static const unsigned int gen75_gpe_sharpening_v_blur[][4] = {
  59.    #include "shaders/post_processing/gen75/sharpening_v_blur.g75b"
  60. };
  61. static const unsigned int gen75_gpe_sharpening_unmask[][4] = {
  62.    #include "shaders/post_processing/gen75/sharpening_unmask.g75b"
  63. };
  64. static struct i965_kernel gen75_vpp_sharpening_kernels[] = {
  65.     {
  66.         "vpp: sharpening(horizontal blur)",
  67.         VPP_GPE_SHARPENING,
  68.         gen75_gpe_sharpening_h_blur,                   
  69.         sizeof(gen75_gpe_sharpening_h_blur),           
  70.         NULL
  71.     },
  72.     {
  73.         "vpp: sharpening(vertical blur)",
  74.         VPP_GPE_SHARPENING,
  75.         gen75_gpe_sharpening_v_blur,                   
  76.         sizeof(gen75_gpe_sharpening_v_blur),           
  77.         NULL
  78.     },
  79.     {
  80.         "vpp: sharpening(unmask)",
  81.         VPP_GPE_SHARPENING,
  82.         gen75_gpe_sharpening_unmask,                   
  83.         sizeof(gen75_gpe_sharpening_unmask),           
  84.         NULL
  85.     },
  86. };
  87.  
  88. /* sharpening kernels for Broadwell */
  89. static const unsigned int gen8_gpe_sharpening_h_blur[][4] = {
  90.    #include "shaders/post_processing/gen8/sharpening_h_blur.g8b"
  91. };
  92. static const unsigned int gen8_gpe_sharpening_v_blur[][4] = {
  93.    #include "shaders/post_processing/gen8/sharpening_v_blur.g8b"
  94. };
  95. static const unsigned int gen8_gpe_sharpening_unmask[][4] = {
  96.    #include "shaders/post_processing/gen8/sharpening_unmask.g8b"
  97. };
  98.  
  99. static struct i965_kernel gen8_vpp_sharpening_kernels[] = {
  100.     {
  101.         "vpp: sharpening(horizontal blur)",
  102.         VPP_GPE_SHARPENING,
  103.         gen8_gpe_sharpening_h_blur,
  104.         sizeof(gen8_gpe_sharpening_h_blur),
  105.         NULL
  106.     },
  107.     {
  108.         "vpp: sharpening(vertical blur)",
  109.         VPP_GPE_SHARPENING,
  110.         gen8_gpe_sharpening_v_blur,
  111.         sizeof(gen8_gpe_sharpening_v_blur),
  112.         NULL
  113.     },
  114.     {
  115.         "vpp: sharpening(unmask)",
  116.         VPP_GPE_SHARPENING,
  117.         gen8_gpe_sharpening_unmask,
  118.         sizeof(gen8_gpe_sharpening_unmask),
  119.         NULL
  120.     },
  121. };
  122.  
  123. static VAStatus
  124. gen75_gpe_process_surfaces_setup(VADriverContextP ctx,
  125.                    struct vpp_gpe_context *vpp_gpe_ctx)
  126. {
  127.     struct object_surface *obj_surface;
  128.     unsigned int i = 0;
  129.     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
  130.                                          vpp_gpe_ctx->backward_surf_sum) * 2;
  131.  
  132.     /* Binding input NV12 surfaces (Luma + Chroma)*/
  133.     for( i = 0; i < input_surface_sum; i += 2){
  134.          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
  135.          assert(obj_surface);
  136.          gen7_gpe_media_rw_surface_setup(ctx,
  137.                                          &vpp_gpe_ctx->gpe_ctx,
  138.                                           obj_surface,
  139.                                           BINDING_TABLE_OFFSET_GEN7(i),
  140.                                           SURFACE_STATE_OFFSET_GEN7(i));
  141.  
  142.          gen75_gpe_media_chroma_surface_setup(ctx,
  143.                                           &vpp_gpe_ctx->gpe_ctx,
  144.                                           obj_surface,
  145.                                           BINDING_TABLE_OFFSET_GEN7(i + 1),
  146.                                           SURFACE_STATE_OFFSET_GEN7(i + 1));
  147.     }
  148.  
  149.     /* Binding output NV12 surface(Luma + Chroma) */
  150.     obj_surface = vpp_gpe_ctx->surface_output_object;
  151.     assert(obj_surface);
  152.     gen7_gpe_media_rw_surface_setup(ctx,
  153.                                     &vpp_gpe_ctx->gpe_ctx,
  154.                                     obj_surface,
  155.                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum),
  156.                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum));
  157.     gen75_gpe_media_chroma_surface_setup(ctx,
  158.                                     &vpp_gpe_ctx->gpe_ctx,
  159.                                     obj_surface,
  160.                                     BINDING_TABLE_OFFSET_GEN7(input_surface_sum + 1),
  161.                                     SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 1));
  162.     /* Bind kernel return buffer surface */
  163.     gen7_gpe_buffer_suface_setup(ctx,
  164.                                   &vpp_gpe_ctx->gpe_ctx,
  165.                                   &vpp_gpe_ctx->vpp_kernel_return,
  166.                                   BINDING_TABLE_OFFSET_GEN7((input_surface_sum + 2)),
  167.                                   SURFACE_STATE_OFFSET_GEN7(input_surface_sum + 2));
  168.  
  169.     return VA_STATUS_SUCCESS;
  170. }
  171.  
  172. static VAStatus
  173. gen75_gpe_process_interface_setup(VADriverContextP ctx,
  174.                     struct vpp_gpe_context *vpp_gpe_ctx)
  175. {
  176.     struct gen6_interface_descriptor_data *desc;  
  177.     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.idrt.bo;
  178.     int i;
  179.  
  180.     dri_bo_map(bo, 1);
  181.     assert(bo->virtual);
  182.     desc = bo->virtual;
  183.    
  184.     /*Setup the descritor table*/
  185.     for(i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
  186.         struct i965_kernel *kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
  187.         assert(sizeof(*desc) == 32);
  188.         memset(desc, 0, sizeof(*desc));
  189.         desc->desc0.kernel_start_pointer = (kernel->bo->offset >> 6);
  190.         desc->desc2.sampler_count = 0; /* FIXME: */
  191.         desc->desc2.sampler_state_pointer = 0;
  192.         desc->desc3.binding_table_entry_count = 6; /* FIXME: */
  193.         desc->desc3.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN7(0) >> 5);
  194.         desc->desc4.constant_urb_entry_read_offset = 0;
  195.         desc->desc4.constant_urb_entry_read_length = 0;
  196.  
  197.         dri_bo_emit_reloc(bo,  
  198.                           I915_GEM_DOMAIN_INSTRUCTION, 0,
  199.                           0,
  200.                           i* sizeof(*desc) + offsetof(struct gen6_interface_descriptor_data, desc0),
  201.                           kernel->bo);
  202.         desc++;
  203.     }
  204.  
  205.     dri_bo_unmap(bo);
  206.  
  207.     return VA_STATUS_SUCCESS;
  208. }
  209.  
  210. static VAStatus
  211. gen75_gpe_process_constant_fill(VADriverContextP ctx,
  212.                    struct vpp_gpe_context *vpp_gpe_ctx)
  213. {
  214.     dri_bo_map(vpp_gpe_ctx->gpe_ctx.curbe.bo, 1);
  215.     assert(vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual);
  216.     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.curbe.bo->virtual;   
  217.     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
  218.                             vpp_gpe_ctx->kernel_param_size);
  219.     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.curbe.bo);
  220.  
  221.     return VA_STATUS_SUCCESS;
  222. }
  223.  
  224. static VAStatus
  225. gen75_gpe_process_parameters_fill(VADriverContextP ctx,
  226.                            struct vpp_gpe_context *vpp_gpe_ctx)
  227. {
  228.     unsigned int *command_ptr;
  229.     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
  230.     unsigned char* position = NULL;
  231.  
  232.     /* Thread inline data setting*/
  233.     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
  234.     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
  235.  
  236.     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
  237.     {
  238.          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
  239.          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
  240.          *command_ptr++ = 0;
  241.          *command_ptr++ = 0;
  242.          *command_ptr++ = 0;
  243.          *command_ptr++ = 0;
  244.    
  245.          /* copy thread inline data */
  246.          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
  247.          memcpy(command_ptr, position, size);
  248.          command_ptr += size/sizeof(int);
  249.     }  
  250.  
  251.     *command_ptr++ = 0;
  252.     *command_ptr++ = MI_BATCH_BUFFER_END;
  253.  
  254.     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
  255.  
  256.     return VA_STATUS_SUCCESS;
  257. }
  258.  
  259. static VAStatus
  260. gen75_gpe_process_pipeline_setup(VADriverContextP ctx,
  261.                    struct vpp_gpe_context *vpp_gpe_ctx)
  262. {
  263.     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
  264.     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
  265.  
  266.     gen6_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
  267.  
  268.     gen75_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
  269.    
  270.     BEGIN_BATCH(vpp_gpe_ctx->batch, 2);
  271.     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8));
  272.     OUT_RELOC(vpp_gpe_ctx->batch,
  273.               vpp_gpe_ctx->vpp_batchbuffer.bo,
  274.               I915_GEM_DOMAIN_COMMAND, 0,
  275.               0);
  276.     ADVANCE_BATCH(vpp_gpe_ctx->batch);
  277.  
  278.     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
  279.        
  280.     return VA_STATUS_SUCCESS;
  281. }
  282.  
  283. static VAStatus
  284. gen75_gpe_process_init(VADriverContextP ctx,
  285.                  struct vpp_gpe_context *vpp_gpe_ctx)
  286. {
  287.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  288.     dri_bo *bo;
  289.  
  290.     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
  291.                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
  292.  
  293.     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
  294.     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
  295.     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
  296.     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks  
  297.            * vpp_gpe_ctx->vpp_kernel_return.size_block;
  298.  
  299.     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  300.     bo = dri_bo_alloc(i965->intel.bufmgr,
  301.                       "vpp batch buffer",
  302.                        batch_buf_size, 0x1000);
  303.     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
  304.     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  305.  
  306.     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
  307.     bo = dri_bo_alloc(i965->intel.bufmgr,
  308.                       "vpp kernel return buffer",
  309.                        kernel_return_size, 0x1000);
  310.     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
  311.     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
  312.  
  313.     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
  314.  
  315.     return VA_STATUS_SUCCESS;
  316. }
  317.  
  318. static VAStatus
  319. gen75_gpe_process_prepare(VADriverContextP ctx,
  320.                     struct vpp_gpe_context *vpp_gpe_ctx)
  321. {
  322.     /*Setup all the memory object*/
  323.     gen75_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
  324.     gen75_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
  325.     //gen75_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
  326.  
  327.     /*Programing media pipeline*/
  328.     gen75_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
  329.        
  330.     return VA_STATUS_SUCCESS;
  331. }
  332.  
  333. static VAStatus
  334. gen75_gpe_process_run(VADriverContextP ctx,
  335.                 struct vpp_gpe_context *vpp_gpe_ctx)
  336. {
  337.     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
  338.    
  339.     return VA_STATUS_SUCCESS;
  340. }
  341.  
  342. static VAStatus
  343. gen75_gpe_process(VADriverContextP ctx,
  344.                   struct vpp_gpe_context * vpp_gpe_ctx)
  345. {
  346.     VAStatus va_status = VA_STATUS_SUCCESS;
  347.  
  348.     va_status = gen75_gpe_process_init(ctx, vpp_gpe_ctx);
  349.     if (va_status != VA_STATUS_SUCCESS)
  350.         return va_status;
  351.  
  352.     va_status = gen75_gpe_process_prepare(ctx, vpp_gpe_ctx);
  353.     if (va_status != VA_STATUS_SUCCESS)
  354.         return va_status;
  355.  
  356.     va_status = gen75_gpe_process_run(ctx, vpp_gpe_ctx);
  357.     if (va_status != VA_STATUS_SUCCESS)
  358.         return va_status;
  359.  
  360.     return VA_STATUS_SUCCESS;
  361. }
  362.  
  363. static VAStatus
  364. gen8_gpe_process_surfaces_setup(VADriverContextP ctx,
  365.                    struct vpp_gpe_context *vpp_gpe_ctx)
  366. {
  367.     struct object_surface *obj_surface;
  368.     unsigned int i = 0;
  369.     unsigned char input_surface_sum = (1 + vpp_gpe_ctx->forward_surf_sum +
  370.                                          vpp_gpe_ctx->backward_surf_sum) * 2;
  371.  
  372.     /* Binding input NV12 surfaces (Luma + Chroma)*/
  373.     for( i = 0; i < input_surface_sum; i += 2){
  374.          obj_surface = vpp_gpe_ctx->surface_input_object[i/2];
  375.          assert(obj_surface);
  376.          gen8_gpe_media_rw_surface_setup(ctx,
  377.                                          &vpp_gpe_ctx->gpe_ctx,
  378.                                           obj_surface,
  379.                                           BINDING_TABLE_OFFSET_GEN8(i),
  380.                                           SURFACE_STATE_OFFSET_GEN8(i));
  381.  
  382.          gen8_gpe_media_chroma_surface_setup(ctx,
  383.                                           &vpp_gpe_ctx->gpe_ctx,
  384.                                           obj_surface,
  385.                                           BINDING_TABLE_OFFSET_GEN8(i + 1),
  386.                                           SURFACE_STATE_OFFSET_GEN8(i + 1));
  387.     }
  388.  
  389.     /* Binding output NV12 surface(Luma + Chroma) */
  390.     obj_surface = vpp_gpe_ctx->surface_output_object;
  391.     assert(obj_surface);
  392.     gen8_gpe_media_rw_surface_setup(ctx,
  393.                                     &vpp_gpe_ctx->gpe_ctx,
  394.                                     obj_surface,
  395.                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum),
  396.                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum));
  397.     gen8_gpe_media_chroma_surface_setup(ctx,
  398.                                     &vpp_gpe_ctx->gpe_ctx,
  399.                                     obj_surface,
  400.                                     BINDING_TABLE_OFFSET_GEN8(input_surface_sum + 1),
  401.                                     SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 1));
  402.     /* Bind kernel return buffer surface */
  403.     gen7_gpe_buffer_suface_setup(ctx,
  404.                                   &vpp_gpe_ctx->gpe_ctx,
  405.                                   &vpp_gpe_ctx->vpp_kernel_return,
  406.                                   BINDING_TABLE_OFFSET_GEN8((input_surface_sum + 2)),
  407.                                   SURFACE_STATE_OFFSET_GEN8(input_surface_sum + 2));
  408.  
  409.     return VA_STATUS_SUCCESS;
  410. }
  411.  
  412. static VAStatus
  413. gen8_gpe_process_interface_setup(VADriverContextP ctx,
  414.                     struct vpp_gpe_context *vpp_gpe_ctx)
  415. {
  416.     struct gen8_interface_descriptor_data *desc;
  417.     dri_bo *bo = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo;
  418.     int i;
  419.  
  420.     dri_bo_map(bo, 1);
  421.     assert(bo->virtual);
  422.     desc = (struct gen8_interface_descriptor_data *)(bo->virtual
  423.                                + vpp_gpe_ctx->gpe_ctx.idrt_offset);
  424.  
  425.     /*Setup the descritor table*/
  426.     for (i = 0; i < vpp_gpe_ctx->sub_shader_sum; i++){
  427.         struct i965_kernel *kernel;
  428.         kernel = &vpp_gpe_ctx->gpe_ctx.kernels[i];
  429.         assert(sizeof(*desc) == 32);
  430.         /*Setup the descritor table*/
  431.          memset(desc, 0, sizeof(*desc));
  432.          desc->desc0.kernel_start_pointer = kernel->kernel_offset >> 6;
  433.          desc->desc3.sampler_count = 0; /* FIXME: */
  434.          desc->desc3.sampler_state_pointer = 0;
  435.          desc->desc4.binding_table_entry_count = 6; /* FIXME: */
  436.          desc->desc4.binding_table_pointer = (BINDING_TABLE_OFFSET_GEN8(0) >> 5);
  437.          desc->desc5.constant_urb_entry_read_offset = 0;
  438.          desc->desc5.constant_urb_entry_read_length = 0;
  439.  
  440.          desc++;
  441.     }
  442.  
  443.     dri_bo_unmap(bo);
  444.  
  445.     return VA_STATUS_SUCCESS;
  446. }
  447.  
  448. static VAStatus
  449. gen8_gpe_process_constant_fill(VADriverContextP ctx,
  450.                    struct vpp_gpe_context *vpp_gpe_ctx)
  451. {
  452.     dri_bo_map(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo, 1);
  453.     assert(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual);
  454.     unsigned char* constant_buffer = vpp_gpe_ctx->gpe_ctx.dynamic_state.bo->virtual;
  455.     memcpy(constant_buffer, vpp_gpe_ctx->kernel_param,
  456.                             vpp_gpe_ctx->kernel_param_size);
  457.     dri_bo_unmap(vpp_gpe_ctx->gpe_ctx.dynamic_state.bo);
  458.  
  459.     return VA_STATUS_SUCCESS;
  460. }
  461.  
  462. static VAStatus
  463. gen8_gpe_process_parameters_fill(VADriverContextP ctx,
  464.                            struct vpp_gpe_context *vpp_gpe_ctx)
  465. {
  466.     unsigned int *command_ptr;
  467.     unsigned int i, size = vpp_gpe_ctx->thread_param_size;
  468.     unsigned char* position = NULL;
  469.  
  470.     /* Thread inline data setting*/
  471.     dri_bo_map(vpp_gpe_ctx->vpp_batchbuffer.bo, 1);
  472.     command_ptr = vpp_gpe_ctx->vpp_batchbuffer.bo->virtual;
  473.  
  474.     for(i = 0; i < vpp_gpe_ctx->thread_num; i ++)
  475.     {
  476.          *command_ptr++ = (CMD_MEDIA_OBJECT | (size/sizeof(int) + 6 - 2));
  477.          *command_ptr++ = vpp_gpe_ctx->sub_shader_index;
  478.          *command_ptr++ = 0;
  479.          *command_ptr++ = 0;
  480.          *command_ptr++ = 0;
  481.          *command_ptr++ = 0;
  482.  
  483.          /* copy thread inline data */
  484.          position =(unsigned char*)(vpp_gpe_ctx->thread_param + size * i);
  485.          memcpy(command_ptr, position, size);
  486.          command_ptr += size/sizeof(int);
  487.  
  488.          *command_ptr++ = CMD_MEDIA_STATE_FLUSH;
  489.          *command_ptr++ = 0;
  490.     }
  491.  
  492.     *command_ptr++ = 0;
  493.     *command_ptr++ = MI_BATCH_BUFFER_END;
  494.  
  495.     dri_bo_unmap(vpp_gpe_ctx->vpp_batchbuffer.bo);
  496.  
  497.     return VA_STATUS_SUCCESS;
  498. }
  499.  
  500. static VAStatus
  501. gen8_gpe_process_pipeline_setup(VADriverContextP ctx,
  502.                    struct vpp_gpe_context *vpp_gpe_ctx)
  503. {
  504.     intel_batchbuffer_start_atomic(vpp_gpe_ctx->batch, 0x1000);
  505.     intel_batchbuffer_emit_mi_flush(vpp_gpe_ctx->batch);
  506.  
  507.     gen8_gpe_pipeline_setup(ctx, &vpp_gpe_ctx->gpe_ctx, vpp_gpe_ctx->batch);
  508.  
  509.     gen8_gpe_process_parameters_fill(ctx, vpp_gpe_ctx);
  510.  
  511.     BEGIN_BATCH(vpp_gpe_ctx->batch, 3);
  512.     OUT_BATCH(vpp_gpe_ctx->batch, MI_BATCH_BUFFER_START | (1 << 8) | (1 << 0));
  513.     OUT_RELOC(vpp_gpe_ctx->batch,
  514.               vpp_gpe_ctx->vpp_batchbuffer.bo,
  515.               I915_GEM_DOMAIN_COMMAND, 0,
  516.               0);
  517.     OUT_BATCH(vpp_gpe_ctx->batch, 0);
  518.  
  519.     ADVANCE_BATCH(vpp_gpe_ctx->batch);
  520.  
  521.     intel_batchbuffer_end_atomic(vpp_gpe_ctx->batch);
  522.  
  523.     return VA_STATUS_SUCCESS;
  524. }
  525.  
  526. static VAStatus
  527. gen8_gpe_process_init(VADriverContextP ctx,
  528.                  struct vpp_gpe_context *vpp_gpe_ctx)
  529. {
  530.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  531.     dri_bo *bo;
  532.  
  533.     unsigned int batch_buf_size = vpp_gpe_ctx->thread_num *
  534.                  (vpp_gpe_ctx->thread_param_size + 6 * sizeof(int)) + 16;
  535.  
  536.     vpp_gpe_ctx->vpp_kernel_return.num_blocks = vpp_gpe_ctx->thread_num;
  537.     vpp_gpe_ctx->vpp_kernel_return.size_block = 16;
  538.     vpp_gpe_ctx->vpp_kernel_return.pitch = 1;
  539.  
  540.     unsigned int kernel_return_size =  vpp_gpe_ctx->vpp_kernel_return.num_blocks
  541.            * vpp_gpe_ctx->vpp_kernel_return.size_block;
  542.  
  543.     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  544.     bo = dri_bo_alloc(i965->intel.bufmgr,
  545.                       "vpp batch buffer",
  546.                        batch_buf_size, 0x1000);
  547.     vpp_gpe_ctx->vpp_batchbuffer.bo = bo;
  548.     dri_bo_reference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  549.  
  550.     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
  551.     bo = dri_bo_alloc(i965->intel.bufmgr,
  552.                       "vpp kernel return buffer",
  553.                        kernel_return_size, 0x1000);
  554.     vpp_gpe_ctx->vpp_kernel_return.bo = bo;
  555.     dri_bo_reference(vpp_gpe_ctx->vpp_kernel_return.bo);
  556.  
  557.     vpp_gpe_ctx->gpe_context_init(ctx, &vpp_gpe_ctx->gpe_ctx);
  558.  
  559.     return VA_STATUS_SUCCESS;
  560. }
  561.  
  562. static VAStatus
  563. gen8_gpe_process_prepare(VADriverContextP ctx,
  564.                     struct vpp_gpe_context *vpp_gpe_ctx)
  565. {
  566.     /*Setup all the memory object*/
  567.     gen8_gpe_process_surfaces_setup(ctx, vpp_gpe_ctx);
  568.     gen8_gpe_process_interface_setup(ctx, vpp_gpe_ctx);
  569.     //gen8_gpe_process_constant_setup(ctx, vpp_gpe_ctx);
  570.  
  571.     /*Programing media pipeline*/
  572.     gen8_gpe_process_pipeline_setup(ctx, vpp_gpe_ctx);
  573.  
  574.     return VA_STATUS_SUCCESS;
  575. }
  576.  
  577. static VAStatus
  578. gen8_gpe_process_run(VADriverContextP ctx,
  579.                 struct vpp_gpe_context *vpp_gpe_ctx)
  580. {
  581.     intel_batchbuffer_flush(vpp_gpe_ctx->batch);
  582.  
  583.     return VA_STATUS_SUCCESS;
  584. }
  585.  
  586. static VAStatus
  587. gen8_gpe_process(VADriverContextP ctx,
  588.                   struct vpp_gpe_context * vpp_gpe_ctx)
  589. {
  590.     VAStatus va_status = VA_STATUS_SUCCESS;
  591.  
  592.     va_status = gen8_gpe_process_init(ctx, vpp_gpe_ctx);
  593.     if (va_status != VA_STATUS_SUCCESS)
  594.         return va_status;
  595.  
  596.     va_status = gen8_gpe_process_prepare(ctx, vpp_gpe_ctx);
  597.     if (va_status != VA_STATUS_SUCCESS)
  598.         return va_status;
  599.  
  600.     va_status = gen8_gpe_process_run(ctx, vpp_gpe_ctx);
  601.     if (va_status != VA_STATUS_SUCCESS)
  602.         return va_status;
  603.  
  604.     return VA_STATUS_SUCCESS;
  605. }
  606.  
  607. static VAStatus
  608. vpp_gpe_process(VADriverContextP ctx,
  609.                   struct vpp_gpe_context * vpp_gpe_ctx)
  610. {
  611.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  612.     if (IS_HASWELL(i965->intel.device_info))
  613.        return gen75_gpe_process(ctx, vpp_gpe_ctx);
  614.     else if (IS_GEN8(i965->intel.device_info))
  615.        return gen8_gpe_process(ctx, vpp_gpe_ctx);
  616.  
  617.      return VA_STATUS_ERROR_UNIMPLEMENTED;
  618. }
  619.  
  620. static VAStatus
  621. vpp_gpe_process_sharpening(VADriverContextP ctx,
  622.                              struct vpp_gpe_context * vpp_gpe_ctx)
  623. {
  624.      VAStatus va_status = VA_STATUS_SUCCESS;
  625.      struct i965_driver_data *i965 = i965_driver_data(ctx);
  626.      struct object_surface *origin_in_obj_surface = vpp_gpe_ctx->surface_input_object[0];
  627.      struct object_surface *origin_out_obj_surface = vpp_gpe_ctx->surface_output_object;
  628.  
  629.      VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
  630.      VABufferID *filter_ids = (VABufferID*)pipe->filters ;
  631.      struct object_buffer *obj_buf = BUFFER((*(filter_ids + 0)));
  632.  
  633.      assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
  634.        
  635.      if (!obj_buf ||
  636.          !obj_buf->buffer_store ||
  637.          !obj_buf->buffer_store->buffer)
  638.          goto error;
  639.  
  640.      VAProcFilterParameterBuffer* filter =
  641.                   (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
  642.      float sharpening_intensity = filter->value;
  643.  
  644.      ThreadParameterSharpening thr_param;
  645.      unsigned int thr_param_size = sizeof(ThreadParameterSharpening);
  646.      unsigned int i;
  647.      unsigned char * pos;
  648.  
  649.      if(vpp_gpe_ctx->is_first_frame){
  650.          vpp_gpe_ctx->sub_shader_sum = 3;
  651.          struct i965_kernel * vpp_kernels;
  652.          if (IS_HASWELL(i965->intel.device_info))
  653.              vpp_kernels = gen75_vpp_sharpening_kernels;
  654.          else if (IS_GEN8(i965->intel.device_info))
  655.              vpp_kernels = gen8_vpp_sharpening_kernels;
  656.  
  657.          vpp_gpe_ctx->gpe_load_kernels(ctx,
  658.                                &vpp_gpe_ctx->gpe_ctx,
  659.                                vpp_kernels,
  660.                                vpp_gpe_ctx->sub_shader_sum);
  661.      }
  662.  
  663.      if(vpp_gpe_ctx->surface_tmp == VA_INVALID_ID){
  664.         va_status = i965_CreateSurfaces(ctx,
  665.                                        vpp_gpe_ctx->in_frame_w,
  666.                                        vpp_gpe_ctx->in_frame_h,
  667.                                        VA_RT_FORMAT_YUV420,
  668.                                        1,
  669.                                        &vpp_gpe_ctx->surface_tmp);
  670.        assert(va_status == VA_STATUS_SUCCESS);
  671.    
  672.        struct object_surface * obj_surf = SURFACE(vpp_gpe_ctx->surface_tmp);
  673.        assert(obj_surf);
  674.  
  675.        if (obj_surf) {
  676.            i965_check_alloc_surface_bo(ctx, obj_surf, 1, VA_FOURCC_NV12,
  677.                                        SUBSAMPLE_YUV420);
  678.            vpp_gpe_ctx->surface_tmp_object = obj_surf;
  679.        }
  680.     }                
  681.  
  682.     assert(sharpening_intensity >= 0.0 && sharpening_intensity <= 1.0);
  683.     thr_param.l_amount = (unsigned int)(sharpening_intensity * 128);
  684.     thr_param.d_amount = (unsigned int)(sharpening_intensity * 128);
  685.  
  686.     thr_param.base.pic_width = vpp_gpe_ctx->in_frame_w;
  687.     thr_param.base.pic_height = vpp_gpe_ctx->in_frame_h;
  688.  
  689.     /* Step 1: horizontal blur process */      
  690.     vpp_gpe_ctx->forward_surf_sum = 0;
  691.     vpp_gpe_ctx->backward_surf_sum = 0;
  692.  
  693.     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/16;
  694.     vpp_gpe_ctx->thread_param_size = thr_param_size;
  695.     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
  696.                                                        *vpp_gpe_ctx->thread_num);
  697.     pos = vpp_gpe_ctx->thread_param;
  698.  
  699.     if (!pos) {
  700.         return VA_STATUS_ERROR_ALLOCATION_FAILED;
  701.     }
  702.  
  703.     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
  704.         thr_param.base.v_pos = 16 * i;
  705.         thr_param.base.h_pos = 0;
  706.         memcpy(pos, &thr_param, thr_param_size);
  707.         pos += thr_param_size;
  708.     }
  709.  
  710.     vpp_gpe_ctx->sub_shader_index = 0;
  711.     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
  712.     free(vpp_gpe_ctx->thread_param);
  713.  
  714.     /* Step 2: vertical blur process */
  715.     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_output_object;
  716.     vpp_gpe_ctx->surface_output_object = vpp_gpe_ctx->surface_tmp_object;
  717.     vpp_gpe_ctx->forward_surf_sum = 0;
  718.     vpp_gpe_ctx->backward_surf_sum = 0;
  719.  
  720.     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_w/16;
  721.     vpp_gpe_ctx->thread_param_size = thr_param_size;
  722.     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
  723.                                                        *vpp_gpe_ctx->thread_num);
  724.     pos = vpp_gpe_ctx->thread_param;
  725.  
  726.     if (!pos) {
  727.         return VA_STATUS_ERROR_ALLOCATION_FAILED;
  728.     }
  729.  
  730.     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
  731.         thr_param.base.v_pos = 0;
  732.         thr_param.base.h_pos = 16 * i;
  733.         memcpy(pos, &thr_param, thr_param_size);
  734.         pos += thr_param_size;
  735.     }
  736.  
  737.     vpp_gpe_ctx->sub_shader_index = 1;
  738.     vpp_gpe_process(ctx, vpp_gpe_ctx);
  739.     free(vpp_gpe_ctx->thread_param);
  740.  
  741.     /* Step 3: apply the blur to original surface */      
  742.     vpp_gpe_ctx->surface_input_object[0]  = origin_in_obj_surface;
  743.     vpp_gpe_ctx->surface_input_object[1]  = vpp_gpe_ctx->surface_tmp_object;
  744.     vpp_gpe_ctx->surface_output_object    = origin_out_obj_surface;
  745.     vpp_gpe_ctx->forward_surf_sum  = 1;
  746.     vpp_gpe_ctx->backward_surf_sum = 0;
  747.  
  748.     vpp_gpe_ctx->thread_num = vpp_gpe_ctx->in_frame_h/4;
  749.     vpp_gpe_ctx->thread_param_size = thr_param_size;
  750.     vpp_gpe_ctx->thread_param = (unsigned char*) malloc(vpp_gpe_ctx->thread_param_size
  751.                                                        *vpp_gpe_ctx->thread_num);
  752.     pos = vpp_gpe_ctx->thread_param;
  753.  
  754.     if (!pos) {
  755.         return VA_STATUS_ERROR_ALLOCATION_FAILED;
  756.     }
  757.  
  758.     for( i = 0 ; i < vpp_gpe_ctx->thread_num; i++){
  759.         thr_param.base.v_pos = 4 * i;
  760.         thr_param.base.h_pos = 0;
  761.         memcpy(pos, &thr_param, thr_param_size);
  762.         pos += thr_param_size;
  763.     }
  764.  
  765.     vpp_gpe_ctx->sub_shader_index = 2;
  766.     va_status = vpp_gpe_process(ctx, vpp_gpe_ctx);
  767.     free(vpp_gpe_ctx->thread_param);
  768.  
  769.     return va_status;
  770.  
  771. error:
  772.     return VA_STATUS_ERROR_INVALID_PARAMETER;
  773. }
  774.  
  775. VAStatus vpp_gpe_process_picture(VADriverContextP ctx,
  776.                     struct vpp_gpe_context * vpp_gpe_ctx)
  777. {
  778.     VAStatus va_status = VA_STATUS_SUCCESS;
  779.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  780.     VAProcPipelineParameterBuffer* pipe = vpp_gpe_ctx->pipeline_param;
  781.     VAProcFilterParameterBuffer* filter = NULL;
  782.     unsigned int i;
  783.     struct object_surface *obj_surface = NULL;
  784.  
  785.     if (pipe->num_filters && !pipe->filters)
  786.         goto error;
  787.  
  788.     for(i = 0; i < pipe->num_filters; i++){
  789.         struct object_buffer *obj_buf = BUFFER(pipe->filters[i]);
  790.  
  791.         assert(obj_buf && obj_buf->buffer_store && obj_buf->buffer_store->buffer);
  792.  
  793.         if (!obj_buf ||
  794.             !obj_buf->buffer_store ||
  795.             !obj_buf->buffer_store->buffer)
  796.             goto error;
  797.  
  798.         filter = (VAProcFilterParameterBuffer*)obj_buf-> buffer_store->buffer;
  799.         if(filter->type == VAProcFilterSharpening){
  800.            break;
  801.         }
  802.     }
  803.        
  804.     assert(pipe->num_forward_references + pipe->num_backward_references <= 4);
  805.     vpp_gpe_ctx->surface_input_object[0] = vpp_gpe_ctx->surface_pipeline_input_object;
  806.  
  807.     vpp_gpe_ctx->forward_surf_sum = 0;
  808.     vpp_gpe_ctx->backward_surf_sum = 0;
  809.  
  810.     for(i = 0; i < pipe->num_forward_references; i ++)
  811.     {
  812.         obj_surface = SURFACE(pipe->forward_references[i]);
  813.  
  814.         assert(obj_surface);
  815.         vpp_gpe_ctx->surface_input_object[i + 1] = obj_surface;
  816.         vpp_gpe_ctx->forward_surf_sum++;
  817.     }
  818.  
  819.     for(i = 0; i < pipe->num_backward_references; i ++)
  820.     {
  821.         obj_surface = SURFACE(pipe->backward_references[i]);
  822.        
  823.         assert(obj_surface);
  824.         vpp_gpe_ctx->surface_input_object[vpp_gpe_ctx->forward_surf_sum + 1 + i ] = obj_surface;
  825.         vpp_gpe_ctx->backward_surf_sum++;
  826.     }
  827.  
  828.     obj_surface = vpp_gpe_ctx->surface_input_object[0];
  829.     vpp_gpe_ctx->in_frame_w = obj_surface->orig_width;
  830.     vpp_gpe_ctx->in_frame_h = obj_surface->orig_height;
  831.  
  832.     if(filter && filter->type == VAProcFilterSharpening) {
  833.        va_status = vpp_gpe_process_sharpening(ctx, vpp_gpe_ctx);
  834.     } else {
  835.        va_status = VA_STATUS_ERROR_ATTR_NOT_SUPPORTED;
  836.     }
  837.  
  838.     vpp_gpe_ctx->is_first_frame = 0;
  839.  
  840.     return va_status;
  841.  
  842. error:
  843.     return VA_STATUS_ERROR_INVALID_PARAMETER;
  844. }
  845.  
  846. void
  847. vpp_gpe_context_destroy(VADriverContextP ctx,
  848.                                struct vpp_gpe_context *vpp_gpe_ctx)
  849. {
  850.     dri_bo_unreference(vpp_gpe_ctx->vpp_batchbuffer.bo);
  851.     vpp_gpe_ctx->vpp_batchbuffer.bo = NULL;
  852.  
  853.     dri_bo_unreference(vpp_gpe_ctx->vpp_kernel_return.bo);
  854.     vpp_gpe_ctx->vpp_kernel_return.bo = NULL;
  855.  
  856.     vpp_gpe_ctx->gpe_context_destroy(&vpp_gpe_ctx->gpe_ctx);
  857.  
  858.     if(vpp_gpe_ctx->surface_tmp != VA_INVALID_ID){
  859.         assert(vpp_gpe_ctx->surface_tmp_object != NULL);
  860.         i965_DestroySurfaces(ctx, &vpp_gpe_ctx->surface_tmp, 1);
  861.         vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
  862.         vpp_gpe_ctx->surface_tmp_object = NULL;
  863.     }  
  864.  
  865.     free(vpp_gpe_ctx->batch);
  866.  
  867.     free(vpp_gpe_ctx);
  868. }
  869.  
  870. struct vpp_gpe_context *
  871. vpp_gpe_context_init(VADriverContextP ctx)
  872. {
  873.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  874.     struct vpp_gpe_context  *vpp_gpe_ctx = calloc(1, sizeof(struct vpp_gpe_context));
  875.     struct i965_gpe_context *gpe_ctx = &(vpp_gpe_ctx->gpe_ctx);
  876.  
  877.     assert(IS_HASWELL(i965->intel.device_info) ||
  878.            IS_GEN8(i965->intel.device_info));
  879.  
  880.     vpp_gpe_ctx->surface_tmp = VA_INVALID_ID;
  881.     vpp_gpe_ctx->surface_tmp_object = NULL;
  882.     vpp_gpe_ctx->batch = intel_batchbuffer_new(&i965->intel, I915_EXEC_RENDER, 0);
  883.     vpp_gpe_ctx->is_first_frame = 1;
  884.  
  885.     gpe_ctx->vfe_state.max_num_threads = 60 - 1;
  886.     gpe_ctx->vfe_state.num_urb_entries = 16;
  887.     gpe_ctx->vfe_state.gpgpu_mode = 0;
  888.     gpe_ctx->vfe_state.urb_entry_size = 59 - 1;
  889.     gpe_ctx->vfe_state.curbe_allocation_size = CURBE_ALLOCATION_SIZE - 1;
  890.  
  891.     if (IS_HASWELL(i965->intel.device_info)) {
  892.         vpp_gpe_ctx->gpe_context_init     = i965_gpe_context_init;
  893.         vpp_gpe_ctx->gpe_context_destroy  = i965_gpe_context_destroy;
  894.         vpp_gpe_ctx->gpe_load_kernels     = i965_gpe_load_kernels;
  895.         gpe_ctx->surface_state_binding_table.length =
  896.                (SURFACE_STATE_PADDED_SIZE_GEN7 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
  897.  
  898.         gpe_ctx->curbe.length = CURBE_TOTAL_DATA_LENGTH;
  899.         gpe_ctx->idrt.max_entries = MAX_INTERFACE_DESC_GEN6;
  900.         gpe_ctx->idrt.entry_size = sizeof(struct gen6_interface_descriptor_data);
  901.  
  902.     } else if (IS_GEN8(i965->intel.device_info)) {
  903.         vpp_gpe_ctx->gpe_context_init     = gen8_gpe_context_init;
  904.         vpp_gpe_ctx->gpe_context_destroy  = gen8_gpe_context_destroy;
  905.         vpp_gpe_ctx->gpe_load_kernels     = gen8_gpe_load_kernels;
  906.         gpe_ctx->surface_state_binding_table.length =
  907.                (SURFACE_STATE_PADDED_SIZE_GEN8 + sizeof(unsigned int)) * MAX_MEDIA_SURFACES_GEN6;
  908.  
  909.         gpe_ctx->curbe_size = CURBE_TOTAL_DATA_LENGTH;
  910.         gpe_ctx->idrt_size  = sizeof(struct gen8_interface_descriptor_data) * MAX_INTERFACE_DESC_GEN6;
  911.  
  912.     }
  913.  
  914.     return vpp_gpe_ctx;
  915. }
  916.  
  917.