Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Keith Packard <keithp@keithp.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *    Zhao Yakui <yakui.zhao@intel.com>
  28.  *
  29.  */
  30.  
  31. /*
  32.  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
  33.  */
  34.  
  35. #include <stdio.h>
  36. #include <stdlib.h>
  37. #include <string.h>
  38. #include <assert.h>
  39. #include <math.h>
  40.  
  41. #include <va/va_drmcommon.h>
  42.  
  43. #include "intel_batchbuffer.h"
  44. #include "intel_driver.h"
  45. #include "i965_defines.h"
  46. #include "i965_drv_video.h"
  47. #include "i965_structs.h"
  48. #include "i965_yuv_coefs.h"
  49.  
  50. #include "i965_render.h"
  51.  
  52. #define SF_KERNEL_NUM_GRF       16
  53. #define SF_MAX_THREADS          1
  54.  
  55. #define PS_KERNEL_NUM_GRF       48
  56. #define PS_MAX_THREADS          32
  57.  
  58. /* Programs for Gen8 */
  59. static const uint32_t sf_kernel_static_gen8[][4] ={
  60.  
  61. };
  62. static const uint32_t ps_kernel_static_gen8[][4] = {
  63. #include "shaders/render/exa_wm_src_affine.g8b"
  64. #include "shaders/render/exa_wm_src_sample_planar.g8b"
  65. #include "shaders/render/exa_wm_yuv_color_balance.g8b"
  66. #include "shaders/render/exa_wm_yuv_rgb.g8b"
  67. #include "shaders/render/exa_wm_write.g8b"
  68. };
  69.  
  70. static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
  71. #include "shaders/render/exa_wm_src_affine.g8b"
  72. #include "shaders/render/exa_wm_src_sample_argb.g8b"
  73. #include "shaders/render/exa_wm_write.g8b"
  74. };
  75.  
  76.  
  77. #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
  78.  
  79. #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
  80. #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
  81.  
  82. enum {
  83.     SF_KERNEL = 0,
  84.     PS_KERNEL,
  85.     PS_SUBPIC_KERNEL
  86. };
  87.  
  88. static struct i965_kernel render_kernels_gen8[] = {
  89.     {
  90.         "SF",
  91.         SF_KERNEL,
  92.         sf_kernel_static_gen8,
  93.         sizeof(sf_kernel_static_gen8),
  94.         NULL
  95.     },
  96.     {
  97.         "PS",
  98.         PS_KERNEL,
  99.         ps_kernel_static_gen8,
  100.         sizeof(ps_kernel_static_gen8),
  101.         NULL
  102.     },
  103.  
  104.     {
  105.         "PS_SUBPIC",
  106.         PS_SUBPIC_KERNEL,
  107.         ps_subpic_kernel_static_gen8,
  108.         sizeof(ps_subpic_kernel_static_gen8),
  109.         NULL
  110.     }
  111. };
  112.  
  113. #define URB_VS_ENTRIES        8
  114. #define URB_VS_ENTRY_SIZE     1
  115.  
  116. #define URB_GS_ENTRIES        0
  117. #define URB_GS_ENTRY_SIZE     0
  118.  
  119. #define URB_CLIP_ENTRIES      0
  120. #define URB_CLIP_ENTRY_SIZE   0
  121.  
  122. #define URB_SF_ENTRIES        1
  123. #define URB_SF_ENTRY_SIZE     2
  124.  
  125. #define URB_CS_ENTRIES        4
  126. #define URB_CS_ENTRY_SIZE     4
  127.  
  128. static void
  129. gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
  130. {
  131.    switch (tiling) {
  132.    case I915_TILING_NONE:
  133.       ss->ss0.tiled_surface = 0;
  134.       ss->ss0.tile_walk = 0;
  135.       break;
  136.    case I915_TILING_X:
  137.       ss->ss0.tiled_surface = 1;
  138.       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  139.       break;
  140.    case I915_TILING_Y:
  141.       ss->ss0.tiled_surface = 1;
  142.       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  143.       break;
  144.    }
  145. }
  146.  
  147. /* Set "Shader Channel Select" for GEN8+ */
  148. void
  149. gen8_render_set_surface_scs(struct gen8_surface_state *ss)
  150. {
  151.     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
  152.     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
  153.     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
  154.     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
  155. }
  156.  
  157. static void
  158. gen8_render_set_surface_state(
  159.     struct gen8_surface_state *ss,
  160.     dri_bo                    *bo,
  161.     unsigned long              offset,
  162.     int                        width,
  163.     int                        height,
  164.     int                        pitch,
  165.     int                        format,
  166.     unsigned int               flags
  167. )
  168. {
  169.     unsigned int tiling;
  170.     unsigned int swizzle;
  171.  
  172.     memset(ss, 0, sizeof(*ss));
  173.  
  174.     switch (flags & (VA_TOP_FIELD|VA_BOTTOM_FIELD)) {
  175.     case VA_BOTTOM_FIELD:
  176.         ss->ss0.vert_line_stride_ofs = 1;
  177.         /* fall-through */
  178.     case VA_TOP_FIELD:
  179.         ss->ss0.vert_line_stride = 1;
  180.         height /= 2;
  181.         break;
  182.     }
  183.  
  184.     ss->ss0.surface_type = I965_SURFACE_2D;
  185.     ss->ss0.surface_format = format;
  186.  
  187.     ss->ss8.base_addr = bo->offset + offset;
  188.  
  189.     ss->ss2.width = width - 1;
  190.     ss->ss2.height = height - 1;
  191.  
  192.     ss->ss3.pitch = pitch - 1;
  193.  
  194.     /* Always set 1(align 4 mode) per B-spec */
  195.     ss->ss0.vertical_alignment = 1;
  196.     ss->ss0.horizontal_alignment = 1;
  197.  
  198.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  199.     gen8_render_set_surface_tiling(ss, tiling);
  200. }
  201.  
  202. static void
  203. gen8_render_src_surface_state(
  204.     VADriverContextP ctx,
  205.     int              index,
  206.     dri_bo          *region,
  207.     unsigned long    offset,
  208.     int              w,
  209.     int              h,
  210.     int              pitch,
  211.     int              format,
  212.     unsigned int     flags
  213. )
  214. {
  215.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  216.     struct i965_render_state *render_state = &i965->render_state;
  217.     void *ss;
  218.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  219.  
  220.     assert(index < MAX_RENDER_SURFACES);
  221.  
  222.     dri_bo_map(ss_bo, 1);
  223.     assert(ss_bo->virtual);
  224.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  225.  
  226.     gen8_render_set_surface_state(ss,
  227.                                   region, offset,
  228.                                   w, h,
  229.                                   pitch, format, flags);
  230.     gen8_render_set_surface_scs(ss);
  231.     dri_bo_emit_reloc(ss_bo,
  232.                       I915_GEM_DOMAIN_SAMPLER, 0,
  233.                       offset,
  234.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
  235.                       region);
  236.  
  237.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  238.     dri_bo_unmap(ss_bo);
  239.     render_state->wm.sampler_count++;
  240. }
  241.  
  242. static void
  243. gen8_render_src_surfaces_state(
  244.     VADriverContextP ctx,
  245.     struct object_surface *obj_surface,
  246.     unsigned int     flags
  247. )
  248. {
  249.     int region_pitch;
  250.     int rw, rh;
  251.     dri_bo *region;
  252.  
  253.     region_pitch = obj_surface->width;
  254.     rw = obj_surface->orig_width;
  255.     rh = obj_surface->orig_height;
  256.     region = obj_surface->bo;
  257.  
  258.     gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
  259.     gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
  260.  
  261.     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
  262.         return;
  263.  
  264.     if (obj_surface->fourcc == VA_FOURCC_NV12) {
  265.         gen8_render_src_surface_state(ctx, 3, region,
  266.                                       region_pitch * obj_surface->y_cb_offset,
  267.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  268.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
  269.         gen8_render_src_surface_state(ctx, 4, region,
  270.                                       region_pitch * obj_surface->y_cb_offset,
  271.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  272.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
  273.     } else {
  274.         gen8_render_src_surface_state(ctx, 3, region,
  275.                                       region_pitch * obj_surface->y_cb_offset,
  276.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  277.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
  278.         gen8_render_src_surface_state(ctx, 4, region,
  279.                                       region_pitch * obj_surface->y_cb_offset,
  280.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  281.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  282.         gen8_render_src_surface_state(ctx, 5, region,
  283.                                       region_pitch * obj_surface->y_cr_offset,
  284.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  285.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
  286.         gen8_render_src_surface_state(ctx, 6, region,
  287.                                       region_pitch * obj_surface->y_cr_offset,
  288.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  289.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  290.     }
  291. }
  292.  
  293. static void
  294. gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
  295.                                       struct object_surface *obj_surface)
  296. {
  297.     dri_bo *subpic_region;
  298.     unsigned int index = obj_surface->subpic_render_idx;
  299.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  300.     struct object_image *obj_image = obj_subpic->obj_image;
  301.  
  302.     assert(obj_surface);
  303.     assert(obj_surface->bo);
  304.     subpic_region = obj_image->bo;
  305.     /*subpicture surface*/
  306.     gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
  307.     gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
  308. }
  309.  
  310. static void
  311. gen8_render_dest_surface_state(VADriverContextP ctx, int index)
  312. {
  313.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  314.     struct i965_render_state *render_state = &i965->render_state;
  315.     struct intel_region *dest_region = render_state->draw_region;
  316.     void *ss;
  317.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  318.     int format;
  319.     assert(index < MAX_RENDER_SURFACES);
  320.  
  321.     if (dest_region->cpp == 2) {
  322.         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
  323.     } else {
  324.         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
  325.     }
  326.  
  327.     dri_bo_map(ss_bo, 1);
  328.     assert(ss_bo->virtual);
  329.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  330.  
  331.     gen8_render_set_surface_state(ss,
  332.                                   dest_region->bo, 0,
  333.                                   dest_region->width, dest_region->height,
  334.                                   dest_region->pitch, format, 0);
  335.     gen8_render_set_surface_scs(ss);
  336.     dri_bo_emit_reloc(ss_bo,
  337.                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  338.                       0,
  339.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
  340.                       dest_region->bo);
  341.  
  342.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  343.     dri_bo_unmap(ss_bo);
  344. }
  345.  
  346. static void
  347. i965_fill_vertex_buffer(
  348.     VADriverContextP ctx,
  349.     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
  350.     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
  351. )
  352. {
  353.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  354.     float vb[12];
  355.  
  356.     enum { X1, Y1, X2, Y2 };
  357.  
  358.     static const unsigned int g_rotation_indices[][6] = {
  359.         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
  360.         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
  361.         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
  362.         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
  363.     };
  364.  
  365.     const unsigned int * const rotation_indices =
  366.         g_rotation_indices[i965->rotation_attrib->value];
  367.  
  368.     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
  369.     vb[1]  = tex_coords[rotation_indices[1]];
  370.     vb[2]  = vid_coords[X2];
  371.     vb[3]  = vid_coords[Y2];
  372.  
  373.     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
  374.     vb[5]  = tex_coords[rotation_indices[3]];
  375.     vb[6]  = vid_coords[X1];
  376.     vb[7]  = vid_coords[Y2];
  377.  
  378.     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
  379.     vb[9]  = tex_coords[rotation_indices[5]];
  380.     vb[10] = vid_coords[X1];
  381.     vb[11] = vid_coords[Y1];
  382.  
  383.     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
  384. }
  385.  
  386. static void
  387. i965_subpic_render_upload_vertex(VADriverContextP ctx,
  388.                                  struct object_surface *obj_surface,
  389.                                  const VARectangle *output_rect)
  390. {
  391.     unsigned int index = obj_surface->subpic_render_idx;
  392.     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
  393.     float tex_coords[4], vid_coords[4];
  394.     VARectangle dst_rect;
  395.  
  396.     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
  397.         dst_rect = obj_subpic->dst_rect;
  398.     else {
  399.         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
  400.         const float sy  = (float)output_rect->height / obj_surface->orig_height;
  401.         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
  402.         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
  403.         dst_rect.width  = sx * obj_subpic->dst_rect.width;
  404.         dst_rect.height = sy * obj_subpic->dst_rect.height;
  405.     }
  406.  
  407.     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
  408.     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
  409.     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
  410.     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
  411.  
  412.     vid_coords[0] = dst_rect.x;
  413.     vid_coords[1] = dst_rect.y;
  414.     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
  415.     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
  416.  
  417.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  418. }
  419.  
  420. static void
  421. i965_render_upload_vertex(
  422.     VADriverContextP   ctx,
  423.     struct object_surface *obj_surface,
  424.     const VARectangle *src_rect,
  425.     const VARectangle *dst_rect
  426. )
  427. {
  428.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  429.     struct i965_render_state *render_state = &i965->render_state;
  430.     struct intel_region *dest_region = render_state->draw_region;
  431.     float tex_coords[4], vid_coords[4];
  432.     int width, height;
  433.  
  434.     width  = obj_surface->orig_width;
  435.     height = obj_surface->orig_height;
  436.  
  437.     tex_coords[0] = (float)src_rect->x / width;
  438.     tex_coords[1] = (float)src_rect->y / height;
  439.     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
  440.     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
  441.  
  442.     vid_coords[0] = dest_region->x + dst_rect->x;
  443.     vid_coords[1] = dest_region->y + dst_rect->y;
  444.     vid_coords[2] = vid_coords[0] + dst_rect->width;
  445.     vid_coords[3] = vid_coords[1] + dst_rect->height;
  446.  
  447.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  448. }
  449.  
  450. static void
  451. i965_render_drawing_rectangle(VADriverContextP ctx)
  452. {
  453.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  454.     struct intel_batchbuffer *batch = i965->batch;
  455.     struct i965_render_state *render_state = &i965->render_state;
  456.     struct intel_region *dest_region = render_state->draw_region;
  457.  
  458.     BEGIN_BATCH(batch, 4);
  459.     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
  460.     OUT_BATCH(batch, 0x00000000);
  461.     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
  462.     OUT_BATCH(batch, 0x00000000);
  463.     ADVANCE_BATCH(batch);
  464. }
  465.  
  466. static void
  467. i965_render_upload_image_palette(
  468.     VADriverContextP ctx,
  469.     struct object_image *obj_image,
  470.     unsigned int     alpha
  471. )
  472. {
  473.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  474.     struct intel_batchbuffer *batch = i965->batch;
  475.     unsigned int i;
  476.  
  477.     assert(obj_image);
  478.  
  479.     if (!obj_image)
  480.         return;
  481.  
  482.     if (obj_image->image.num_palette_entries == 0)
  483.         return;
  484.  
  485.     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
  486.     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
  487.     /*fill palette*/
  488.     //int32_t out[16]; //0-23:color 23-31:alpha
  489.     for (i = 0; i < obj_image->image.num_palette_entries; i++)
  490.         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
  491.     ADVANCE_BATCH(batch);
  492. }
  493.  
  494. static void
  495. gen8_clear_dest_region(VADriverContextP ctx)
  496. {
  497.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  498.     struct intel_batchbuffer *batch = i965->batch;
  499.     struct i965_render_state *render_state = &i965->render_state;
  500.     struct intel_region *dest_region = render_state->draw_region;
  501.     unsigned int blt_cmd, br13;
  502.     int pitch;
  503.  
  504.     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
  505.     br13 = 0xf0 << 16;
  506.     pitch = dest_region->pitch;
  507.  
  508.     if (dest_region->cpp == 4) {
  509.         br13 |= BR13_8888;
  510.         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
  511.     } else {
  512.         assert(dest_region->cpp == 2);
  513.         br13 |= BR13_565;
  514.     }
  515.  
  516.     if (dest_region->tiling != I915_TILING_NONE) {
  517.         blt_cmd |= XY_COLOR_BLT_DST_TILED;
  518.         pitch /= 4;
  519.     }
  520.  
  521.     br13 |= pitch;
  522.  
  523.     intel_batchbuffer_start_atomic_blt(batch, 24);
  524.     BEGIN_BLT_BATCH(batch, 7);
  525.  
  526.     OUT_BATCH(batch, blt_cmd);
  527.     OUT_BATCH(batch, br13);
  528.     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
  529.     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
  530.               (dest_region->x + dest_region->width));
  531.     OUT_RELOC(batch, dest_region->bo,
  532.               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  533.               0);
  534.     OUT_BATCH(batch, 0x0);
  535.     OUT_BATCH(batch, 0x0);
  536.     ADVANCE_BATCH(batch);
  537.     intel_batchbuffer_end_atomic(batch);
  538. }
  539.  
  540.  
  541. /*
  542.  * for GEN8
  543.  */
  544. #define ALIGNMENT       64
  545.  
  546. static void
  547. gen8_render_initialize(VADriverContextP ctx)
  548. {
  549.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  550.     struct i965_render_state *render_state = &i965->render_state;
  551.     dri_bo *bo;
  552.     int size;
  553.     unsigned int end_offset;
  554.  
  555.     /* VERTEX BUFFER */
  556.     dri_bo_unreference(render_state->vb.vertex_buffer);
  557.     bo = dri_bo_alloc(i965->intel.bufmgr,
  558.                       "vertex buffer",
  559.                       4096,
  560.                       4096);
  561.     assert(bo);
  562.     render_state->vb.vertex_buffer = bo;
  563.  
  564.     /* WM */
  565.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  566.     bo = dri_bo_alloc(i965->intel.bufmgr,
  567.                       "surface state & binding table",
  568.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  569.                       4096);
  570.     assert(bo);
  571.     render_state->wm.surface_state_binding_table_bo = bo;
  572.  
  573.     render_state->curbe_size = 256;
  574.  
  575.     render_state->wm.sampler_count = 0;
  576.  
  577.     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
  578.  
  579.     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
  580.  
  581.     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
  582.  
  583.     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
  584.                         16 * sizeof(struct gen8_blend_state_rt);
  585.  
  586.     render_state->sf_clip_size = 1024;
  587.  
  588.     render_state->scissor_size = 1024;
  589.  
  590.     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
  591.         ALIGN(render_state->sampler_size, ALIGNMENT) +
  592.         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
  593.         ALIGN(render_state->cc_state_size, ALIGNMENT) +
  594.         ALIGN(render_state->blend_state_size, ALIGNMENT) +
  595.         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
  596.         ALIGN(render_state->scissor_size, ALIGNMENT);
  597.  
  598.     dri_bo_unreference(render_state->dynamic_state.bo);
  599.     bo = dri_bo_alloc(i965->intel.bufmgr,
  600.                       "dynamic_state",
  601.                       size,
  602.                       4096);
  603.  
  604.     render_state->dynamic_state.bo = bo;
  605.  
  606.     end_offset = 0;
  607.     render_state->dynamic_state.end_offset = 0;
  608.  
  609.     /* Constant buffer offset */
  610.     render_state->curbe_offset = end_offset;
  611.     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
  612.  
  613.     /* Sampler_state  */
  614.     render_state->sampler_offset = end_offset;
  615.     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
  616.  
  617.     /* CC_VIEWPORT_state  */
  618.     render_state->cc_viewport_offset = end_offset;
  619.     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
  620.  
  621.     /* CC_STATE_state  */
  622.     render_state->cc_state_offset = end_offset;
  623.     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
  624.  
  625.     /* Blend_state  */
  626.     render_state->blend_state_offset = end_offset;
  627.     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
  628.  
  629.     /* SF_CLIP_state  */
  630.     render_state->sf_clip_offset = end_offset;
  631.     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
  632.  
  633.     /* SCISSOR_state  */
  634.     render_state->scissor_offset = end_offset;
  635.     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
  636.  
  637.     /* update the end offset of dynamic_state */
  638.     render_state->dynamic_state.end_offset = end_offset;
  639.  
  640. }
  641.  
  642. static void
  643. gen8_render_sampler(VADriverContextP ctx)
  644. {
  645.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  646.     struct i965_render_state *render_state = &i965->render_state;
  647.     struct gen8_sampler_state *sampler_state;
  648.     int i;
  649.     unsigned char *cc_ptr;
  650.  
  651.     assert(render_state->wm.sampler_count > 0);
  652.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  653.  
  654.     dri_bo_map(render_state->dynamic_state.bo, 1);
  655.     assert(render_state->dynamic_state.bo->virtual);
  656.  
  657.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  658.                         render_state->sampler_offset;
  659.  
  660.     sampler_state = (struct gen8_sampler_state *) cc_ptr;
  661.  
  662.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  663.         memset(sampler_state, 0, sizeof(*sampler_state));
  664.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  665.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  666.         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  667.         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  668.         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  669.         sampler_state++;
  670.     }
  671.  
  672.     dri_bo_unmap(render_state->dynamic_state.bo);
  673. }
  674.  
  675. static void
  676. gen8_render_blend_state(VADriverContextP ctx)
  677. {
  678.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  679.     struct i965_render_state *render_state = &i965->render_state;
  680.     struct gen8_global_blend_state *global_blend_state;
  681.     struct gen8_blend_state_rt *blend_state;
  682.     unsigned char *cc_ptr;
  683.  
  684.     dri_bo_map(render_state->dynamic_state.bo, 1);
  685.     assert(render_state->dynamic_state.bo->virtual);
  686.  
  687.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  688.                         render_state->blend_state_offset;
  689.  
  690.     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
  691.  
  692.     memset(global_blend_state, 0, render_state->blend_state_size);
  693.     /* Global blend state + blend_state for Render Target */
  694.     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
  695.     blend_state->blend1.logic_op_enable = 1;
  696.     blend_state->blend1.logic_op_func = 0xc;
  697.     blend_state->blend1.pre_blend_clamp_enable = 1;
  698.  
  699.     dri_bo_unmap(render_state->dynamic_state.bo);
  700. }
  701.  
  702.  
  703. static void
  704. gen8_render_cc_viewport(VADriverContextP ctx)
  705. {
  706.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  707.     struct i965_render_state *render_state = &i965->render_state;
  708.     struct i965_cc_viewport *cc_viewport;
  709.     unsigned char *cc_ptr;
  710.  
  711.     dri_bo_map(render_state->dynamic_state.bo, 1);
  712.     assert(render_state->dynamic_state.bo->virtual);
  713.  
  714.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  715.                         render_state->cc_viewport_offset;
  716.  
  717.     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
  718.  
  719.     memset(cc_viewport, 0, sizeof(*cc_viewport));
  720.  
  721.     cc_viewport->min_depth = -1.e35;
  722.     cc_viewport->max_depth = 1.e35;
  723.  
  724.     dri_bo_unmap(render_state->dynamic_state.bo);
  725. }
  726.  
  727. static void
  728. gen8_render_color_calc_state(VADriverContextP ctx)
  729. {
  730.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  731.     struct i965_render_state *render_state = &i965->render_state;
  732.     struct gen6_color_calc_state *color_calc_state;
  733.     unsigned char *cc_ptr;
  734.  
  735.     dri_bo_map(render_state->dynamic_state.bo, 1);
  736.     assert(render_state->dynamic_state.bo->virtual);
  737.  
  738.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  739.                         render_state->cc_state_offset;
  740.  
  741.     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
  742.  
  743.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  744.     color_calc_state->constant_r = 1.0;
  745.     color_calc_state->constant_g = 0.0;
  746.     color_calc_state->constant_b = 1.0;
  747.     color_calc_state->constant_a = 1.0;
  748.     dri_bo_unmap(render_state->dynamic_state.bo);
  749. }
  750.  
  751. #define PI  3.1415926
  752.  
  753. static void
  754. gen8_render_upload_constants(VADriverContextP ctx,
  755.                              struct object_surface *obj_surface,
  756.                              unsigned int flags)
  757. {
  758.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  759.     struct i965_render_state *render_state = &i965->render_state;
  760.     unsigned short *constant_buffer;
  761.     unsigned char *cc_ptr;
  762.     float *color_balance_base;
  763.     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
  764.     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
  765.     float hue = (float)i965->hue_attrib->value / 180 * PI;
  766.     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
  767.     float *yuv_to_rgb;
  768.     unsigned int color_flag;
  769.     const float* yuv_coefs;
  770.     size_t coefs_length;
  771.  
  772.     dri_bo_map(render_state->dynamic_state.bo, 1);
  773.     assert(render_state->dynamic_state.bo->virtual);
  774.  
  775.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  776.                         render_state->curbe_offset;
  777.  
  778.     constant_buffer = (unsigned short *) cc_ptr;
  779.  
  780.     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
  781.         assert(obj_surface->fourcc == VA_FOURCC_Y800);
  782.  
  783.         *constant_buffer = 2;
  784.     } else {
  785.         if (obj_surface->fourcc == VA_FOURCC_NV12)
  786.             *constant_buffer = 1;
  787.         else
  788.             *constant_buffer = 0;
  789.     }
  790.  
  791.     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
  792.         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
  793.         i965->hue_attrib->value == DEFAULT_HUE &&
  794.         i965->saturation_attrib->value == DEFAULT_SATURATION)
  795.         constant_buffer[1] = 1; /* skip color balance transformation */
  796.     else
  797.         constant_buffer[1] = 0;
  798.  
  799.     color_balance_base = (float *)constant_buffer + 4;
  800.     *color_balance_base++ = contrast;
  801.     *color_balance_base++ = brightness;
  802.     *color_balance_base++ = cos(hue) * contrast * saturation;
  803.     *color_balance_base++ = sin(hue) * contrast * saturation;
  804.  
  805.     color_flag = flags & VA_SRC_COLOR_MASK;
  806.     yuv_to_rgb = (float *)constant_buffer + 8;
  807.  
  808.     yuv_coefs = i915_color_standard_to_coefs(i915_filter_to_color_standard(color_flag),
  809.                                              &coefs_length);
  810.     memcpy(yuv_to_rgb, yuv_coefs, coefs_length);
  811.  
  812.     dri_bo_unmap(render_state->dynamic_state.bo);
  813. }
  814.  
  815. static void
  816. gen8_render_setup_states(
  817.     VADriverContextP   ctx,
  818.     struct object_surface *obj_surface,
  819.     const VARectangle *src_rect,
  820.     const VARectangle *dst_rect,
  821.     unsigned int       flags
  822. )
  823. {
  824.     gen8_render_dest_surface_state(ctx, 0);
  825.     gen8_render_src_surfaces_state(ctx, obj_surface, flags);
  826.     gen8_render_sampler(ctx);
  827.     gen8_render_cc_viewport(ctx);
  828.     gen8_render_color_calc_state(ctx);
  829.     gen8_render_blend_state(ctx);
  830.     gen8_render_upload_constants(ctx, obj_surface, flags);
  831.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  832. }
  833.  
  834. static void
  835. gen8_emit_state_base_address(VADriverContextP ctx)
  836. {
  837.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  838.     struct intel_batchbuffer *batch = i965->batch;
  839.     struct i965_render_state *render_state = &i965->render_state;
  840.  
  841.     BEGIN_BATCH(batch, 16);
  842.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
  843.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  844.         OUT_BATCH(batch, 0);
  845.         OUT_BATCH(batch, 0);
  846.         /*DW4 */
  847.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  848.         OUT_BATCH(batch, 0);
  849.  
  850.         /*DW6*/
  851.     /* Dynamic state base address */
  852.     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
  853.                 0, BASE_ADDRESS_MODIFY);
  854.     OUT_BATCH(batch, 0);
  855.  
  856.         /*DW8*/
  857.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  858.     OUT_BATCH(batch, 0);
  859.  
  860.         /*DW10 */
  861.     /* Instruction base address */
  862.     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  863.     OUT_BATCH(batch, 0);
  864.  
  865.         /*DW12 */
  866.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
  867.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  868.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  869.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  870.     ADVANCE_BATCH(batch);
  871. }
  872.  
  873. static void
  874. gen8_emit_cc_state_pointers(VADriverContextP ctx)
  875. {
  876.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  877.     struct intel_batchbuffer *batch = i965->batch;
  878.     struct i965_render_state *render_state = &i965->render_state;
  879.  
  880.     BEGIN_BATCH(batch, 2);
  881.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
  882.     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
  883.     ADVANCE_BATCH(batch);
  884.  
  885.     BEGIN_BATCH(batch, 2);
  886.     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
  887.     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
  888.     ADVANCE_BATCH(batch);
  889.  
  890. }
  891.  
  892. static void
  893. gen8_emit_vertices(VADriverContextP ctx)
  894. {
  895.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  896.     struct intel_batchbuffer *batch = i965->batch;
  897.     struct i965_render_state *render_state = &i965->render_state;
  898.  
  899.     BEGIN_BATCH(batch, 5);
  900.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
  901.     OUT_BATCH(batch,
  902.               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
  903.               (0 << GEN8_VB0_MOCS_SHIFT) |
  904.               GEN7_VB0_ADDRESS_MODIFYENABLE |
  905.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  906.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  907.     OUT_BATCH(batch, 0);
  908.     OUT_BATCH(batch, 12 * 4);
  909.     ADVANCE_BATCH(batch);
  910.  
  911.     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
  912.     BEGIN_BATCH(batch, 2);
  913.     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
  914.     OUT_BATCH(batch,
  915.               _3DPRIM_RECTLIST);
  916.     ADVANCE_BATCH(batch);
  917.  
  918.     BEGIN_BATCH(batch, 7);
  919.     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
  920.     OUT_BATCH(batch,
  921.               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
  922.     OUT_BATCH(batch, 3); /* vertex count per instance */
  923.     OUT_BATCH(batch, 0); /* start vertex offset */
  924.     OUT_BATCH(batch, 1); /* single instance */
  925.     OUT_BATCH(batch, 0); /* start instance location */
  926.     OUT_BATCH(batch, 0);
  927.     ADVANCE_BATCH(batch);
  928. }
  929.  
  930. static void
  931. gen8_emit_vertex_element_state(VADriverContextP ctx)
  932. {
  933.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  934.     struct intel_batchbuffer *batch = i965->batch;
  935.     int i;
  936.  
  937.     /*
  938.      * The VUE layout
  939.      * dword 0-3: pad (0, 0, 0. 0)
  940.      * dword 4-7: position (x, y, 1.0, 1.0),
  941.      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
  942.      */
  943.  
  944.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  945.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
  946.  
  947.     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
  948.      * We don't really know or care what they do.
  949.      */
  950.  
  951.     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  952.               GEN8_VE0_VALID |
  953.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  954.               (0 << VE0_OFFSET_SHIFT));
  955.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
  956.               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
  957.               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
  958.               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
  959.  
  960.     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
  961.     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  962.               GEN8_VE0_VALID |
  963.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  964.               (8 << VE0_OFFSET_SHIFT));
  965.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  966.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  967.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  968.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  969.  
  970.     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
  971.     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  972.               GEN8_VE0_VALID |
  973.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  974.               (0 << VE0_OFFSET_SHIFT));
  975.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  976.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  977.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  978.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  979.  
  980.     /* Disable instancing for all vertex elements. */
  981.     for (i = 0; i < 3; i++) {
  982.         OUT_BATCH(batch, GEN8_3DSTATE_VF_INSTANCING | (3 - 2));
  983.         OUT_BATCH(batch, i);
  984.         OUT_BATCH(batch, 0);
  985.     }
  986.  
  987.     /* Disable system-generated values. */
  988.     OUT_BATCH(batch, GEN8_3DSTATE_VF_SGVS | (2 - 2));
  989.     OUT_BATCH(batch, 0);
  990. }
  991.  
  992. static void
  993. gen8_emit_vs_state(VADriverContextP ctx)
  994. {
  995.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  996.     struct intel_batchbuffer *batch = i965->batch;
  997.  
  998.     /* disable VS constant buffer */
  999.     BEGIN_BATCH(batch, 11);
  1000.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
  1001.     OUT_BATCH(batch, 0);
  1002.     OUT_BATCH(batch, 0);
  1003.     /* CS Buffer 0 */
  1004.     OUT_BATCH(batch, 0);
  1005.     OUT_BATCH(batch, 0);
  1006.     /* CS Buffer 1 */
  1007.     OUT_BATCH(batch, 0);
  1008.     OUT_BATCH(batch, 0);
  1009.     /* CS Buffer 2 */
  1010.     OUT_BATCH(batch, 0);
  1011.     OUT_BATCH(batch, 0);
  1012.     /* CS Buffer 3 */
  1013.     OUT_BATCH(batch, 0);
  1014.     OUT_BATCH(batch, 0);
  1015.     ADVANCE_BATCH(batch);
  1016.  
  1017.     BEGIN_BATCH(batch, 9);
  1018.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
  1019.     OUT_BATCH(batch, 0); /* without VS kernel */
  1020.     OUT_BATCH(batch, 0);
  1021.     /* VS shader dispatch flag */
  1022.     OUT_BATCH(batch, 0);
  1023.     OUT_BATCH(batch, 0);
  1024.     OUT_BATCH(batch, 0);
  1025.     /* DW6. VS shader GRF and URB buffer definition */
  1026.     OUT_BATCH(batch, 0);
  1027.     OUT_BATCH(batch, 0); /* pass-through */
  1028.     OUT_BATCH(batch, 0);
  1029.     ADVANCE_BATCH(batch);
  1030.  
  1031.     BEGIN_BATCH(batch, 2);
  1032.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
  1033.     OUT_BATCH(batch, 0);
  1034.     ADVANCE_BATCH(batch);
  1035.  
  1036.     BEGIN_BATCH(batch, 2);
  1037.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
  1038.     OUT_BATCH(batch, 0);
  1039.     ADVANCE_BATCH(batch);
  1040.  
  1041. }
  1042.  
  1043. /*
  1044.  * URB layout on GEN8
  1045.  * ----------------------------------------
  1046.  * | PS Push Constants (8KB) | VS entries |
  1047.  * ----------------------------------------
  1048.  */
  1049. static void
  1050. gen8_emit_urb(VADriverContextP ctx)
  1051. {
  1052.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1053.     struct intel_batchbuffer *batch = i965->batch;
  1054.     unsigned int num_urb_entries = 64;
  1055.  
  1056.     /* The minimum urb entries is 64 */
  1057.  
  1058.     BEGIN_BATCH(batch, 2);
  1059.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
  1060.     OUT_BATCH(batch, 0);
  1061.     ADVANCE_BATCH(batch);
  1062.  
  1063.     BEGIN_BATCH(batch, 2);
  1064.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
  1065.     OUT_BATCH(batch, 0);
  1066.     ADVANCE_BATCH(batch);
  1067.  
  1068.     BEGIN_BATCH(batch, 2);
  1069.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
  1070.     OUT_BATCH(batch, 0);
  1071.     ADVANCE_BATCH(batch);
  1072.  
  1073.     BEGIN_BATCH(batch, 2);
  1074.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
  1075.     OUT_BATCH(batch, 0);
  1076.     ADVANCE_BATCH(batch);
  1077.  
  1078.     /* Size is 8Kbs and base address is 0Kb */
  1079.     BEGIN_BATCH(batch, 2);
  1080.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
  1081.     /* Size is 8Kbs and base address is 0Kb */
  1082.     OUT_BATCH(batch,
  1083.                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
  1084.                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
  1085.     ADVANCE_BATCH(batch);
  1086.  
  1087.     BEGIN_BATCH(batch, 2);
  1088.     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
  1089.     OUT_BATCH(batch,
  1090.               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
  1091.               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
  1092.               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1093.    ADVANCE_BATCH(batch);
  1094.  
  1095.    BEGIN_BATCH(batch, 2);
  1096.    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
  1097.    OUT_BATCH(batch,
  1098.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  1099.              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1100.    ADVANCE_BATCH(batch);
  1101.  
  1102.    BEGIN_BATCH(batch, 2);
  1103.    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
  1104.    OUT_BATCH(batch,
  1105.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  1106.              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1107.    ADVANCE_BATCH(batch);
  1108.  
  1109.    BEGIN_BATCH(batch, 2);
  1110.    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
  1111.    OUT_BATCH(batch,
  1112.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  1113.              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1114.    ADVANCE_BATCH(batch);
  1115. }
  1116.  
  1117. static void
  1118. gen8_emit_bypass_state(VADriverContextP ctx)
  1119. {
  1120.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1121.     struct intel_batchbuffer *batch = i965->batch;
  1122.  
  1123.     /* bypass GS */
  1124.     BEGIN_BATCH(batch, 11);
  1125.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
  1126.     OUT_BATCH(batch, 0);
  1127.     OUT_BATCH(batch, 0);
  1128.     OUT_BATCH(batch, 0);
  1129.     OUT_BATCH(batch, 0);
  1130.     OUT_BATCH(batch, 0);
  1131.     OUT_BATCH(batch, 0);
  1132.     OUT_BATCH(batch, 0);
  1133.     OUT_BATCH(batch, 0);
  1134.     OUT_BATCH(batch, 0);
  1135.     OUT_BATCH(batch, 0);
  1136.     ADVANCE_BATCH(batch);
  1137.  
  1138.     BEGIN_BATCH(batch, 10);
  1139.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
  1140.     /* GS shader address */
  1141.     OUT_BATCH(batch, 0); /* without GS kernel */
  1142.     OUT_BATCH(batch, 0);
  1143.     /* DW3. GS shader dispatch flag */
  1144.     OUT_BATCH(batch, 0);
  1145.     OUT_BATCH(batch, 0);
  1146.     OUT_BATCH(batch, 0);
  1147.     /* DW6. GS shader GRF and URB offset/length */
  1148.     OUT_BATCH(batch, 0);
  1149.     OUT_BATCH(batch, 0); /* pass-through */
  1150.     OUT_BATCH(batch, 0);
  1151.     OUT_BATCH(batch, 0);
  1152.     ADVANCE_BATCH(batch);
  1153.  
  1154.     BEGIN_BATCH(batch, 2);
  1155.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
  1156.     OUT_BATCH(batch, 0);
  1157.     ADVANCE_BATCH(batch);
  1158.  
  1159.     BEGIN_BATCH(batch, 2);
  1160.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
  1161.     OUT_BATCH(batch, 0);
  1162.     ADVANCE_BATCH(batch);
  1163.  
  1164.     /* disable HS */
  1165.     BEGIN_BATCH(batch, 11);
  1166.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
  1167.     OUT_BATCH(batch, 0);
  1168.     OUT_BATCH(batch, 0);
  1169.     OUT_BATCH(batch, 0);
  1170.     OUT_BATCH(batch, 0);
  1171.     OUT_BATCH(batch, 0);
  1172.     OUT_BATCH(batch, 0);
  1173.     OUT_BATCH(batch, 0);
  1174.     OUT_BATCH(batch, 0);
  1175.     OUT_BATCH(batch, 0);
  1176.     OUT_BATCH(batch, 0);
  1177.     ADVANCE_BATCH(batch);
  1178.  
  1179.     BEGIN_BATCH(batch, 9);
  1180.     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
  1181.     OUT_BATCH(batch, 0);
  1182.     /*DW2. HS pass-through */
  1183.     OUT_BATCH(batch, 0);
  1184.     /*DW3. HS shader address */
  1185.     OUT_BATCH(batch, 0);
  1186.     OUT_BATCH(batch, 0);
  1187.     /*DW5. HS shader flag. URB offset/length and so on */
  1188.     OUT_BATCH(batch, 0);
  1189.     OUT_BATCH(batch, 0);
  1190.     OUT_BATCH(batch, 0);
  1191.     OUT_BATCH(batch, 0);
  1192.     ADVANCE_BATCH(batch);
  1193.  
  1194.     BEGIN_BATCH(batch, 2);
  1195.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
  1196.     OUT_BATCH(batch, 0);
  1197.     ADVANCE_BATCH(batch);
  1198.  
  1199.     BEGIN_BATCH(batch, 2);
  1200.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
  1201.     OUT_BATCH(batch, 0);
  1202.     ADVANCE_BATCH(batch);
  1203.  
  1204.     /* Disable TE */
  1205.     BEGIN_BATCH(batch, 4);
  1206.     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
  1207.     OUT_BATCH(batch, 0);
  1208.     OUT_BATCH(batch, 0);
  1209.     OUT_BATCH(batch, 0);
  1210.     ADVANCE_BATCH(batch);
  1211.  
  1212.     /* Disable DS */
  1213.     BEGIN_BATCH(batch, 11);
  1214.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
  1215.     OUT_BATCH(batch, 0);
  1216.     OUT_BATCH(batch, 0);
  1217.     OUT_BATCH(batch, 0);
  1218.     OUT_BATCH(batch, 0);
  1219.     OUT_BATCH(batch, 0);
  1220.     OUT_BATCH(batch, 0);
  1221.     OUT_BATCH(batch, 0);
  1222.     OUT_BATCH(batch, 0);
  1223.     OUT_BATCH(batch, 0);
  1224.     OUT_BATCH(batch, 0);
  1225.     ADVANCE_BATCH(batch);
  1226.  
  1227.     BEGIN_BATCH(batch, 9);
  1228.     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
  1229.     /* DW1. DS shader pointer */
  1230.     OUT_BATCH(batch, 0);
  1231.     OUT_BATCH(batch, 0);
  1232.     /* DW3-5. DS shader dispatch flag.*/
  1233.     OUT_BATCH(batch, 0);
  1234.     OUT_BATCH(batch, 0);
  1235.     OUT_BATCH(batch, 0);
  1236.     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
  1237.     OUT_BATCH(batch, 0);
  1238.     OUT_BATCH(batch, 0);
  1239.     /* DW8. DS shader output URB */
  1240.     OUT_BATCH(batch, 0);
  1241.     ADVANCE_BATCH(batch);
  1242.  
  1243.     BEGIN_BATCH(batch, 2);
  1244.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
  1245.     OUT_BATCH(batch, 0);
  1246.     ADVANCE_BATCH(batch);
  1247.  
  1248.     BEGIN_BATCH(batch, 2);
  1249.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
  1250.     OUT_BATCH(batch, 0);
  1251.     ADVANCE_BATCH(batch);
  1252.  
  1253.     /* Disable STREAMOUT */
  1254.     BEGIN_BATCH(batch, 5);
  1255.     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
  1256.     OUT_BATCH(batch, 0);
  1257.     OUT_BATCH(batch, 0);
  1258.     OUT_BATCH(batch, 0);
  1259.     OUT_BATCH(batch, 0);
  1260.     ADVANCE_BATCH(batch);
  1261. }
  1262.  
  1263. static void
  1264. gen8_emit_invarient_states(VADriverContextP ctx)
  1265. {
  1266.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1267.     struct intel_batchbuffer *batch = i965->batch;
  1268.  
  1269.     BEGIN_BATCH(batch, 1);
  1270.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1271.     ADVANCE_BATCH(batch);
  1272.  
  1273.     BEGIN_BATCH(batch, 2);
  1274.     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
  1275.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  1276.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  1277.     ADVANCE_BATCH(batch);
  1278.  
  1279.     /* Update 3D Multisample pattern */
  1280.     BEGIN_BATCH(batch, 9);
  1281.     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
  1282.     OUT_BATCH(batch, 0);
  1283.     OUT_BATCH(batch, 0);
  1284.     OUT_BATCH(batch, 0);
  1285.     OUT_BATCH(batch, 0);
  1286.     OUT_BATCH(batch, 0);
  1287.     OUT_BATCH(batch, 0);
  1288.     OUT_BATCH(batch, 0);
  1289.     OUT_BATCH(batch, 0);
  1290.     ADVANCE_BATCH(batch);
  1291.  
  1292.  
  1293.     BEGIN_BATCH(batch, 2);
  1294.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  1295.     OUT_BATCH(batch, 1);
  1296.     ADVANCE_BATCH(batch);
  1297.  
  1298.     /* Set system instruction pointer */
  1299.     BEGIN_BATCH(batch, 3);
  1300.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1301.     OUT_BATCH(batch, 0);
  1302.     OUT_BATCH(batch, 0);
  1303.     ADVANCE_BATCH(batch);
  1304. }
  1305.  
  1306. static void
  1307. gen8_emit_clip_state(VADriverContextP ctx)
  1308. {
  1309.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1310.     struct intel_batchbuffer *batch = i965->batch;
  1311.  
  1312.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  1313.     OUT_BATCH(batch, 0);
  1314.     OUT_BATCH(batch, 0); /* pass-through */
  1315.     OUT_BATCH(batch, 0);
  1316. }
  1317.  
  1318. static void
  1319. gen8_emit_sf_state(VADriverContextP ctx)
  1320. {
  1321.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1322.     struct intel_batchbuffer *batch = i965->batch;
  1323.  
  1324.     BEGIN_BATCH(batch, 5);
  1325.     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
  1326.     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
  1327.     OUT_BATCH(batch, 0);
  1328.     OUT_BATCH(batch, 0);
  1329.     OUT_BATCH(batch, 0);
  1330.     ADVANCE_BATCH(batch);
  1331.  
  1332.  
  1333.     BEGIN_BATCH(batch, 4);
  1334.     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
  1335.     OUT_BATCH(batch,
  1336.               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
  1337.               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
  1338.               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
  1339.               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
  1340.               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
  1341.     OUT_BATCH(batch, 0);
  1342.     OUT_BATCH(batch, 0);
  1343.     ADVANCE_BATCH(batch);
  1344.  
  1345.     /* SBE for backend setup */
  1346.     BEGIN_BATCH(batch, 11);
  1347.     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
  1348.     OUT_BATCH(batch, 0);
  1349.     OUT_BATCH(batch, 0);
  1350.     OUT_BATCH(batch, 0);
  1351.     OUT_BATCH(batch, 0);
  1352.     OUT_BATCH(batch, 0);
  1353.     OUT_BATCH(batch, 0);
  1354.     OUT_BATCH(batch, 0);
  1355.     OUT_BATCH(batch, 0);
  1356.     OUT_BATCH(batch, 0);
  1357.     OUT_BATCH(batch, 0);
  1358.     ADVANCE_BATCH(batch);
  1359.  
  1360.     BEGIN_BATCH(batch, 4);
  1361.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
  1362.     OUT_BATCH(batch, 0);
  1363.     OUT_BATCH(batch, 0);
  1364.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
  1365.     ADVANCE_BATCH(batch);
  1366. }
  1367.  
  1368. static void
  1369. gen8_emit_wm_state(VADriverContextP ctx, int kernel)
  1370. {
  1371.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1372.     struct intel_batchbuffer *batch = i965->batch;
  1373.     struct i965_render_state *render_state = &i965->render_state;
  1374.     unsigned int num_samples = 0;
  1375.     unsigned int max_threads;
  1376.  
  1377.     max_threads = i965->intel.device_info->max_wm_threads - 2;
  1378.  
  1379.     BEGIN_BATCH(batch, 2);
  1380.     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
  1381.     OUT_BATCH(batch,
  1382.               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
  1383.     ADVANCE_BATCH(batch);
  1384.  
  1385.     if (kernel == PS_KERNEL) {
  1386.         BEGIN_BATCH(batch, 2);
  1387.         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
  1388.         OUT_BATCH(batch,
  1389.                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
  1390.         ADVANCE_BATCH(batch);
  1391.     } else if (kernel == PS_SUBPIC_KERNEL) {
  1392.         BEGIN_BATCH(batch, 2);
  1393.         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
  1394.         OUT_BATCH(batch,
  1395.                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
  1396.                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
  1397.                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
  1398.                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
  1399.                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
  1400.                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
  1401.         ADVANCE_BATCH(batch);
  1402.     }
  1403.  
  1404.     BEGIN_BATCH(batch, 2);
  1405.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
  1406.     OUT_BATCH(batch,
  1407.               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  1408.     ADVANCE_BATCH(batch);
  1409.  
  1410.     BEGIN_BATCH(batch, 11);
  1411.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
  1412.     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
  1413.     OUT_BATCH(batch, 0);
  1414.     /*DW3-4. Constant buffer 0 */
  1415.     OUT_BATCH(batch, render_state->curbe_offset);
  1416.     OUT_BATCH(batch, 0);
  1417.  
  1418.     /*DW5-10. Constant buffer 1-3 */
  1419.     OUT_BATCH(batch, 0);
  1420.     OUT_BATCH(batch, 0);
  1421.     OUT_BATCH(batch, 0);
  1422.     OUT_BATCH(batch, 0);
  1423.     OUT_BATCH(batch, 0);
  1424.     OUT_BATCH(batch, 0);
  1425.     ADVANCE_BATCH(batch);
  1426.  
  1427.     BEGIN_BATCH(batch, 12);
  1428.     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
  1429.     /* PS shader address */
  1430.     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
  1431.  
  1432.     OUT_BATCH(batch, 0);
  1433.     /* DW3. PS shader flag .Binding table cnt/sample cnt */
  1434.     OUT_BATCH(batch,
  1435.               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
  1436.               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT) |
  1437.               GEN7_PS_VECTOR_MASK_ENABLE);
  1438.     /* DW4-5. Scatch space */
  1439.     OUT_BATCH(batch, 0); /* scratch space base offset */
  1440.     OUT_BATCH(batch, 0);
  1441.     /* DW6. PS shader threads. */
  1442.     OUT_BATCH(batch,
  1443.               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
  1444.               GEN7_PS_PUSH_CONSTANT_ENABLE |
  1445.               GEN7_PS_16_DISPATCH_ENABLE);
  1446.     /* DW7. PS shader GRF */
  1447.     OUT_BATCH(batch,
  1448.               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
  1449.     OUT_BATCH(batch, 0); /* kernel 1 pointer */
  1450.     OUT_BATCH(batch, 0);
  1451.     OUT_BATCH(batch, 0); /* kernel 2 pointer */
  1452.     OUT_BATCH(batch, 0);
  1453.     ADVANCE_BATCH(batch);
  1454.  
  1455.     BEGIN_BATCH(batch, 2);
  1456.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
  1457.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1458.     ADVANCE_BATCH(batch);
  1459. }
  1460.  
  1461. static void
  1462. gen8_emit_depth_buffer_state(VADriverContextP ctx)
  1463. {
  1464.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1465.     struct intel_batchbuffer *batch = i965->batch;
  1466.  
  1467.     BEGIN_BATCH(batch, 8);
  1468.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
  1469.     OUT_BATCH(batch,
  1470.               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
  1471.               (I965_SURFACE_NULL << 29));
  1472.     /* DW2-3. Depth Buffer Address */
  1473.     OUT_BATCH(batch, 0);
  1474.     OUT_BATCH(batch, 0);
  1475.     /* DW4-7. Surface structure */
  1476.     OUT_BATCH(batch, 0);
  1477.     OUT_BATCH(batch, 0);
  1478.     OUT_BATCH(batch, 0);
  1479.     OUT_BATCH(batch, 0);
  1480.     ADVANCE_BATCH(batch);
  1481.  
  1482.     /* Update the Hier Depth buffer */
  1483.     BEGIN_BATCH(batch, 5);
  1484.     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
  1485.     OUT_BATCH(batch, 0);
  1486.     OUT_BATCH(batch, 0);
  1487.     OUT_BATCH(batch, 0);
  1488.     OUT_BATCH(batch, 0);
  1489.     ADVANCE_BATCH(batch);
  1490.  
  1491.     /* Update the stencil buffer */
  1492.     BEGIN_BATCH(batch, 5);
  1493.     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
  1494.     OUT_BATCH(batch, 0);
  1495.     OUT_BATCH(batch, 0);
  1496.     OUT_BATCH(batch, 0);
  1497.     OUT_BATCH(batch, 0);
  1498.     ADVANCE_BATCH(batch);
  1499.  
  1500.     BEGIN_BATCH(batch, 3);
  1501.     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
  1502.     OUT_BATCH(batch, 0);
  1503.     OUT_BATCH(batch, 0);
  1504.     ADVANCE_BATCH(batch);
  1505. }
  1506.  
  1507. static void
  1508. gen8_emit_depth_stencil_state(VADriverContextP ctx)
  1509. {
  1510.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1511.     struct intel_batchbuffer *batch = i965->batch;
  1512.  
  1513.     BEGIN_BATCH(batch, 3);
  1514.     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
  1515.     OUT_BATCH(batch, 0);
  1516.     OUT_BATCH(batch, 0);
  1517.     ADVANCE_BATCH(batch);
  1518. }
  1519.  
  1520. static void
  1521. gen8_emit_wm_hz_op(VADriverContextP ctx)
  1522. {
  1523.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1524.     struct intel_batchbuffer *batch = i965->batch;
  1525.  
  1526.     BEGIN_BATCH(batch, 5);
  1527.     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
  1528.     OUT_BATCH(batch, 0);
  1529.     OUT_BATCH(batch, 0);
  1530.     OUT_BATCH(batch, 0);
  1531.     OUT_BATCH(batch, 0);
  1532.     ADVANCE_BATCH(batch);
  1533. }
  1534.  
  1535. static void
  1536. gen8_emit_viewport_state_pointers(VADriverContextP ctx)
  1537. {
  1538.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1539.     struct intel_batchbuffer *batch = i965->batch;
  1540.     struct i965_render_state *render_state = &i965->render_state;
  1541.  
  1542.     BEGIN_BATCH(batch, 2);
  1543.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
  1544.     OUT_BATCH(batch, render_state->cc_viewport_offset);
  1545.     ADVANCE_BATCH(batch);
  1546.  
  1547.     BEGIN_BATCH(batch, 2);
  1548.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
  1549.     OUT_BATCH(batch, 0);
  1550.     ADVANCE_BATCH(batch);
  1551. }
  1552.  
  1553. static void
  1554. gen8_emit_sampler_state_pointers(VADriverContextP ctx)
  1555. {
  1556.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1557.     struct intel_batchbuffer *batch = i965->batch;
  1558.     struct i965_render_state *render_state = &i965->render_state;
  1559.  
  1560.     BEGIN_BATCH(batch, 2);
  1561.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
  1562.     OUT_BATCH(batch, render_state->sampler_offset);
  1563.     ADVANCE_BATCH(batch);
  1564. }
  1565.  
  1566.  
  1567. static void
  1568. gen7_emit_drawing_rectangle(VADriverContextP ctx)
  1569. {
  1570.     i965_render_drawing_rectangle(ctx);
  1571. }
  1572.  
  1573. static void
  1574. gen8_render_emit_states(VADriverContextP ctx, int kernel)
  1575. {
  1576.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1577.     struct intel_batchbuffer *batch = i965->batch;
  1578.  
  1579.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1580.     intel_batchbuffer_emit_mi_flush(batch);
  1581.     gen8_emit_invarient_states(ctx);
  1582.     gen8_emit_state_base_address(ctx);
  1583.     gen8_emit_viewport_state_pointers(ctx);
  1584.     gen8_emit_urb(ctx);
  1585.     gen8_emit_cc_state_pointers(ctx);
  1586.     gen8_emit_sampler_state_pointers(ctx);
  1587.     gen8_emit_wm_hz_op(ctx);
  1588.     gen8_emit_bypass_state(ctx);
  1589.     gen8_emit_vs_state(ctx);
  1590.     gen8_emit_clip_state(ctx);
  1591.     gen8_emit_sf_state(ctx);
  1592.     gen8_emit_depth_stencil_state(ctx);
  1593.     gen8_emit_wm_state(ctx, kernel);
  1594.     gen8_emit_depth_buffer_state(ctx);
  1595.     gen7_emit_drawing_rectangle(ctx);
  1596.     gen8_emit_vertex_element_state(ctx);
  1597.     gen8_emit_vertices(ctx);
  1598.     intel_batchbuffer_end_atomic(batch);
  1599. }
  1600.  
  1601. static void
  1602. gen8_render_put_surface(
  1603.     VADriverContextP   ctx,
  1604.     struct object_surface *obj_surface,
  1605.     const VARectangle *src_rect,
  1606.     const VARectangle *dst_rect,
  1607.     unsigned int       flags
  1608. )
  1609. {
  1610.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1611.     struct intel_batchbuffer *batch = i965->batch;
  1612.  
  1613.     gen8_render_initialize(ctx);
  1614.     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
  1615.     gen8_clear_dest_region(ctx);
  1616.     gen8_render_emit_states(ctx, PS_KERNEL);
  1617.     intel_batchbuffer_flush(batch);
  1618. }
  1619.  
  1620. static void
  1621. gen8_subpicture_render_blend_state(VADriverContextP ctx)
  1622. {
  1623.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1624.     struct i965_render_state *render_state = &i965->render_state;
  1625.     struct gen8_global_blend_state *global_blend_state;
  1626.     struct gen8_blend_state_rt *blend_state;
  1627.     unsigned char *cc_ptr;
  1628.  
  1629.     dri_bo_map(render_state->dynamic_state.bo, 1);
  1630.     assert(render_state->dynamic_state.bo->virtual);
  1631.  
  1632.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  1633.                         render_state->blend_state_offset;
  1634.  
  1635.     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
  1636.  
  1637.     memset(global_blend_state, 0, render_state->blend_state_size);
  1638.     /* Global blend state + blend_state for Render Target */
  1639.     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
  1640.     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
  1641.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  1642.     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  1643.     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
  1644.     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  1645.     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  1646.     blend_state->blend0.colorbuf_blend = 1;
  1647.     blend_state->blend1.post_blend_clamp_enable = 1;
  1648.     blend_state->blend1.pre_blend_clamp_enable = 1;
  1649.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  1650.  
  1651.     dri_bo_unmap(render_state->dynamic_state.bo);
  1652. }
  1653.  
  1654. static void
  1655. gen8_subpic_render_upload_constants(VADriverContextP ctx,
  1656.                                     struct object_surface *obj_surface)
  1657. {
  1658.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1659.     struct i965_render_state *render_state = &i965->render_state;
  1660.     float *constant_buffer;
  1661.     float global_alpha = 1.0;
  1662.     unsigned int index = obj_surface->subpic_render_idx;
  1663.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1664.     unsigned char *cc_ptr;
  1665.  
  1666.     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
  1667.         global_alpha = obj_subpic->global_alpha;
  1668.     }
  1669.  
  1670.  
  1671.     dri_bo_map(render_state->dynamic_state.bo, 1);
  1672.     assert(render_state->dynamic_state.bo->virtual);
  1673.  
  1674.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  1675.                                 render_state->curbe_offset;
  1676.  
  1677.     constant_buffer = (float *) cc_ptr;
  1678.     *constant_buffer = global_alpha;
  1679.  
  1680.     dri_bo_unmap(render_state->dynamic_state.bo);
  1681. }
  1682.  
  1683. static void
  1684. gen8_subpicture_render_setup_states(
  1685.     VADriverContextP   ctx,
  1686.     struct object_surface *obj_surface,
  1687.     const VARectangle *src_rect,
  1688.     const VARectangle *dst_rect
  1689. )
  1690. {
  1691.     gen8_render_dest_surface_state(ctx, 0);
  1692.     gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
  1693.     gen8_render_sampler(ctx);
  1694.     gen8_render_cc_viewport(ctx);
  1695.     gen8_render_color_calc_state(ctx);
  1696.     gen8_subpicture_render_blend_state(ctx);
  1697.     gen8_subpic_render_upload_constants(ctx, obj_surface);
  1698.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  1699. }
  1700.  
  1701. static void
  1702. gen8_render_put_subpicture(
  1703.     VADriverContextP   ctx,
  1704.     struct object_surface *obj_surface,
  1705.     const VARectangle *src_rect,
  1706.     const VARectangle *dst_rect
  1707. )
  1708. {
  1709.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1710.     struct intel_batchbuffer *batch = i965->batch;
  1711.     unsigned int index = obj_surface->subpic_render_idx;
  1712.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1713.  
  1714.     assert(obj_subpic);
  1715.     gen8_render_initialize(ctx);
  1716.     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
  1717.     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  1718.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  1719.     intel_batchbuffer_flush(batch);
  1720. }
  1721.  
  1722. static void
  1723. gen8_render_terminate(VADriverContextP ctx)
  1724. {
  1725.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1726.     struct i965_render_state *render_state = &i965->render_state;
  1727.  
  1728.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1729.     render_state->vb.vertex_buffer = NULL;
  1730.  
  1731.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1732.     render_state->wm.surface_state_binding_table_bo = NULL;
  1733.  
  1734.     if (render_state->instruction_state.bo) {
  1735.         dri_bo_unreference(render_state->instruction_state.bo);
  1736.         render_state->instruction_state.bo = NULL;
  1737.     }
  1738.  
  1739.     if (render_state->dynamic_state.bo) {
  1740.         dri_bo_unreference(render_state->dynamic_state.bo);
  1741.         render_state->dynamic_state.bo = NULL;
  1742.     }
  1743.  
  1744.     if (render_state->indirect_state.bo) {
  1745.         dri_bo_unreference(render_state->indirect_state.bo);
  1746.         render_state->indirect_state.bo = NULL;
  1747.     }
  1748.  
  1749.     if (render_state->draw_region) {
  1750.         dri_bo_unreference(render_state->draw_region->bo);
  1751.         free(render_state->draw_region);
  1752.         render_state->draw_region = NULL;
  1753.     }
  1754. }
  1755.  
  1756. bool
  1757. gen8_render_init(VADriverContextP ctx)
  1758. {
  1759.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1760.     struct i965_render_state *render_state = &i965->render_state;
  1761.     int i, kernel_size;
  1762.     unsigned int kernel_offset, end_offset;
  1763.     unsigned char *kernel_ptr;
  1764.     struct i965_kernel *kernel;
  1765.  
  1766.     render_state->render_put_surface = gen8_render_put_surface;
  1767.     render_state->render_put_subpicture = gen8_render_put_subpicture;
  1768.     render_state->render_terminate = gen8_render_terminate;
  1769.  
  1770.     memcpy(render_state->render_kernels, render_kernels_gen8,
  1771.            sizeof(render_state->render_kernels));
  1772.  
  1773.     kernel_size = 4096;
  1774.  
  1775.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  1776.         kernel = &render_state->render_kernels[i];
  1777.  
  1778.         if (!kernel->size)
  1779.             continue;
  1780.  
  1781.         kernel_size += kernel->size;
  1782.     }
  1783.  
  1784.     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
  1785.                                   "kernel shader",
  1786.                                   kernel_size,
  1787.                                   0x1000);
  1788.     if (render_state->instruction_state.bo == NULL) {
  1789.         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
  1790.         return false;
  1791.     }
  1792.  
  1793.     assert(render_state->instruction_state.bo);
  1794.  
  1795.     render_state->instruction_state.bo_size = kernel_size;
  1796.     render_state->instruction_state.end_offset = 0;
  1797.     end_offset = 0;
  1798.  
  1799.     dri_bo_map(render_state->instruction_state.bo, 1);
  1800.     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
  1801.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  1802.         kernel = &render_state->render_kernels[i];
  1803.         kernel_offset = end_offset;
  1804.         kernel->kernel_offset = kernel_offset;
  1805.  
  1806.         if (!kernel->size)
  1807.             continue;
  1808.  
  1809.         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
  1810.  
  1811.         end_offset += ALIGN(kernel->size, ALIGNMENT);
  1812.     }
  1813.  
  1814.     render_state->instruction_state.end_offset = end_offset;
  1815.  
  1816.     dri_bo_unmap(render_state->instruction_state.bo);
  1817.  
  1818.     return true;
  1819. }
  1820.