Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2014 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Keith Packard <keithp@keithp.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *    Zhao Yakui <yakui.zhao@intel.com>
  28.  *
  29.  */
  30.  
  31. /*
  32.  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
  33.  */
  34.  
  35. #include <stdio.h>
  36. #include <stdlib.h>
  37. #include <string.h>
  38. #include <assert.h>
  39. #include <math.h>
  40.  
  41. #include <va/va_drmcommon.h>
  42.  
  43. #include "intel_batchbuffer.h"
  44. #include "intel_driver.h"
  45. #include "i965_defines.h"
  46. #include "i965_drv_video.h"
  47. #include "i965_structs.h"
  48.  
  49. #include "i965_render.h"
  50.  
  51. #define SF_KERNEL_NUM_GRF       16
  52. #define SF_MAX_THREADS          1
  53.  
  54. #define PS_KERNEL_NUM_GRF       48
  55. #define PS_MAX_THREADS          32
  56.  
  57. /* Programs for Gen8 */
  58. static const uint32_t sf_kernel_static_gen8[][4] ={
  59.  
  60. };
  61. static const uint32_t ps_kernel_static_gen8[][4] = {
  62. #include "shaders/render/exa_wm_src_affine.g8b"
  63. #include "shaders/render/exa_wm_src_sample_planar.g8b"
  64. #include "shaders/render/exa_wm_yuv_color_balance.g8b"
  65. #include "shaders/render/exa_wm_yuv_rgb.g8b"
  66. #include "shaders/render/exa_wm_write.g8b"
  67. };
  68.  
  69. static const uint32_t ps_subpic_kernel_static_gen8[][4] = {
  70. #include "shaders/render/exa_wm_src_affine.g8b"
  71. #include "shaders/render/exa_wm_src_sample_argb.g8b"
  72. #include "shaders/render/exa_wm_write.g8b"
  73. };
  74.  
  75.  
  76. #define SURFACE_STATE_PADDED_SIZE       SURFACE_STATE_PADDED_SIZE_GEN8
  77.  
  78. #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
  79. #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
  80.  
  81. enum {
  82.     SF_KERNEL = 0,
  83.     PS_KERNEL,
  84.     PS_SUBPIC_KERNEL
  85. };
  86.  
  87. static struct i965_kernel render_kernels_gen8[] = {
  88.     {
  89.         "SF",
  90.         SF_KERNEL,
  91.         sf_kernel_static_gen8,
  92.         sizeof(sf_kernel_static_gen8),
  93.         NULL
  94.     },
  95.     {
  96.         "PS",
  97.         PS_KERNEL,
  98.         ps_kernel_static_gen8,
  99.         sizeof(ps_kernel_static_gen8),
  100.         NULL
  101.     },
  102.  
  103.     {
  104.         "PS_SUBPIC",
  105.         PS_SUBPIC_KERNEL,
  106.         ps_subpic_kernel_static_gen8,
  107.         sizeof(ps_subpic_kernel_static_gen8),
  108.         NULL
  109.     }
  110. };
  111.  
  112. #define URB_VS_ENTRIES        8
  113. #define URB_VS_ENTRY_SIZE     1
  114.  
  115. #define URB_GS_ENTRIES        0
  116. #define URB_GS_ENTRY_SIZE     0
  117.  
  118. #define URB_CLIP_ENTRIES      0
  119. #define URB_CLIP_ENTRY_SIZE   0
  120.  
  121. #define URB_SF_ENTRIES        1
  122. #define URB_SF_ENTRY_SIZE     2
  123.  
  124. #define URB_CS_ENTRIES        4
  125. #define URB_CS_ENTRY_SIZE     4
  126.  
  127. static float yuv_to_rgb_bt601[3][4] = {
  128. {1.164,         0,      1.596,          -0.06275,},
  129. {1.164,         -0.392, -0.813,         -0.50196,},
  130. {1.164,         2.017,  0,              -0.50196,},
  131. };
  132.  
  133. static float yuv_to_rgb_bt709[3][4] = {
  134. {1.164,         0,      1.793,          -0.06275,},
  135. {1.164,         -0.213, -0.533,         -0.50196,},
  136. {1.164,         2.112,  0,              -0.50196,},
  137. };
  138.  
  139. static float yuv_to_rgb_smpte_240[3][4] = {
  140. {1.164,         0,      1.794,          -0.06275,},
  141. {1.164,         -0.258, -0.5425,        -0.50196,},
  142. {1.164,         2.078,  0,              -0.50196,},
  143. };
  144.  
  145.  
  146. static void
  147. gen8_render_set_surface_tiling(struct gen8_surface_state *ss, uint32_t tiling)
  148. {
  149.    switch (tiling) {
  150.    case I915_TILING_NONE:
  151.       ss->ss0.tiled_surface = 0;
  152.       ss->ss0.tile_walk = 0;
  153.       break;
  154.    case I915_TILING_X:
  155.       ss->ss0.tiled_surface = 1;
  156.       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  157.       break;
  158.    case I915_TILING_Y:
  159.       ss->ss0.tiled_surface = 1;
  160.       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  161.       break;
  162.    }
  163. }
  164.  
  165. /* Set "Shader Channel Select" for GEN8+ */
  166. void
  167. gen8_render_set_surface_scs(struct gen8_surface_state *ss)
  168. {
  169.     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
  170.     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
  171.     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
  172.     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
  173. }
  174.  
  175. static void
  176. gen8_render_set_surface_state(
  177.     struct gen8_surface_state *ss,
  178.     dri_bo                    *bo,
  179.     unsigned long              offset,
  180.     int                        width,
  181.     int                        height,
  182.     int                        pitch,
  183.     int                        format,
  184.     unsigned int               flags
  185. )
  186. {
  187.     unsigned int tiling;
  188.     unsigned int swizzle;
  189.  
  190.     memset(ss, 0, sizeof(*ss));
  191.  
  192.     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
  193.     case I965_PP_FLAG_BOTTOM_FIELD:
  194.         ss->ss0.vert_line_stride_ofs = 1;
  195.         /* fall-through */
  196.     case I965_PP_FLAG_TOP_FIELD:
  197.         ss->ss0.vert_line_stride = 1;
  198.         height /= 2;
  199.         break;
  200.     }
  201.  
  202.     ss->ss0.surface_type = I965_SURFACE_2D;
  203.     ss->ss0.surface_format = format;
  204.  
  205.     ss->ss8.base_addr = bo->offset + offset;
  206.  
  207.     ss->ss2.width = width - 1;
  208.     ss->ss2.height = height - 1;
  209.  
  210.     ss->ss3.pitch = pitch - 1;
  211.  
  212.     /* Always set 1(align 4 mode) per B-spec */
  213.     ss->ss0.vertical_alignment = 1;
  214.     ss->ss0.horizontal_alignment = 1;
  215.  
  216.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  217.     gen8_render_set_surface_tiling(ss, tiling);
  218. }
  219.  
  220. static void
  221. gen8_render_src_surface_state(
  222.     VADriverContextP ctx,
  223.     int              index,
  224.     dri_bo          *region,
  225.     unsigned long    offset,
  226.     int              w,
  227.     int              h,
  228.     int              pitch,
  229.     int              format,
  230.     unsigned int     flags
  231. )
  232. {
  233.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  234.     struct i965_render_state *render_state = &i965->render_state;
  235.     void *ss;
  236.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  237.  
  238.     assert(index < MAX_RENDER_SURFACES);
  239.  
  240.     dri_bo_map(ss_bo, 1);
  241.     assert(ss_bo->virtual);
  242.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  243.  
  244.     gen8_render_set_surface_state(ss,
  245.                                   region, offset,
  246.                                   w, h,
  247.                                   pitch, format, flags);
  248.     gen8_render_set_surface_scs(ss);
  249.     dri_bo_emit_reloc(ss_bo,
  250.                       I915_GEM_DOMAIN_SAMPLER, 0,
  251.                       offset,
  252.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
  253.                       region);
  254.  
  255.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  256.     dri_bo_unmap(ss_bo);
  257.     render_state->wm.sampler_count++;
  258. }
  259.  
  260. static void
  261. gen8_render_src_surfaces_state(
  262.     VADriverContextP ctx,
  263.     struct object_surface *obj_surface,
  264.     unsigned int     flags
  265. )
  266. {
  267.     int region_pitch;
  268.     int rw, rh;
  269.     dri_bo *region;
  270.  
  271.     region_pitch = obj_surface->width;
  272.     rw = obj_surface->orig_width;
  273.     rh = obj_surface->orig_height;
  274.     region = obj_surface->bo;
  275.  
  276.     gen8_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
  277.     gen8_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
  278.  
  279.     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
  280.         return;
  281.  
  282.     if (obj_surface->fourcc == VA_FOURCC_NV12) {
  283.         gen8_render_src_surface_state(ctx, 3, region,
  284.                                       region_pitch * obj_surface->y_cb_offset,
  285.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  286.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
  287.         gen8_render_src_surface_state(ctx, 4, region,
  288.                                       region_pitch * obj_surface->y_cb_offset,
  289.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  290.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
  291.     } else {
  292.         gen8_render_src_surface_state(ctx, 3, region,
  293.                                       region_pitch * obj_surface->y_cb_offset,
  294.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  295.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
  296.         gen8_render_src_surface_state(ctx, 4, region,
  297.                                       region_pitch * obj_surface->y_cb_offset,
  298.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  299.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  300.         gen8_render_src_surface_state(ctx, 5, region,
  301.                                       region_pitch * obj_surface->y_cr_offset,
  302.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  303.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
  304.         gen8_render_src_surface_state(ctx, 6, region,
  305.                                       region_pitch * obj_surface->y_cr_offset,
  306.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  307.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  308.     }
  309. }
  310.  
  311. static void
  312. gen8_subpic_render_src_surfaces_state(VADriverContextP ctx,
  313.                                       struct object_surface *obj_surface)
  314. {
  315.     dri_bo *subpic_region;
  316.     unsigned int index = obj_surface->subpic_render_idx;
  317.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  318.     struct object_image *obj_image = obj_subpic->obj_image;
  319.  
  320.     assert(obj_surface);
  321.     assert(obj_surface->bo);
  322.     subpic_region = obj_image->bo;
  323.     /*subpicture surface*/
  324.     gen8_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
  325.     gen8_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);
  326. }
  327.  
  328. static void
  329. gen8_render_dest_surface_state(VADriverContextP ctx, int index)
  330. {
  331.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  332.     struct i965_render_state *render_state = &i965->render_state;
  333.     struct intel_region *dest_region = render_state->draw_region;
  334.     void *ss;
  335.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  336.     int format;
  337.     assert(index < MAX_RENDER_SURFACES);
  338.  
  339.     if (dest_region->cpp == 2) {
  340.         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
  341.     } else {
  342.         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
  343.     }
  344.  
  345.     dri_bo_map(ss_bo, 1);
  346.     assert(ss_bo->virtual);
  347.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  348.  
  349.     gen8_render_set_surface_state(ss,
  350.                                   dest_region->bo, 0,
  351.                                   dest_region->width, dest_region->height,
  352.                                   dest_region->pitch, format, 0);
  353.     gen8_render_set_surface_scs(ss);
  354.     dri_bo_emit_reloc(ss_bo,
  355.                       I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  356.                       0,
  357.                       SURFACE_STATE_OFFSET(index) + offsetof(struct gen8_surface_state, ss8),
  358.                       dest_region->bo);
  359.  
  360.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  361.     dri_bo_unmap(ss_bo);
  362. }
  363.  
  364. static void
  365. i965_fill_vertex_buffer(
  366.     VADriverContextP ctx,
  367.     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
  368.     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
  369. )
  370. {
  371.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  372.     float vb[12];
  373.  
  374.     enum { X1, Y1, X2, Y2 };
  375.  
  376.     static const unsigned int g_rotation_indices[][6] = {
  377.         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
  378.         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
  379.         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
  380.         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
  381.     };
  382.  
  383.     const unsigned int * const rotation_indices =
  384.         g_rotation_indices[i965->rotation_attrib->value];
  385.  
  386.     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
  387.     vb[1]  = tex_coords[rotation_indices[1]];
  388.     vb[2]  = vid_coords[X2];
  389.     vb[3]  = vid_coords[Y2];
  390.  
  391.     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
  392.     vb[5]  = tex_coords[rotation_indices[3]];
  393.     vb[6]  = vid_coords[X1];
  394.     vb[7]  = vid_coords[Y2];
  395.  
  396.     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
  397.     vb[9]  = tex_coords[rotation_indices[5]];
  398.     vb[10] = vid_coords[X1];
  399.     vb[11] = vid_coords[Y1];
  400.  
  401.     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
  402. }
  403.  
  404. static void
  405. i965_subpic_render_upload_vertex(VADriverContextP ctx,
  406.                                  struct object_surface *obj_surface,
  407.                                  const VARectangle *output_rect)
  408. {
  409.     unsigned int index = obj_surface->subpic_render_idx;
  410.     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
  411.     float tex_coords[4], vid_coords[4];
  412.     VARectangle dst_rect;
  413.  
  414.     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
  415.         dst_rect = obj_subpic->dst_rect;
  416.     else {
  417.         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
  418.         const float sy  = (float)output_rect->height / obj_surface->orig_height;
  419.         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
  420.         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
  421.         dst_rect.width  = sx * obj_subpic->dst_rect.width;
  422.         dst_rect.height = sy * obj_subpic->dst_rect.height;
  423.     }
  424.  
  425.     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
  426.     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
  427.     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
  428.     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
  429.  
  430.     vid_coords[0] = dst_rect.x;
  431.     vid_coords[1] = dst_rect.y;
  432.     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
  433.     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
  434.  
  435.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  436. }
  437.  
  438. static void
  439. i965_render_upload_vertex(
  440.     VADriverContextP   ctx,
  441.     struct object_surface *obj_surface,
  442.     const VARectangle *src_rect,
  443.     const VARectangle *dst_rect
  444. )
  445. {
  446.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  447.     struct i965_render_state *render_state = &i965->render_state;
  448.     struct intel_region *dest_region = render_state->draw_region;
  449.     float tex_coords[4], vid_coords[4];
  450.     int width, height;
  451.  
  452.     width  = obj_surface->orig_width;
  453.     height = obj_surface->orig_height;
  454.  
  455.     tex_coords[0] = (float)src_rect->x / width;
  456.     tex_coords[1] = (float)src_rect->y / height;
  457.     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
  458.     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
  459.  
  460.     vid_coords[0] = dest_region->x + dst_rect->x;
  461.     vid_coords[1] = dest_region->y + dst_rect->y;
  462.     vid_coords[2] = vid_coords[0] + dst_rect->width;
  463.     vid_coords[3] = vid_coords[1] + dst_rect->height;
  464.  
  465.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  466. }
  467.  
  468. static void
  469. i965_render_drawing_rectangle(VADriverContextP ctx)
  470. {
  471.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  472.     struct intel_batchbuffer *batch = i965->batch;
  473.     struct i965_render_state *render_state = &i965->render_state;
  474.     struct intel_region *dest_region = render_state->draw_region;
  475.  
  476.     BEGIN_BATCH(batch, 4);
  477.     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
  478.     OUT_BATCH(batch, 0x00000000);
  479.     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
  480.     OUT_BATCH(batch, 0x00000000);
  481.     ADVANCE_BATCH(batch);
  482. }
  483.  
  484. static void
  485. i965_render_upload_image_palette(
  486.     VADriverContextP ctx,
  487.     struct object_image *obj_image,
  488.     unsigned int     alpha
  489. )
  490. {
  491.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  492.     struct intel_batchbuffer *batch = i965->batch;
  493.     unsigned int i;
  494.  
  495.     assert(obj_image);
  496.  
  497.     if (!obj_image)
  498.         return;
  499.  
  500.     if (obj_image->image.num_palette_entries == 0)
  501.         return;
  502.  
  503.     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
  504.     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
  505.     /*fill palette*/
  506.     //int32_t out[16]; //0-23:color 23-31:alpha
  507.     for (i = 0; i < obj_image->image.num_palette_entries; i++)
  508.         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
  509.     ADVANCE_BATCH(batch);
  510. }
  511.  
  512. static void
  513. gen8_clear_dest_region(VADriverContextP ctx)
  514. {
  515.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  516.     struct intel_batchbuffer *batch = i965->batch;
  517.     struct i965_render_state *render_state = &i965->render_state;
  518.     struct intel_region *dest_region = render_state->draw_region;
  519.     unsigned int blt_cmd, br13;
  520.     int pitch;
  521.  
  522.     blt_cmd = GEN8_XY_COLOR_BLT_CMD;
  523.     br13 = 0xf0 << 16;
  524.     pitch = dest_region->pitch;
  525.  
  526.     if (dest_region->cpp == 4) {
  527.         br13 |= BR13_8888;
  528.         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
  529.     } else {
  530.         assert(dest_region->cpp == 2);
  531.         br13 |= BR13_565;
  532.     }
  533.  
  534.     if (dest_region->tiling != I915_TILING_NONE) {
  535.         blt_cmd |= XY_COLOR_BLT_DST_TILED;
  536.         pitch /= 4;
  537.     }
  538.  
  539.     br13 |= pitch;
  540.  
  541.     intel_batchbuffer_start_atomic_blt(batch, 24);
  542.     BEGIN_BLT_BATCH(batch, 7);
  543.  
  544.     OUT_BATCH(batch, blt_cmd);
  545.     OUT_BATCH(batch, br13);
  546.     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
  547.     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
  548.               (dest_region->x + dest_region->width));
  549.     OUT_RELOC(batch, dest_region->bo,
  550.               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  551.               0);
  552.     OUT_BATCH(batch, 0x0);
  553.     OUT_BATCH(batch, 0x0);
  554.     ADVANCE_BATCH(batch);
  555.     intel_batchbuffer_end_atomic(batch);
  556. }
  557.  
  558.  
  559. /*
  560.  * for GEN8
  561.  */
  562. #define ALIGNMENT       64
  563.  
  564. static void
  565. gen8_render_initialize(VADriverContextP ctx)
  566. {
  567.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  568.     struct i965_render_state *render_state = &i965->render_state;
  569.     dri_bo *bo;
  570.     int size;
  571.     unsigned int end_offset;
  572.  
  573.     /* VERTEX BUFFER */
  574.     dri_bo_unreference(render_state->vb.vertex_buffer);
  575.     bo = dri_bo_alloc(i965->intel.bufmgr,
  576.                       "vertex buffer",
  577.                       4096,
  578.                       4096);
  579.     assert(bo);
  580.     render_state->vb.vertex_buffer = bo;
  581.  
  582.     /* WM */
  583.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  584.     bo = dri_bo_alloc(i965->intel.bufmgr,
  585.                       "surface state & binding table",
  586.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  587.                       4096);
  588.     assert(bo);
  589.     render_state->wm.surface_state_binding_table_bo = bo;
  590.  
  591.     render_state->curbe_size = 256;
  592.  
  593.     render_state->wm.sampler_count = 0;
  594.  
  595.     render_state->sampler_size = MAX_SAMPLERS * sizeof(struct gen8_sampler_state);
  596.  
  597.     render_state->cc_state_size = sizeof(struct gen6_color_calc_state);
  598.  
  599.     render_state->cc_viewport_size = sizeof(struct i965_cc_viewport);
  600.  
  601.     render_state->blend_state_size = sizeof(struct gen8_global_blend_state) +
  602.                         16 * sizeof(struct gen8_blend_state_rt);
  603.  
  604.     render_state->sf_clip_size = 1024;
  605.  
  606.     render_state->scissor_size = 1024;
  607.  
  608.     size = ALIGN(render_state->curbe_size, ALIGNMENT) +
  609.         ALIGN(render_state->sampler_size, ALIGNMENT) +
  610.         ALIGN(render_state->cc_viewport_size, ALIGNMENT) +
  611.         ALIGN(render_state->cc_state_size, ALIGNMENT) +
  612.         ALIGN(render_state->blend_state_size, ALIGNMENT) +
  613.         ALIGN(render_state->sf_clip_size, ALIGNMENT) +
  614.         ALIGN(render_state->scissor_size, ALIGNMENT);
  615.  
  616.     dri_bo_unreference(render_state->dynamic_state.bo);
  617.     bo = dri_bo_alloc(i965->intel.bufmgr,
  618.                       "dynamic_state",
  619.                       size,
  620.                       4096);
  621.  
  622.     render_state->dynamic_state.bo = bo;
  623.  
  624.     end_offset = 0;
  625.     render_state->dynamic_state.end_offset = 0;
  626.  
  627.     /* Constant buffer offset */
  628.     render_state->curbe_offset = end_offset;
  629.     end_offset += ALIGN(render_state->curbe_size, ALIGNMENT);
  630.  
  631.     /* Sampler_state  */
  632.     render_state->sampler_offset = end_offset;
  633.     end_offset += ALIGN(render_state->sampler_size, ALIGNMENT);
  634.  
  635.     /* CC_VIEWPORT_state  */
  636.     render_state->cc_viewport_offset = end_offset;
  637.     end_offset += ALIGN(render_state->cc_viewport_size, ALIGNMENT);
  638.  
  639.     /* CC_STATE_state  */
  640.     render_state->cc_state_offset = end_offset;
  641.     end_offset += ALIGN(render_state->cc_state_size, ALIGNMENT);
  642.  
  643.     /* Blend_state  */
  644.     render_state->blend_state_offset = end_offset;
  645.     end_offset += ALIGN(render_state->blend_state_size, ALIGNMENT);
  646.  
  647.     /* SF_CLIP_state  */
  648.     render_state->sf_clip_offset = end_offset;
  649.     end_offset += ALIGN(render_state->sf_clip_size, ALIGNMENT);
  650.  
  651.     /* SCISSOR_state  */
  652.     render_state->scissor_offset = end_offset;
  653.     end_offset += ALIGN(render_state->scissor_size, ALIGNMENT);
  654.  
  655.     /* update the end offset of dynamic_state */
  656.     render_state->dynamic_state.end_offset = end_offset;
  657.  
  658. }
  659.  
  660. static void
  661. gen8_render_sampler(VADriverContextP ctx)
  662. {
  663.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  664.     struct i965_render_state *render_state = &i965->render_state;
  665.     struct gen8_sampler_state *sampler_state;
  666.     int i;
  667.     unsigned char *cc_ptr;
  668.  
  669.     assert(render_state->wm.sampler_count > 0);
  670.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  671.  
  672.     dri_bo_map(render_state->dynamic_state.bo, 1);
  673.     assert(render_state->dynamic_state.bo->virtual);
  674.  
  675.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  676.                         render_state->sampler_offset;
  677.  
  678.     sampler_state = (struct gen8_sampler_state *) cc_ptr;
  679.  
  680.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  681.         memset(sampler_state, 0, sizeof(*sampler_state));
  682.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  683.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  684.         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  685.         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  686.         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  687.         sampler_state++;
  688.     }
  689.  
  690.     dri_bo_unmap(render_state->dynamic_state.bo);
  691. }
  692.  
  693. static void
  694. gen8_render_blend_state(VADriverContextP ctx)
  695. {
  696.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  697.     struct i965_render_state *render_state = &i965->render_state;
  698.     struct gen8_global_blend_state *global_blend_state;
  699.     struct gen8_blend_state_rt *blend_state;
  700.     unsigned char *cc_ptr;
  701.  
  702.     dri_bo_map(render_state->dynamic_state.bo, 1);
  703.     assert(render_state->dynamic_state.bo->virtual);
  704.  
  705.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  706.                         render_state->blend_state_offset;
  707.  
  708.     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
  709.  
  710.     memset(global_blend_state, 0, render_state->blend_state_size);
  711.     /* Global blend state + blend_state for Render Target */
  712.     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
  713.     blend_state->blend1.logic_op_enable = 1;
  714.     blend_state->blend1.logic_op_func = 0xc;
  715.     blend_state->blend1.pre_blend_clamp_enable = 1;
  716.  
  717.     dri_bo_unmap(render_state->dynamic_state.bo);
  718. }
  719.  
  720.  
  721. static void
  722. gen8_render_cc_viewport(VADriverContextP ctx)
  723. {
  724.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  725.     struct i965_render_state *render_state = &i965->render_state;
  726.     struct i965_cc_viewport *cc_viewport;
  727.     unsigned char *cc_ptr;
  728.  
  729.     dri_bo_map(render_state->dynamic_state.bo, 1);
  730.     assert(render_state->dynamic_state.bo->virtual);
  731.  
  732.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  733.                         render_state->cc_viewport_offset;
  734.  
  735.     cc_viewport = (struct i965_cc_viewport *) cc_ptr;
  736.  
  737.     memset(cc_viewport, 0, sizeof(*cc_viewport));
  738.  
  739.     cc_viewport->min_depth = -1.e35;
  740.     cc_viewport->max_depth = 1.e35;
  741.  
  742.     dri_bo_unmap(render_state->dynamic_state.bo);
  743. }
  744.  
  745. static void
  746. gen8_render_color_calc_state(VADriverContextP ctx)
  747. {
  748.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  749.     struct i965_render_state *render_state = &i965->render_state;
  750.     struct gen6_color_calc_state *color_calc_state;
  751.     unsigned char *cc_ptr;
  752.  
  753.     dri_bo_map(render_state->dynamic_state.bo, 1);
  754.     assert(render_state->dynamic_state.bo->virtual);
  755.  
  756.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  757.                         render_state->cc_state_offset;
  758.  
  759.     color_calc_state = (struct gen6_color_calc_state *) cc_ptr;
  760.  
  761.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  762.     color_calc_state->constant_r = 1.0;
  763.     color_calc_state->constant_g = 0.0;
  764.     color_calc_state->constant_b = 1.0;
  765.     color_calc_state->constant_a = 1.0;
  766.     dri_bo_unmap(render_state->dynamic_state.bo);
  767. }
  768.  
  769. #define PI  3.1415926
  770.  
  771. static void
  772. gen8_render_upload_constants(VADriverContextP ctx,
  773.                              struct object_surface *obj_surface,
  774.                              unsigned int flags)
  775. {
  776.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  777.     struct i965_render_state *render_state = &i965->render_state;
  778.     unsigned short *constant_buffer;
  779.     unsigned char *cc_ptr;
  780.     float *color_balance_base;
  781.     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
  782.     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
  783.     float hue = (float)i965->hue_attrib->value / 180 * PI;
  784.     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
  785.     float *yuv_to_rgb;
  786.     unsigned int color_flag;
  787.  
  788.     dri_bo_map(render_state->dynamic_state.bo, 1);
  789.     assert(render_state->dynamic_state.bo->virtual);
  790.  
  791.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  792.                         render_state->curbe_offset;
  793.  
  794.     constant_buffer = (unsigned short *) cc_ptr;
  795.  
  796.     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
  797.         assert(obj_surface->fourcc == VA_FOURCC_Y800);
  798.  
  799.         *constant_buffer = 2;
  800.     } else {
  801.         if (obj_surface->fourcc == VA_FOURCC_NV12)
  802.             *constant_buffer = 1;
  803.         else
  804.             *constant_buffer = 0;
  805.     }
  806.  
  807.     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
  808.         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
  809.         i965->hue_attrib->value == DEFAULT_HUE &&
  810.         i965->saturation_attrib->value == DEFAULT_SATURATION)
  811.         constant_buffer[1] = 1; /* skip color balance transformation */
  812.     else
  813.         constant_buffer[1] = 0;
  814.  
  815.     color_balance_base = (float *)constant_buffer + 4;
  816.     *color_balance_base++ = contrast;
  817.     *color_balance_base++ = brightness;
  818.     *color_balance_base++ = cos(hue) * contrast * saturation;
  819.     *color_balance_base++ = sin(hue) * contrast * saturation;
  820.  
  821.     color_flag = flags & VA_SRC_COLOR_MASK;
  822.     yuv_to_rgb = (float *)constant_buffer + 8;
  823.     if (color_flag == VA_SRC_BT709)
  824.         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
  825.     else if (color_flag == VA_SRC_SMPTE_240)
  826.         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
  827.     else
  828.         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
  829.  
  830.     dri_bo_unmap(render_state->dynamic_state.bo);
  831. }
  832.  
  833. static void
  834. gen8_render_setup_states(
  835.     VADriverContextP   ctx,
  836.     struct object_surface *obj_surface,
  837.     const VARectangle *src_rect,
  838.     const VARectangle *dst_rect,
  839.     unsigned int       flags
  840. )
  841. {
  842.     gen8_render_dest_surface_state(ctx, 0);
  843.     gen8_render_src_surfaces_state(ctx, obj_surface, flags);
  844.     gen8_render_sampler(ctx);
  845.     gen8_render_cc_viewport(ctx);
  846.     gen8_render_color_calc_state(ctx);
  847.     gen8_render_blend_state(ctx);
  848.     gen8_render_upload_constants(ctx, obj_surface, flags);
  849.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  850. }
  851.  
  852. static void
  853. gen8_emit_state_base_address(VADriverContextP ctx)
  854. {
  855.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  856.     struct intel_batchbuffer *batch = i965->batch;
  857.     struct i965_render_state *render_state = &i965->render_state;
  858.  
  859.     BEGIN_BATCH(batch, 16);
  860.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (16 - 2));
  861.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  862.         OUT_BATCH(batch, 0);
  863.         OUT_BATCH(batch, 0);
  864.         /*DW4 */
  865.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  866.         OUT_BATCH(batch, 0);
  867.  
  868.         /*DW6*/
  869.     /* Dynamic state base address */
  870.     OUT_RELOC(batch, render_state->dynamic_state.bo, I915_GEM_DOMAIN_RENDER | I915_GEM_DOMAIN_SAMPLER,
  871.                 0, BASE_ADDRESS_MODIFY);
  872.     OUT_BATCH(batch, 0);
  873.  
  874.         /*DW8*/
  875.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  876.     OUT_BATCH(batch, 0);
  877.  
  878.         /*DW10 */
  879.     /* Instruction base address */
  880.     OUT_RELOC(batch, render_state->instruction_state.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  881.     OUT_BATCH(batch, 0);
  882.  
  883.         /*DW12 */
  884.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* General state upper bound */
  885.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  886.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  887.     OUT_BATCH(batch, 0xFFFF0000 | BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  888.     ADVANCE_BATCH(batch);
  889. }
  890.  
  891. static void
  892. gen8_emit_cc_state_pointers(VADriverContextP ctx)
  893. {
  894.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  895.     struct intel_batchbuffer *batch = i965->batch;
  896.     struct i965_render_state *render_state = &i965->render_state;
  897.  
  898.     BEGIN_BATCH(batch, 2);
  899.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
  900.     OUT_BATCH(batch, (render_state->cc_state_offset + 1));
  901.     ADVANCE_BATCH(batch);
  902.  
  903.     BEGIN_BATCH(batch, 2);
  904.     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
  905.     OUT_BATCH(batch, (render_state->blend_state_offset + 1));
  906.     ADVANCE_BATCH(batch);
  907.  
  908. }
  909.  
  910. static void
  911. gen8_emit_vertices(VADriverContextP ctx)
  912. {
  913.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  914.     struct intel_batchbuffer *batch = i965->batch;
  915.     struct i965_render_state *render_state = &i965->render_state;
  916.  
  917.     BEGIN_BATCH(batch, 5);
  918.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
  919.     OUT_BATCH(batch,
  920.               (0 << GEN8_VB0_BUFFER_INDEX_SHIFT) |
  921.               (0 << GEN8_VB0_MOCS_SHIFT) |
  922.               GEN7_VB0_ADDRESS_MODIFYENABLE |
  923.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  924.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  925.     OUT_BATCH(batch, 0);
  926.     OUT_BATCH(batch, 12 * 4);
  927.     ADVANCE_BATCH(batch);
  928.  
  929.     /* Topology in 3D primitive is overrided by VF_TOPOLOGY command */
  930.     BEGIN_BATCH(batch, 2);
  931.     OUT_BATCH(batch, GEN8_3DSTATE_VF_TOPOLOGY | (2 - 2));
  932.     OUT_BATCH(batch,
  933.               _3DPRIM_RECTLIST);
  934.     ADVANCE_BATCH(batch);
  935.  
  936.     BEGIN_BATCH(batch, 7);
  937.     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
  938.     OUT_BATCH(batch,
  939.               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
  940.     OUT_BATCH(batch, 3); /* vertex count per instance */
  941.     OUT_BATCH(batch, 0); /* start vertex offset */
  942.     OUT_BATCH(batch, 1); /* single instance */
  943.     OUT_BATCH(batch, 0); /* start instance location */
  944.     OUT_BATCH(batch, 0);
  945.     ADVANCE_BATCH(batch);
  946. }
  947.  
  948. static void
  949. gen8_emit_vertex_element_state(VADriverContextP ctx)
  950. {
  951.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  952.     struct intel_batchbuffer *batch = i965->batch;
  953.  
  954.     /*
  955.      * The VUE layout
  956.      * dword 0-3: pad (0, 0, 0. 0)
  957.      * dword 4-7: position (x, y, 1.0, 1.0),
  958.      * dword 8-11: texture coordinate 0 (u0, v0, 1.0, 1.0)
  959.      */
  960.  
  961.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  962.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (7 - 2));
  963.  
  964.     /* Element state 0. These are 4 dwords of 0 required for the VUE format.
  965.      * We don't really know or care what they do.
  966.      */
  967.  
  968.     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  969.               GEN8_VE0_VALID |
  970.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  971.               (0 << VE0_OFFSET_SHIFT));
  972.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) |
  973.               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) |
  974.               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) |
  975.               (I965_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT));
  976.  
  977.     /* offset 8: X, Y -> {x, y, 1.0, 1.0} */
  978.     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  979.               GEN8_VE0_VALID |
  980.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  981.               (8 << VE0_OFFSET_SHIFT));
  982.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  983.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  984.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  985.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  986.  
  987.     /* offset 0: u,v -> {U, V, 1.0, 1.0} */
  988.     OUT_BATCH(batch, (0 << GEN8_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  989.               GEN8_VE0_VALID |
  990.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  991.               (0 << VE0_OFFSET_SHIFT));
  992.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  993.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  994.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  995.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  996. }
  997.  
  998. static void
  999. gen8_emit_vs_state(VADriverContextP ctx)
  1000. {
  1001.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1002.     struct intel_batchbuffer *batch = i965->batch;
  1003.  
  1004.     /* disable VS constant buffer */
  1005.     BEGIN_BATCH(batch, 11);
  1006.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (11 - 2));
  1007.     OUT_BATCH(batch, 0);
  1008.     OUT_BATCH(batch, 0);
  1009.     /* CS Buffer 0 */
  1010.     OUT_BATCH(batch, 0);
  1011.     OUT_BATCH(batch, 0);
  1012.     /* CS Buffer 1 */
  1013.     OUT_BATCH(batch, 0);
  1014.     OUT_BATCH(batch, 0);
  1015.     /* CS Buffer 2 */
  1016.     OUT_BATCH(batch, 0);
  1017.     OUT_BATCH(batch, 0);
  1018.     /* CS Buffer 3 */
  1019.     OUT_BATCH(batch, 0);
  1020.     OUT_BATCH(batch, 0);
  1021.     ADVANCE_BATCH(batch);
  1022.  
  1023.     BEGIN_BATCH(batch, 9);
  1024.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (9 - 2));
  1025.     OUT_BATCH(batch, 0); /* without VS kernel */
  1026.     OUT_BATCH(batch, 0);
  1027.     /* VS shader dispatch flag */
  1028.     OUT_BATCH(batch, 0);
  1029.     OUT_BATCH(batch, 0);
  1030.     OUT_BATCH(batch, 0);
  1031.     /* DW6. VS shader GRF and URB buffer definition */
  1032.     OUT_BATCH(batch, 0);
  1033.     OUT_BATCH(batch, 0); /* pass-through */
  1034.     OUT_BATCH(batch, 0);
  1035.     ADVANCE_BATCH(batch);
  1036.  
  1037.     BEGIN_BATCH(batch, 2);
  1038.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_VS | (2 - 2));
  1039.     OUT_BATCH(batch, 0);
  1040.     ADVANCE_BATCH(batch);
  1041.  
  1042.     BEGIN_BATCH(batch, 2);
  1043.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_VS | (2 - 2));
  1044.     OUT_BATCH(batch, 0);
  1045.     ADVANCE_BATCH(batch);
  1046.  
  1047. }
  1048.  
  1049. /*
  1050.  * URB layout on GEN8
  1051.  * ----------------------------------------
  1052.  * | PS Push Constants (8KB) | VS entries |
  1053.  * ----------------------------------------
  1054.  */
  1055. static void
  1056. gen8_emit_urb(VADriverContextP ctx)
  1057. {
  1058.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1059.     struct intel_batchbuffer *batch = i965->batch;
  1060.     unsigned int num_urb_entries = 64;
  1061.  
  1062.     /* The minimum urb entries is 64 */
  1063.  
  1064.     BEGIN_BATCH(batch, 2);
  1065.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_VS | (2 - 2));
  1066.     OUT_BATCH(batch, 0);
  1067.     ADVANCE_BATCH(batch);
  1068.  
  1069.     BEGIN_BATCH(batch, 2);
  1070.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_DS | (2 - 2));
  1071.     OUT_BATCH(batch, 0);
  1072.     ADVANCE_BATCH(batch);
  1073.  
  1074.     BEGIN_BATCH(batch, 2);
  1075.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_HS | (2 - 2));
  1076.     OUT_BATCH(batch, 0);
  1077.     ADVANCE_BATCH(batch);
  1078.  
  1079.     BEGIN_BATCH(batch, 2);
  1080.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_GS | (2 - 2));
  1081.     OUT_BATCH(batch, 0);
  1082.     ADVANCE_BATCH(batch);
  1083.  
  1084.     /* Size is 8Kbs and base address is 0Kb */
  1085.     BEGIN_BATCH(batch, 2);
  1086.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
  1087.     /* Size is 8Kbs and base address is 0Kb */
  1088.     OUT_BATCH(batch,
  1089.                 (0 << GEN8_PUSH_CONSTANT_BUFFER_OFFSET_SHIFT) |
  1090.                 (8 << GEN8_PUSH_CONSTANT_BUFFER_SIZE_SHIFT));
  1091.     ADVANCE_BATCH(batch);
  1092.  
  1093.     BEGIN_BATCH(batch, 2);
  1094.     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
  1095.     OUT_BATCH(batch,
  1096.               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
  1097.               (4 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
  1098.               (4 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1099.    ADVANCE_BATCH(batch);
  1100.  
  1101.    BEGIN_BATCH(batch, 2);
  1102.    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
  1103.    OUT_BATCH(batch,
  1104.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  1105.              (5 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1106.    ADVANCE_BATCH(batch);
  1107.  
  1108.    BEGIN_BATCH(batch, 2);
  1109.    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
  1110.    OUT_BATCH(batch,
  1111.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  1112.              (6 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1113.    ADVANCE_BATCH(batch);
  1114.  
  1115.    BEGIN_BATCH(batch, 2);
  1116.    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
  1117.    OUT_BATCH(batch,
  1118.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  1119.              (7 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  1120.    ADVANCE_BATCH(batch);
  1121. }
  1122.  
  1123. static void
  1124. gen8_emit_bypass_state(VADriverContextP ctx)
  1125. {
  1126.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1127.     struct intel_batchbuffer *batch = i965->batch;
  1128.  
  1129.     /* bypass GS */
  1130.     BEGIN_BATCH(batch, 11);
  1131.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (11 - 2));
  1132.     OUT_BATCH(batch, 0);
  1133.     OUT_BATCH(batch, 0);
  1134.     OUT_BATCH(batch, 0);
  1135.     OUT_BATCH(batch, 0);
  1136.     OUT_BATCH(batch, 0);
  1137.     OUT_BATCH(batch, 0);
  1138.     OUT_BATCH(batch, 0);
  1139.     OUT_BATCH(batch, 0);
  1140.     OUT_BATCH(batch, 0);
  1141.     OUT_BATCH(batch, 0);
  1142.     ADVANCE_BATCH(batch);
  1143.  
  1144.     BEGIN_BATCH(batch, 10);
  1145.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (10 - 2));
  1146.     /* GS shader address */
  1147.     OUT_BATCH(batch, 0); /* without GS kernel */
  1148.     OUT_BATCH(batch, 0);
  1149.     /* DW3. GS shader dispatch flag */
  1150.     OUT_BATCH(batch, 0);
  1151.     OUT_BATCH(batch, 0);
  1152.     OUT_BATCH(batch, 0);
  1153.     /* DW6. GS shader GRF and URB offset/length */
  1154.     OUT_BATCH(batch, 0);
  1155.     OUT_BATCH(batch, 0); /* pass-through */
  1156.     OUT_BATCH(batch, 0);
  1157.     OUT_BATCH(batch, 0);
  1158.     ADVANCE_BATCH(batch);
  1159.  
  1160.     BEGIN_BATCH(batch, 2);
  1161.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
  1162.     OUT_BATCH(batch, 0);
  1163.     ADVANCE_BATCH(batch);
  1164.  
  1165.     BEGIN_BATCH(batch, 2);
  1166.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_GS | (2 - 2));
  1167.     OUT_BATCH(batch, 0);
  1168.     ADVANCE_BATCH(batch);
  1169.  
  1170.     /* disable HS */
  1171.     BEGIN_BATCH(batch, 11);
  1172.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (11 - 2));
  1173.     OUT_BATCH(batch, 0);
  1174.     OUT_BATCH(batch, 0);
  1175.     OUT_BATCH(batch, 0);
  1176.     OUT_BATCH(batch, 0);
  1177.     OUT_BATCH(batch, 0);
  1178.     OUT_BATCH(batch, 0);
  1179.     OUT_BATCH(batch, 0);
  1180.     OUT_BATCH(batch, 0);
  1181.     OUT_BATCH(batch, 0);
  1182.     OUT_BATCH(batch, 0);
  1183.     ADVANCE_BATCH(batch);
  1184.  
  1185.     BEGIN_BATCH(batch, 9);
  1186.     OUT_BATCH(batch, GEN7_3DSTATE_HS | (9 - 2));
  1187.     OUT_BATCH(batch, 0);
  1188.     /*DW2. HS pass-through */
  1189.     OUT_BATCH(batch, 0);
  1190.     /*DW3. HS shader address */
  1191.     OUT_BATCH(batch, 0);
  1192.     OUT_BATCH(batch, 0);
  1193.     /*DW5. HS shader flag. URB offset/length and so on */
  1194.     OUT_BATCH(batch, 0);
  1195.     OUT_BATCH(batch, 0);
  1196.     OUT_BATCH(batch, 0);
  1197.     OUT_BATCH(batch, 0);
  1198.     ADVANCE_BATCH(batch);
  1199.  
  1200.     BEGIN_BATCH(batch, 2);
  1201.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
  1202.     OUT_BATCH(batch, 0);
  1203.     ADVANCE_BATCH(batch);
  1204.  
  1205.     BEGIN_BATCH(batch, 2);
  1206.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_HS | (2 - 2));
  1207.     OUT_BATCH(batch, 0);
  1208.     ADVANCE_BATCH(batch);
  1209.  
  1210.     /* Disable TE */
  1211.     BEGIN_BATCH(batch, 4);
  1212.     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
  1213.     OUT_BATCH(batch, 0);
  1214.     OUT_BATCH(batch, 0);
  1215.     OUT_BATCH(batch, 0);
  1216.     ADVANCE_BATCH(batch);
  1217.  
  1218.     /* Disable DS */
  1219.     BEGIN_BATCH(batch, 11);
  1220.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (11 - 2));
  1221.     OUT_BATCH(batch, 0);
  1222.     OUT_BATCH(batch, 0);
  1223.     OUT_BATCH(batch, 0);
  1224.     OUT_BATCH(batch, 0);
  1225.     OUT_BATCH(batch, 0);
  1226.     OUT_BATCH(batch, 0);
  1227.     OUT_BATCH(batch, 0);
  1228.     OUT_BATCH(batch, 0);
  1229.     OUT_BATCH(batch, 0);
  1230.     OUT_BATCH(batch, 0);
  1231.     ADVANCE_BATCH(batch);
  1232.  
  1233.     BEGIN_BATCH(batch, 9);
  1234.     OUT_BATCH(batch, GEN7_3DSTATE_DS | (9 - 2));
  1235.     /* DW1. DS shader pointer */
  1236.     OUT_BATCH(batch, 0);
  1237.     OUT_BATCH(batch, 0);
  1238.     /* DW3-5. DS shader dispatch flag.*/
  1239.     OUT_BATCH(batch, 0);
  1240.     OUT_BATCH(batch, 0);
  1241.     OUT_BATCH(batch, 0);
  1242.     /* DW6-7. DS shader pass-through, GRF,URB offset/Length,Thread Number*/
  1243.     OUT_BATCH(batch, 0);
  1244.     OUT_BATCH(batch, 0);
  1245.     /* DW8. DS shader output URB */
  1246.     OUT_BATCH(batch, 0);
  1247.     ADVANCE_BATCH(batch);
  1248.  
  1249.     BEGIN_BATCH(batch, 2);
  1250.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
  1251.     OUT_BATCH(batch, 0);
  1252.     ADVANCE_BATCH(batch);
  1253.  
  1254.     BEGIN_BATCH(batch, 2);
  1255.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_DS | (2 - 2));
  1256.     OUT_BATCH(batch, 0);
  1257.     ADVANCE_BATCH(batch);
  1258.  
  1259.     /* Disable STREAMOUT */
  1260.     BEGIN_BATCH(batch, 5);
  1261.     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (5 - 2));
  1262.     OUT_BATCH(batch, 0);
  1263.     OUT_BATCH(batch, 0);
  1264.     OUT_BATCH(batch, 0);
  1265.     OUT_BATCH(batch, 0);
  1266.     ADVANCE_BATCH(batch);
  1267. }
  1268.  
  1269. static void
  1270. gen8_emit_invarient_states(VADriverContextP ctx)
  1271. {
  1272.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1273.     struct intel_batchbuffer *batch = i965->batch;
  1274.  
  1275.     BEGIN_BATCH(batch, 1);
  1276.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1277.     ADVANCE_BATCH(batch);
  1278.  
  1279.     BEGIN_BATCH(batch, 2);
  1280.     OUT_BATCH(batch, GEN8_3DSTATE_MULTISAMPLE | (2 - 2));
  1281.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  1282.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  1283.     ADVANCE_BATCH(batch);
  1284.  
  1285.     /* Update 3D Multisample pattern */
  1286.     BEGIN_BATCH(batch, 9);
  1287.     OUT_BATCH(batch, GEN8_3DSTATE_SAMPLE_PATTERN | (9 - 2));
  1288.     OUT_BATCH(batch, 0);
  1289.     OUT_BATCH(batch, 0);
  1290.     OUT_BATCH(batch, 0);
  1291.     OUT_BATCH(batch, 0);
  1292.     OUT_BATCH(batch, 0);
  1293.     OUT_BATCH(batch, 0);
  1294.     OUT_BATCH(batch, 0);
  1295.     OUT_BATCH(batch, 0);
  1296.     ADVANCE_BATCH(batch);
  1297.  
  1298.  
  1299.     BEGIN_BATCH(batch, 2);
  1300.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  1301.     OUT_BATCH(batch, 1);
  1302.     ADVANCE_BATCH(batch);
  1303.  
  1304.     /* Set system instruction pointer */
  1305.     BEGIN_BATCH(batch, 3);
  1306.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1307.     OUT_BATCH(batch, 0);
  1308.     OUT_BATCH(batch, 0);
  1309.     ADVANCE_BATCH(batch);
  1310. }
  1311.  
  1312. static void
  1313. gen8_emit_clip_state(VADriverContextP ctx)
  1314. {
  1315.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1316.     struct intel_batchbuffer *batch = i965->batch;
  1317.  
  1318.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  1319.     OUT_BATCH(batch, 0);
  1320.     OUT_BATCH(batch, 0); /* pass-through */
  1321.     OUT_BATCH(batch, 0);
  1322. }
  1323.  
  1324. static void
  1325. gen8_emit_sf_state(VADriverContextP ctx)
  1326. {
  1327.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1328.     struct intel_batchbuffer *batch = i965->batch;
  1329.  
  1330.     BEGIN_BATCH(batch, 5);
  1331.     OUT_BATCH(batch, GEN8_3DSTATE_RASTER | (5 - 2));
  1332.     OUT_BATCH(batch, GEN8_3DSTATE_RASTER_CULL_NONE);
  1333.     OUT_BATCH(batch, 0);
  1334.     OUT_BATCH(batch, 0);
  1335.     OUT_BATCH(batch, 0);
  1336.     ADVANCE_BATCH(batch);
  1337.  
  1338.  
  1339.     BEGIN_BATCH(batch, 4);
  1340.     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (4 - 2));
  1341.     OUT_BATCH(batch,
  1342.               (GEN8_SBE_FORCE_URB_ENTRY_READ_LENGTH) |
  1343.               (GEN8_SBE_FORCE_URB_ENTRY_READ_OFFSET) |
  1344.               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
  1345.               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
  1346.               (1 << GEN8_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
  1347.     OUT_BATCH(batch, 0);
  1348.     OUT_BATCH(batch, 0);
  1349.     ADVANCE_BATCH(batch);
  1350.  
  1351.     /* SBE for backend setup */
  1352.     BEGIN_BATCH(batch, 11);
  1353.     OUT_BATCH(batch, GEN8_3DSTATE_SBE_SWIZ | (11 - 2));
  1354.     OUT_BATCH(batch, 0);
  1355.     OUT_BATCH(batch, 0);
  1356.     OUT_BATCH(batch, 0);
  1357.     OUT_BATCH(batch, 0);
  1358.     OUT_BATCH(batch, 0);
  1359.     OUT_BATCH(batch, 0);
  1360.     OUT_BATCH(batch, 0);
  1361.     OUT_BATCH(batch, 0);
  1362.     OUT_BATCH(batch, 0);
  1363.     OUT_BATCH(batch, 0);
  1364.     ADVANCE_BATCH(batch);
  1365.  
  1366.     BEGIN_BATCH(batch, 4);
  1367.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (4 - 2));
  1368.     OUT_BATCH(batch, 0);
  1369.     OUT_BATCH(batch, 0);
  1370.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
  1371.     ADVANCE_BATCH(batch);
  1372. }
  1373.  
  1374. static void
  1375. gen8_emit_wm_state(VADriverContextP ctx, int kernel)
  1376. {
  1377.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1378.     struct intel_batchbuffer *batch = i965->batch;
  1379.     struct i965_render_state *render_state = &i965->render_state;
  1380.     unsigned int num_samples = 0;
  1381.     unsigned int max_threads;
  1382.  
  1383.     max_threads = i965->intel.device_info->max_wm_threads - 2;
  1384.  
  1385.     BEGIN_BATCH(batch, 2);
  1386.     OUT_BATCH(batch, GEN8_3DSTATE_PSEXTRA | (2 - 2));
  1387.     OUT_BATCH(batch,
  1388.               (GEN8_PSX_PIXEL_SHADER_VALID | GEN8_PSX_ATTRIBUTE_ENABLE));
  1389.     ADVANCE_BATCH(batch);
  1390.  
  1391.     if (kernel == PS_KERNEL) {
  1392.         BEGIN_BATCH(batch, 2);
  1393.         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
  1394.         OUT_BATCH(batch,
  1395.                 GEN8_PS_BLEND_HAS_WRITEABLE_RT);
  1396.         ADVANCE_BATCH(batch);
  1397.     } else if (kernel == PS_SUBPIC_KERNEL) {
  1398.         BEGIN_BATCH(batch, 2);
  1399.         OUT_BATCH(batch, GEN8_3DSTATE_PSBLEND | (2 - 2));
  1400.         OUT_BATCH(batch,
  1401.                 (GEN8_PS_BLEND_HAS_WRITEABLE_RT |
  1402.                  GEN8_PS_BLEND_COLOR_BUFFER_BLEND_ENABLE |
  1403.                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_ALPHA_BLEND_FACTOR_SHIFT) |
  1404.                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_ALPHA_BLEND_FACTOR_SHIFT) |
  1405.                  (I965_BLENDFACTOR_SRC_ALPHA << GEN8_PS_BLEND_SRC_BLEND_FACTOR_SHIFT) |
  1406.                  (I965_BLENDFACTOR_INV_SRC_ALPHA << GEN8_PS_BLEND_DST_BLEND_FACTOR_SHIFT)));
  1407.         ADVANCE_BATCH(batch);
  1408.     }
  1409.  
  1410.     BEGIN_BATCH(batch, 2);
  1411.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (2 - 2));
  1412.     OUT_BATCH(batch,
  1413.               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  1414.     ADVANCE_BATCH(batch);
  1415.  
  1416.     BEGIN_BATCH(batch, 11);
  1417.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (11 - 2));
  1418.     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
  1419.     OUT_BATCH(batch, 0);
  1420.     /*DW3-4. Constant buffer 0 */
  1421.     OUT_BATCH(batch, render_state->curbe_offset);
  1422.     OUT_BATCH(batch, 0);
  1423.  
  1424.     /*DW5-10. Constant buffer 1-3 */
  1425.     OUT_BATCH(batch, 0);
  1426.     OUT_BATCH(batch, 0);
  1427.     OUT_BATCH(batch, 0);
  1428.     OUT_BATCH(batch, 0);
  1429.     OUT_BATCH(batch, 0);
  1430.     OUT_BATCH(batch, 0);
  1431.     ADVANCE_BATCH(batch);
  1432.  
  1433.     BEGIN_BATCH(batch, 12);
  1434.     OUT_BATCH(batch, GEN7_3DSTATE_PS | (12 - 2));
  1435.     /* PS shader address */
  1436.     OUT_BATCH(batch, render_state->render_kernels[kernel].kernel_offset);
  1437.  
  1438.     OUT_BATCH(batch, 0);
  1439.     /* DW3. PS shader flag .Binding table cnt/sample cnt */
  1440.     OUT_BATCH(batch,
  1441.               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
  1442.               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  1443.     /* DW4-5. Scatch space */
  1444.     OUT_BATCH(batch, 0); /* scratch space base offset */
  1445.     OUT_BATCH(batch, 0);
  1446.     /* DW6. PS shader threads. */
  1447.     OUT_BATCH(batch,
  1448.               ((max_threads - 1) << GEN8_PS_MAX_THREADS_SHIFT) | num_samples |
  1449.               GEN7_PS_PUSH_CONSTANT_ENABLE |
  1450.               GEN7_PS_16_DISPATCH_ENABLE);
  1451.     /* DW7. PS shader GRF */
  1452.     OUT_BATCH(batch,
  1453.               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
  1454.     OUT_BATCH(batch, 0); /* kernel 1 pointer */
  1455.     OUT_BATCH(batch, 0);
  1456.     OUT_BATCH(batch, 0); /* kernel 2 pointer */
  1457.     OUT_BATCH(batch, 0);
  1458.     ADVANCE_BATCH(batch);
  1459.  
  1460.     BEGIN_BATCH(batch, 2);
  1461.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
  1462.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1463.     ADVANCE_BATCH(batch);
  1464. }
  1465.  
  1466. static void
  1467. gen8_emit_depth_buffer_state(VADriverContextP ctx)
  1468. {
  1469.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1470.     struct intel_batchbuffer *batch = i965->batch;
  1471.  
  1472.     BEGIN_BATCH(batch, 8);
  1473.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (8 - 2));
  1474.     OUT_BATCH(batch,
  1475.               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
  1476.               (I965_SURFACE_NULL << 29));
  1477.     /* DW2-3. Depth Buffer Address */
  1478.     OUT_BATCH(batch, 0);
  1479.     OUT_BATCH(batch, 0);
  1480.     /* DW4-7. Surface structure */
  1481.     OUT_BATCH(batch, 0);
  1482.     OUT_BATCH(batch, 0);
  1483.     OUT_BATCH(batch, 0);
  1484.     OUT_BATCH(batch, 0);
  1485.     ADVANCE_BATCH(batch);
  1486.  
  1487.     /* Update the Hier Depth buffer */
  1488.     BEGIN_BATCH(batch, 5);
  1489.     OUT_BATCH(batch, GEN7_3DSTATE_HIER_DEPTH_BUFFER | (5 - 2));
  1490.     OUT_BATCH(batch, 0);
  1491.     OUT_BATCH(batch, 0);
  1492.     OUT_BATCH(batch, 0);
  1493.     OUT_BATCH(batch, 0);
  1494.     ADVANCE_BATCH(batch);
  1495.  
  1496.     /* Update the stencil buffer */
  1497.     BEGIN_BATCH(batch, 5);
  1498.     OUT_BATCH(batch, GEN7_3DSTATE_STENCIL_BUFFER | (5 - 2));
  1499.     OUT_BATCH(batch, 0);
  1500.     OUT_BATCH(batch, 0);
  1501.     OUT_BATCH(batch, 0);
  1502.     OUT_BATCH(batch, 0);
  1503.     ADVANCE_BATCH(batch);
  1504.  
  1505.     BEGIN_BATCH(batch, 3);
  1506.     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
  1507.     OUT_BATCH(batch, 0);
  1508.     OUT_BATCH(batch, 0);
  1509.     ADVANCE_BATCH(batch);
  1510. }
  1511.  
  1512. static void
  1513. gen8_emit_depth_stencil_state(VADriverContextP ctx)
  1514. {
  1515.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1516.     struct intel_batchbuffer *batch = i965->batch;
  1517.  
  1518.     BEGIN_BATCH(batch, 3);
  1519.     OUT_BATCH(batch, GEN8_3DSTATE_WM_DEPTH_STENCIL | (3 - 2));
  1520.     OUT_BATCH(batch, 0);
  1521.     OUT_BATCH(batch, 0);
  1522.     ADVANCE_BATCH(batch);
  1523. }
  1524.  
  1525. static void
  1526. gen8_emit_wm_hz_op(VADriverContextP ctx)
  1527. {
  1528.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1529.     struct intel_batchbuffer *batch = i965->batch;
  1530.  
  1531.     BEGIN_BATCH(batch, 5);
  1532.     OUT_BATCH(batch, GEN8_3DSTATE_WM_HZ_OP | (5 - 2));
  1533.     OUT_BATCH(batch, 0);
  1534.     OUT_BATCH(batch, 0);
  1535.     OUT_BATCH(batch, 0);
  1536.     OUT_BATCH(batch, 0);
  1537.     ADVANCE_BATCH(batch);
  1538. }
  1539.  
  1540. static void
  1541. gen8_emit_viewport_state_pointers(VADriverContextP ctx)
  1542. {
  1543.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1544.     struct intel_batchbuffer *batch = i965->batch;
  1545.     struct i965_render_state *render_state = &i965->render_state;
  1546.  
  1547.     BEGIN_BATCH(batch, 2);
  1548.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
  1549.     OUT_BATCH(batch, render_state->cc_viewport_offset);
  1550.     ADVANCE_BATCH(batch);
  1551.  
  1552.     BEGIN_BATCH(batch, 2);
  1553.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
  1554.     OUT_BATCH(batch, 0);
  1555.     ADVANCE_BATCH(batch);
  1556. }
  1557.  
  1558. static void
  1559. gen8_emit_sampler_state_pointers(VADriverContextP ctx)
  1560. {
  1561.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1562.     struct intel_batchbuffer *batch = i965->batch;
  1563.     struct i965_render_state *render_state = &i965->render_state;
  1564.  
  1565.     BEGIN_BATCH(batch, 2);
  1566.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
  1567.     OUT_BATCH(batch, render_state->sampler_offset);
  1568.     ADVANCE_BATCH(batch);
  1569. }
  1570.  
  1571.  
  1572. static void
  1573. gen7_emit_drawing_rectangle(VADriverContextP ctx)
  1574. {
  1575.     i965_render_drawing_rectangle(ctx);
  1576. }
  1577.  
  1578. static void
  1579. gen8_render_emit_states(VADriverContextP ctx, int kernel)
  1580. {
  1581.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1582.     struct intel_batchbuffer *batch = i965->batch;
  1583.  
  1584.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1585.     intel_batchbuffer_emit_mi_flush(batch);
  1586.     gen8_emit_invarient_states(ctx);
  1587.     gen8_emit_state_base_address(ctx);
  1588.     gen8_emit_viewport_state_pointers(ctx);
  1589.     gen8_emit_urb(ctx);
  1590.     gen8_emit_cc_state_pointers(ctx);
  1591.     gen8_emit_sampler_state_pointers(ctx);
  1592.     gen8_emit_wm_hz_op(ctx);
  1593.     gen8_emit_bypass_state(ctx);
  1594.     gen8_emit_vs_state(ctx);
  1595.     gen8_emit_clip_state(ctx);
  1596.     gen8_emit_sf_state(ctx);
  1597.     gen8_emit_depth_stencil_state(ctx);
  1598.     gen8_emit_wm_state(ctx, kernel);
  1599.     gen8_emit_depth_buffer_state(ctx);
  1600.     gen7_emit_drawing_rectangle(ctx);
  1601.     gen8_emit_vertex_element_state(ctx);
  1602.     gen8_emit_vertices(ctx);
  1603.     intel_batchbuffer_end_atomic(batch);
  1604. }
  1605.  
  1606. static void
  1607. gen8_render_put_surface(
  1608.     VADriverContextP   ctx,
  1609.     struct object_surface *obj_surface,
  1610.     const VARectangle *src_rect,
  1611.     const VARectangle *dst_rect,
  1612.     unsigned int       flags
  1613. )
  1614. {
  1615.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1616.     struct intel_batchbuffer *batch = i965->batch;
  1617.  
  1618.     gen8_render_initialize(ctx);
  1619.     gen8_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
  1620.     gen8_clear_dest_region(ctx);
  1621.     gen8_render_emit_states(ctx, PS_KERNEL);
  1622.     intel_batchbuffer_flush(batch);
  1623. }
  1624.  
  1625. static void
  1626. gen8_subpicture_render_blend_state(VADriverContextP ctx)
  1627. {
  1628.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1629.     struct i965_render_state *render_state = &i965->render_state;
  1630.     struct gen8_global_blend_state *global_blend_state;
  1631.     struct gen8_blend_state_rt *blend_state;
  1632.     unsigned char *cc_ptr;
  1633.  
  1634.     dri_bo_map(render_state->dynamic_state.bo, 1);
  1635.     assert(render_state->dynamic_state.bo->virtual);
  1636.  
  1637.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  1638.                         render_state->blend_state_offset;
  1639.  
  1640.     global_blend_state = (struct gen8_global_blend_state*) cc_ptr;
  1641.  
  1642.     memset(global_blend_state, 0, render_state->blend_state_size);
  1643.     /* Global blend state + blend_state for Render Target */
  1644.     blend_state = (struct gen8_blend_state_rt *)(global_blend_state + 1);
  1645.     blend_state->blend0.color_blend_func = I965_BLENDFUNCTION_ADD;
  1646.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  1647.     blend_state->blend0.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  1648.     blend_state->blend0.alpha_blend_func = I965_BLENDFUNCTION_ADD;
  1649.     blend_state->blend0.ia_dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  1650.     blend_state->blend0.ia_src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  1651.     blend_state->blend0.colorbuf_blend = 1;
  1652.     blend_state->blend1.post_blend_clamp_enable = 1;
  1653.     blend_state->blend1.pre_blend_clamp_enable = 1;
  1654.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  1655.  
  1656.     dri_bo_unmap(render_state->dynamic_state.bo);
  1657. }
  1658.  
  1659. static void
  1660. gen8_subpic_render_upload_constants(VADriverContextP ctx,
  1661.                                     struct object_surface *obj_surface)
  1662. {
  1663.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1664.     struct i965_render_state *render_state = &i965->render_state;
  1665.     float *constant_buffer;
  1666.     float global_alpha = 1.0;
  1667.     unsigned int index = obj_surface->subpic_render_idx;
  1668.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1669.     unsigned char *cc_ptr;
  1670.  
  1671.     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
  1672.         global_alpha = obj_subpic->global_alpha;
  1673.     }
  1674.  
  1675.  
  1676.     dri_bo_map(render_state->dynamic_state.bo, 1);
  1677.     assert(render_state->dynamic_state.bo->virtual);
  1678.  
  1679.     cc_ptr = (unsigned char *) render_state->dynamic_state.bo->virtual +
  1680.                                 render_state->curbe_offset;
  1681.  
  1682.     constant_buffer = (float *) cc_ptr;
  1683.     *constant_buffer = global_alpha;
  1684.  
  1685.     dri_bo_unmap(render_state->dynamic_state.bo);
  1686. }
  1687.  
  1688. static void
  1689. gen8_subpicture_render_setup_states(
  1690.     VADriverContextP   ctx,
  1691.     struct object_surface *obj_surface,
  1692.     const VARectangle *src_rect,
  1693.     const VARectangle *dst_rect
  1694. )
  1695. {
  1696.     gen8_render_dest_surface_state(ctx, 0);
  1697.     gen8_subpic_render_src_surfaces_state(ctx, obj_surface);
  1698.     gen8_render_sampler(ctx);
  1699.     gen8_render_cc_viewport(ctx);
  1700.     gen8_render_color_calc_state(ctx);
  1701.     gen8_subpicture_render_blend_state(ctx);
  1702.     gen8_subpic_render_upload_constants(ctx, obj_surface);
  1703.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  1704. }
  1705.  
  1706. static void
  1707. gen8_render_put_subpicture(
  1708.     VADriverContextP   ctx,
  1709.     struct object_surface *obj_surface,
  1710.     const VARectangle *src_rect,
  1711.     const VARectangle *dst_rect
  1712. )
  1713. {
  1714.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1715.     struct intel_batchbuffer *batch = i965->batch;
  1716.     unsigned int index = obj_surface->subpic_render_idx;
  1717.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1718.  
  1719.     assert(obj_subpic);
  1720.     gen8_render_initialize(ctx);
  1721.     gen8_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
  1722.     gen8_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  1723.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  1724.     intel_batchbuffer_flush(batch);
  1725. }
  1726.  
  1727. static void
  1728. gen8_render_terminate(VADriverContextP ctx)
  1729. {
  1730.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1731.     struct i965_render_state *render_state = &i965->render_state;
  1732.  
  1733.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1734.     render_state->vb.vertex_buffer = NULL;
  1735.  
  1736.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1737.     render_state->wm.surface_state_binding_table_bo = NULL;
  1738.  
  1739.     if (render_state->instruction_state.bo) {
  1740.         dri_bo_unreference(render_state->instruction_state.bo);
  1741.         render_state->instruction_state.bo = NULL;
  1742.     }
  1743.  
  1744.     if (render_state->dynamic_state.bo) {
  1745.         dri_bo_unreference(render_state->dynamic_state.bo);
  1746.         render_state->dynamic_state.bo = NULL;
  1747.     }
  1748.  
  1749.     if (render_state->indirect_state.bo) {
  1750.         dri_bo_unreference(render_state->indirect_state.bo);
  1751.         render_state->indirect_state.bo = NULL;
  1752.     }
  1753.  
  1754.     if (render_state->draw_region) {
  1755.         dri_bo_unreference(render_state->draw_region->bo);
  1756.         free(render_state->draw_region);
  1757.         render_state->draw_region = NULL;
  1758.     }
  1759. }
  1760.  
  1761. bool
  1762. gen8_render_init(VADriverContextP ctx)
  1763. {
  1764.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1765.     struct i965_render_state *render_state = &i965->render_state;
  1766.     int i, kernel_size;
  1767.     unsigned int kernel_offset, end_offset;
  1768.     unsigned char *kernel_ptr;
  1769.     struct i965_kernel *kernel;
  1770.  
  1771.     render_state->render_put_surface = gen8_render_put_surface;
  1772.     render_state->render_put_subpicture = gen8_render_put_subpicture;
  1773.     render_state->render_terminate = gen8_render_terminate;
  1774.  
  1775.     memcpy(render_state->render_kernels, render_kernels_gen8,
  1776.            sizeof(render_state->render_kernels));
  1777.  
  1778.     kernel_size = 4096;
  1779.  
  1780.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  1781.         kernel = &render_state->render_kernels[i];
  1782.  
  1783.         if (!kernel->size)
  1784.             continue;
  1785.  
  1786.         kernel_size += kernel->size;
  1787.     }
  1788.  
  1789.     render_state->instruction_state.bo = dri_bo_alloc(i965->intel.bufmgr,
  1790.                                   "kernel shader",
  1791.                                   kernel_size,
  1792.                                   0x1000);
  1793.     if (render_state->instruction_state.bo == NULL) {
  1794.         WARN_ONCE("failure to allocate the buffer space for kernel shader\n");
  1795.         return false;
  1796.     }
  1797.  
  1798.     assert(render_state->instruction_state.bo);
  1799.  
  1800.     render_state->instruction_state.bo_size = kernel_size;
  1801.     render_state->instruction_state.end_offset = 0;
  1802.     end_offset = 0;
  1803.  
  1804.     dri_bo_map(render_state->instruction_state.bo, 1);
  1805.     kernel_ptr = (unsigned char *)(render_state->instruction_state.bo->virtual);
  1806.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  1807.         kernel = &render_state->render_kernels[i];
  1808.         kernel_offset = end_offset;
  1809.         kernel->kernel_offset = kernel_offset;
  1810.  
  1811.         if (!kernel->size)
  1812.             continue;
  1813.  
  1814.         memcpy(kernel_ptr + kernel_offset, kernel->bin, kernel->size);
  1815.  
  1816.         end_offset += ALIGN(kernel->size, ALIGNMENT);
  1817.     }
  1818.  
  1819.     render_state->instruction_state.end_offset = end_offset;
  1820.  
  1821.     dri_bo_unmap(render_state->instruction_state.bo);
  1822.  
  1823.     return true;
  1824. }
  1825.