Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /*
  2.  * Copyright © 2006 Intel Corporation
  3.  *
  4.  * Permission is hereby granted, free of charge, to any person obtaining a
  5.  * copy of this software and associated documentation files (the "Software"),
  6.  * to deal in the Software without restriction, including without limitation
  7.  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
  8.  * and/or sell copies of the Software, and to permit persons to whom the
  9.  * Software is furnished to do so, subject to the following conditions:
  10.  *
  11.  * The above copyright notice and this permission notice (including the next
  12.  * paragraph) shall be included in all copies or substantial portions of the
  13.  * Software.
  14.  *
  15.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16.  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17.  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18.  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19.  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
  20.  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
  21.  * DEALINGS IN THE SOFTWARE.
  22.  *
  23.  * Authors:
  24.  *    Eric Anholt <eric@anholt.net>
  25.  *    Keith Packard <keithp@keithp.com>
  26.  *    Xiang Haihao <haihao.xiang@intel.com>
  27.  *
  28.  */
  29.  
  30. /*
  31.  * Most of rendering codes are ported from xf86-video-intel/src/i965_video.c
  32.  */
  33.  
  34. #include <stdio.h>
  35. #include <stdlib.h>
  36. #include <string.h>
  37. #include <assert.h>
  38. #include <math.h>
  39.  
  40. #include <va/va_drmcommon.h>
  41.  
  42. #include "intel_batchbuffer.h"
  43. #include "intel_driver.h"
  44. #include "i965_defines.h"
  45. #include "i965_drv_video.h"
  46. #include "i965_structs.h"
  47.  
  48. #include "i965_render.h"
  49.  
  50. #define SF_KERNEL_NUM_GRF       16
  51. #define SF_MAX_THREADS          1
  52.  
  53. static const uint32_t sf_kernel_static[][4] =
  54. {
  55. #include "shaders/render/exa_sf.g4b"
  56. };
  57.  
  58. #define PS_KERNEL_NUM_GRF       48
  59. #define PS_MAX_THREADS          32
  60.  
  61. #define I965_GRF_BLOCKS(nreg)   ((nreg + 15) / 16 - 1)
  62.  
  63. static const uint32_t ps_kernel_static[][4] =
  64. {
  65. #include "shaders/render/exa_wm_xy.g4b"
  66. #include "shaders/render/exa_wm_src_affine.g4b"
  67. #include "shaders/render/exa_wm_src_sample_planar.g4b"
  68. #include "shaders/render/exa_wm_yuv_color_balance.g4b"
  69. #include "shaders/render/exa_wm_yuv_rgb.g4b"
  70. #include "shaders/render/exa_wm_write.g4b"
  71. };
  72. static const uint32_t ps_subpic_kernel_static[][4] =
  73. {
  74. #include "shaders/render/exa_wm_xy.g4b"
  75. #include "shaders/render/exa_wm_src_affine.g4b"
  76. #include "shaders/render/exa_wm_src_sample_argb.g4b"
  77. #include "shaders/render/exa_wm_write.g4b"
  78. };
  79.  
  80. /* On IRONLAKE */
  81. static const uint32_t sf_kernel_static_gen5[][4] =
  82. {
  83. #include "shaders/render/exa_sf.g4b.gen5"
  84. };
  85.  
  86. static const uint32_t ps_kernel_static_gen5[][4] =
  87. {
  88. #include "shaders/render/exa_wm_xy.g4b.gen5"
  89. #include "shaders/render/exa_wm_src_affine.g4b.gen5"
  90. #include "shaders/render/exa_wm_src_sample_planar.g4b.gen5"
  91. #include "shaders/render/exa_wm_yuv_color_balance.g4b.gen5"
  92. #include "shaders/render/exa_wm_yuv_rgb.g4b.gen5"
  93. #include "shaders/render/exa_wm_write.g4b.gen5"
  94. };
  95. static const uint32_t ps_subpic_kernel_static_gen5[][4] =
  96. {
  97. #include "shaders/render/exa_wm_xy.g4b.gen5"
  98. #include "shaders/render/exa_wm_src_affine.g4b.gen5"
  99. #include "shaders/render/exa_wm_src_sample_argb.g4b.gen5"
  100. #include "shaders/render/exa_wm_write.g4b.gen5"
  101. };
  102.  
  103. /* programs for Sandybridge */
  104. static const uint32_t sf_kernel_static_gen6[][4] =
  105. {
  106. };
  107.  
  108. static const uint32_t ps_kernel_static_gen6[][4] = {
  109. #include "shaders/render/exa_wm_src_affine.g6b"
  110. #include "shaders/render/exa_wm_src_sample_planar.g6b"
  111. #include "shaders/render/exa_wm_yuv_color_balance.g6b"
  112. #include "shaders/render/exa_wm_yuv_rgb.g6b"
  113. #include "shaders/render/exa_wm_write.g6b"
  114. };
  115.  
  116. static const uint32_t ps_subpic_kernel_static_gen6[][4] = {
  117. #include "shaders/render/exa_wm_src_affine.g6b"
  118. #include "shaders/render/exa_wm_src_sample_argb.g6b"
  119. #include "shaders/render/exa_wm_write.g6b"
  120. };
  121.  
  122. /* programs for Ivybridge */
  123. static const uint32_t sf_kernel_static_gen7[][4] =
  124. {
  125. };
  126.  
  127. static const uint32_t ps_kernel_static_gen7[][4] = {
  128. #include "shaders/render/exa_wm_src_affine.g7b"
  129. #include "shaders/render/exa_wm_src_sample_planar.g7b"
  130. #include "shaders/render/exa_wm_yuv_color_balance.g7b"
  131. #include "shaders/render/exa_wm_yuv_rgb.g7b"
  132. #include "shaders/render/exa_wm_write.g7b"
  133. };
  134.  
  135. static const uint32_t ps_subpic_kernel_static_gen7[][4] = {
  136. #include "shaders/render/exa_wm_src_affine.g7b"
  137. #include "shaders/render/exa_wm_src_sample_argb.g7b"
  138. #include "shaders/render/exa_wm_write.g7b"
  139. };
  140.  
  141. /* Programs for Haswell */
  142. static const uint32_t ps_kernel_static_gen7_haswell[][4] = {
  143. #include "shaders/render/exa_wm_src_affine.g7b"
  144. #include "shaders/render/exa_wm_src_sample_planar.g7b.haswell"
  145. #include "shaders/render/exa_wm_yuv_color_balance.g7b.haswell"
  146. #include "shaders/render/exa_wm_yuv_rgb.g7b"
  147. #include "shaders/render/exa_wm_write.g7b"
  148. };
  149.  
  150.  
  151. #define SURFACE_STATE_PADDED_SIZE       MAX(SURFACE_STATE_PADDED_SIZE_GEN6, SURFACE_STATE_PADDED_SIZE_GEN7)
  152.  
  153. #define SURFACE_STATE_OFFSET(index)     (SURFACE_STATE_PADDED_SIZE * index)
  154. #define BINDING_TABLE_OFFSET            SURFACE_STATE_OFFSET(MAX_RENDER_SURFACES)
  155.  
  156. static uint32_t float_to_uint (float f)
  157. {
  158.     union {
  159.         uint32_t i;
  160.         float f;
  161.     } x;
  162.  
  163.     x.f = f;
  164.     return x.i;
  165. }
  166.  
  167. enum
  168. {
  169.     SF_KERNEL = 0,
  170.     PS_KERNEL,
  171.     PS_SUBPIC_KERNEL
  172. };
  173.  
  174. static struct i965_kernel render_kernels_gen4[] = {
  175.     {
  176.         "SF",
  177.         SF_KERNEL,
  178.         sf_kernel_static,
  179.         sizeof(sf_kernel_static),
  180.         NULL
  181.     },
  182.     {
  183.         "PS",
  184.         PS_KERNEL,
  185.         ps_kernel_static,
  186.         sizeof(ps_kernel_static),
  187.         NULL
  188.     },
  189.  
  190.     {
  191.         "PS_SUBPIC",
  192.         PS_SUBPIC_KERNEL,
  193.         ps_subpic_kernel_static,
  194.         sizeof(ps_subpic_kernel_static),
  195.         NULL
  196.     }
  197. };
  198.  
  199. static struct i965_kernel render_kernels_gen5[] = {
  200.     {
  201.         "SF",
  202.         SF_KERNEL,
  203.         sf_kernel_static_gen5,
  204.         sizeof(sf_kernel_static_gen5),
  205.         NULL
  206.     },
  207.     {
  208.         "PS",
  209.         PS_KERNEL,
  210.         ps_kernel_static_gen5,
  211.         sizeof(ps_kernel_static_gen5),
  212.         NULL
  213.     },
  214.  
  215.     {
  216.         "PS_SUBPIC",
  217.         PS_SUBPIC_KERNEL,
  218.         ps_subpic_kernel_static_gen5,
  219.         sizeof(ps_subpic_kernel_static_gen5),
  220.         NULL
  221.     }
  222. };
  223.  
  224. static struct i965_kernel render_kernels_gen6[] = {
  225.     {
  226.         "SF",
  227.         SF_KERNEL,
  228.         sf_kernel_static_gen6,
  229.         sizeof(sf_kernel_static_gen6),
  230.         NULL
  231.     },
  232.     {
  233.         "PS",
  234.         PS_KERNEL,
  235.         ps_kernel_static_gen6,
  236.         sizeof(ps_kernel_static_gen6),
  237.         NULL
  238.     },
  239.  
  240.     {
  241.         "PS_SUBPIC",
  242.         PS_SUBPIC_KERNEL,
  243.         ps_subpic_kernel_static_gen6,
  244.         sizeof(ps_subpic_kernel_static_gen6),
  245.         NULL
  246.     }
  247. };
  248.  
  249. static struct i965_kernel render_kernels_gen7[] = {
  250.     {
  251.         "SF",
  252.         SF_KERNEL,
  253.         sf_kernel_static_gen7,
  254.         sizeof(sf_kernel_static_gen7),
  255.         NULL
  256.     },
  257.     {
  258.         "PS",
  259.         PS_KERNEL,
  260.         ps_kernel_static_gen7,
  261.         sizeof(ps_kernel_static_gen7),
  262.         NULL
  263.     },
  264.  
  265.     {
  266.         "PS_SUBPIC",
  267.         PS_SUBPIC_KERNEL,
  268.         ps_subpic_kernel_static_gen7,
  269.         sizeof(ps_subpic_kernel_static_gen7),
  270.         NULL
  271.     }
  272. };
  273.  
  274. static struct i965_kernel render_kernels_gen7_haswell[] = {
  275.     {
  276.         "SF",
  277.         SF_KERNEL,
  278.         sf_kernel_static_gen7,
  279.         sizeof(sf_kernel_static_gen7),
  280.         NULL
  281.     },
  282.     {
  283.         "PS",
  284.         PS_KERNEL,
  285.         ps_kernel_static_gen7_haswell,
  286.         sizeof(ps_kernel_static_gen7_haswell),
  287.         NULL
  288.     },
  289.  
  290.     {
  291.         "PS_SUBPIC",
  292.         PS_SUBPIC_KERNEL,
  293.         ps_subpic_kernel_static_gen7,
  294.         sizeof(ps_subpic_kernel_static_gen7),
  295.         NULL
  296.     }
  297. };
  298.  
  299. #define URB_VS_ENTRIES        8
  300. #define URB_VS_ENTRY_SIZE     1
  301.  
  302. #define URB_GS_ENTRIES        0
  303. #define URB_GS_ENTRY_SIZE     0
  304.  
  305. #define URB_CLIP_ENTRIES      0
  306. #define URB_CLIP_ENTRY_SIZE   0
  307.  
  308. #define URB_SF_ENTRIES        1
  309. #define URB_SF_ENTRY_SIZE     2
  310.  
  311. #define URB_CS_ENTRIES        4
  312. #define URB_CS_ENTRY_SIZE     4
  313.  
  314. static float yuv_to_rgb_bt601[3][4] = {
  315. {1.164,         0,      1.596,          -0.06275,},
  316. {1.164,         -0.392, -0.813,         -0.50196,},
  317. {1.164,         2.017,  0,              -0.50196,},
  318. };
  319.  
  320. static float yuv_to_rgb_bt709[3][4] = {
  321. {1.164,         0,      1.793,          -0.06275,},
  322. {1.164,         -0.213, -0.533,         -0.50196,},
  323. {1.164,         2.112,  0,              -0.50196,},
  324. };
  325.  
  326. static float yuv_to_rgb_smpte_240[3][4] = {
  327. {1.164,         0,      1.794,          -0.06275,},
  328. {1.164,         -0.258, -0.5425,        -0.50196,},
  329. {1.164,         2.078,  0,              -0.50196,},
  330. };
  331.  
  332. static void
  333. i965_render_vs_unit(VADriverContextP ctx)
  334. {
  335.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  336.     struct i965_render_state *render_state = &i965->render_state;
  337.     struct i965_vs_unit_state *vs_state;
  338.  
  339.     dri_bo_map(render_state->vs.state, 1);
  340.     assert(render_state->vs.state->virtual);
  341.     vs_state = render_state->vs.state->virtual;
  342.     memset(vs_state, 0, sizeof(*vs_state));
  343.  
  344.     if (IS_IRONLAKE(i965->intel.device_info))
  345.         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2;
  346.     else
  347.         vs_state->thread4.nr_urb_entries = URB_VS_ENTRIES;
  348.  
  349.     vs_state->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1;
  350.     vs_state->vs6.vs_enable = 0;
  351.     vs_state->vs6.vert_cache_disable = 1;
  352.    
  353.     dri_bo_unmap(render_state->vs.state);
  354. }
  355.  
  356. static void
  357. i965_render_sf_unit(VADriverContextP ctx)
  358. {
  359.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  360.     struct i965_render_state *render_state = &i965->render_state;
  361.     struct i965_sf_unit_state *sf_state;
  362.  
  363.     dri_bo_map(render_state->sf.state, 1);
  364.     assert(render_state->sf.state->virtual);
  365.     sf_state = render_state->sf.state->virtual;
  366.     memset(sf_state, 0, sizeof(*sf_state));
  367.  
  368.     sf_state->thread0.grf_reg_count = I965_GRF_BLOCKS(SF_KERNEL_NUM_GRF);
  369.     sf_state->thread0.kernel_start_pointer = render_state->render_kernels[SF_KERNEL].bo->offset >> 6;
  370.  
  371.     sf_state->sf1.single_program_flow = 1; /* XXX */
  372.     sf_state->sf1.binding_table_entry_count = 0;
  373.     sf_state->sf1.thread_priority = 0;
  374.     sf_state->sf1.floating_point_mode = 0; /* Mesa does this */
  375.     sf_state->sf1.illegal_op_exception_enable = 1;
  376.     sf_state->sf1.mask_stack_exception_enable = 1;
  377.     sf_state->sf1.sw_exception_enable = 1;
  378.  
  379.     /* scratch space is not used in our kernel */
  380.     sf_state->thread2.per_thread_scratch_space = 0;
  381.     sf_state->thread2.scratch_space_base_pointer = 0;
  382.  
  383.     sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */
  384.     sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */
  385.     sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */
  386.     sf_state->thread3.urb_entry_read_offset = 0;
  387.     sf_state->thread3.dispatch_grf_start_reg = 3;
  388.  
  389.     sf_state->thread4.max_threads = SF_MAX_THREADS - 1;
  390.     sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1;
  391.     sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES;
  392.     sf_state->thread4.stats_enable = 1;
  393.  
  394.     sf_state->sf5.viewport_transform = 0; /* skip viewport */
  395.  
  396.     sf_state->sf6.cull_mode = I965_CULLMODE_NONE;
  397.     sf_state->sf6.scissor = 0;
  398.  
  399.     sf_state->sf7.trifan_pv = 2;
  400.  
  401.     sf_state->sf6.dest_org_vbias = 0x8;
  402.     sf_state->sf6.dest_org_hbias = 0x8;
  403.  
  404.     dri_bo_emit_reloc(render_state->sf.state,
  405.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  406.                       sf_state->thread0.grf_reg_count << 1,
  407.                       offsetof(struct i965_sf_unit_state, thread0),
  408.                       render_state->render_kernels[SF_KERNEL].bo);
  409.  
  410.     dri_bo_unmap(render_state->sf.state);
  411. }
  412.  
  413. static void
  414. i965_render_sampler(VADriverContextP ctx)
  415. {
  416.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  417.     struct i965_render_state *render_state = &i965->render_state;
  418.     struct i965_sampler_state *sampler_state;
  419.     int i;
  420.    
  421.     assert(render_state->wm.sampler_count > 0);
  422.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  423.  
  424.     dri_bo_map(render_state->wm.sampler, 1);
  425.     assert(render_state->wm.sampler->virtual);
  426.     sampler_state = render_state->wm.sampler->virtual;
  427.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  428.         memset(sampler_state, 0, sizeof(*sampler_state));
  429.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  430.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  431.         sampler_state->ss1.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  432.         sampler_state->ss1.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  433.         sampler_state->ss1.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  434.         sampler_state++;
  435.     }
  436.  
  437.     dri_bo_unmap(render_state->wm.sampler);
  438. }
  439. static void
  440. i965_subpic_render_wm_unit(VADriverContextP ctx)
  441. {
  442.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  443.     struct i965_render_state *render_state = &i965->render_state;
  444.     struct i965_wm_unit_state *wm_state;
  445.  
  446.     assert(render_state->wm.sampler);
  447.  
  448.     dri_bo_map(render_state->wm.state, 1);
  449.     assert(render_state->wm.state->virtual);
  450.     wm_state = render_state->wm.state->virtual;
  451.     memset(wm_state, 0, sizeof(*wm_state));
  452.  
  453.     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
  454.     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_SUBPIC_KERNEL].bo->offset >> 6;
  455.  
  456.     wm_state->thread1.single_program_flow = 1; /* XXX */
  457.  
  458.     if (IS_IRONLAKE(i965->intel.device_info))
  459.         wm_state->thread1.binding_table_entry_count = 0; /* hardware requirement */
  460.     else
  461.         wm_state->thread1.binding_table_entry_count = 7;
  462.  
  463.     wm_state->thread2.scratch_space_base_pointer = 0;
  464.     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
  465.  
  466.     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
  467.     wm_state->thread3.const_urb_entry_read_length = 4;
  468.     wm_state->thread3.const_urb_entry_read_offset = 0;
  469.     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
  470.     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
  471.  
  472.     wm_state->wm4.stats_enable = 0;
  473.     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
  474.  
  475.     if (IS_IRONLAKE(i965->intel.device_info)) {
  476.         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
  477.     } else {
  478.         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
  479.     }
  480.  
  481.     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
  482.     wm_state->wm5.thread_dispatch_enable = 1;
  483.     wm_state->wm5.enable_16_pix = 1;
  484.     wm_state->wm5.enable_8_pix = 0;
  485.     wm_state->wm5.early_depth_test = 1;
  486.  
  487.     dri_bo_emit_reloc(render_state->wm.state,
  488.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  489.                       wm_state->thread0.grf_reg_count << 1,
  490.                       offsetof(struct i965_wm_unit_state, thread0),
  491.                       render_state->render_kernels[PS_SUBPIC_KERNEL].bo);
  492.  
  493.     dri_bo_emit_reloc(render_state->wm.state,
  494.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  495.                       wm_state->wm4.sampler_count << 2,
  496.                       offsetof(struct i965_wm_unit_state, wm4),
  497.                       render_state->wm.sampler);
  498.  
  499.     dri_bo_unmap(render_state->wm.state);
  500. }
  501.  
  502.  
  503. static void
  504. i965_render_wm_unit(VADriverContextP ctx)
  505. {
  506.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  507.     struct i965_render_state *render_state = &i965->render_state;
  508.     struct i965_wm_unit_state *wm_state;
  509.  
  510.     assert(render_state->wm.sampler);
  511.  
  512.     dri_bo_map(render_state->wm.state, 1);
  513.     assert(render_state->wm.state->virtual);
  514.     wm_state = render_state->wm.state->virtual;
  515.     memset(wm_state, 0, sizeof(*wm_state));
  516.  
  517.     wm_state->thread0.grf_reg_count = I965_GRF_BLOCKS(PS_KERNEL_NUM_GRF);
  518.     wm_state->thread0.kernel_start_pointer = render_state->render_kernels[PS_KERNEL].bo->offset >> 6;
  519.  
  520.     wm_state->thread1.single_program_flow = 1; /* XXX */
  521.  
  522.     if (IS_IRONLAKE(i965->intel.device_info))
  523.         wm_state->thread1.binding_table_entry_count = 0;        /* hardware requirement */
  524.     else
  525.         wm_state->thread1.binding_table_entry_count = 7;
  526.  
  527.     wm_state->thread2.scratch_space_base_pointer = 0;
  528.     wm_state->thread2.per_thread_scratch_space = 0; /* 1024 bytes */
  529.  
  530.     wm_state->thread3.dispatch_grf_start_reg = 2; /* XXX */
  531.     wm_state->thread3.const_urb_entry_read_length = 4;
  532.     wm_state->thread3.const_urb_entry_read_offset = 0;
  533.     wm_state->thread3.urb_entry_read_length = 1; /* XXX */
  534.     wm_state->thread3.urb_entry_read_offset = 0; /* XXX */
  535.  
  536.     wm_state->wm4.stats_enable = 0;
  537.     wm_state->wm4.sampler_state_pointer = render_state->wm.sampler->offset >> 5;
  538.  
  539.     if (IS_IRONLAKE(i965->intel.device_info)) {
  540.         wm_state->wm4.sampler_count = 0;        /* hardware requirement */
  541.     } else {
  542.         wm_state->wm4.sampler_count = (render_state->wm.sampler_count + 3) / 4;
  543.     }
  544.  
  545.     wm_state->wm5.max_threads = i965->intel.device_info->max_wm_threads - 1;
  546.     wm_state->wm5.thread_dispatch_enable = 1;
  547.     wm_state->wm5.enable_16_pix = 1;
  548.     wm_state->wm5.enable_8_pix = 0;
  549.     wm_state->wm5.early_depth_test = 1;
  550.  
  551.     dri_bo_emit_reloc(render_state->wm.state,
  552.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  553.                       wm_state->thread0.grf_reg_count << 1,
  554.                       offsetof(struct i965_wm_unit_state, thread0),
  555.                       render_state->render_kernels[PS_KERNEL].bo);
  556.  
  557.     dri_bo_emit_reloc(render_state->wm.state,
  558.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  559.                       wm_state->wm4.sampler_count << 2,
  560.                       offsetof(struct i965_wm_unit_state, wm4),
  561.                       render_state->wm.sampler);
  562.  
  563.     dri_bo_unmap(render_state->wm.state);
  564. }
  565.  
  566. static void
  567. i965_render_cc_viewport(VADriverContextP ctx)
  568. {
  569.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  570.     struct i965_render_state *render_state = &i965->render_state;
  571.     struct i965_cc_viewport *cc_viewport;
  572.  
  573.     dri_bo_map(render_state->cc.viewport, 1);
  574.     assert(render_state->cc.viewport->virtual);
  575.     cc_viewport = render_state->cc.viewport->virtual;
  576.     memset(cc_viewport, 0, sizeof(*cc_viewport));
  577.    
  578.     cc_viewport->min_depth = -1.e35;
  579.     cc_viewport->max_depth = 1.e35;
  580.  
  581.     dri_bo_unmap(render_state->cc.viewport);
  582. }
  583.  
  584. static void
  585. i965_subpic_render_cc_unit(VADriverContextP ctx)
  586. {
  587.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  588.     struct i965_render_state *render_state = &i965->render_state;
  589.     struct i965_cc_unit_state *cc_state;
  590.  
  591.     assert(render_state->cc.viewport);
  592.  
  593.     dri_bo_map(render_state->cc.state, 1);
  594.     assert(render_state->cc.state->virtual);
  595.     cc_state = render_state->cc.state->virtual;
  596.     memset(cc_state, 0, sizeof(*cc_state));
  597.  
  598.     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
  599.     cc_state->cc2.depth_test = 0;       /* disable depth test */
  600.     cc_state->cc2.logicop_enable = 0;   /* disable logic op */
  601.     cc_state->cc3.ia_blend_enable = 0 ;  /* blend alpha just like colors */
  602.     cc_state->cc3.blend_enable = 1;     /* enable color blend */
  603.     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
  604.     cc_state->cc3.alpha_test_format = 0;//0:ALPHATEST_UNORM8;       /*store alpha value with UNORM8 */
  605.     cc_state->cc3.alpha_test_func = 5;//COMPAREFUNCTION_LESS;       /*pass if less than the reference */
  606.     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
  607.  
  608.     cc_state->cc5.dither_enable = 0;    /* disable dither */
  609.     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
  610.     cc_state->cc5.statistics_enable = 1;
  611.     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
  612.     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
  613.     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_DST_ALPHA;
  614.  
  615.     cc_state->cc6.clamp_post_alpha_blend = 0;
  616.     cc_state->cc6.clamp_pre_alpha_blend  =0;
  617.    
  618.     /*final color = src_color*src_blend_factor +/- dst_color*dest_color_blend_factor*/
  619.     cc_state->cc6.blend_function = I965_BLENDFUNCTION_ADD;
  620.     cc_state->cc6.src_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  621.     cc_state->cc6.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  622.    
  623.     /*alpha test reference*/
  624.     cc_state->cc7.alpha_ref.f =0.0 ;
  625.  
  626.  
  627.     dri_bo_emit_reloc(render_state->cc.state,
  628.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  629.                       0,
  630.                       offsetof(struct i965_cc_unit_state, cc4),
  631.                       render_state->cc.viewport);
  632.  
  633.     dri_bo_unmap(render_state->cc.state);
  634. }
  635.  
  636.  
  637. static void
  638. i965_render_cc_unit(VADriverContextP ctx)
  639. {
  640.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  641.     struct i965_render_state *render_state = &i965->render_state;
  642.     struct i965_cc_unit_state *cc_state;
  643.  
  644.     assert(render_state->cc.viewport);
  645.  
  646.     dri_bo_map(render_state->cc.state, 1);
  647.     assert(render_state->cc.state->virtual);
  648.     cc_state = render_state->cc.state->virtual;
  649.     memset(cc_state, 0, sizeof(*cc_state));
  650.  
  651.     cc_state->cc0.stencil_enable = 0;   /* disable stencil */
  652.     cc_state->cc2.depth_test = 0;       /* disable depth test */
  653.     cc_state->cc2.logicop_enable = 1;   /* enable logic op */
  654.     cc_state->cc3.ia_blend_enable = 0;  /* blend alpha just like colors */
  655.     cc_state->cc3.blend_enable = 0;     /* disable color blend */
  656.     cc_state->cc3.alpha_test = 0;       /* disable alpha test */
  657.     cc_state->cc4.cc_viewport_state_offset = render_state->cc.viewport->offset >> 5;
  658.  
  659.     cc_state->cc5.dither_enable = 0;    /* disable dither */
  660.     cc_state->cc5.logicop_func = 0xc;   /* WHITE */
  661.     cc_state->cc5.statistics_enable = 1;
  662.     cc_state->cc5.ia_blend_function = I965_BLENDFUNCTION_ADD;
  663.     cc_state->cc5.ia_src_blend_factor = I965_BLENDFACTOR_ONE;
  664.     cc_state->cc5.ia_dest_blend_factor = I965_BLENDFACTOR_ONE;
  665.  
  666.     dri_bo_emit_reloc(render_state->cc.state,
  667.                       I915_GEM_DOMAIN_INSTRUCTION, 0,
  668.                       0,
  669.                       offsetof(struct i965_cc_unit_state, cc4),
  670.                       render_state->cc.viewport);
  671.  
  672.     dri_bo_unmap(render_state->cc.state);
  673. }
  674.  
  675. static void
  676. i965_render_set_surface_tiling(struct i965_surface_state *ss, unsigned int tiling)
  677. {
  678.     switch (tiling) {
  679.     case I915_TILING_NONE:
  680.         ss->ss3.tiled_surface = 0;
  681.         ss->ss3.tile_walk = 0;
  682.         break;
  683.     case I915_TILING_X:
  684.         ss->ss3.tiled_surface = 1;
  685.         ss->ss3.tile_walk = I965_TILEWALK_XMAJOR;
  686.         break;
  687.     case I915_TILING_Y:
  688.         ss->ss3.tiled_surface = 1;
  689.         ss->ss3.tile_walk = I965_TILEWALK_YMAJOR;
  690.         break;
  691.     }
  692. }
  693.  
  694. static void
  695. i965_render_set_surface_state(
  696.     struct i965_surface_state *ss,
  697.     dri_bo                    *bo,
  698.     unsigned long              offset,
  699.     unsigned int               width,
  700.     unsigned int               height,
  701.     unsigned int               pitch,
  702.     unsigned int               format,
  703.     unsigned int               flags
  704. )
  705. {
  706.     unsigned int tiling;
  707.     unsigned int swizzle;
  708.  
  709.     memset(ss, 0, sizeof(*ss));
  710.  
  711.     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
  712.     case I965_PP_FLAG_BOTTOM_FIELD:
  713.         ss->ss0.vert_line_stride_ofs = 1;
  714.         /* fall-through */
  715.     case I965_PP_FLAG_TOP_FIELD:
  716.         ss->ss0.vert_line_stride = 1;
  717.         height /= 2;
  718.         break;
  719.     }
  720.  
  721.     ss->ss0.surface_type = I965_SURFACE_2D;
  722.     ss->ss0.surface_format = format;
  723.     ss->ss0.color_blend = 1;
  724.  
  725.     ss->ss1.base_addr = bo->offset + offset;
  726.  
  727.     ss->ss2.width = width - 1;
  728.     ss->ss2.height = height - 1;
  729.  
  730.     ss->ss3.pitch = pitch - 1;
  731.  
  732.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  733.     i965_render_set_surface_tiling(ss, tiling);
  734. }
  735.  
  736. static void
  737. gen7_render_set_surface_tiling(struct gen7_surface_state *ss, uint32_t tiling)
  738. {
  739.    switch (tiling) {
  740.    case I915_TILING_NONE:
  741.       ss->ss0.tiled_surface = 0;
  742.       ss->ss0.tile_walk = 0;
  743.       break;
  744.    case I915_TILING_X:
  745.       ss->ss0.tiled_surface = 1;
  746.       ss->ss0.tile_walk = I965_TILEWALK_XMAJOR;
  747.       break;
  748.    case I915_TILING_Y:
  749.       ss->ss0.tiled_surface = 1;
  750.       ss->ss0.tile_walk = I965_TILEWALK_YMAJOR;
  751.       break;
  752.    }
  753. }
  754.  
  755. /* Set "Shader Channel Select" */
  756. void
  757. gen7_render_set_surface_scs(struct gen7_surface_state *ss)
  758. {
  759.     ss->ss7.shader_chanel_select_r = HSW_SCS_RED;
  760.     ss->ss7.shader_chanel_select_g = HSW_SCS_GREEN;
  761.     ss->ss7.shader_chanel_select_b = HSW_SCS_BLUE;
  762.     ss->ss7.shader_chanel_select_a = HSW_SCS_ALPHA;
  763. }
  764.  
  765. static void
  766. gen7_render_set_surface_state(
  767.     struct gen7_surface_state *ss,
  768.     dri_bo                    *bo,
  769.     unsigned long              offset,
  770.     int                        width,
  771.     int                        height,
  772.     int                        pitch,
  773.     int                        format,
  774.     unsigned int               flags
  775. )
  776. {
  777.     unsigned int tiling;
  778.     unsigned int swizzle;
  779.  
  780.     memset(ss, 0, sizeof(*ss));
  781.  
  782.     switch (flags & (I965_PP_FLAG_TOP_FIELD|I965_PP_FLAG_BOTTOM_FIELD)) {
  783.     case I965_PP_FLAG_BOTTOM_FIELD:
  784.         ss->ss0.vert_line_stride_ofs = 1;
  785.         /* fall-through */
  786.     case I965_PP_FLAG_TOP_FIELD:
  787.         ss->ss0.vert_line_stride = 1;
  788.         height /= 2;
  789.         break;
  790.     }
  791.  
  792.     ss->ss0.surface_type = I965_SURFACE_2D;
  793.     ss->ss0.surface_format = format;
  794.  
  795.     ss->ss1.base_addr = bo->offset + offset;
  796.  
  797.     ss->ss2.width = width - 1;
  798.     ss->ss2.height = height - 1;
  799.  
  800.     ss->ss3.pitch = pitch - 1;
  801.  
  802.     dri_bo_get_tiling(bo, &tiling, &swizzle);
  803.     gen7_render_set_surface_tiling(ss, tiling);
  804. }
  805.  
  806.  
  807. static void
  808. i965_render_src_surface_state(
  809.     VADriverContextP ctx,
  810.     int              index,
  811.     dri_bo          *region,
  812.     unsigned long    offset,
  813.     int              w,
  814.     int              h,
  815.     int              pitch,
  816.     int              format,
  817.     unsigned int     flags
  818. )
  819. {
  820.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  821.     struct i965_render_state *render_state = &i965->render_state;
  822.     void *ss;
  823.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  824.  
  825.     assert(index < MAX_RENDER_SURFACES);
  826.  
  827.     dri_bo_map(ss_bo, 1);
  828.     assert(ss_bo->virtual);
  829.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  830.  
  831.     if (IS_GEN7(i965->intel.device_info)) {
  832.         gen7_render_set_surface_state(ss,
  833.                                       region, offset,
  834.                                       w, h,
  835.                                       pitch, format, flags);
  836.         if (IS_HASWELL(i965->intel.device_info))
  837.             gen7_render_set_surface_scs(ss);
  838.         dri_bo_emit_reloc(ss_bo,
  839.                           I915_GEM_DOMAIN_SAMPLER, 0,
  840.                           offset,
  841.                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  842.                           region);
  843.     } else {
  844.         i965_render_set_surface_state(ss,
  845.                                       region, offset,
  846.                                       w, h,
  847.                                       pitch, format, flags);
  848.         dri_bo_emit_reloc(ss_bo,
  849.                           I915_GEM_DOMAIN_SAMPLER, 0,
  850.                           offset,
  851.                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
  852.                           region);
  853.     }
  854.  
  855.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  856.     dri_bo_unmap(ss_bo);
  857.     render_state->wm.sampler_count++;
  858. }
  859.  
  860. static void
  861. i965_render_src_surfaces_state(
  862.     VADriverContextP ctx,
  863.     struct object_surface *obj_surface,
  864.     unsigned int     flags
  865. )
  866. {
  867.     int region_pitch;
  868.     int rw, rh;
  869.     dri_bo *region;
  870.  
  871.     region_pitch = obj_surface->width;
  872.     rw = obj_surface->orig_width;
  873.     rh = obj_surface->orig_height;
  874.     region = obj_surface->bo;
  875.  
  876.     i965_render_src_surface_state(ctx, 1, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);     /* Y */
  877.     i965_render_src_surface_state(ctx, 2, region, 0, rw, rh, region_pitch, I965_SURFACEFORMAT_R8_UNORM, flags);
  878.  
  879.     if (obj_surface->fourcc == VA_FOURCC_Y800) /* single plane for grayscale */
  880.         return;
  881.  
  882.     if (obj_surface->fourcc == VA_FOURCC_NV12) {
  883.         i965_render_src_surface_state(ctx, 3, region,
  884.                                       region_pitch * obj_surface->y_cb_offset,
  885.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  886.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags); /* UV */
  887.         i965_render_src_surface_state(ctx, 4, region,
  888.                                       region_pitch * obj_surface->y_cb_offset,
  889.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  890.                                       I965_SURFACEFORMAT_R8G8_UNORM, flags);
  891.     } else {
  892.         i965_render_src_surface_state(ctx, 3, region,
  893.                                       region_pitch * obj_surface->y_cb_offset,
  894.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  895.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* U */
  896.         i965_render_src_surface_state(ctx, 4, region,
  897.                                       region_pitch * obj_surface->y_cb_offset,
  898.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  899.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  900.         i965_render_src_surface_state(ctx, 5, region,
  901.                                       region_pitch * obj_surface->y_cr_offset,
  902.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  903.                                       I965_SURFACEFORMAT_R8_UNORM, flags); /* V */
  904.         i965_render_src_surface_state(ctx, 6, region,
  905.                                       region_pitch * obj_surface->y_cr_offset,
  906.                                       obj_surface->cb_cr_width, obj_surface->cb_cr_height, obj_surface->cb_cr_pitch,
  907.                                       I965_SURFACEFORMAT_R8_UNORM, flags);
  908.     }
  909. }
  910.  
  911. static void
  912. i965_subpic_render_src_surfaces_state(VADriverContextP ctx,
  913.                                       struct object_surface *obj_surface)
  914. {
  915.     dri_bo *subpic_region;
  916.     unsigned int index = obj_surface->subpic_render_idx;
  917.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  918.     struct object_image *obj_image = obj_subpic->obj_image;
  919.  
  920.     assert(obj_surface);
  921.     assert(obj_surface->bo);
  922.     subpic_region = obj_image->bo;
  923.     /*subpicture surface*/
  924.     i965_render_src_surface_state(ctx, 1, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);    
  925.     i965_render_src_surface_state(ctx, 2, subpic_region, 0, obj_subpic->width, obj_subpic->height, obj_subpic->pitch, obj_subpic->format, 0);    
  926. }
  927.  
  928. static void
  929. i965_render_dest_surface_state(VADriverContextP ctx, int index)
  930. {
  931.     struct i965_driver_data *i965 = i965_driver_data(ctx);  
  932.     struct i965_render_state *render_state = &i965->render_state;
  933.     struct intel_region *dest_region = render_state->draw_region;
  934.     void *ss;
  935.     dri_bo *ss_bo = render_state->wm.surface_state_binding_table_bo;
  936.     int format;
  937.     assert(index < MAX_RENDER_SURFACES);
  938.  
  939.     if (dest_region->cpp == 2) {
  940.         format = I965_SURFACEFORMAT_B5G6R5_UNORM;
  941.     } else {
  942.         format = I965_SURFACEFORMAT_B8G8R8A8_UNORM;
  943.     }
  944.  
  945.     dri_bo_map(ss_bo, 1);
  946.     assert(ss_bo->virtual);
  947.     ss = (char *)ss_bo->virtual + SURFACE_STATE_OFFSET(index);
  948.  
  949.     if (IS_GEN7(i965->intel.device_info)) {
  950.         gen7_render_set_surface_state(ss,
  951.                                       dest_region->bo, 0,
  952.                                       dest_region->width, dest_region->height,
  953.                                       dest_region->pitch, format, 0);
  954.         if (IS_HASWELL(i965->intel.device_info))
  955.             gen7_render_set_surface_scs(ss);
  956.         dri_bo_emit_reloc(ss_bo,
  957.                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  958.                           0,
  959.                           SURFACE_STATE_OFFSET(index) + offsetof(struct gen7_surface_state, ss1),
  960.                           dest_region->bo);
  961.     } else {
  962.         i965_render_set_surface_state(ss,
  963.                                       dest_region->bo, 0,
  964.                                       dest_region->width, dest_region->height,
  965.                                       dest_region->pitch, format, 0);
  966.         dri_bo_emit_reloc(ss_bo,
  967.                           I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  968.                           0,
  969.                           SURFACE_STATE_OFFSET(index) + offsetof(struct i965_surface_state, ss1),
  970.                           dest_region->bo);
  971.     }
  972.  
  973.     ((unsigned int *)((char *)ss_bo->virtual + BINDING_TABLE_OFFSET))[index] = SURFACE_STATE_OFFSET(index);
  974.     dri_bo_unmap(ss_bo);
  975. }
  976.  
  977. static void
  978. i965_fill_vertex_buffer(
  979.     VADriverContextP ctx,
  980.     float tex_coords[4], /* [(u1,v1);(u2,v2)] */
  981.     float vid_coords[4]  /* [(x1,y1);(x2,y2)] */
  982. )
  983. {
  984.     struct i965_driver_data * const i965 = i965_driver_data(ctx);
  985.     float vb[12];
  986.  
  987.     enum { X1, Y1, X2, Y2 };
  988.  
  989.     static const unsigned int g_rotation_indices[][6] = {
  990.         [VA_ROTATION_NONE] = { X2, Y2, X1, Y2, X1, Y1 },
  991.         [VA_ROTATION_90]   = { X2, Y1, X2, Y2, X1, Y2 },
  992.         [VA_ROTATION_180]  = { X1, Y1, X2, Y1, X2, Y2 },
  993.         [VA_ROTATION_270]  = { X1, Y2, X1, Y1, X2, Y1 },
  994.     };
  995.  
  996.     const unsigned int * const rotation_indices =
  997.         g_rotation_indices[i965->rotation_attrib->value];
  998.  
  999.     vb[0]  = tex_coords[rotation_indices[0]]; /* bottom-right corner */
  1000.     vb[1]  = tex_coords[rotation_indices[1]];
  1001.     vb[2]  = vid_coords[X2];
  1002.     vb[3]  = vid_coords[Y2];
  1003.  
  1004.     vb[4]  = tex_coords[rotation_indices[2]]; /* bottom-left corner */
  1005.     vb[5]  = tex_coords[rotation_indices[3]];
  1006.     vb[6]  = vid_coords[X1];
  1007.     vb[7]  = vid_coords[Y2];
  1008.  
  1009.     vb[8]  = tex_coords[rotation_indices[4]]; /* top-left corner */
  1010.     vb[9]  = tex_coords[rotation_indices[5]];
  1011.     vb[10] = vid_coords[X1];
  1012.     vb[11] = vid_coords[Y1];
  1013.  
  1014.     dri_bo_subdata(i965->render_state.vb.vertex_buffer, 0, sizeof(vb), vb);
  1015. }
  1016.  
  1017. static void
  1018. i965_subpic_render_upload_vertex(VADriverContextP ctx,
  1019.                                  struct object_surface *obj_surface,
  1020.                                  const VARectangle *output_rect)
  1021. {    
  1022.     unsigned int index = obj_surface->subpic_render_idx;
  1023.     struct object_subpic     *obj_subpic   = obj_surface->obj_subpic[index];
  1024.     float tex_coords[4], vid_coords[4];
  1025.     VARectangle dst_rect;
  1026.  
  1027.     if (obj_subpic->flags & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD)
  1028.         dst_rect = obj_subpic->dst_rect;
  1029.     else {
  1030.         const float sx  = (float)output_rect->width  / obj_surface->orig_width;
  1031.         const float sy  = (float)output_rect->height / obj_surface->orig_height;
  1032.         dst_rect.x      = output_rect->x + sx * obj_subpic->dst_rect.x;
  1033.         dst_rect.y      = output_rect->y + sy * obj_subpic->dst_rect.y;
  1034.         dst_rect.width  = sx * obj_subpic->dst_rect.width;
  1035.         dst_rect.height = sy * obj_subpic->dst_rect.height;
  1036.     }
  1037.  
  1038.     tex_coords[0] = (float)obj_subpic->src_rect.x / obj_subpic->width;
  1039.     tex_coords[1] = (float)obj_subpic->src_rect.y / obj_subpic->height;
  1040.     tex_coords[2] = (float)(obj_subpic->src_rect.x + obj_subpic->src_rect.width) / obj_subpic->width;
  1041.     tex_coords[3] = (float)(obj_subpic->src_rect.y + obj_subpic->src_rect.height) / obj_subpic->height;
  1042.  
  1043.     vid_coords[0] = dst_rect.x;
  1044.     vid_coords[1] = dst_rect.y;
  1045.     vid_coords[2] = (float)(dst_rect.x + dst_rect.width);
  1046.     vid_coords[3] = (float)(dst_rect.y + dst_rect.height);
  1047.  
  1048.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  1049. }
  1050.  
  1051. static void
  1052. i965_render_upload_vertex(
  1053.     VADriverContextP   ctx,
  1054.     struct object_surface *obj_surface,
  1055.     const VARectangle *src_rect,
  1056.     const VARectangle *dst_rect
  1057. )
  1058. {
  1059.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1060.     struct i965_render_state *render_state = &i965->render_state;
  1061.     struct intel_region *dest_region = render_state->draw_region;
  1062.     float tex_coords[4], vid_coords[4];
  1063.     int width, height;
  1064.  
  1065.     width  = obj_surface->orig_width;
  1066.     height = obj_surface->orig_height;
  1067.  
  1068.     tex_coords[0] = (float)src_rect->x / width;
  1069.     tex_coords[1] = (float)src_rect->y / height;
  1070.     tex_coords[2] = (float)(src_rect->x + src_rect->width) / width;
  1071.     tex_coords[3] = (float)(src_rect->y + src_rect->height) / height;
  1072.  
  1073.     vid_coords[0] = dest_region->x + dst_rect->x;
  1074.     vid_coords[1] = dest_region->y + dst_rect->y;
  1075.     vid_coords[2] = vid_coords[0] + dst_rect->width;
  1076.     vid_coords[3] = vid_coords[1] + dst_rect->height;
  1077.  
  1078.     i965_fill_vertex_buffer(ctx, tex_coords, vid_coords);
  1079. }
  1080.  
  1081. #define PI  3.1415926
  1082.  
  1083. static void
  1084. i965_render_upload_constants(VADriverContextP ctx,
  1085.                              struct object_surface *obj_surface,
  1086.                              unsigned int flags)
  1087. {
  1088.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1089.     struct i965_render_state *render_state = &i965->render_state;
  1090.     unsigned short *constant_buffer;
  1091.     float *color_balance_base;
  1092.     float contrast = (float)i965->contrast_attrib->value / DEFAULT_CONTRAST;
  1093.     float brightness = (float)i965->brightness_attrib->value / 255; /* YUV is float in the shader */
  1094.     float hue = (float)i965->hue_attrib->value / 180 * PI;
  1095.     float saturation = (float)i965->saturation_attrib->value / DEFAULT_SATURATION;
  1096.     float *yuv_to_rgb;
  1097.     unsigned int color_flag;
  1098.  
  1099.     dri_bo_map(render_state->curbe.bo, 1);
  1100.     assert(render_state->curbe.bo->virtual);
  1101.     constant_buffer = render_state->curbe.bo->virtual;
  1102.  
  1103.     if (obj_surface->subsampling == SUBSAMPLE_YUV400) {
  1104.         assert(obj_surface->fourcc == VA_FOURCC_Y800);
  1105.  
  1106.         constant_buffer[0] = 2;
  1107.     } else {
  1108.         if (obj_surface->fourcc == VA_FOURCC_NV12)
  1109.             constant_buffer[0] = 1;
  1110.         else
  1111.             constant_buffer[0] = 0;
  1112.     }
  1113.  
  1114.     if (i965->contrast_attrib->value == DEFAULT_CONTRAST &&
  1115.         i965->brightness_attrib->value == DEFAULT_BRIGHTNESS &&
  1116.         i965->hue_attrib->value == DEFAULT_HUE &&
  1117.         i965->saturation_attrib->value == DEFAULT_SATURATION)
  1118.         constant_buffer[1] = 1; /* skip color balance transformation */
  1119.     else
  1120.         constant_buffer[1] = 0;
  1121.  
  1122.     color_balance_base = (float *)constant_buffer + 4;
  1123.     *color_balance_base++ = contrast;
  1124.     *color_balance_base++ = brightness;
  1125.     *color_balance_base++ = cos(hue) * contrast * saturation;
  1126.     *color_balance_base++ = sin(hue) * contrast * saturation;
  1127.  
  1128.     color_flag = flags & VA_SRC_COLOR_MASK;
  1129.     yuv_to_rgb = (float *)constant_buffer + 8;
  1130.     if (color_flag == VA_SRC_BT709)
  1131.         memcpy(yuv_to_rgb, yuv_to_rgb_bt709, sizeof(yuv_to_rgb_bt709));
  1132.     else if (color_flag == VA_SRC_SMPTE_240)
  1133.         memcpy(yuv_to_rgb, yuv_to_rgb_smpte_240, sizeof(yuv_to_rgb_smpte_240));
  1134.     else
  1135.         memcpy(yuv_to_rgb, yuv_to_rgb_bt601, sizeof(yuv_to_rgb_bt601));
  1136.  
  1137.     dri_bo_unmap(render_state->curbe.bo);
  1138. }
  1139.  
  1140. static void
  1141. i965_subpic_render_upload_constants(VADriverContextP ctx,
  1142.                                     struct object_surface *obj_surface)
  1143. {
  1144.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1145.     struct i965_render_state *render_state = &i965->render_state;
  1146.     float *constant_buffer;
  1147.     float global_alpha = 1.0;
  1148.     unsigned int index = obj_surface->subpic_render_idx;
  1149.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1150.    
  1151.     if (obj_subpic->flags & VA_SUBPICTURE_GLOBAL_ALPHA) {
  1152.         global_alpha = obj_subpic->global_alpha;
  1153.     }
  1154.  
  1155.     dri_bo_map(render_state->curbe.bo, 1);
  1156.  
  1157.     assert(render_state->curbe.bo->virtual);
  1158.     constant_buffer = render_state->curbe.bo->virtual;
  1159.     *constant_buffer = global_alpha;
  1160.  
  1161.     dri_bo_unmap(render_state->curbe.bo);
  1162. }
  1163.  
  1164. static void
  1165. i965_surface_render_state_setup(
  1166.     VADriverContextP   ctx,
  1167.     struct object_surface *obj_surface,
  1168.     const VARectangle *src_rect,
  1169.     const VARectangle *dst_rect,
  1170.     unsigned int       flags
  1171. )
  1172. {
  1173.     i965_render_vs_unit(ctx);
  1174.     i965_render_sf_unit(ctx);
  1175.     i965_render_dest_surface_state(ctx, 0);
  1176.     i965_render_src_surfaces_state(ctx, obj_surface, flags);
  1177.     i965_render_sampler(ctx);
  1178.     i965_render_wm_unit(ctx);
  1179.     i965_render_cc_viewport(ctx);
  1180.     i965_render_cc_unit(ctx);
  1181.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  1182.     i965_render_upload_constants(ctx, obj_surface, flags);
  1183. }
  1184.  
  1185. static void
  1186. i965_subpic_render_state_setup(
  1187.     VADriverContextP   ctx,
  1188.     struct object_surface *obj_surface,
  1189.     const VARectangle *src_rect,
  1190.     const VARectangle *dst_rect
  1191. )
  1192. {
  1193.     i965_render_vs_unit(ctx);
  1194.     i965_render_sf_unit(ctx);
  1195.     i965_render_dest_surface_state(ctx, 0);
  1196.     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
  1197.     i965_render_sampler(ctx);
  1198.     i965_subpic_render_wm_unit(ctx);
  1199.     i965_render_cc_viewport(ctx);
  1200.     i965_subpic_render_cc_unit(ctx);
  1201.     i965_subpic_render_upload_constants(ctx, obj_surface);
  1202.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  1203. }
  1204.  
  1205.  
  1206. static void
  1207. i965_render_pipeline_select(VADriverContextP ctx)
  1208. {
  1209.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1210.     struct intel_batchbuffer *batch = i965->batch;
  1211.  
  1212.     BEGIN_BATCH(batch, 1);
  1213.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1214.     ADVANCE_BATCH(batch);
  1215. }
  1216.  
  1217. static void
  1218. i965_render_state_sip(VADriverContextP ctx)
  1219. {
  1220.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1221.     struct intel_batchbuffer *batch = i965->batch;
  1222.  
  1223.     BEGIN_BATCH(batch, 2);
  1224.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1225.     OUT_BATCH(batch, 0);
  1226.     ADVANCE_BATCH(batch);
  1227. }
  1228.  
  1229. static void
  1230. i965_render_state_base_address(VADriverContextP ctx)
  1231. {
  1232.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1233.     struct intel_batchbuffer *batch = i965->batch;
  1234.     struct i965_render_state *render_state = &i965->render_state;
  1235.  
  1236.     if (IS_IRONLAKE(i965->intel.device_info)) {
  1237.         BEGIN_BATCH(batch, 8);
  1238.         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 6);
  1239.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1240.         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  1241.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1242.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1243.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1244.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1245.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1246.         ADVANCE_BATCH(batch);
  1247.     } else {
  1248.         BEGIN_BATCH(batch, 6);
  1249.         OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | 4);
  1250.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1251.         OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY);
  1252.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1253.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1254.         OUT_BATCH(batch, 0 | BASE_ADDRESS_MODIFY);
  1255.         ADVANCE_BATCH(batch);
  1256.     }
  1257. }
  1258.  
  1259. static void
  1260. i965_render_binding_table_pointers(VADriverContextP ctx)
  1261. {
  1262.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1263.     struct intel_batchbuffer *batch = i965->batch;
  1264.  
  1265.     BEGIN_BATCH(batch, 6);
  1266.     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS | 4);
  1267.     OUT_BATCH(batch, 0); /* vs */
  1268.     OUT_BATCH(batch, 0); /* gs */
  1269.     OUT_BATCH(batch, 0); /* clip */
  1270.     OUT_BATCH(batch, 0); /* sf */
  1271.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1272.     ADVANCE_BATCH(batch);
  1273. }
  1274.  
  1275. static void
  1276. i965_render_constant_color(VADriverContextP ctx)
  1277. {
  1278.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1279.     struct intel_batchbuffer *batch = i965->batch;
  1280.  
  1281.     BEGIN_BATCH(batch, 5);
  1282.     OUT_BATCH(batch, CMD_CONSTANT_COLOR | 3);
  1283.     OUT_BATCH(batch, float_to_uint(1.0));
  1284.     OUT_BATCH(batch, float_to_uint(0.0));
  1285.     OUT_BATCH(batch, float_to_uint(1.0));
  1286.     OUT_BATCH(batch, float_to_uint(1.0));
  1287.     ADVANCE_BATCH(batch);
  1288. }
  1289.  
  1290. static void
  1291. i965_render_pipelined_pointers(VADriverContextP ctx)
  1292. {
  1293.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1294.     struct intel_batchbuffer *batch = i965->batch;
  1295.     struct i965_render_state *render_state = &i965->render_state;
  1296.  
  1297.     BEGIN_BATCH(batch, 7);
  1298.     OUT_BATCH(batch, CMD_PIPELINED_POINTERS | 5);
  1299.     OUT_RELOC(batch, render_state->vs.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1300.     OUT_BATCH(batch, 0);  /* disable GS */
  1301.     OUT_BATCH(batch, 0);  /* disable CLIP */
  1302.     OUT_RELOC(batch, render_state->sf.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1303.     OUT_RELOC(batch, render_state->wm.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1304.     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1305.     ADVANCE_BATCH(batch);
  1306. }
  1307.  
  1308. static void
  1309. i965_render_urb_layout(VADriverContextP ctx)
  1310. {
  1311.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1312.     struct intel_batchbuffer *batch = i965->batch;
  1313.     int urb_vs_start, urb_vs_size;
  1314.     int urb_gs_start, urb_gs_size;
  1315.     int urb_clip_start, urb_clip_size;
  1316.     int urb_sf_start, urb_sf_size;
  1317.     int urb_cs_start, urb_cs_size;
  1318.  
  1319.     urb_vs_start = 0;
  1320.     urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE;
  1321.     urb_gs_start = urb_vs_start + urb_vs_size;
  1322.     urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE;
  1323.     urb_clip_start = urb_gs_start + urb_gs_size;
  1324.     urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE;
  1325.     urb_sf_start = urb_clip_start + urb_clip_size;
  1326.     urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE;
  1327.     urb_cs_start = urb_sf_start + urb_sf_size;
  1328.     urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE;
  1329.  
  1330.     BEGIN_BATCH(batch, 3);
  1331.     OUT_BATCH(batch,
  1332.               CMD_URB_FENCE |
  1333.               UF0_CS_REALLOC |
  1334.               UF0_SF_REALLOC |
  1335.               UF0_CLIP_REALLOC |
  1336.               UF0_GS_REALLOC |
  1337.               UF0_VS_REALLOC |
  1338.               1);
  1339.     OUT_BATCH(batch,
  1340.               ((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) |
  1341.               ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) |
  1342.               ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT));
  1343.     OUT_BATCH(batch,
  1344.               ((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) |
  1345.               ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT));
  1346.     ADVANCE_BATCH(batch);
  1347. }
  1348.  
  1349. static void
  1350. i965_render_cs_urb_layout(VADriverContextP ctx)
  1351. {
  1352.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1353.     struct intel_batchbuffer *batch = i965->batch;
  1354.  
  1355.     BEGIN_BATCH(batch, 2);
  1356.     OUT_BATCH(batch, CMD_CS_URB_STATE | 0);
  1357.     OUT_BATCH(batch,
  1358.               ((URB_CS_ENTRY_SIZE - 1) << 4) |          /* URB Entry Allocation Size */
  1359.               (URB_CS_ENTRIES << 0));                /* Number of URB Entries */
  1360.     ADVANCE_BATCH(batch);
  1361. }
  1362.  
  1363. static void
  1364. i965_render_constant_buffer(VADriverContextP ctx)
  1365. {
  1366.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1367.     struct intel_batchbuffer *batch = i965->batch;
  1368.     struct i965_render_state *render_state = &i965->render_state;
  1369.  
  1370.     BEGIN_BATCH(batch, 2);
  1371.     OUT_BATCH(batch, CMD_CONSTANT_BUFFER | (1 << 8) | (2 - 2));
  1372.     OUT_RELOC(batch, render_state->curbe.bo,
  1373.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  1374.               URB_CS_ENTRY_SIZE - 1);
  1375.     ADVANCE_BATCH(batch);    
  1376. }
  1377.  
  1378. static void
  1379. i965_render_drawing_rectangle(VADriverContextP ctx)
  1380. {
  1381.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1382.     struct intel_batchbuffer *batch = i965->batch;
  1383.     struct i965_render_state *render_state = &i965->render_state;
  1384.     struct intel_region *dest_region = render_state->draw_region;
  1385.  
  1386.     BEGIN_BATCH(batch, 4);
  1387.     OUT_BATCH(batch, CMD_DRAWING_RECTANGLE | 2);
  1388.     OUT_BATCH(batch, 0x00000000);
  1389.     OUT_BATCH(batch, (dest_region->width - 1) | (dest_region->height - 1) << 16);
  1390.     OUT_BATCH(batch, 0x00000000);        
  1391.     ADVANCE_BATCH(batch);
  1392. }
  1393.  
  1394. static void
  1395. i965_render_vertex_elements(VADriverContextP ctx)
  1396. {
  1397.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1398.     struct intel_batchbuffer *batch = i965->batch;
  1399.  
  1400.     if (IS_IRONLAKE(i965->intel.device_info)) {
  1401.         BEGIN_BATCH(batch, 5);
  1402.         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
  1403.         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  1404.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1405.                   VE0_VALID |
  1406.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1407.                   (0 << VE0_OFFSET_SHIFT));
  1408.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1409.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1410.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1411.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  1412.         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  1413.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1414.                   VE0_VALID |
  1415.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1416.                   (8 << VE0_OFFSET_SHIFT));
  1417.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1418.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1419.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1420.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  1421.         ADVANCE_BATCH(batch);
  1422.     } else {
  1423.         BEGIN_BATCH(batch, 5);
  1424.         OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | 3);
  1425.         /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  1426.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1427.                   VE0_VALID |
  1428.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1429.                   (0 << VE0_OFFSET_SHIFT));
  1430.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1431.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1432.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1433.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
  1434.                   (0 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
  1435.         /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  1436.         OUT_BATCH(batch, (0 << VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  1437.                   VE0_VALID |
  1438.                   (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  1439.                   (8 << VE0_OFFSET_SHIFT));
  1440.         OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  1441.                   (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  1442.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  1443.                   (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT) |
  1444.                   (4 << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT));
  1445.         ADVANCE_BATCH(batch);
  1446.     }
  1447. }
  1448.  
  1449. static void
  1450. i965_render_upload_image_palette(
  1451.     VADriverContextP ctx,
  1452.     struct object_image *obj_image,
  1453.     unsigned int     alpha
  1454. )
  1455. {
  1456.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1457.     struct intel_batchbuffer *batch = i965->batch;
  1458.     unsigned int i;
  1459.  
  1460.     assert(obj_image);
  1461.  
  1462.     if (!obj_image)
  1463.         return;
  1464.  
  1465.     if (obj_image->image.num_palette_entries == 0)
  1466.         return;
  1467.  
  1468.     BEGIN_BATCH(batch, 1 + obj_image->image.num_palette_entries);
  1469.     OUT_BATCH(batch, CMD_SAMPLER_PALETTE_LOAD | (obj_image->image.num_palette_entries - 1));
  1470.     /*fill palette*/
  1471.     //int32_t out[16]; //0-23:color 23-31:alpha
  1472.     for (i = 0; i < obj_image->image.num_palette_entries; i++)
  1473.         OUT_BATCH(batch, (alpha << 24) | obj_image->palette[i]);
  1474.     ADVANCE_BATCH(batch);
  1475. }
  1476.  
  1477. static void
  1478. i965_render_startup(VADriverContextP ctx)
  1479. {
  1480.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1481.     struct intel_batchbuffer *batch = i965->batch;
  1482.     struct i965_render_state *render_state = &i965->render_state;
  1483.  
  1484.     BEGIN_BATCH(batch, 11);
  1485.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
  1486.     OUT_BATCH(batch,
  1487.               (0 << VB0_BUFFER_INDEX_SHIFT) |
  1488.               VB0_VERTEXDATA |
  1489.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  1490.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  1491.  
  1492.     if (IS_IRONLAKE(i965->intel.device_info))
  1493.         OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  1494.     else
  1495.         OUT_BATCH(batch, 3);
  1496.  
  1497.     OUT_BATCH(batch, 0);
  1498.  
  1499.     OUT_BATCH(batch,
  1500.               CMD_3DPRIMITIVE |
  1501.               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
  1502.               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
  1503.               (0 << 9) |
  1504.               4);
  1505.     OUT_BATCH(batch, 3); /* vertex count per instance */
  1506.     OUT_BATCH(batch, 0); /* start vertex offset */
  1507.     OUT_BATCH(batch, 1); /* single instance */
  1508.     OUT_BATCH(batch, 0); /* start instance location */
  1509.     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
  1510.     ADVANCE_BATCH(batch);
  1511. }
  1512.  
  1513. static void
  1514. i965_clear_dest_region(VADriverContextP ctx)
  1515. {
  1516.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1517.     struct intel_batchbuffer *batch = i965->batch;
  1518.     struct i965_render_state *render_state = &i965->render_state;
  1519.     struct intel_region *dest_region = render_state->draw_region;
  1520.     unsigned int blt_cmd, br13;
  1521.     int pitch;
  1522.  
  1523.     blt_cmd = XY_COLOR_BLT_CMD;
  1524.     br13 = 0xf0 << 16;
  1525.     pitch = dest_region->pitch;
  1526.  
  1527.     if (dest_region->cpp == 4) {
  1528.         br13 |= BR13_8888;
  1529.         blt_cmd |= (XY_COLOR_BLT_WRITE_RGB | XY_COLOR_BLT_WRITE_ALPHA);
  1530.     } else {
  1531.         assert(dest_region->cpp == 2);
  1532.         br13 |= BR13_565;
  1533.     }
  1534.  
  1535.     if (dest_region->tiling != I915_TILING_NONE) {
  1536.         blt_cmd |= XY_COLOR_BLT_DST_TILED;
  1537.         pitch /= 4;
  1538.     }
  1539.  
  1540.     br13 |= pitch;
  1541.  
  1542.     if (IS_GEN6(i965->intel.device_info) ||
  1543.         IS_GEN7(i965->intel.device_info)) {
  1544.         intel_batchbuffer_start_atomic_blt(batch, 24);
  1545.         BEGIN_BLT_BATCH(batch, 6);
  1546.     } else {
  1547.         intel_batchbuffer_start_atomic(batch, 24);
  1548.         BEGIN_BATCH(batch, 6);
  1549.     }
  1550.  
  1551.     OUT_BATCH(batch, blt_cmd);
  1552.     OUT_BATCH(batch, br13);
  1553.     OUT_BATCH(batch, (dest_region->y << 16) | (dest_region->x));
  1554.     OUT_BATCH(batch, ((dest_region->y + dest_region->height) << 16) |
  1555.               (dest_region->x + dest_region->width));
  1556.     OUT_RELOC(batch, dest_region->bo,
  1557.               I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
  1558.               0);
  1559.     OUT_BATCH(batch, 0x0);
  1560.     ADVANCE_BATCH(batch);
  1561.     intel_batchbuffer_end_atomic(batch);
  1562. }
  1563.  
  1564. static void
  1565. i965_surface_render_pipeline_setup(VADriverContextP ctx)
  1566. {
  1567.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1568.     struct intel_batchbuffer *batch = i965->batch;
  1569.  
  1570.     i965_clear_dest_region(ctx);
  1571.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1572.     intel_batchbuffer_emit_mi_flush(batch);
  1573.     i965_render_pipeline_select(ctx);
  1574.     i965_render_state_sip(ctx);
  1575.     i965_render_state_base_address(ctx);
  1576.     i965_render_binding_table_pointers(ctx);
  1577.     i965_render_constant_color(ctx);
  1578.     i965_render_pipelined_pointers(ctx);
  1579.     i965_render_urb_layout(ctx);
  1580.     i965_render_cs_urb_layout(ctx);
  1581.     i965_render_constant_buffer(ctx);
  1582.     i965_render_drawing_rectangle(ctx);
  1583.     i965_render_vertex_elements(ctx);
  1584.     i965_render_startup(ctx);
  1585.     intel_batchbuffer_end_atomic(batch);
  1586. }
  1587.  
  1588. static void
  1589. i965_subpic_render_pipeline_setup(VADriverContextP ctx)
  1590. {
  1591.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1592.     struct intel_batchbuffer *batch = i965->batch;
  1593.  
  1594.     intel_batchbuffer_start_atomic(batch, 0x1000);
  1595.     intel_batchbuffer_emit_mi_flush(batch);
  1596.     i965_render_pipeline_select(ctx);
  1597.     i965_render_state_sip(ctx);
  1598.     i965_render_state_base_address(ctx);
  1599.     i965_render_binding_table_pointers(ctx);
  1600.     i965_render_constant_color(ctx);
  1601.     i965_render_pipelined_pointers(ctx);
  1602.     i965_render_urb_layout(ctx);
  1603.     i965_render_cs_urb_layout(ctx);
  1604.     i965_render_constant_buffer(ctx);
  1605.     i965_render_drawing_rectangle(ctx);
  1606.     i965_render_vertex_elements(ctx);
  1607.     i965_render_startup(ctx);
  1608.     intel_batchbuffer_end_atomic(batch);
  1609. }
  1610.  
  1611.  
  1612. static void
  1613. i965_render_initialize(VADriverContextP ctx)
  1614. {
  1615.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1616.     struct i965_render_state *render_state = &i965->render_state;
  1617.     dri_bo *bo;
  1618.  
  1619.     /* VERTEX BUFFER */
  1620.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1621.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1622.                       "vertex buffer",
  1623.                       4096,
  1624.                       4096);
  1625.     assert(bo);
  1626.     render_state->vb.vertex_buffer = bo;
  1627.  
  1628.     /* VS */
  1629.     dri_bo_unreference(render_state->vs.state);
  1630.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1631.                       "vs state",
  1632.                       sizeof(struct i965_vs_unit_state),
  1633.                       64);
  1634.     assert(bo);
  1635.     render_state->vs.state = bo;
  1636.  
  1637.     /* GS */
  1638.     /* CLIP */
  1639.     /* SF */
  1640.     dri_bo_unreference(render_state->sf.state);
  1641.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1642.                       "sf state",
  1643.                       sizeof(struct i965_sf_unit_state),
  1644.                       64);
  1645.     assert(bo);
  1646.     render_state->sf.state = bo;
  1647.  
  1648.     /* WM */
  1649.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1650.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1651.                       "surface state & binding table",
  1652.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  1653.                       4096);
  1654.     assert(bo);
  1655.     render_state->wm.surface_state_binding_table_bo = bo;
  1656.  
  1657.     dri_bo_unreference(render_state->wm.sampler);
  1658.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1659.                       "sampler state",
  1660.                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
  1661.                       64);
  1662.     assert(bo);
  1663.     render_state->wm.sampler = bo;
  1664.     render_state->wm.sampler_count = 0;
  1665.  
  1666.     dri_bo_unreference(render_state->wm.state);
  1667.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1668.                       "wm state",
  1669.                       sizeof(struct i965_wm_unit_state),
  1670.                       64);
  1671.     assert(bo);
  1672.     render_state->wm.state = bo;
  1673.  
  1674.     /* COLOR CALCULATOR */
  1675.     dri_bo_unreference(render_state->cc.state);
  1676.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1677.                       "color calc state",
  1678.                       sizeof(struct i965_cc_unit_state),
  1679.                       64);
  1680.     assert(bo);
  1681.     render_state->cc.state = bo;
  1682.  
  1683.     dri_bo_unreference(render_state->cc.viewport);
  1684.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1685.                       "cc viewport",
  1686.                       sizeof(struct i965_cc_viewport),
  1687.                       64);
  1688.     assert(bo);
  1689.     render_state->cc.viewport = bo;
  1690. }
  1691.  
  1692. static void
  1693. i965_render_put_surface(
  1694.     VADriverContextP   ctx,
  1695.     struct object_surface *obj_surface,
  1696.     const VARectangle *src_rect,
  1697.     const VARectangle *dst_rect,
  1698.     unsigned int       flags
  1699. )
  1700. {
  1701.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1702.     struct intel_batchbuffer *batch = i965->batch;
  1703.  
  1704.     i965_render_initialize(ctx);
  1705.     i965_surface_render_state_setup(ctx, obj_surface, src_rect, dst_rect, flags);
  1706.     i965_surface_render_pipeline_setup(ctx);
  1707.     intel_batchbuffer_flush(batch);
  1708. }
  1709.  
  1710. static void
  1711. i965_render_put_subpicture(
  1712.     VADriverContextP   ctx,
  1713.     struct object_surface *obj_surface,
  1714.     const VARectangle *src_rect,
  1715.     const VARectangle *dst_rect
  1716. )
  1717. {
  1718.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1719.     struct intel_batchbuffer *batch = i965->batch;
  1720.     unsigned int index = obj_surface->subpic_render_idx;
  1721.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  1722.  
  1723.     assert(obj_subpic);
  1724.  
  1725.     i965_render_initialize(ctx);
  1726.     i965_subpic_render_state_setup(ctx, obj_surface, src_rect, dst_rect);
  1727.     i965_subpic_render_pipeline_setup(ctx);
  1728.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  1729.     intel_batchbuffer_flush(batch);
  1730. }
  1731.  
  1732. /*
  1733.  * for GEN6+
  1734.  */
  1735. static void
  1736. gen6_render_initialize(VADriverContextP ctx)
  1737. {
  1738.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1739.     struct i965_render_state *render_state = &i965->render_state;
  1740.     dri_bo *bo;
  1741.  
  1742.     /* VERTEX BUFFER */
  1743.     dri_bo_unreference(render_state->vb.vertex_buffer);
  1744.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1745.                       "vertex buffer",
  1746.                       4096,
  1747.                       4096);
  1748.     assert(bo);
  1749.     render_state->vb.vertex_buffer = bo;
  1750.  
  1751.     /* WM */
  1752.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  1753.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1754.                       "surface state & binding table",
  1755.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  1756.                       4096);
  1757.     assert(bo);
  1758.     render_state->wm.surface_state_binding_table_bo = bo;
  1759.  
  1760.     dri_bo_unreference(render_state->wm.sampler);
  1761.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1762.                       "sampler state",
  1763.                       MAX_SAMPLERS * sizeof(struct i965_sampler_state),
  1764.                       4096);
  1765.     assert(bo);
  1766.     render_state->wm.sampler = bo;
  1767.     render_state->wm.sampler_count = 0;
  1768.  
  1769.     /* COLOR CALCULATOR */
  1770.     dri_bo_unreference(render_state->cc.state);
  1771.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1772.                       "color calc state",
  1773.                       sizeof(struct gen6_color_calc_state),
  1774.                       4096);
  1775.     assert(bo);
  1776.     render_state->cc.state = bo;
  1777.  
  1778.     /* CC VIEWPORT */
  1779.     dri_bo_unreference(render_state->cc.viewport);
  1780.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1781.                       "cc viewport",
  1782.                       sizeof(struct i965_cc_viewport),
  1783.                       4096);
  1784.     assert(bo);
  1785.     render_state->cc.viewport = bo;
  1786.  
  1787.     /* BLEND STATE */
  1788.     dri_bo_unreference(render_state->cc.blend);
  1789.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1790.                       "blend state",
  1791.                       sizeof(struct gen6_blend_state),
  1792.                       4096);
  1793.     assert(bo);
  1794.     render_state->cc.blend = bo;
  1795.  
  1796.     /* DEPTH & STENCIL STATE */
  1797.     dri_bo_unreference(render_state->cc.depth_stencil);
  1798.     bo = dri_bo_alloc(i965->intel.bufmgr,
  1799.                       "depth & stencil state",
  1800.                       sizeof(struct gen6_depth_stencil_state),
  1801.                       4096);
  1802.     assert(bo);
  1803.     render_state->cc.depth_stencil = bo;
  1804. }
  1805.  
  1806. static void
  1807. gen6_render_color_calc_state(VADriverContextP ctx)
  1808. {
  1809.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1810.     struct i965_render_state *render_state = &i965->render_state;
  1811.     struct gen6_color_calc_state *color_calc_state;
  1812.    
  1813.     dri_bo_map(render_state->cc.state, 1);
  1814.     assert(render_state->cc.state->virtual);
  1815.     color_calc_state = render_state->cc.state->virtual;
  1816.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  1817.     color_calc_state->constant_r = 1.0;
  1818.     color_calc_state->constant_g = 0.0;
  1819.     color_calc_state->constant_b = 1.0;
  1820.     color_calc_state->constant_a = 1.0;
  1821.     dri_bo_unmap(render_state->cc.state);
  1822. }
  1823.  
  1824. static void
  1825. gen6_render_blend_state(VADriverContextP ctx)
  1826. {
  1827.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1828.     struct i965_render_state *render_state = &i965->render_state;
  1829.     struct gen6_blend_state *blend_state;
  1830.    
  1831.     dri_bo_map(render_state->cc.blend, 1);
  1832.     assert(render_state->cc.blend->virtual);
  1833.     blend_state = render_state->cc.blend->virtual;
  1834.     memset(blend_state, 0, sizeof(*blend_state));
  1835.     blend_state->blend1.logic_op_enable = 1;
  1836.     blend_state->blend1.logic_op_func = 0xc;
  1837.     dri_bo_unmap(render_state->cc.blend);
  1838. }
  1839.  
  1840. static void
  1841. gen6_render_depth_stencil_state(VADriverContextP ctx)
  1842. {
  1843.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1844.     struct i965_render_state *render_state = &i965->render_state;
  1845.     struct gen6_depth_stencil_state *depth_stencil_state;
  1846.    
  1847.     dri_bo_map(render_state->cc.depth_stencil, 1);
  1848.     assert(render_state->cc.depth_stencil->virtual);
  1849.     depth_stencil_state = render_state->cc.depth_stencil->virtual;
  1850.     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
  1851.     dri_bo_unmap(render_state->cc.depth_stencil);
  1852. }
  1853.  
  1854. static void
  1855. gen6_render_setup_states(
  1856.     VADriverContextP   ctx,
  1857.     struct object_surface *obj_surface,
  1858.     const VARectangle *src_rect,
  1859.     const VARectangle *dst_rect,
  1860.     unsigned int       flags
  1861. )
  1862. {
  1863.     i965_render_dest_surface_state(ctx, 0);
  1864.     i965_render_src_surfaces_state(ctx, obj_surface, flags);
  1865.     i965_render_sampler(ctx);
  1866.     i965_render_cc_viewport(ctx);
  1867.     gen6_render_color_calc_state(ctx);
  1868.     gen6_render_blend_state(ctx);
  1869.     gen6_render_depth_stencil_state(ctx);
  1870.     i965_render_upload_constants(ctx, obj_surface, flags);
  1871.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  1872. }
  1873.  
  1874. static void
  1875. gen6_emit_invarient_states(VADriverContextP ctx)
  1876. {
  1877.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1878.     struct intel_batchbuffer *batch = i965->batch;
  1879.  
  1880.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  1881.  
  1882.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (3 - 2));
  1883.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  1884.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  1885.     OUT_BATCH(batch, 0);
  1886.  
  1887.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  1888.     OUT_BATCH(batch, 1);
  1889.  
  1890.     /* Set system instruction pointer */
  1891.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  1892.     OUT_BATCH(batch, 0);
  1893. }
  1894.  
  1895. static void
  1896. gen6_emit_state_base_address(VADriverContextP ctx)
  1897. {
  1898.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1899.     struct intel_batchbuffer *batch = i965->batch;
  1900.     struct i965_render_state *render_state = &i965->render_state;
  1901.  
  1902.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  1903.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  1904.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  1905.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
  1906.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  1907.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
  1908.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
  1909.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  1910.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  1911.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  1912. }
  1913.  
  1914. static void
  1915. gen6_emit_viewport_state_pointers(VADriverContextP ctx)
  1916. {
  1917.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1918.     struct intel_batchbuffer *batch = i965->batch;
  1919.     struct i965_render_state *render_state = &i965->render_state;
  1920.  
  1921.     OUT_BATCH(batch, GEN6_3DSTATE_VIEWPORT_STATE_POINTERS |
  1922.               GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC |
  1923.               (4 - 2));
  1924.     OUT_BATCH(batch, 0);
  1925.     OUT_BATCH(batch, 0);
  1926.     OUT_RELOC(batch, render_state->cc.viewport, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1927. }
  1928.  
  1929. static void
  1930. gen6_emit_urb(VADriverContextP ctx)
  1931. {
  1932.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1933.     struct intel_batchbuffer *batch = i965->batch;
  1934.  
  1935.     OUT_BATCH(batch, GEN6_3DSTATE_URB | (3 - 2));
  1936.     OUT_BATCH(batch, ((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) |
  1937.               (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */
  1938.     OUT_BATCH(batch, (0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) |
  1939.               (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */
  1940. }
  1941.  
  1942. static void
  1943. gen6_emit_cc_state_pointers(VADriverContextP ctx)
  1944. {
  1945.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1946.     struct intel_batchbuffer *batch = i965->batch;
  1947.     struct i965_render_state *render_state = &i965->render_state;
  1948.  
  1949.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2));
  1950.     OUT_RELOC(batch, render_state->cc.blend, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1951.     OUT_RELOC(batch, render_state->cc.depth_stencil, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1952.     OUT_RELOC(batch, render_state->cc.state, I915_GEM_DOMAIN_INSTRUCTION, 0, 1);
  1953. }
  1954.  
  1955. static void
  1956. gen6_emit_sampler_state_pointers(VADriverContextP ctx)
  1957. {
  1958.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1959.     struct intel_batchbuffer *batch = i965->batch;
  1960.     struct i965_render_state *render_state = &i965->render_state;
  1961.  
  1962.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLER_STATE_POINTERS |
  1963.               GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS |
  1964.               (4 - 2));
  1965.     OUT_BATCH(batch, 0); /* VS */
  1966.     OUT_BATCH(batch, 0); /* GS */
  1967.     OUT_RELOC(batch,render_state->wm.sampler, I915_GEM_DOMAIN_INSTRUCTION, 0, 0);
  1968. }
  1969.  
  1970. static void
  1971. gen6_emit_binding_table(VADriverContextP ctx)
  1972. {
  1973.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1974.     struct intel_batchbuffer *batch = i965->batch;
  1975.  
  1976.     /* Binding table pointers */
  1977.     OUT_BATCH(batch, CMD_BINDING_TABLE_POINTERS |
  1978.               GEN6_BINDING_TABLE_MODIFY_PS |
  1979.               (4 - 2));
  1980.     OUT_BATCH(batch, 0);                /* vs */
  1981.     OUT_BATCH(batch, 0);                /* gs */
  1982.     /* Only the PS uses the binding table */
  1983.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  1984. }
  1985.  
  1986. static void
  1987. gen6_emit_depth_buffer_state(VADriverContextP ctx)
  1988. {
  1989.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  1990.     struct intel_batchbuffer *batch = i965->batch;
  1991.  
  1992.     OUT_BATCH(batch, CMD_DEPTH_BUFFER | (7 - 2));
  1993.     OUT_BATCH(batch, (I965_SURFACE_NULL << CMD_DEPTH_BUFFER_TYPE_SHIFT) |
  1994.               (I965_DEPTHFORMAT_D32_FLOAT << CMD_DEPTH_BUFFER_FORMAT_SHIFT));
  1995.     OUT_BATCH(batch, 0);
  1996.     OUT_BATCH(batch, 0);
  1997.     OUT_BATCH(batch, 0);
  1998.     OUT_BATCH(batch, 0);
  1999.     OUT_BATCH(batch, 0);
  2000.  
  2001.     OUT_BATCH(batch, CMD_CLEAR_PARAMS | (2 - 2));
  2002.     OUT_BATCH(batch, 0);
  2003. }
  2004.  
  2005. static void
  2006. gen6_emit_drawing_rectangle(VADriverContextP ctx)
  2007. {
  2008.     i965_render_drawing_rectangle(ctx);
  2009. }
  2010.  
  2011. static void
  2012. gen6_emit_vs_state(VADriverContextP ctx)
  2013. {
  2014.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2015.     struct intel_batchbuffer *batch = i965->batch;
  2016.  
  2017.     /* disable VS constant buffer */
  2018.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (5 - 2));
  2019.     OUT_BATCH(batch, 0);
  2020.     OUT_BATCH(batch, 0);
  2021.     OUT_BATCH(batch, 0);
  2022.     OUT_BATCH(batch, 0);
  2023.        
  2024.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
  2025.     OUT_BATCH(batch, 0); /* without VS kernel */
  2026.     OUT_BATCH(batch, 0);
  2027.     OUT_BATCH(batch, 0);
  2028.     OUT_BATCH(batch, 0);
  2029.     OUT_BATCH(batch, 0); /* pass-through */
  2030. }
  2031.  
  2032. static void
  2033. gen6_emit_gs_state(VADriverContextP ctx)
  2034. {
  2035.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2036.     struct intel_batchbuffer *batch = i965->batch;
  2037.  
  2038.     /* disable GS constant buffer */
  2039.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (5 - 2));
  2040.     OUT_BATCH(batch, 0);
  2041.     OUT_BATCH(batch, 0);
  2042.     OUT_BATCH(batch, 0);
  2043.     OUT_BATCH(batch, 0);
  2044.        
  2045.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
  2046.     OUT_BATCH(batch, 0); /* without GS kernel */
  2047.     OUT_BATCH(batch, 0);
  2048.     OUT_BATCH(batch, 0);
  2049.     OUT_BATCH(batch, 0);
  2050.     OUT_BATCH(batch, 0);
  2051.     OUT_BATCH(batch, 0); /* pass-through */
  2052. }
  2053.  
  2054. static void
  2055. gen6_emit_clip_state(VADriverContextP ctx)
  2056. {
  2057.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2058.     struct intel_batchbuffer *batch = i965->batch;
  2059.  
  2060.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  2061.     OUT_BATCH(batch, 0);
  2062.     OUT_BATCH(batch, 0); /* pass-through */
  2063.     OUT_BATCH(batch, 0);
  2064. }
  2065.  
  2066. static void
  2067. gen6_emit_sf_state(VADriverContextP ctx)
  2068. {
  2069.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2070.     struct intel_batchbuffer *batch = i965->batch;
  2071.  
  2072.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (20 - 2));
  2073.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT) |
  2074.               (1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT) |
  2075.               (0 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT));
  2076.     OUT_BATCH(batch, 0);
  2077.     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
  2078.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */
  2079.     OUT_BATCH(batch, 0);
  2080.     OUT_BATCH(batch, 0);
  2081.     OUT_BATCH(batch, 0);
  2082.     OUT_BATCH(batch, 0);
  2083.     OUT_BATCH(batch, 0); /* DW9 */
  2084.     OUT_BATCH(batch, 0);
  2085.     OUT_BATCH(batch, 0);
  2086.     OUT_BATCH(batch, 0);
  2087.     OUT_BATCH(batch, 0);
  2088.     OUT_BATCH(batch, 0); /* DW14 */
  2089.     OUT_BATCH(batch, 0);
  2090.     OUT_BATCH(batch, 0);
  2091.     OUT_BATCH(batch, 0);
  2092.     OUT_BATCH(batch, 0);
  2093.     OUT_BATCH(batch, 0); /* DW19 */
  2094. }
  2095.  
  2096. static void
  2097. gen6_emit_wm_state(VADriverContextP ctx, int kernel)
  2098. {
  2099.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2100.     struct intel_batchbuffer *batch = i965->batch;
  2101.     struct i965_render_state *render_state = &i965->render_state;
  2102.  
  2103.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS |
  2104.               GEN6_3DSTATE_CONSTANT_BUFFER_0_ENABLE |
  2105.               (5 - 2));
  2106.     OUT_RELOC(batch,
  2107.               render_state->curbe.bo,
  2108.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2109.               (URB_CS_ENTRY_SIZE-1));
  2110.     OUT_BATCH(batch, 0);
  2111.     OUT_BATCH(batch, 0);
  2112.     OUT_BATCH(batch, 0);
  2113.  
  2114.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (9 - 2));
  2115.     OUT_RELOC(batch, render_state->render_kernels[kernel].bo,
  2116.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2117.               0);
  2118.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF) |
  2119.               (5 << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  2120.     OUT_BATCH(batch, 0);
  2121.     OUT_BATCH(batch, (6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT)); /* DW4 */
  2122.     OUT_BATCH(batch, ((i965->intel.device_info->max_wm_threads - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT) |
  2123.               GEN6_3DSTATE_WM_DISPATCH_ENABLE |
  2124.               GEN6_3DSTATE_WM_16_DISPATCH_ENABLE);
  2125.     OUT_BATCH(batch, (1 << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT) |
  2126.               GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  2127.     OUT_BATCH(batch, 0);
  2128.     OUT_BATCH(batch, 0);
  2129. }
  2130.  
  2131. static void
  2132. gen6_emit_vertex_element_state(VADriverContextP ctx)
  2133. {
  2134.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2135.     struct intel_batchbuffer *batch = i965->batch;
  2136.  
  2137.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  2138.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
  2139.     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  2140.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2141.               GEN6_VE0_VALID |
  2142.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2143.               (0 << VE0_OFFSET_SHIFT));
  2144.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2145.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2146.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2147.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2148.     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  2149.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2150.               GEN6_VE0_VALID |
  2151.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2152.               (8 << VE0_OFFSET_SHIFT));
  2153.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2154.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2155.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2156.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2157. }
  2158.  
  2159. static void
  2160. gen6_emit_vertices(VADriverContextP ctx)
  2161. {
  2162.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2163.     struct intel_batchbuffer *batch = i965->batch;
  2164.     struct i965_render_state *render_state = &i965->render_state;
  2165.  
  2166.     BEGIN_BATCH(batch, 11);
  2167.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | 3);
  2168.     OUT_BATCH(batch,
  2169.               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
  2170.               GEN6_VB0_VERTEXDATA |
  2171.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  2172.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  2173.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  2174.     OUT_BATCH(batch, 0);
  2175.  
  2176.     OUT_BATCH(batch,
  2177.               CMD_3DPRIMITIVE |
  2178.               _3DPRIMITIVE_VERTEX_SEQUENTIAL |
  2179.               (_3DPRIM_RECTLIST << _3DPRIMITIVE_TOPOLOGY_SHIFT) |
  2180.               (0 << 9) |
  2181.               4);
  2182.     OUT_BATCH(batch, 3); /* vertex count per instance */
  2183.     OUT_BATCH(batch, 0); /* start vertex offset */
  2184.     OUT_BATCH(batch, 1); /* single instance */
  2185.     OUT_BATCH(batch, 0); /* start instance location */
  2186.     OUT_BATCH(batch, 0); /* index buffer offset, ignored */
  2187.     ADVANCE_BATCH(batch);
  2188. }
  2189.  
  2190. static void
  2191. gen6_render_emit_states(VADriverContextP ctx, int kernel)
  2192. {
  2193.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2194.     struct intel_batchbuffer *batch = i965->batch;
  2195.  
  2196.     intel_batchbuffer_start_atomic(batch, 0x1000);
  2197.     intel_batchbuffer_emit_mi_flush(batch);
  2198.     gen6_emit_invarient_states(ctx);
  2199.     gen6_emit_state_base_address(ctx);
  2200.     gen6_emit_viewport_state_pointers(ctx);
  2201.     gen6_emit_urb(ctx);
  2202.     gen6_emit_cc_state_pointers(ctx);
  2203.     gen6_emit_sampler_state_pointers(ctx);
  2204.     gen6_emit_vs_state(ctx);
  2205.     gen6_emit_gs_state(ctx);
  2206.     gen6_emit_clip_state(ctx);
  2207.     gen6_emit_sf_state(ctx);
  2208.     gen6_emit_wm_state(ctx, kernel);
  2209.     gen6_emit_binding_table(ctx);
  2210.     gen6_emit_depth_buffer_state(ctx);
  2211.     gen6_emit_drawing_rectangle(ctx);
  2212.     gen6_emit_vertex_element_state(ctx);
  2213.     gen6_emit_vertices(ctx);
  2214.     intel_batchbuffer_end_atomic(batch);
  2215. }
  2216.  
  2217. static void
  2218. gen6_render_put_surface(
  2219.     VADriverContextP   ctx,
  2220.     struct object_surface *obj_surface,
  2221.     const VARectangle *src_rect,
  2222.     const VARectangle *dst_rect,
  2223.     unsigned int       flags
  2224. )
  2225. {
  2226.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2227.     struct intel_batchbuffer *batch = i965->batch;
  2228.  
  2229.     gen6_render_initialize(ctx);
  2230.     gen6_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
  2231.     i965_clear_dest_region(ctx);
  2232.     gen6_render_emit_states(ctx, PS_KERNEL);
  2233.     intel_batchbuffer_flush(batch);
  2234. }
  2235.  
  2236. static void
  2237. gen6_subpicture_render_blend_state(VADriverContextP ctx)
  2238. {
  2239.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2240.     struct i965_render_state *render_state = &i965->render_state;
  2241.     struct gen6_blend_state *blend_state;
  2242.  
  2243.     dri_bo_unmap(render_state->cc.state);    
  2244.     dri_bo_map(render_state->cc.blend, 1);
  2245.     assert(render_state->cc.blend->virtual);
  2246.     blend_state = render_state->cc.blend->virtual;
  2247.     memset(blend_state, 0, sizeof(*blend_state));
  2248.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  2249.     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  2250.     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
  2251.     blend_state->blend0.blend_enable = 1;
  2252.     blend_state->blend1.post_blend_clamp_enable = 1;
  2253.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2254.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  2255.     dri_bo_unmap(render_state->cc.blend);
  2256. }
  2257.  
  2258. static void
  2259. gen6_subpicture_render_setup_states(
  2260.     VADriverContextP   ctx,
  2261.     struct object_surface *obj_surface,
  2262.     const VARectangle *src_rect,
  2263.     const VARectangle *dst_rect
  2264. )
  2265. {
  2266.     i965_render_dest_surface_state(ctx, 0);
  2267.     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
  2268.     i965_render_sampler(ctx);
  2269.     i965_render_cc_viewport(ctx);
  2270.     gen6_render_color_calc_state(ctx);
  2271.     gen6_subpicture_render_blend_state(ctx);
  2272.     gen6_render_depth_stencil_state(ctx);
  2273.     i965_subpic_render_upload_constants(ctx, obj_surface);
  2274.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  2275. }
  2276.  
  2277. static void
  2278. gen6_render_put_subpicture(
  2279.     VADriverContextP   ctx,
  2280.     struct object_surface *obj_surface,
  2281.     const VARectangle *src_rect,
  2282.     const VARectangle *dst_rect
  2283. )
  2284. {
  2285.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2286.     struct intel_batchbuffer *batch = i965->batch;
  2287.     unsigned int index = obj_surface->subpic_render_idx;
  2288.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  2289.  
  2290.     assert(obj_subpic);
  2291.     gen6_render_initialize(ctx);
  2292.     gen6_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
  2293.     gen6_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  2294.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  2295.     intel_batchbuffer_flush(batch);
  2296. }
  2297.  
  2298. /*
  2299.  * for GEN7
  2300.  */
  2301. static void
  2302. gen7_render_initialize(VADriverContextP ctx)
  2303. {
  2304.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2305.     struct i965_render_state *render_state = &i965->render_state;
  2306.     dri_bo *bo;
  2307.  
  2308.     /* VERTEX BUFFER */
  2309.     dri_bo_unreference(render_state->vb.vertex_buffer);
  2310.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2311.                       "vertex buffer",
  2312.                       4096,
  2313.                       4096);
  2314.     assert(bo);
  2315.     render_state->vb.vertex_buffer = bo;
  2316.  
  2317.     /* WM */
  2318.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  2319.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2320.                       "surface state & binding table",
  2321.                       (SURFACE_STATE_PADDED_SIZE + sizeof(unsigned int)) * MAX_RENDER_SURFACES,
  2322.                       4096);
  2323.     assert(bo);
  2324.     render_state->wm.surface_state_binding_table_bo = bo;
  2325.  
  2326.     dri_bo_unreference(render_state->wm.sampler);
  2327.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2328.                       "sampler state",
  2329.                       MAX_SAMPLERS * sizeof(struct gen7_sampler_state),
  2330.                       4096);
  2331.     assert(bo);
  2332.     render_state->wm.sampler = bo;
  2333.     render_state->wm.sampler_count = 0;
  2334.  
  2335.     /* COLOR CALCULATOR */
  2336.     dri_bo_unreference(render_state->cc.state);
  2337.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2338.                       "color calc state",
  2339.                       sizeof(struct gen6_color_calc_state),
  2340.                       4096);
  2341.     assert(bo);
  2342.     render_state->cc.state = bo;
  2343.  
  2344.     /* CC VIEWPORT */
  2345.     dri_bo_unreference(render_state->cc.viewport);
  2346.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2347.                       "cc viewport",
  2348.                       sizeof(struct i965_cc_viewport),
  2349.                       4096);
  2350.     assert(bo);
  2351.     render_state->cc.viewport = bo;
  2352.  
  2353.     /* BLEND STATE */
  2354.     dri_bo_unreference(render_state->cc.blend);
  2355.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2356.                       "blend state",
  2357.                       sizeof(struct gen6_blend_state),
  2358.                       4096);
  2359.     assert(bo);
  2360.     render_state->cc.blend = bo;
  2361.  
  2362.     /* DEPTH & STENCIL STATE */
  2363.     dri_bo_unreference(render_state->cc.depth_stencil);
  2364.     bo = dri_bo_alloc(i965->intel.bufmgr,
  2365.                       "depth & stencil state",
  2366.                       sizeof(struct gen6_depth_stencil_state),
  2367.                       4096);
  2368.     assert(bo);
  2369.     render_state->cc.depth_stencil = bo;
  2370. }
  2371.  
  2372. /*
  2373.  * for GEN8
  2374.  */
  2375. #define ALIGNMENT       64
  2376.  
  2377. static void
  2378. gen7_render_color_calc_state(VADriverContextP ctx)
  2379. {
  2380.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2381.     struct i965_render_state *render_state = &i965->render_state;
  2382.     struct gen6_color_calc_state *color_calc_state;
  2383.    
  2384.     dri_bo_map(render_state->cc.state, 1);
  2385.     assert(render_state->cc.state->virtual);
  2386.     color_calc_state = render_state->cc.state->virtual;
  2387.     memset(color_calc_state, 0, sizeof(*color_calc_state));
  2388.     color_calc_state->constant_r = 1.0;
  2389.     color_calc_state->constant_g = 0.0;
  2390.     color_calc_state->constant_b = 1.0;
  2391.     color_calc_state->constant_a = 1.0;
  2392.     dri_bo_unmap(render_state->cc.state);
  2393. }
  2394.  
  2395. static void
  2396. gen7_render_blend_state(VADriverContextP ctx)
  2397. {
  2398.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2399.     struct i965_render_state *render_state = &i965->render_state;
  2400.     struct gen6_blend_state *blend_state;
  2401.    
  2402.     dri_bo_map(render_state->cc.blend, 1);
  2403.     assert(render_state->cc.blend->virtual);
  2404.     blend_state = render_state->cc.blend->virtual;
  2405.     memset(blend_state, 0, sizeof(*blend_state));
  2406.     blend_state->blend1.logic_op_enable = 1;
  2407.     blend_state->blend1.logic_op_func = 0xc;
  2408.     blend_state->blend1.pre_blend_clamp_enable = 1;
  2409.     dri_bo_unmap(render_state->cc.blend);
  2410. }
  2411.  
  2412. static void
  2413. gen7_render_depth_stencil_state(VADriverContextP ctx)
  2414. {
  2415.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2416.     struct i965_render_state *render_state = &i965->render_state;
  2417.     struct gen6_depth_stencil_state *depth_stencil_state;
  2418.    
  2419.     dri_bo_map(render_state->cc.depth_stencil, 1);
  2420.     assert(render_state->cc.depth_stencil->virtual);
  2421.     depth_stencil_state = render_state->cc.depth_stencil->virtual;
  2422.     memset(depth_stencil_state, 0, sizeof(*depth_stencil_state));
  2423.     dri_bo_unmap(render_state->cc.depth_stencil);
  2424. }
  2425.  
  2426. static void
  2427. gen7_render_sampler(VADriverContextP ctx)
  2428. {
  2429.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2430.     struct i965_render_state *render_state = &i965->render_state;
  2431.     struct gen7_sampler_state *sampler_state;
  2432.     int i;
  2433.    
  2434.     assert(render_state->wm.sampler_count > 0);
  2435.     assert(render_state->wm.sampler_count <= MAX_SAMPLERS);
  2436.  
  2437.     dri_bo_map(render_state->wm.sampler, 1);
  2438.     assert(render_state->wm.sampler->virtual);
  2439.     sampler_state = render_state->wm.sampler->virtual;
  2440.     for (i = 0; i < render_state->wm.sampler_count; i++) {
  2441.         memset(sampler_state, 0, sizeof(*sampler_state));
  2442.         sampler_state->ss0.min_filter = I965_MAPFILTER_LINEAR;
  2443.         sampler_state->ss0.mag_filter = I965_MAPFILTER_LINEAR;
  2444.         sampler_state->ss3.r_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2445.         sampler_state->ss3.s_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2446.         sampler_state->ss3.t_wrap_mode = I965_TEXCOORDMODE_CLAMP;
  2447.         sampler_state++;
  2448.     }
  2449.  
  2450.     dri_bo_unmap(render_state->wm.sampler);
  2451. }
  2452.  
  2453.  
  2454. static void
  2455. gen7_render_setup_states(
  2456.     VADriverContextP   ctx,
  2457.     struct object_surface *obj_surface,
  2458.     const VARectangle *src_rect,
  2459.     const VARectangle *dst_rect,
  2460.     unsigned int       flags
  2461. )
  2462. {
  2463.     i965_render_dest_surface_state(ctx, 0);
  2464.     i965_render_src_surfaces_state(ctx, obj_surface, flags);
  2465.     gen7_render_sampler(ctx);
  2466.     i965_render_cc_viewport(ctx);
  2467.     gen7_render_color_calc_state(ctx);
  2468.     gen7_render_blend_state(ctx);
  2469.     gen7_render_depth_stencil_state(ctx);
  2470.     i965_render_upload_constants(ctx, obj_surface, flags);
  2471.     i965_render_upload_vertex(ctx, obj_surface, src_rect, dst_rect);
  2472. }
  2473.  
  2474.  
  2475. static void
  2476. gen7_emit_invarient_states(VADriverContextP ctx)
  2477. {
  2478.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2479.     struct intel_batchbuffer *batch = i965->batch;
  2480.  
  2481.     BEGIN_BATCH(batch, 1);
  2482.     OUT_BATCH(batch, CMD_PIPELINE_SELECT | PIPELINE_SELECT_3D);
  2483.     ADVANCE_BATCH(batch);
  2484.  
  2485.     BEGIN_BATCH(batch, 4);
  2486.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE | (4 - 2));
  2487.     OUT_BATCH(batch, GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER |
  2488.               GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */
  2489.     OUT_BATCH(batch, 0);
  2490.     OUT_BATCH(batch, 0);
  2491.     ADVANCE_BATCH(batch);
  2492.  
  2493.     BEGIN_BATCH(batch, 2);
  2494.     OUT_BATCH(batch, GEN6_3DSTATE_SAMPLE_MASK | (2 - 2));
  2495.     OUT_BATCH(batch, 1);
  2496.     ADVANCE_BATCH(batch);
  2497.  
  2498.     /* Set system instruction pointer */
  2499.     BEGIN_BATCH(batch, 2);
  2500.     OUT_BATCH(batch, CMD_STATE_SIP | 0);
  2501.     OUT_BATCH(batch, 0);
  2502.     ADVANCE_BATCH(batch);
  2503. }
  2504.  
  2505. static void
  2506. gen7_emit_state_base_address(VADriverContextP ctx)
  2507. {
  2508.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2509.     struct intel_batchbuffer *batch = i965->batch;
  2510.     struct i965_render_state *render_state = &i965->render_state;
  2511.  
  2512.     OUT_BATCH(batch, CMD_STATE_BASE_ADDRESS | (10 - 2));
  2513.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state base address */
  2514.     OUT_RELOC(batch, render_state->wm.surface_state_binding_table_bo, I915_GEM_DOMAIN_INSTRUCTION, 0, BASE_ADDRESS_MODIFY); /* Surface state base address */
  2515.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state base address */
  2516.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object base address */
  2517.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction base address */
  2518.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* General state upper bound */
  2519.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Dynamic state upper bound */
  2520.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Indirect object upper bound */
  2521.     OUT_BATCH(batch, BASE_ADDRESS_MODIFY); /* Instruction access upper bound */
  2522. }
  2523.  
  2524. static void
  2525. gen7_emit_viewport_state_pointers(VADriverContextP ctx)
  2526. {
  2527.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2528.     struct intel_batchbuffer *batch = i965->batch;
  2529.     struct i965_render_state *render_state = &i965->render_state;
  2530.  
  2531.     BEGIN_BATCH(batch, 2);
  2532.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_CC | (2 - 2));
  2533.     OUT_RELOC(batch,
  2534.               render_state->cc.viewport,
  2535.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2536.               0);
  2537.     ADVANCE_BATCH(batch);
  2538.  
  2539.     BEGIN_BATCH(batch, 2);
  2540.     OUT_BATCH(batch, GEN7_3DSTATE_VIEWPORT_STATE_POINTERS_SF_CL | (2 - 2));
  2541.     OUT_BATCH(batch, 0);
  2542.     ADVANCE_BATCH(batch);
  2543. }
  2544.  
  2545. /*
  2546.  * URB layout on GEN7
  2547.  * ----------------------------------------
  2548.  * | PS Push Constants (8KB) | VS entries |
  2549.  * ----------------------------------------
  2550.  */
  2551. static void
  2552. gen7_emit_urb(VADriverContextP ctx)
  2553. {
  2554.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2555.     struct intel_batchbuffer *batch = i965->batch;
  2556.     unsigned int num_urb_entries = 32;
  2557.  
  2558.     if (IS_HASWELL(i965->intel.device_info))
  2559.         num_urb_entries = 64;
  2560.  
  2561.     BEGIN_BATCH(batch, 2);
  2562.     OUT_BATCH(batch, GEN7_3DSTATE_PUSH_CONSTANT_ALLOC_PS | (2 - 2));
  2563.     OUT_BATCH(batch, 8); /* in 1KBs */
  2564.     ADVANCE_BATCH(batch);
  2565.  
  2566.     BEGIN_BATCH(batch, 2);
  2567.     OUT_BATCH(batch, GEN7_3DSTATE_URB_VS | (2 - 2));
  2568.     OUT_BATCH(batch,
  2569.               (num_urb_entries << GEN7_URB_ENTRY_NUMBER_SHIFT) |
  2570.               (2 - 1) << GEN7_URB_ENTRY_SIZE_SHIFT |
  2571.               (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2572.    ADVANCE_BATCH(batch);
  2573.  
  2574.    BEGIN_BATCH(batch, 2);
  2575.    OUT_BATCH(batch, GEN7_3DSTATE_URB_GS | (2 - 2));
  2576.    OUT_BATCH(batch,
  2577.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2578.              (1 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2579.    ADVANCE_BATCH(batch);
  2580.  
  2581.    BEGIN_BATCH(batch, 2);
  2582.    OUT_BATCH(batch, GEN7_3DSTATE_URB_HS | (2 - 2));
  2583.    OUT_BATCH(batch,
  2584.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2585.              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2586.    ADVANCE_BATCH(batch);
  2587.  
  2588.    BEGIN_BATCH(batch, 2);
  2589.    OUT_BATCH(batch, GEN7_3DSTATE_URB_DS | (2 - 2));
  2590.    OUT_BATCH(batch,
  2591.              (0 << GEN7_URB_ENTRY_SIZE_SHIFT) |
  2592.              (2 << GEN7_URB_STARTING_ADDRESS_SHIFT));
  2593.    ADVANCE_BATCH(batch);
  2594. }
  2595.  
  2596. static void
  2597. gen7_emit_cc_state_pointers(VADriverContextP ctx)
  2598. {
  2599.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2600.     struct intel_batchbuffer *batch = i965->batch;
  2601.     struct i965_render_state *render_state = &i965->render_state;
  2602.  
  2603.     BEGIN_BATCH(batch, 2);
  2604.     OUT_BATCH(batch, GEN6_3DSTATE_CC_STATE_POINTERS | (2 - 2));
  2605.     OUT_RELOC(batch,
  2606.               render_state->cc.state,
  2607.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2608.               1);
  2609.     ADVANCE_BATCH(batch);
  2610.  
  2611.     BEGIN_BATCH(batch, 2);
  2612.     OUT_BATCH(batch, GEN7_3DSTATE_BLEND_STATE_POINTERS | (2 - 2));
  2613.     OUT_RELOC(batch,
  2614.               render_state->cc.blend,
  2615.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2616.               1);
  2617.     ADVANCE_BATCH(batch);
  2618.  
  2619.     BEGIN_BATCH(batch, 2);
  2620.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_STENCIL_STATE_POINTERS | (2 - 2));
  2621.     OUT_RELOC(batch,
  2622.               render_state->cc.depth_stencil,
  2623.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2624.               1);
  2625.     ADVANCE_BATCH(batch);
  2626. }
  2627.  
  2628. static void
  2629. gen7_emit_sampler_state_pointers(VADriverContextP ctx)
  2630. {
  2631.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2632.     struct intel_batchbuffer *batch = i965->batch;
  2633.     struct i965_render_state *render_state = &i965->render_state;
  2634.  
  2635.     BEGIN_BATCH(batch, 2);
  2636.     OUT_BATCH(batch, GEN7_3DSTATE_SAMPLER_STATE_POINTERS_PS | (2 - 2));
  2637.     OUT_RELOC(batch,
  2638.               render_state->wm.sampler,
  2639.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2640.               0);
  2641.     ADVANCE_BATCH(batch);
  2642. }
  2643.  
  2644. static void
  2645. gen7_emit_binding_table(VADriverContextP ctx)
  2646. {
  2647.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2648.     struct intel_batchbuffer *batch = i965->batch;
  2649.  
  2650.     BEGIN_BATCH(batch, 2);
  2651.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_PS | (2 - 2));
  2652.     OUT_BATCH(batch, BINDING_TABLE_OFFSET);
  2653.     ADVANCE_BATCH(batch);
  2654. }
  2655.  
  2656. static void
  2657. gen7_emit_depth_buffer_state(VADriverContextP ctx)
  2658. {
  2659.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2660.     struct intel_batchbuffer *batch = i965->batch;
  2661.  
  2662.     BEGIN_BATCH(batch, 7);
  2663.     OUT_BATCH(batch, GEN7_3DSTATE_DEPTH_BUFFER | (7 - 2));
  2664.     OUT_BATCH(batch,
  2665.               (I965_DEPTHFORMAT_D32_FLOAT << 18) |
  2666.               (I965_SURFACE_NULL << 29));
  2667.     OUT_BATCH(batch, 0);
  2668.     OUT_BATCH(batch, 0);
  2669.     OUT_BATCH(batch, 0);
  2670.     OUT_BATCH(batch, 0);
  2671.     OUT_BATCH(batch, 0);
  2672.     ADVANCE_BATCH(batch);
  2673.  
  2674.     BEGIN_BATCH(batch, 3);
  2675.     OUT_BATCH(batch, GEN7_3DSTATE_CLEAR_PARAMS | (3 - 2));
  2676.     OUT_BATCH(batch, 0);
  2677.     OUT_BATCH(batch, 0);
  2678.     ADVANCE_BATCH(batch);
  2679. }
  2680.  
  2681. static void
  2682. gen7_emit_drawing_rectangle(VADriverContextP ctx)
  2683. {
  2684.     i965_render_drawing_rectangle(ctx);
  2685. }
  2686.  
  2687. static void
  2688. gen7_emit_vs_state(VADriverContextP ctx)
  2689. {
  2690.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2691.     struct intel_batchbuffer *batch = i965->batch;
  2692.  
  2693.     /* disable VS constant buffer */
  2694.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_VS | (7 - 2));
  2695.     OUT_BATCH(batch, 0);
  2696.     OUT_BATCH(batch, 0);
  2697.     OUT_BATCH(batch, 0);
  2698.     OUT_BATCH(batch, 0);
  2699.     OUT_BATCH(batch, 0);
  2700.     OUT_BATCH(batch, 0);
  2701.        
  2702.     OUT_BATCH(batch, GEN6_3DSTATE_VS | (6 - 2));
  2703.     OUT_BATCH(batch, 0); /* without VS kernel */
  2704.     OUT_BATCH(batch, 0);
  2705.     OUT_BATCH(batch, 0);
  2706.     OUT_BATCH(batch, 0);
  2707.     OUT_BATCH(batch, 0); /* pass-through */
  2708. }
  2709.  
  2710. static void
  2711. gen7_emit_bypass_state(VADriverContextP ctx)
  2712. {
  2713.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2714.     struct intel_batchbuffer *batch = i965->batch;
  2715.  
  2716.     /* bypass GS */
  2717.     BEGIN_BATCH(batch, 7);
  2718.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_GS | (7 - 2));
  2719.     OUT_BATCH(batch, 0);
  2720.     OUT_BATCH(batch, 0);
  2721.     OUT_BATCH(batch, 0);
  2722.     OUT_BATCH(batch, 0);
  2723.     OUT_BATCH(batch, 0);
  2724.     OUT_BATCH(batch, 0);
  2725.     ADVANCE_BATCH(batch);
  2726.  
  2727.     BEGIN_BATCH(batch, 7);     
  2728.     OUT_BATCH(batch, GEN6_3DSTATE_GS | (7 - 2));
  2729.     OUT_BATCH(batch, 0); /* without GS kernel */
  2730.     OUT_BATCH(batch, 0);
  2731.     OUT_BATCH(batch, 0);
  2732.     OUT_BATCH(batch, 0);
  2733.     OUT_BATCH(batch, 0);
  2734.     OUT_BATCH(batch, 0); /* pass-through */
  2735.     ADVANCE_BATCH(batch);
  2736.  
  2737.     BEGIN_BATCH(batch, 2);
  2738.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_GS | (2 - 2));
  2739.     OUT_BATCH(batch, 0);
  2740.     ADVANCE_BATCH(batch);
  2741.  
  2742.     /* disable HS */
  2743.     BEGIN_BATCH(batch, 7);
  2744.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_HS | (7 - 2));
  2745.     OUT_BATCH(batch, 0);
  2746.     OUT_BATCH(batch, 0);
  2747.     OUT_BATCH(batch, 0);
  2748.     OUT_BATCH(batch, 0);
  2749.     OUT_BATCH(batch, 0);
  2750.     OUT_BATCH(batch, 0);
  2751.     ADVANCE_BATCH(batch);
  2752.  
  2753.     BEGIN_BATCH(batch, 7);
  2754.     OUT_BATCH(batch, GEN7_3DSTATE_HS | (7 - 2));
  2755.     OUT_BATCH(batch, 0);
  2756.     OUT_BATCH(batch, 0);
  2757.     OUT_BATCH(batch, 0);
  2758.     OUT_BATCH(batch, 0);
  2759.     OUT_BATCH(batch, 0);
  2760.     OUT_BATCH(batch, 0);
  2761.     ADVANCE_BATCH(batch);
  2762.  
  2763.     BEGIN_BATCH(batch, 2);
  2764.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_HS | (2 - 2));
  2765.     OUT_BATCH(batch, 0);
  2766.     ADVANCE_BATCH(batch);
  2767.  
  2768.     /* Disable TE */
  2769.     BEGIN_BATCH(batch, 4);
  2770.     OUT_BATCH(batch, GEN7_3DSTATE_TE | (4 - 2));
  2771.     OUT_BATCH(batch, 0);
  2772.     OUT_BATCH(batch, 0);
  2773.     OUT_BATCH(batch, 0);
  2774.     ADVANCE_BATCH(batch);
  2775.  
  2776.     /* Disable DS */
  2777.     BEGIN_BATCH(batch, 7);
  2778.     OUT_BATCH(batch, GEN7_3DSTATE_CONSTANT_DS | (7 - 2));
  2779.     OUT_BATCH(batch, 0);
  2780.     OUT_BATCH(batch, 0);
  2781.     OUT_BATCH(batch, 0);
  2782.     OUT_BATCH(batch, 0);
  2783.     OUT_BATCH(batch, 0);
  2784.     OUT_BATCH(batch, 0);
  2785.     ADVANCE_BATCH(batch);
  2786.  
  2787.     BEGIN_BATCH(batch, 6);
  2788.     OUT_BATCH(batch, GEN7_3DSTATE_DS | (6 - 2));
  2789.     OUT_BATCH(batch, 0);
  2790.     OUT_BATCH(batch, 0);
  2791.     OUT_BATCH(batch, 0);
  2792.     OUT_BATCH(batch, 0);
  2793.     OUT_BATCH(batch, 0);
  2794.     ADVANCE_BATCH(batch);
  2795.  
  2796.     BEGIN_BATCH(batch, 2);
  2797.     OUT_BATCH(batch, GEN7_3DSTATE_BINDING_TABLE_POINTERS_DS | (2 - 2));
  2798.     OUT_BATCH(batch, 0);
  2799.     ADVANCE_BATCH(batch);
  2800.  
  2801.     /* Disable STREAMOUT */
  2802.     BEGIN_BATCH(batch, 3);
  2803.     OUT_BATCH(batch, GEN7_3DSTATE_STREAMOUT | (3 - 2));
  2804.     OUT_BATCH(batch, 0);
  2805.     OUT_BATCH(batch, 0);
  2806.     ADVANCE_BATCH(batch);
  2807. }
  2808.  
  2809. static void
  2810. gen7_emit_clip_state(VADriverContextP ctx)
  2811. {
  2812.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2813.     struct intel_batchbuffer *batch = i965->batch;
  2814.  
  2815.     OUT_BATCH(batch, GEN6_3DSTATE_CLIP | (4 - 2));
  2816.     OUT_BATCH(batch, 0);
  2817.     OUT_BATCH(batch, 0); /* pass-through */
  2818.     OUT_BATCH(batch, 0);
  2819. }
  2820.  
  2821. static void
  2822. gen7_emit_sf_state(VADriverContextP ctx)
  2823. {
  2824.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2825.     struct intel_batchbuffer *batch = i965->batch;
  2826.  
  2827.     BEGIN_BATCH(batch, 14);
  2828.     OUT_BATCH(batch, GEN7_3DSTATE_SBE | (14 - 2));
  2829.     OUT_BATCH(batch,
  2830.               (1 << GEN7_SBE_NUM_OUTPUTS_SHIFT) |
  2831.               (1 << GEN7_SBE_URB_ENTRY_READ_LENGTH_SHIFT) |
  2832.               (0 << GEN7_SBE_URB_ENTRY_READ_OFFSET_SHIFT));
  2833.     OUT_BATCH(batch, 0);
  2834.     OUT_BATCH(batch, 0);
  2835.     OUT_BATCH(batch, 0); /* DW4 */
  2836.     OUT_BATCH(batch, 0);
  2837.     OUT_BATCH(batch, 0);
  2838.     OUT_BATCH(batch, 0);
  2839.     OUT_BATCH(batch, 0);
  2840.     OUT_BATCH(batch, 0); /* DW9 */
  2841.     OUT_BATCH(batch, 0);
  2842.     OUT_BATCH(batch, 0);
  2843.     OUT_BATCH(batch, 0);
  2844.     OUT_BATCH(batch, 0);
  2845.     ADVANCE_BATCH(batch);
  2846.  
  2847.     BEGIN_BATCH(batch, 7);
  2848.     OUT_BATCH(batch, GEN6_3DSTATE_SF | (7 - 2));
  2849.     OUT_BATCH(batch, 0);
  2850.     OUT_BATCH(batch, GEN6_3DSTATE_SF_CULL_NONE);
  2851.     OUT_BATCH(batch, 2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT);
  2852.     OUT_BATCH(batch, 0);
  2853.     OUT_BATCH(batch, 0);
  2854.     OUT_BATCH(batch, 0);
  2855.     ADVANCE_BATCH(batch);
  2856. }
  2857.  
  2858. static void
  2859. gen7_emit_wm_state(VADriverContextP ctx, int kernel)
  2860. {
  2861.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2862.     struct intel_batchbuffer *batch = i965->batch;
  2863.     struct i965_render_state *render_state = &i965->render_state;
  2864.     unsigned int max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_IVB;
  2865.     unsigned int num_samples = 0;
  2866.  
  2867.     if (IS_HASWELL(i965->intel.device_info)) {
  2868.         max_threads_shift = GEN7_PS_MAX_THREADS_SHIFT_HSW;
  2869.         num_samples = 1 << GEN7_PS_SAMPLE_MASK_SHIFT_HSW;
  2870.     }
  2871.  
  2872.     BEGIN_BATCH(batch, 3);
  2873.     OUT_BATCH(batch, GEN6_3DSTATE_WM | (3 - 2));
  2874.     OUT_BATCH(batch,
  2875.               GEN7_WM_DISPATCH_ENABLE |
  2876.               GEN7_WM_PERSPECTIVE_PIXEL_BARYCENTRIC);
  2877.     OUT_BATCH(batch, 0);
  2878.     ADVANCE_BATCH(batch);
  2879.  
  2880.     BEGIN_BATCH(batch, 7);
  2881.     OUT_BATCH(batch, GEN6_3DSTATE_CONSTANT_PS | (7 - 2));
  2882.     OUT_BATCH(batch, URB_CS_ENTRY_SIZE);
  2883.     OUT_BATCH(batch, 0);
  2884.     OUT_RELOC(batch,
  2885.               render_state->curbe.bo,
  2886.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2887.               0);
  2888.     OUT_BATCH(batch, 0);
  2889.     OUT_BATCH(batch, 0);
  2890.     OUT_BATCH(batch, 0);
  2891.     ADVANCE_BATCH(batch);
  2892.  
  2893.     BEGIN_BATCH(batch, 8);
  2894.     OUT_BATCH(batch, GEN7_3DSTATE_PS | (8 - 2));
  2895.     OUT_RELOC(batch,
  2896.               render_state->render_kernels[kernel].bo,
  2897.               I915_GEM_DOMAIN_INSTRUCTION, 0,
  2898.               0);
  2899.     OUT_BATCH(batch,
  2900.               (1 << GEN7_PS_SAMPLER_COUNT_SHIFT) |
  2901.               (5 << GEN7_PS_BINDING_TABLE_ENTRY_COUNT_SHIFT));
  2902.     OUT_BATCH(batch, 0); /* scratch space base offset */
  2903.     OUT_BATCH(batch,
  2904.               ((i965->intel.device_info->max_wm_threads - 1) << max_threads_shift) | num_samples |
  2905.               GEN7_PS_PUSH_CONSTANT_ENABLE |
  2906.               GEN7_PS_ATTRIBUTE_ENABLE |
  2907.               GEN7_PS_16_DISPATCH_ENABLE);
  2908.     OUT_BATCH(batch,
  2909.               (6 << GEN7_PS_DISPATCH_START_GRF_SHIFT_0));
  2910.     OUT_BATCH(batch, 0); /* kernel 1 pointer */
  2911.     OUT_BATCH(batch, 0); /* kernel 2 pointer */
  2912.     ADVANCE_BATCH(batch);
  2913. }
  2914.  
  2915. static void
  2916. gen7_emit_vertex_element_state(VADriverContextP ctx)
  2917. {
  2918.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2919.     struct intel_batchbuffer *batch = i965->batch;
  2920.  
  2921.     /* Set up our vertex elements, sourced from the single vertex buffer. */
  2922.     OUT_BATCH(batch, CMD_VERTEX_ELEMENTS | (5 - 2));
  2923.     /* offset 0: X,Y -> {X, Y, 1.0, 1.0} */
  2924.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2925.               GEN6_VE0_VALID |
  2926.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2927.               (0 << VE0_OFFSET_SHIFT));
  2928.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2929.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2930.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2931.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2932.     /* offset 8: S0, T0 -> {S0, T0, 1.0, 1.0} */
  2933.     OUT_BATCH(batch, (0 << GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT) |
  2934.               GEN6_VE0_VALID |
  2935.               (I965_SURFACEFORMAT_R32G32_FLOAT << VE0_FORMAT_SHIFT) |
  2936.               (8 << VE0_OFFSET_SHIFT));
  2937.     OUT_BATCH(batch, (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) |
  2938.               (I965_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) |
  2939.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) |
  2940.               (I965_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT));
  2941. }
  2942.  
  2943. static void
  2944. gen7_emit_vertices(VADriverContextP ctx)
  2945. {
  2946.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2947.     struct intel_batchbuffer *batch = i965->batch;
  2948.     struct i965_render_state *render_state = &i965->render_state;
  2949.  
  2950.     BEGIN_BATCH(batch, 5);
  2951.     OUT_BATCH(batch, CMD_VERTEX_BUFFERS | (5 - 2));
  2952.     OUT_BATCH(batch,
  2953.               (0 << GEN6_VB0_BUFFER_INDEX_SHIFT) |
  2954.               GEN6_VB0_VERTEXDATA |
  2955.               GEN7_VB0_ADDRESS_MODIFYENABLE |
  2956.               ((4 * 4) << VB0_BUFFER_PITCH_SHIFT));
  2957.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 0);
  2958.     OUT_RELOC(batch, render_state->vb.vertex_buffer, I915_GEM_DOMAIN_VERTEX, 0, 12 * 4);
  2959.     OUT_BATCH(batch, 0);
  2960.     ADVANCE_BATCH(batch);
  2961.  
  2962.     BEGIN_BATCH(batch, 7);
  2963.     OUT_BATCH(batch, CMD_3DPRIMITIVE | (7 - 2));
  2964.     OUT_BATCH(batch,
  2965.               _3DPRIM_RECTLIST |
  2966.               GEN7_3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL);
  2967.     OUT_BATCH(batch, 3); /* vertex count per instance */
  2968.     OUT_BATCH(batch, 0); /* start vertex offset */
  2969.     OUT_BATCH(batch, 1); /* single instance */
  2970.     OUT_BATCH(batch, 0); /* start instance location */
  2971.     OUT_BATCH(batch, 0);
  2972.     ADVANCE_BATCH(batch);
  2973. }
  2974.  
  2975. static void
  2976. gen7_render_emit_states(VADriverContextP ctx, int kernel)
  2977. {
  2978.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  2979.     struct intel_batchbuffer *batch = i965->batch;
  2980.  
  2981.     intel_batchbuffer_start_atomic(batch, 0x1000);
  2982.     intel_batchbuffer_emit_mi_flush(batch);
  2983.     gen7_emit_invarient_states(ctx);
  2984.     gen7_emit_state_base_address(ctx);
  2985.     gen7_emit_viewport_state_pointers(ctx);
  2986.     gen7_emit_urb(ctx);
  2987.     gen7_emit_cc_state_pointers(ctx);
  2988.     gen7_emit_sampler_state_pointers(ctx);
  2989.     gen7_emit_bypass_state(ctx);
  2990.     gen7_emit_vs_state(ctx);
  2991.     gen7_emit_clip_state(ctx);
  2992.     gen7_emit_sf_state(ctx);
  2993.     gen7_emit_wm_state(ctx, kernel);
  2994.     gen7_emit_binding_table(ctx);
  2995.     gen7_emit_depth_buffer_state(ctx);
  2996.     gen7_emit_drawing_rectangle(ctx);
  2997.     gen7_emit_vertex_element_state(ctx);
  2998.     gen7_emit_vertices(ctx);
  2999.     intel_batchbuffer_end_atomic(batch);
  3000. }
  3001.  
  3002.  
  3003. static void
  3004. gen7_render_put_surface(
  3005.     VADriverContextP   ctx,
  3006.     struct object_surface *obj_surface,    
  3007.     const VARectangle *src_rect,
  3008.     const VARectangle *dst_rect,
  3009.     unsigned int       flags
  3010. )
  3011. {
  3012.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3013.     struct intel_batchbuffer *batch = i965->batch;
  3014.  
  3015.     gen7_render_initialize(ctx);
  3016.     gen7_render_setup_states(ctx, obj_surface, src_rect, dst_rect, flags);
  3017.     i965_clear_dest_region(ctx);
  3018.     gen7_render_emit_states(ctx, PS_KERNEL);
  3019.     intel_batchbuffer_flush(batch);
  3020. }
  3021.  
  3022.  
  3023. static void
  3024. gen7_subpicture_render_blend_state(VADriverContextP ctx)
  3025. {
  3026.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3027.     struct i965_render_state *render_state = &i965->render_state;
  3028.     struct gen6_blend_state *blend_state;
  3029.  
  3030.     dri_bo_unmap(render_state->cc.state);    
  3031.     dri_bo_map(render_state->cc.blend, 1);
  3032.     assert(render_state->cc.blend->virtual);
  3033.     blend_state = render_state->cc.blend->virtual;
  3034.     memset(blend_state, 0, sizeof(*blend_state));
  3035.     blend_state->blend0.dest_blend_factor = I965_BLENDFACTOR_INV_SRC_ALPHA;
  3036.     blend_state->blend0.source_blend_factor = I965_BLENDFACTOR_SRC_ALPHA;
  3037.     blend_state->blend0.blend_func = I965_BLENDFUNCTION_ADD;
  3038.     blend_state->blend0.blend_enable = 1;
  3039.     blend_state->blend1.post_blend_clamp_enable = 1;
  3040.     blend_state->blend1.pre_blend_clamp_enable = 1;
  3041.     blend_state->blend1.clamp_range = 0; /* clamp range [0, 1] */
  3042.     dri_bo_unmap(render_state->cc.blend);
  3043. }
  3044.  
  3045. static void
  3046. gen7_subpicture_render_setup_states(
  3047.     VADriverContextP   ctx,
  3048.     struct object_surface *obj_surface,
  3049.     const VARectangle *src_rect,
  3050.     const VARectangle *dst_rect
  3051. )
  3052. {
  3053.     i965_render_dest_surface_state(ctx, 0);
  3054.     i965_subpic_render_src_surfaces_state(ctx, obj_surface);
  3055.     i965_render_sampler(ctx);
  3056.     i965_render_cc_viewport(ctx);
  3057.     gen7_render_color_calc_state(ctx);
  3058.     gen7_subpicture_render_blend_state(ctx);
  3059.     gen7_render_depth_stencil_state(ctx);
  3060.     i965_subpic_render_upload_constants(ctx, obj_surface);
  3061.     i965_subpic_render_upload_vertex(ctx, obj_surface, dst_rect);
  3062. }
  3063.  
  3064. static void
  3065. gen7_render_put_subpicture(
  3066.     VADriverContextP   ctx,
  3067.     struct object_surface *obj_surface,
  3068.     const VARectangle *src_rect,
  3069.     const VARectangle *dst_rect
  3070. )
  3071. {
  3072.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3073.     struct intel_batchbuffer *batch = i965->batch;
  3074.     unsigned int index = obj_surface->subpic_render_idx;
  3075.     struct object_subpic *obj_subpic = obj_surface->obj_subpic[index];
  3076.  
  3077.     assert(obj_subpic);
  3078.     gen7_render_initialize(ctx);
  3079.     gen7_subpicture_render_setup_states(ctx, obj_surface, src_rect, dst_rect);
  3080.     gen7_render_emit_states(ctx, PS_SUBPIC_KERNEL);
  3081.     i965_render_upload_image_palette(ctx, obj_subpic->obj_image, 0xff);
  3082.     intel_batchbuffer_flush(batch);
  3083. }
  3084.  
  3085.  
  3086. void
  3087. intel_render_put_surface(
  3088.     VADriverContextP   ctx,
  3089.     struct object_surface *obj_surface,
  3090.     const VARectangle *src_rect,
  3091.     const VARectangle *dst_rect,
  3092.     unsigned int       flags
  3093. )
  3094. {
  3095.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3096.     struct i965_render_state *render_state = &i965->render_state;
  3097.     int has_done_scaling = 0;
  3098.     VASurfaceID out_surface_id = i965_post_processing(ctx,
  3099.                                                       obj_surface,
  3100.                                                       src_rect,
  3101.                                                       dst_rect,
  3102.                                                       flags,
  3103.                                                       &has_done_scaling);
  3104.  
  3105.     assert((!has_done_scaling) || (out_surface_id != VA_INVALID_ID));
  3106.  
  3107.     if (out_surface_id != VA_INVALID_ID) {
  3108.         struct object_surface *new_obj_surface = SURFACE(out_surface_id);
  3109.        
  3110.         if (new_obj_surface && new_obj_surface->bo)
  3111.             obj_surface = new_obj_surface;
  3112.  
  3113.         if (has_done_scaling)
  3114.             src_rect = dst_rect;
  3115.     }
  3116.  
  3117.     render_state->render_put_surface(ctx, obj_surface, src_rect, dst_rect, flags);
  3118.  
  3119.     if (out_surface_id != VA_INVALID_ID)
  3120.         i965_DestroySurfaces(ctx, &out_surface_id, 1);
  3121. }
  3122.  
  3123. void
  3124. intel_render_put_subpicture(
  3125.     VADriverContextP   ctx,
  3126.     struct object_surface *obj_surface,
  3127.     const VARectangle *src_rect,
  3128.     const VARectangle *dst_rect
  3129. )
  3130. {
  3131.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3132.     struct i965_render_state *render_state = &i965->render_state;
  3133.  
  3134.     render_state->render_put_subpicture(ctx, obj_surface, src_rect, dst_rect);
  3135. }
  3136.  
  3137. static void
  3138. genx_render_terminate(VADriverContextP ctx)
  3139. {
  3140.     int i;
  3141.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3142.     struct i965_render_state *render_state = &i965->render_state;
  3143.  
  3144.     dri_bo_unreference(render_state->curbe.bo);
  3145.     render_state->curbe.bo = NULL;
  3146.  
  3147.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  3148.         struct i965_kernel *kernel = &render_state->render_kernels[i];
  3149.  
  3150.         dri_bo_unreference(kernel->bo);
  3151.         kernel->bo = NULL;
  3152.     }
  3153.  
  3154.     dri_bo_unreference(render_state->vb.vertex_buffer);
  3155.     render_state->vb.vertex_buffer = NULL;
  3156.     dri_bo_unreference(render_state->vs.state);
  3157.     render_state->vs.state = NULL;
  3158.     dri_bo_unreference(render_state->sf.state);
  3159.     render_state->sf.state = NULL;
  3160.     dri_bo_unreference(render_state->wm.sampler);
  3161.     render_state->wm.sampler = NULL;
  3162.     dri_bo_unreference(render_state->wm.state);
  3163.     render_state->wm.state = NULL;
  3164.     dri_bo_unreference(render_state->wm.surface_state_binding_table_bo);
  3165.     dri_bo_unreference(render_state->cc.viewport);
  3166.     render_state->cc.viewport = NULL;
  3167.     dri_bo_unreference(render_state->cc.state);
  3168.     render_state->cc.state = NULL;
  3169.     dri_bo_unreference(render_state->cc.blend);
  3170.     render_state->cc.blend = NULL;
  3171.     dri_bo_unreference(render_state->cc.depth_stencil);
  3172.     render_state->cc.depth_stencil = NULL;
  3173.  
  3174.     if (render_state->draw_region) {
  3175.         dri_bo_unreference(render_state->draw_region->bo);
  3176.         free(render_state->draw_region);
  3177.         render_state->draw_region = NULL;
  3178.     }
  3179. }
  3180.  
  3181. bool
  3182. genx_render_init(VADriverContextP ctx)
  3183. {
  3184.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3185.     struct i965_render_state *render_state = &i965->render_state;
  3186.     int i;
  3187.  
  3188.     /* kernel */
  3189.     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen5) /
  3190.                                  sizeof(render_kernels_gen5[0])));
  3191.     assert(NUM_RENDER_KERNEL == (sizeof(render_kernels_gen6) /
  3192.                                  sizeof(render_kernels_gen6[0])));
  3193.  
  3194.     if (IS_GEN7(i965->intel.device_info)) {
  3195.         memcpy(render_state->render_kernels,
  3196.                (IS_HASWELL(i965->intel.device_info) ? render_kernels_gen7_haswell : render_kernels_gen7),
  3197.                sizeof(render_state->render_kernels));
  3198.         render_state->render_put_surface = gen7_render_put_surface;
  3199.         render_state->render_put_subpicture = gen7_render_put_subpicture;
  3200.     } else if (IS_GEN6(i965->intel.device_info)) {
  3201.         memcpy(render_state->render_kernels, render_kernels_gen6, sizeof(render_state->render_kernels));
  3202.         render_state->render_put_surface = gen6_render_put_surface;
  3203.         render_state->render_put_subpicture = gen6_render_put_subpicture;
  3204.     } else if (IS_IRONLAKE(i965->intel.device_info)) {
  3205.         memcpy(render_state->render_kernels, render_kernels_gen5, sizeof(render_state->render_kernels));
  3206.         render_state->render_put_surface = i965_render_put_surface;
  3207.         render_state->render_put_subpicture = i965_render_put_subpicture;
  3208.     } else {
  3209.         memcpy(render_state->render_kernels, render_kernels_gen4, sizeof(render_state->render_kernels));
  3210.         render_state->render_put_surface = i965_render_put_surface;
  3211.         render_state->render_put_subpicture = i965_render_put_subpicture;
  3212.     }
  3213.  
  3214.     render_state->render_terminate = genx_render_terminate;
  3215.  
  3216.     for (i = 0; i < NUM_RENDER_KERNEL; i++) {
  3217.         struct i965_kernel *kernel = &render_state->render_kernels[i];
  3218.  
  3219.         if (!kernel->size)
  3220.             continue;
  3221.  
  3222.         kernel->bo = dri_bo_alloc(i965->intel.bufmgr,
  3223.                                   kernel->name,
  3224.                                   kernel->size, 0x1000);
  3225.         assert(kernel->bo);
  3226.         dri_bo_subdata(kernel->bo, 0, kernel->size, kernel->bin);
  3227.     }
  3228.  
  3229.     /* constant buffer */
  3230.     render_state->curbe.bo = dri_bo_alloc(i965->intel.bufmgr,
  3231.                       "constant buffer",
  3232.                       4096, 64);
  3233.     assert(render_state->curbe.bo);
  3234.  
  3235.     return true;
  3236. }
  3237.  
  3238. bool
  3239. i965_render_init(VADriverContextP ctx)
  3240. {
  3241.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3242.  
  3243.     return i965->codec_info->render_init(ctx);
  3244. }
  3245.  
  3246. void
  3247. i965_render_terminate(VADriverContextP ctx)
  3248. {
  3249.     struct i965_driver_data *i965 = i965_driver_data(ctx);
  3250.     struct i965_render_state *render_state = &i965->render_state;
  3251.  
  3252.     render_state->render_terminate(ctx);
  3253. }
  3254.